diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:12 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:12 +0000 |
commit | e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (patch) | |
tree | 599ab169a01f1c86eda9adc774edaedde2f2db5b /lib/Target/PowerPC/PPCFrameLowering.cpp | |
parent | 1a56a5ead7a2e84bee8240f5f6b033b5f1707154 (diff) |
Diffstat (limited to 'lib/Target/PowerPC/PPCFrameLowering.cpp')
-rw-r--r-- | lib/Target/PowerPC/PPCFrameLowering.cpp | 211 |
1 files changed, 181 insertions, 30 deletions
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 8263954994d2..ebfb1ef7f49b 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1,9 +1,8 @@ //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -30,7 +29,6 @@ using namespace llvm; #define DEBUG_TYPE "framelowering" -STATISTIC(NumNoNeedForFrame, "Number of functions without frames"); STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); @@ -73,10 +71,10 @@ static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { } static unsigned computeLinkageSize(const PPCSubtarget &STI) { - if (STI.isDarwinABI() || STI.isPPC64()) + if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64()) return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); - // SVR4 ABI: + // 32-bit SVR4 ABI: return 8; } @@ -446,12 +444,27 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); } +/// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum +/// call frame size. Update the MachineFunction object with the stack size. +unsigned +PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, + bool UseEstimate) const { + unsigned NewMaxCallFrameSize = 0; + unsigned FrameSize = determineFrameLayout(MF, UseEstimate, + &NewMaxCallFrameSize); + MF.getFrameInfo().setStackSize(FrameSize); + MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); + return FrameSize; +} + /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. -unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, - bool UpdateMF, - bool UseEstimate) const { - MachineFrameInfo &MFI = MF.getFrameInfo(); +unsigned +PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, + bool UseEstimate, + unsigned *NewMaxCallFrameSize) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); // Get the number of bytes to allocate from the FrameInfo unsigned FrameSize = @@ -469,6 +482,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. !MFI.adjustsStack() && // No calls. !MustSaveLR(MF, LR) && // No need to save LR. + !FI->mustSaveTOC() && // No need to save TOC. !RegInfo->hasBasePointer(MF); // No special alignment. // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless @@ -477,10 +491,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, // Check whether we can skip adjusting the stack pointer (by using red zone) if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { - NumNoNeedForFrame++; // No need for frame - if (UpdateMF) - MFI.setStackSize(0); return 0; } @@ -496,9 +507,9 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, if (MFI.hasVarSizedObjects()) maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; - // Update maximum call frame size. - if (UpdateMF) - MFI.setMaxCallFrameSize(maxCallFrameSize); + // Update the new max call frame size if the caller passes in a valid pointer. + if (NewMaxCallFrameSize) + *NewMaxCallFrameSize = maxCallFrameSize; // Include call frame size in total. FrameSize += maxCallFrameSize; @@ -506,10 +517,6 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, // Make sure the frame is aligned. FrameSize = (FrameSize + AlignMask) & ~AlignMask; - // Update frame info. - if (UpdateMF) - MFI.setStackSize(FrameSize); - return FrameSize; } @@ -690,7 +697,7 @@ PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); MachineFunction &MF = *(MBB->getParent()); bool HasBP = RegInfo->hasBasePointer(MF); - unsigned FrameSize = determineFrameLayout(MF, false); + unsigned FrameSize = determineFrameLayout(MF); int NegFrameSize = -FrameSize; bool IsLargeFrame = !isInt<16>(NegFrameSize); MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -713,6 +720,50 @@ bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { return findScratchRegister(TmpMBB, true); } +bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { + const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + + // Abort if there is no register info or function info. + if (!RegInfo || !FI) + return false; + + // Only move the stack update on ELFv2 ABI and PPC64. + if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) + return false; + + // Check the frame size first and return false if it does not fit the + // requirements. + // We need a non-zero frame size as well as a frame that will fit in the red + // zone. This is because by moving the stack pointer update we are now storing + // to the red zone until the stack pointer is updated. If we get an interrupt + // inside the prologue but before the stack update we now have a number of + // stores to the red zone and those stores must all fit. + MachineFrameInfo &MFI = MF.getFrameInfo(); + unsigned FrameSize = MFI.getStackSize(); + if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) + return false; + + // Frame pointers and base pointers complicate matters so don't do anything + // if we have them. For example having a frame pointer will sometimes require + // a copy of r1 into r31 and that makes keeping track of updates to r1 more + // difficult. + if (hasFP(MF) || RegInfo->hasBasePointer(MF)) + return false; + + // Calls to fast_cc functions use different rules for passing parameters on + // the stack from the ABI and using PIC base in the function imposes + // similar restrictions to using the base pointer. It is not generally safe + // to move the stack pointer update in these situations. + if (FI->hasFastCall() || FI->usesPICBase()) + return false; + + // Finally we can move the stack update if we do not require register + // scavenging. Register scavenging can introduce more spills and so + // may make the frame size larger than we have computed. + return !RegInfo->requiresFrameIndexScavenging(MF); +} + void PPCFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -748,7 +799,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, MBBI = MBB.begin(); // Work out frame sizes. - unsigned FrameSize = determineFrameLayout(MF); + unsigned FrameSize = determineFrameLayoutAndUpdate(MF); int NegFrameSize = -FrameSize; if (!isInt<32>(NegFrameSize)) llvm_unreachable("Unhandled stack size!"); @@ -759,6 +810,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, // Check if the link register (LR) must be saved. PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); bool MustSaveLR = FI->mustSaveLR(); + bool MustSaveTOC = FI->mustSaveTOC(); const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); bool MustSaveCR = !MustSaveCRs.empty(); // Do we have a frame pointer and/or base pointer for this function? @@ -770,6 +822,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, unsigned BPReg = RegInfo->getBaseRegister(MF); unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; + unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2; unsigned ScratchReg = 0; unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) @@ -855,6 +908,45 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, assert((isPPC64 || !MustSaveCR) && "Prologue CR saving supported only in 64-bit mode"); + // Check if we can move the stack update instruction (stdu) down the prologue + // past the callee saves. Hopefully this will avoid the situation where the + // saves are waiting for the update on the store with update to complete. + MachineBasicBlock::iterator StackUpdateLoc = MBBI; + bool MovingStackUpdateDown = false; + + // Check if we can move the stack update. + if (stackUpdateCanBeMoved(MF)) { + const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); + for (CalleeSavedInfo CSI : Info) { + int FrIdx = CSI.getFrameIdx(); + // If the frame index is not negative the callee saved info belongs to a + // stack object that is not a fixed stack object. We ignore non-fixed + // stack objects because we won't move the stack update pointer past them. + if (FrIdx >= 0) + continue; + + if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { + StackUpdateLoc++; + MovingStackUpdateDown = true; + } else { + // We need all of the Frame Indices to meet these conditions. + // If they do not, abort the whole operation. + StackUpdateLoc = MBBI; + MovingStackUpdateDown = false; + break; + } + } + + // If the operation was not aborted then update the object offset. + if (MovingStackUpdateDown) { + for (CalleeSavedInfo CSI : Info) { + int FrIdx = CSI.getFrameIdx(); + if (FrIdx < 0) + MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); + } + } + } + // If we need to spill the CR and the LR but we don't have two separate // registers available, we must spill them one at a time if (MustSaveCR && SingleScratchReg && MustSaveLR) { @@ -918,7 +1010,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, } if (MustSaveLR) - BuildMI(MBB, MBBI, dl, StoreInst) + BuildMI(MBB, StackUpdateLoc, dl, StoreInst) .addReg(ScratchReg, getKillRegState(true)) .addImm(LROffset) .addReg(SPReg); @@ -986,7 +1078,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, HasSTUX = true; } else if (!isLargeFrame) { - BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg) + BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) .addReg(SPReg) .addImm(NegFrameSize) .addReg(SPReg); @@ -1004,6 +1096,16 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, HasSTUX = true; } + // Save the TOC register after the stack pointer update if a prologue TOC + // save is required for the function. + if (MustSaveTOC) { + assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); + BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) + .addReg(TOCReg, getKillRegState(true)) + .addImm(TOCSaveOffset) + .addReg(SPReg); + } + if (!HasRedZone) { assert(!isPPC64 && "A red zone is always available on PPC64"); if (HasSTUX) { @@ -1205,6 +1307,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, if (PPC::CRBITRCRegClass.contains(Reg)) continue; + if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) + continue; + // For SVR4, don't emit a move for the CR spill slot if we haven't // spilled CRs. if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) @@ -1234,6 +1339,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, .addCFIIndex(CFIRegister); } else { int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); + // We have changed the object offset above but we do not want to change + // the actual offsets in the CFI instruction so we have to undo the + // offset change here. + if (MovingStackUpdateDown) + Offset -= NegFrameSize; + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) @@ -1380,6 +1491,32 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, unsigned RBReg = SPReg; unsigned SPAdd = 0; + // Check if we can move the stack update instruction up the epilogue + // past the callee saves. This will allow the move to LR instruction + // to be executed before the restores of the callee saves which means + // that the callee saves can hide the latency from the MTLR instrcution. + MachineBasicBlock::iterator StackUpdateLoc = MBBI; + if (stackUpdateCanBeMoved(MF)) { + const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); + for (CalleeSavedInfo CSI : Info) { + int FrIdx = CSI.getFrameIdx(); + // If the frame index is not negative the callee saved info belongs to a + // stack object that is not a fixed stack object. We ignore non-fixed + // stack objects because we won't move the update of the stack pointer + // past them. + if (FrIdx >= 0) + continue; + + if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) + StackUpdateLoc--; + else { + // Abort the operation as we can't update all CSR restores. + StackUpdateLoc = MBBI; + break; + } + } + } + if (FrameSize) { // In the prologue, the loaded (or persistent) stack pointer value is // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red @@ -1409,7 +1546,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, } } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { if (HasRedZone) { - BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) + BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) .addReg(SPReg) .addImm(FrameSize); } else { @@ -1433,7 +1570,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, .addReg(FPReg); RBReg = FPReg; } - BuildMI(MBB, MBBI, dl, LoadInst, RBReg) + BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) .addImm(0) .addReg(SPReg); } @@ -1466,7 +1603,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // a base register anyway, because it may happen to be R0. bool LoadedLR = false; if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { - BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) + BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) .addImm(LROffset+SPAdd) .addReg(RBReg); LoadedLR = true; @@ -1538,7 +1675,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, .addReg(TempReg, getKillRegState(i == e-1)); if (MustSaveLR) - BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg); + BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); // Callee pop calling convention. Pop parameter/linkage area. Used for tail // call optimization @@ -1732,6 +1869,9 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); + assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || + (Reg != PPC::X2 && Reg != PPC::R2)) && + "Not expecting to try to spill R2 in a function that must save TOC"); if (PPC::GPRCRegClass.contains(Reg) || PPC::SPE4RCRegClass.contains(Reg)) { HasGPSaveArea = true; @@ -1947,7 +2087,7 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, // the 16-bit immediate. We don't know the complete frame size here // because we've not yet computed callee-saved register spills or the // needed alignment padding. - unsigned StackSize = determineFrameLayout(MF, false, true); + unsigned StackSize = determineFrameLayout(MF, true); MachineFrameInfo &MFI = MF.getFrameInfo(); if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { @@ -2041,6 +2181,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction *MF = MBB.getParent(); const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); + PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); + bool MustSaveTOC = FI->mustSaveTOC(); DebugLoc DL; bool CRSpilled = false; MachineInstrBuilder CRMIB; @@ -2071,6 +2213,10 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, continue; } + // The actual spill will happen in the prologue. + if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) + continue; + // Insert the spill to the stack frame. if (IsCRField) { PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); @@ -2198,6 +2344,8 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction *MF = MBB.getParent(); const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); + PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); + bool MustSaveTOC = FI->mustSaveTOC(); bool CR2Spilled = false; bool CR3Spilled = false; bool CR4Spilled = false; @@ -2220,6 +2368,9 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()) continue; + if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) + continue; + if (Reg == PPC::CR2) { CR2Spilled = true; // The spill slot is associated only with CR2, which is the |