diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 10:51:19 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 10:51:19 +0000 |
| commit | eb11fae6d08f479c0799db45860a98af528fa6e7 (patch) | |
| tree | 44d492a50c8c1a7eb8e2d17ea3360ec4d066f042 /lib/Target/AArch64/AArch64FrameLowering.cpp | |
| parent | b8a2042aa938069e862750553db0e4d82d25822c (diff) | |
Notes
Diffstat (limited to 'lib/Target/AArch64/AArch64FrameLowering.cpp')
| -rw-r--r-- | lib/Target/AArch64/AArch64FrameLowering.cpp | 313 |
1 files changed, 250 insertions, 63 deletions
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index d66f7b59a4b5..6dc5d19862a9 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -140,8 +140,19 @@ static cl::opt<bool> EnableRedZone("aarch64-redzone", cl::desc("enable use of redzone on AArch64"), cl::init(false), cl::Hidden); +static cl::opt<bool> + ReverseCSRRestoreSeq("reverse-csr-restore-seq", + cl::desc("reverse the CSR restore sequence"), + cl::init(false), cl::Hidden); + STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); +/// This is the biggest offset to the stack pointer we can encode in aarch64 +/// instructions (without using a separate calculation and a temp register). +/// Note that the exception here are vector stores/loads which cannot encode any +/// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()). +static const unsigned DefaultSafeSPDisplacement = 255; + /// Look at each instruction that references stack frames and return the stack /// size limit beyond which some of these instructions will require a scratch /// register during their expansion later. @@ -151,7 +162,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF) { // realistically that's not a big deal at this stage of the game. for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { - if (MI.isDebugValue() || MI.isPseudo() || + if (MI.isDebugInstr() || MI.isPseudo() || MI.getOpcode() == AArch64::ADDXri || MI.getOpcode() == AArch64::ADDSXri) continue; @@ -167,7 +178,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF) { } } } - return 255; + return DefaultSafeSPDisplacement; } bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { @@ -191,11 +202,25 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); // Retain behavior of always omitting the FP for leaf functions when possible. - return (MFI.hasCalls() && - MF.getTarget().Options.DisableFramePointerElim(MF)) || - MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || - MFI.hasStackMap() || MFI.hasPatchPoint() || - RegInfo->needsStackRealignment(MF); + if (MFI.hasCalls() && MF.getTarget().Options.DisableFramePointerElim(MF)) + return true; + if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || + MFI.hasStackMap() || MFI.hasPatchPoint() || + RegInfo->needsStackRealignment(MF)) + return true; + // With large callframes around we may need to use FP to access the scavenging + // emergency spillslot. + // + // Unfortunately some calls to hasFP() like machine verifier -> + // getReservedReg() -> hasFP in the middle of global isel are too early + // to know the max call frame size. Hopefully conservatively returning "true" + // in those cases is fine. + // DefaultSafeSPDisplacement is fine as we only emergency spill GP regs. + if (!MFI.isMaxCallFrameSizeComputed() || + MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement) + return true; + + return false; } /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is @@ -349,7 +374,8 @@ static bool windowsRequiresStackProbe(MachineFunction &MF, F.getFnAttribute("stack-probe-size") .getValueAsString() .getAsInteger(0, StackProbeSize); - return StackSizeInBytes >= StackProbeSize; + return (StackSizeInBytes >= StackProbeSize) && + !F.hasFnAttribute("no-stack-arg-probe"); } bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( @@ -388,6 +414,14 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) { + // Ignore instructions that do not operate on SP, i.e. shadow call stack + // instructions. + while (MBBI->getOpcode() == AArch64::STRXpost || + MBBI->getOpcode() == AArch64::LDRXpre) { + assert(MBBI->getOperand(0).getReg() != AArch64::SP); + ++MBBI; + } + unsigned NewOpc; bool NewIsUnscaled = false; switch (MBBI->getOpcode()) { @@ -455,6 +489,14 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, unsigned LocalStackSize) { unsigned Opc = MI.getOpcode(); + + // Ignore instructions that do not operate on SP, i.e. shadow call stack + // instructions. + if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre) { + assert(MI.getOperand(0).getReg() != AArch64::SP); + return; + } + (void)Opc; assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi || Opc == AArch64::STRXui || Opc == AArch64::STRDui || @@ -472,6 +514,38 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8); } +static void adaptForLdStOpt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator FirstSPPopI, + MachineBasicBlock::iterator LastPopI) { + // Sometimes (when we restore in the same order as we save), we can end up + // with code like this: + // + // ldp x26, x25, [sp] + // ldp x24, x23, [sp, #16] + // ldp x22, x21, [sp, #32] + // ldp x20, x19, [sp, #48] + // add sp, sp, #64 + // + // In this case, it is always better to put the first ldp at the end, so + // that the load-store optimizer can run and merge the ldp and the add into + // a post-index ldp. + // If we managed to grab the first pop instruction, move it to the end. + if (ReverseCSRRestoreSeq) + MBB.splice(FirstSPPopI, &MBB, LastPopI); + // We should end up with something like this now: + // + // ldp x24, x23, [sp, #16] + // ldp x22, x21, [sp, #32] + // ldp x20, x19, [sp, #48] + // ldp x26, x25, [sp] + // add sp, sp, #64 + // + // and the load-store optimizer can merge the last two instructions into: + // + // ldp x26, x25, [sp], #64 + // +} + void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -485,6 +559,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, bool needsFrameMoves = MMI.hasDebugInfo() || F.needsUnwindTableEntry(); bool HasFP = hasFP(MF); + // At this point, we're going to decide whether or not the function uses a + // redzone. In most cases, the function doesn't have a redzone so let's + // assume that's false and set it to true in the case that there's a redzone. + AFI->setHasRedZone(false); + // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; @@ -505,9 +584,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, return; // REDZONE: If the stack size is less than 128 bytes, we don't need // to actually allocate. - if (canUseRedZone(MF)) + if (canUseRedZone(MF)) { + AFI->setHasRedZone(true); ++NumRedZoneFunctions; - else { + } else { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); @@ -823,14 +903,32 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; + uint64_t AfterCSRPopSize = ArgumentPopSize; auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); + // Assume we can't combine the last pop with the sp restore. - if (!CombineSPBump && PrologueSaveSize != 0) - convertCalleeSaveRestoreToSPPrePostIncDec( - MBB, std::prev(MBB.getFirstTerminator()), DL, TII, PrologueSaveSize); + if (!CombineSPBump && PrologueSaveSize != 0) { + MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator()); + // Converting the last ldp to a post-index ldp is valid only if the last + // ldp's offset is 0. + const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1); + // If the offset is 0, convert it to a post-index ldp. + if (OffsetOp.getImm() == 0) { + convertCalleeSaveRestoreToSPPrePostIncDec(MBB, Pop, DL, TII, + PrologueSaveSize); + } else { + // If not, make sure to emit an add after the last ldp. + // We're doing this by transfering the size to be restored from the + // adjustment *before* the CSR pops to the adjustment *after* the CSR + // pops. + AfterCSRPopSize += PrologueSaveSize; + } + } // Move past the restores of the callee-saved registers. + // If we plan on combining the sp bump of the local stack size and the callee + // save stack size, we might need to adjust the CSR save and restore offsets. MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); MachineBasicBlock::iterator Begin = MBB.begin(); while (LastPopI != Begin) { @@ -845,7 +943,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // If there is a single SP update, insert it before the ret and we're done. if (CombineSPBump) { emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, - NumBytes + ArgumentPopSize, TII, + NumBytes + AfterCSRPopSize, TII, MachineInstr::FrameDestroy); return; } @@ -857,19 +955,27 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, bool RedZone = canUseRedZone(MF); // If this was a redzone leaf function, we don't need to restore the // stack pointer (but we may need to pop stack args for fastcc). - if (RedZone && ArgumentPopSize == 0) + if (RedZone && AfterCSRPopSize == 0) return; bool NoCalleeSaveRestore = PrologueSaveSize == 0; int StackRestoreBytes = RedZone ? 0 : NumBytes; if (NoCalleeSaveRestore) - StackRestoreBytes += ArgumentPopSize; - emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, - StackRestoreBytes, TII, MachineInstr::FrameDestroy); + StackRestoreBytes += AfterCSRPopSize; + // If we were able to combine the local stack pop with the argument pop, // then we're done. - if (NoCalleeSaveRestore || ArgumentPopSize == 0) + bool Done = NoCalleeSaveRestore || AfterCSRPopSize == 0; + + // If we're done after this, make sure to help the load store optimizer. + if (Done) + adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI); + + emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, + StackRestoreBytes, TII, MachineInstr::FrameDestroy); + if (Done) return; + NumBytes = 0; } @@ -888,9 +994,24 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // This must be placed after the callee-save restore code because that code // assumes the SP is at the same location as it was after the callee-save save // code in the prologue. - if (ArgumentPopSize) - emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, - ArgumentPopSize, TII, MachineInstr::FrameDestroy); + if (AfterCSRPopSize) { + // Find an insertion point for the first ldp so that it goes before the + // shadow call stack epilog instruction. This ensures that the restore of + // lr from x18 is placed after the restore from sp. + auto FirstSPPopI = MBB.getFirstTerminator(); + while (FirstSPPopI != Begin) { + auto Prev = std::prev(FirstSPPopI); + if (Prev->getOpcode() != AArch64::LDRXpre || + Prev->getOperand(0).getReg() == AArch64::SP) + break; + FirstSPPopI = Prev; + } + + adaptForLdStOpt(MBB, FirstSPPopI, LastPopI); + + emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP, + AfterCSRPopSize, TII, MachineInstr::FrameDestroy); + } } /// getFrameIndexReference - Provide a base+offset reference to an FI slot for @@ -917,6 +1038,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16; int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize(); bool isFixed = MFI.isFixedObjectIndex(FI); + bool isCSR = !isFixed && MFI.getObjectOffset(FI) >= + -((int)AFI->getCalleeSavedStackSize()); // Use frame pointer to reference fixed objects. Use it for locals if // there are VLAs or a dynamically realigned SP (and thus the SP isn't @@ -930,26 +1053,48 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, // Argument access should always use the FP. if (isFixed) { UseFP = hasFP(MF); - } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) && - !RegInfo->needsStackRealignment(MF)) { - // Use SP or FP, whichever gives us the best chance of the offset - // being in range for direct access. If the FPOffset is positive, - // that'll always be best, as the SP will be even further away. + } else if (isCSR && RegInfo->needsStackRealignment(MF)) { + // References to the CSR area must use FP if we're re-aligning the stack + // since the dynamically-sized alignment padding is between the SP/BP and + // the CSR area. + assert(hasFP(MF) && "Re-aligned stack must have frame pointer"); + UseFP = true; + } else if (hasFP(MF) && !RegInfo->needsStackRealignment(MF)) { // If the FPOffset is negative, we have to keep in mind that the // available offset range for negative offsets is smaller than for - // positive ones. If we have variable sized objects, we're stuck with - // using the FP regardless, though, as the SP offset is unknown - // and we don't have a base pointer available. If an offset is + // positive ones. If an offset is // available via the FP and the SP, use whichever is closest. - if (PreferFP || MFI.hasVarSizedObjects() || FPOffset >= 0 || - (FPOffset >= -256 && Offset > -FPOffset)) + bool FPOffsetFits = FPOffset >= -256; + PreferFP |= Offset > -FPOffset; + + if (MFI.hasVarSizedObjects()) { + // If we have variable sized objects, we can use either FP or BP, as the + // SP offset is unknown. We can use the base pointer if we have one and + // FP is not preferred. If not, we're stuck with using FP. + bool CanUseBP = RegInfo->hasBasePointer(MF); + if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best. + UseFP = PreferFP; + else if (!CanUseBP) // Can't use BP. Forced to use FP. + UseFP = true; + // else we can use BP and FP, but the offset from FP won't fit. + // That will make us scavenge registers which we can probably avoid by + // using BP. If it won't fit for BP either, we'll scavenge anyway. + } else if (FPOffset >= 0) { + // Use SP or FP, whichever gives us the best chance of the offset + // being in range for direct access. If the FPOffset is positive, + // that'll always be best, as the SP will be even further away. UseFP = true; + } else { + // We have the choice between FP and (SP or BP). + if (FPOffsetFits && PreferFP) // If FP is the best fit, use it. + UseFP = true; + } } } - assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) && + assert(((isFixed || isCSR) || !RegInfo->needsStackRealignment(MF) || !UseFP) && "In the presence of dynamic stack pointer realignment, " - "non-argument objects cannot be accessed through the frame pointer"); + "non-argument/CSR objects cannot be accessed through the frame pointer"); if (UseFP) { FrameReg = RegInfo->getFrameRegister(MF); @@ -960,6 +1105,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, if (RegInfo->hasBasePointer(MF)) FrameReg = RegInfo->getBaseRegister(); else { + assert(!MFI.hasVarSizedObjects() && + "Can't use SP when we have var sized objects."); FrameReg = AArch64::SP; // If we're using the red zone for this function, the SP won't actually // be adjusted, so the offsets will be negative. They're also all @@ -1007,7 +1154,8 @@ struct RegPairInfo { static void computeCalleeSaveRegisterPairs( MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) { + const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs, + bool &NeedShadowCallStackProlog) { if (CSI.empty()) return; @@ -1041,6 +1189,15 @@ static void computeCalleeSaveRegisterPairs( RPI.Reg2 = NextReg; } + // If either of the registers to be saved is the lr register, it means that + // we also need to save lr in the shadow call stack. + if ((RPI.Reg1 == AArch64::LR || RPI.Reg2 == AArch64::LR) && + MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) { + if (!MF.getSubtarget<AArch64Subtarget>().isX18Reserved()) + report_fatal_error("Must reserve x18 to use shadow call stack"); + NeedShadowCallStackProlog = true; + } + // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI // list to come in sorted by frame index so that we can issue the store // pair instructions directly. Assert if we see anything otherwise. @@ -1091,9 +1248,24 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( DebugLoc DL; SmallVector<RegPairInfo, 8> RegPairs; - computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); + bool NeedShadowCallStackProlog = false; + computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, + NeedShadowCallStackProlog); const MachineRegisterInfo &MRI = MF.getRegInfo(); + if (NeedShadowCallStackProlog) { + // Shadow call stack prolog: str x30, [x18], #8 + BuildMI(MBB, MI, DL, TII.get(AArch64::STRXpost)) + .addReg(AArch64::X18, RegState::Define) + .addReg(AArch64::LR) + .addReg(AArch64::X18) + .addImm(8) + .setMIFlag(MachineInstr::FrameSetup); + + // This instruction also makes x18 live-in to the entry block. + MBB.addLiveIn(AArch64::X18); + } + for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; ++RPII) { RegPairInfo RPI = *RPII; @@ -1115,13 +1287,11 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; else StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; - DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI); - if (RPI.isPaired()) - dbgs() << ", " << printReg(Reg2, TRI); - dbgs() << ") -> fi#(" << RPI.FrameIdx; - if (RPI.isPaired()) - dbgs() << ", " << RPI.FrameIdx+1; - dbgs() << ")\n"); + LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI); + if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI); + dbgs() << ") -> fi#(" << RPI.FrameIdx; + if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1; + dbgs() << ")\n"); MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); if (!MRI.isReserved(Reg1)) @@ -1157,11 +1327,11 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( if (MI != MBB.end()) DL = MI->getDebugLoc(); - computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); + bool NeedShadowCallStackProlog = false; + computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, + NeedShadowCallStackProlog); - for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE; - ++RPII) { - RegPairInfo RPI = *RPII; + auto EmitMI = [&](const RegPairInfo &RPI) { unsigned Reg1 = RPI.Reg1; unsigned Reg2 = RPI.Reg2; @@ -1178,13 +1348,11 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; else LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; - DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI); - if (RPI.isPaired()) - dbgs() << ", " << printReg(Reg2, TRI); - dbgs() << ") -> fi#(" << RPI.FrameIdx; - if (RPI.isPaired()) - dbgs() << ", " << RPI.FrameIdx+1; - dbgs() << ")\n"); + LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI); + if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI); + dbgs() << ") -> fi#(" << RPI.FrameIdx; + if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1; + dbgs() << ")\n"); MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); if (RPI.isPaired()) { @@ -1200,7 +1368,25 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( MIB.addMemOperand(MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), MachineMemOperand::MOLoad, 8, 8)); + }; + + if (ReverseCSRRestoreSeq) + for (const RegPairInfo &RPI : reverse(RegPairs)) + EmitMI(RPI); + else + for (const RegPairInfo &RPI : RegPairs) + EmitMI(RPI); + + if (NeedShadowCallStackProlog) { + // Shadow call stack epilog: ldr x30, [x18, #-8]! + BuildMI(MBB, MI, DL, TII.get(AArch64::LDRXpre)) + .addReg(AArch64::X18, RegState::Define) + .addReg(AArch64::LR, RegState::Define) + .addReg(AArch64::X18) + .addImm(-8) + .setMIFlag(MachineInstr::FrameDestroy); } + return true; } @@ -1275,10 +1461,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, } } - DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"; - for (unsigned Reg : SavedRegs.set_bits()) - dbgs() << ' ' << printReg(Reg, RegInfo); - dbgs() << "\n";); + LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"; + for (unsigned Reg + : SavedRegs.set_bits()) dbgs() + << ' ' << printReg(Reg, RegInfo); + dbgs() << "\n";); // If any callee-saved registers are used, the frame cannot be eliminated. unsigned NumRegsSpilled = SavedRegs.count(); @@ -1287,7 +1474,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled; - DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); + LLVM_DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF); bool BigStack = (CFSize > EstimatedStackSizeLimit); if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) @@ -1301,8 +1488,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // here. if (BigStack) { if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) { - DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo) - << " to get a scratch register.\n"); + LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo) + << " to get a scratch register.\n"); SavedRegs.set(UnspilledCSGPR); // MachO's compact unwind format relies on all registers being stored in // pairs, so if we need to spill one extra for BigStack, then we need to @@ -1322,8 +1509,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, unsigned Align = TRI->getSpillAlignment(RC); int FI = MFI.CreateStackObject(Size, Align, false); RS->addScavengingFrameIndex(FI); - DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI - << " as the emergency spill slot.\n"); + LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI + << " as the emergency spill slot.\n"); } } |
