diff options
Diffstat (limited to 'lib/Target/ARM/ARMLowOverheadLoops.cpp')
-rw-r--r-- | lib/Target/ARM/ARMLowOverheadLoops.cpp | 364 |
1 files changed, 267 insertions, 97 deletions
diff --git a/lib/Target/ARM/ARMLowOverheadLoops.cpp b/lib/Target/ARM/ARMLowOverheadLoops.cpp index cedf3bd3c74e..e1c5a9c3e223 100644 --- a/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -11,8 +11,7 @@ /// The expectation is that the loop contains three pseudo instructions: /// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop /// form should be in the preheader, whereas the while form should be in the -/// preheaders only predecessor. TODO: Could DoLoopStart get moved into the -/// pre-preheader? +/// preheaders only predecessor. /// - t2LoopDec - placed within in the loop body. /// - t2LoopEnd - the loop latch terminator. /// @@ -35,6 +34,7 @@ using namespace llvm; namespace { class ARMLowOverheadLoops : public MachineFunctionPass { + MachineFunction *MF = nullptr; const ARMBaseInstrInfo *TII = nullptr; MachineRegisterInfo *MRI = nullptr; std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr; @@ -52,17 +52,6 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; - bool ProcessLoop(MachineLoop *ML); - - void RevertWhile(MachineInstr *MI) const; - - void RevertLoopDec(MachineInstr *MI) const; - - void RevertLoopEnd(MachineInstr *MI) const; - - void Expand(MachineLoop *ML, MachineInstr *Start, - MachineInstr *Dec, MachineInstr *End, bool Revert); - MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); @@ -71,36 +60,156 @@ namespace { StringRef getPassName() const override { return ARM_LOW_OVERHEAD_LOOPS_NAME; } + + private: + bool ProcessLoop(MachineLoop *ML); + + MachineInstr * IsSafeToDefineLR(MachineInstr *MI); + + bool RevertNonLoops(); + + void RevertWhile(MachineInstr *MI) const; + + bool RevertLoopDec(MachineInstr *MI, bool AllowFlags = false) const; + + void RevertLoopEnd(MachineInstr *MI, bool SkipCmp = false) const; + + void Expand(MachineLoop *ML, MachineInstr *Start, + MachineInstr *InsertPt, MachineInstr *Dec, + MachineInstr *End, bool Revert); + }; } - + char ARMLowOverheadLoops::ID = 0; INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME, false, false) -bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) { - if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB()) +bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) { + const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(mf.getSubtarget()); + if (!ST.hasLOB()) return false; - LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n"); + MF = &mf; + LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n"); auto &MLI = getAnalysis<MachineLoopInfo>(); - MRI = &MF.getRegInfo(); - TII = static_cast<const ARMBaseInstrInfo*>( - MF.getSubtarget().getInstrInfo()); - BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF)); + MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness); + MRI = &MF->getRegInfo(); + TII = static_cast<const ARMBaseInstrInfo*>(ST.getInstrInfo()); + BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(*MF)); BBUtils->computeAllBlockSizes(); - BBUtils->adjustBBOffsetsAfter(&MF.front()); + BBUtils->adjustBBOffsetsAfter(&MF->front()); bool Changed = false; for (auto ML : MLI) { if (!ML->getParentLoop()) Changed |= ProcessLoop(ML); } + Changed |= RevertNonLoops(); return Changed; } +static bool IsLoopStart(MachineInstr &MI) { + return MI.getOpcode() == ARM::t2DoLoopStart || + MI.getOpcode() == ARM::t2WhileLoopStart; +} + +template<typename T> +static MachineInstr* SearchForDef(MachineInstr *Begin, T End, unsigned Reg) { + for(auto &MI : make_range(T(Begin), End)) { + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg) + continue; + return &MI; + } + } + return nullptr; +} + +static MachineInstr* SearchForUse(MachineInstr *Begin, + MachineBasicBlock::iterator End, + unsigned Reg) { + for(auto &MI : make_range(MachineBasicBlock::iterator(Begin), End)) { + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) + continue; + return &MI; + } + } + return nullptr; +} + +// Is it safe to define LR with DLS/WLS? +// LR can defined if it is the operand to start, because it's the same value, +// or if it's going to be equivalent to the operand to Start. +MachineInstr *ARMLowOverheadLoops::IsSafeToDefineLR(MachineInstr *Start) { + + auto IsMoveLR = [](MachineInstr *MI, unsigned Reg) { + return MI->getOpcode() == ARM::tMOVr && + MI->getOperand(0).getReg() == ARM::LR && + MI->getOperand(1).getReg() == Reg && + MI->getOperand(2).getImm() == ARMCC::AL; + }; + + MachineBasicBlock *MBB = Start->getParent(); + unsigned CountReg = Start->getOperand(0).getReg(); + // Walk forward and backward in the block to find the closest instructions + // that define LR. Then also filter them out if they're not a mov lr. + MachineInstr *PredLRDef = SearchForDef(Start, MBB->rend(), ARM::LR); + if (PredLRDef && !IsMoveLR(PredLRDef, CountReg)) + PredLRDef = nullptr; + + MachineInstr *SuccLRDef = SearchForDef(Start, MBB->end(), ARM::LR); + if (SuccLRDef && !IsMoveLR(SuccLRDef, CountReg)) + SuccLRDef = nullptr; + + // We've either found one, two or none mov lr instructions... Now figure out + // if they are performing the equilvant mov that the Start instruction will. + // Do this by scanning forward and backward to see if there's a def of the + // register holding the count value. If we find a suitable def, return it as + // the insert point. Later, if InsertPt != Start, then we can remove the + // redundant instruction. + if (SuccLRDef) { + MachineBasicBlock::iterator End(SuccLRDef); + if (!SearchForDef(Start, End, CountReg)) { + return SuccLRDef; + } else + SuccLRDef = nullptr; + } + if (PredLRDef) { + MachineBasicBlock::reverse_iterator End(PredLRDef); + if (!SearchForDef(Start, End, CountReg)) { + return PredLRDef; + } else + PredLRDef = nullptr; + } + + // We can define LR because LR already contains the same value. + if (Start->getOperand(0).getReg() == ARM::LR) + return Start; + + // We've found no suitable LR def and Start doesn't use LR directly. Can we + // just define LR anyway? + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + LivePhysRegs LiveRegs(*TRI); + LiveRegs.addLiveOuts(*MBB); + + // Not if we've haven't found a suitable mov and LR is live out. + if (LiveRegs.contains(ARM::LR)) + return nullptr; + + // If LR is not live out, we can insert the instruction if nothing else + // uses LR after it. + if (!SearchForUse(Start, MBB->end(), ARM::LR)) + return Start; + + LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find suitable insertion point for" + << " LR\n"); + return nullptr; +} + bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { bool Changed = false; @@ -111,15 +220,10 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML); - auto IsLoopStart = [](MachineInstr &MI) { - return MI.getOpcode() == ARM::t2DoLoopStart || - MI.getOpcode() == ARM::t2WhileLoopStart; - }; - // Search the given block for a loop start instruction. If one isn't found, // and there's only one predecessor block, search that one too. std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart = - [&IsLoopStart, &SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* { + [&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* { for (auto &MI : *MBB) { if (IsLoopStart(MI)) return &MI; @@ -165,41 +269,62 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { Dec = &MI; else if (MI.getOpcode() == ARM::t2LoopEnd) End = &MI; - else if (MI.getDesc().isCall()) + else if (IsLoopStart(MI)) + Start = &MI; + else if (MI.getDesc().isCall()) { // TODO: Though the call will require LE to execute again, does this // mean we should revert? Always executing LE hopefully should be // faster than performing a sub,cmp,br or even subs,br. Revert = true; + LLVM_DEBUG(dbgs() << "ARM Loops: Found call.\n"); + } - if (!Dec) + if (!Dec || End) continue; - // If we find that we load/store LR between LoopDec and LoopEnd, expect - // that the decremented value has been spilled to the stack. Because - // this value isn't actually going to be produced until the latch, by LE, - // we would need to generate a real sub. The value is also likely to be - // reloaded for use of LoopEnd - in which in case we'd need to perform - // an add because it gets negated again by LE! The other option is to - // then generate the other form of LE which doesn't perform the sub. - if (MI.mayLoad() || MI.mayStore()) - Revert = - MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == ARM::LR; + // If we find that LR has been written or read between LoopDec and + // LoopEnd, expect that the decremented value is being used else where. + // Because this value isn't actually going to be produced until the + // latch, by LE, we would need to generate a real sub. The value is also + // likely to be copied/reloaded for use of LoopEnd - in which in case + // we'd need to perform an add because it gets subtracted again by LE! + // The other option is to then generate the other form of LE which doesn't + // perform the sub. + for (auto &MO : MI.operands()) { + if (MI.getOpcode() != ARM::t2LoopDec && MO.isReg() && + MO.getReg() == ARM::LR) { + LLVM_DEBUG(dbgs() << "ARM Loops: Found LR Use/Def: " << MI); + Revert = true; + break; + } + } } if (Dec && End && Revert) break; } + LLVM_DEBUG(if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start; + if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec; + if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;); + if (!Start && !Dec && !End) { LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n"); return Changed; - } if (!(Start && Dec && End)) { - report_fatal_error("Failed to find all loop components"); + } else if (!(Start && Dec && End)) { + LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find all loop components.\n"); + return false; } - if (!End->getOperand(1).isMBB() || - End->getOperand(1).getMBB() != ML->getHeader()) - report_fatal_error("Expected LoopEnd to target Loop Header"); + if (!End->getOperand(1).isMBB()) + report_fatal_error("Expected LoopEnd to target basic block"); + + // TODO Maybe there's cases where the target doesn't have to be the header, + // but for now be safe and revert. + if (End->getOperand(1).getMBB() != ML->getHeader()) { + LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targetting header.\n"); + Revert = true; + } // The WLS and LE instructions have 12-bits for the label offset. WLS // requires a positive offset, while LE uses negative. @@ -216,41 +341,57 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { Revert = true; } - LLVM_DEBUG(dbgs() << "ARM Loops:\n - Found Loop Start: " << *Start - << " - Found Loop Dec: " << *Dec - << " - Found Loop End: " << *End); + MachineInstr *InsertPt = Revert ? nullptr : IsSafeToDefineLR(Start); + if (!InsertPt) { + LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n"); + Revert = true; + } else + LLVM_DEBUG(dbgs() << "ARM Loops: Start insertion point: " << *InsertPt); - Expand(ML, Start, Dec, End, Revert); + Expand(ML, Start, InsertPt, Dec, End, Revert); return true; } // WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a // beq that branches to the exit branch. -// FIXME: Need to check that we're not trashing the CPSR when generating the -// cmp. We could also try to generate a cbz if the value in LR is also in +// TODO: We could also try to generate a cbz if the value in LR is also in // another low register. void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI); MachineBasicBlock *MBB = MI->getParent(); MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri)); - MIB.addReg(ARM::LR); + MIB.add(MI->getOperand(0)); MIB.addImm(0); MIB.addImm(ARMCC::AL); - MIB.addReg(ARM::CPSR); + MIB.addReg(ARM::NoRegister); + + MachineBasicBlock *DestBB = MI->getOperand(1).getMBB(); + unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ? + ARM::tBcc : ARM::t2Bcc; - // TODO: Try to use tBcc instead - MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc)); + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc)); MIB.add(MI->getOperand(1)); // branch target MIB.addImm(ARMCC::EQ); // condition code MIB.addReg(ARM::CPSR); MI->eraseFromParent(); } -// TODO: Check flags so that we can possibly generate a tSubs or tSub. -void ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const { +bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI, + bool AllowFlags) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI); MachineBasicBlock *MBB = MI->getParent(); + + // If nothing uses or defines CPSR between LoopDec and LoopEnd, use a t2SUBS. + bool SetFlags = false; + if (AllowFlags) { + if (auto *Def = SearchForDef(MI, MBB->end(), ARM::CPSR)) { + if (!SearchForUse(MI, MBB->end(), ARM::CPSR) && + Def->getOpcode() == ARM::t2LoopEnd) + SetFlags = true; + } + } + MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri)); MIB.addDef(ARM::LR); @@ -258,28 +399,39 @@ void ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const { MIB.add(MI->getOperand(2)); MIB.addImm(ARMCC::AL); MIB.addReg(0); - MIB.addReg(0); + + if (SetFlags) { + MIB.addReg(ARM::CPSR); + MIB->getOperand(5).setIsDef(true); + } else + MIB.addReg(0); + MI->eraseFromParent(); + return SetFlags; } // Generate a subs, or sub and cmp, and a branch instead of an LE. -// FIXME: Need to check that we're not trashing the CPSR when generating -// the cmp. -void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI) const { +void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI); - // Create cmp MachineBasicBlock *MBB = MI->getParent(); - MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), - TII->get(ARM::t2CMPri)); - MIB.addReg(ARM::LR); - MIB.addImm(0); - MIB.addImm(ARMCC::AL); - MIB.addReg(ARM::CPSR); + // Create cmp + if (!SkipCmp) { + MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), + TII->get(ARM::t2CMPri)); + MIB.addReg(ARM::LR); + MIB.addImm(0); + MIB.addImm(ARMCC::AL); + MIB.addReg(ARM::NoRegister); + } + + MachineBasicBlock *DestBB = MI->getOperand(1).getMBB(); + unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ? + ARM::tBcc : ARM::t2Bcc; - // TODO Try to use tBcc instead. // Create bne - MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc)); + MachineInstrBuilder MIB = + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc)); MIB.add(MI->getOperand(1)); // branch target MIB.addImm(ARMCC::NE); // condition code MIB.addReg(ARM::CPSR); @@ -287,33 +439,13 @@ void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI) const { } void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start, + MachineInstr *InsertPt, MachineInstr *Dec, MachineInstr *End, bool Revert) { - auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) { - // The trip count should already been held in LR since the instructions - // within the loop can only read and write to LR. So, there should be a - // mov to setup the count. WLS/DLS perform this move, so find the original - // and delete it - inserting WLS/DLS in its place. - MachineBasicBlock *MBB = Start->getParent(); - MachineInstr *InsertPt = Start; - for (auto &I : MRI->def_instructions(ARM::LR)) { - if (I.getParent() != MBB) - continue; - - // Always execute. - if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() != ARMCC::AL) - continue; - - // Only handle move reg, if the trip count it will need moving into a reg - // before the setup instruction anyway. - if (!I.getDesc().isMoveReg() || - !I.getOperand(1).isIdenticalTo(Start->getOperand(0))) - continue; - InsertPt = &I; - break; - } - + auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start, + MachineInstr *InsertPt) { + MachineBasicBlock *MBB = InsertPt->getParent(); unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ? ARM::t2DLS : ARM::t2WLS; MachineInstrBuilder MIB = @@ -369,16 +501,54 @@ void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start, RevertWhile(Start); else Start->eraseFromParent(); - RevertLoopDec(Dec); - RevertLoopEnd(End); + bool FlagsAlreadySet = RevertLoopDec(Dec, true); + RevertLoopEnd(End, FlagsAlreadySet); } else { - Start = ExpandLoopStart(ML, Start); + Start = ExpandLoopStart(ML, Start, InsertPt); RemoveDeadBranch(Start); End = ExpandLoopEnd(ML, Dec, End); RemoveDeadBranch(End); } } +bool ARMLowOverheadLoops::RevertNonLoops() { + LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n"); + bool Changed = false; + + for (auto &MBB : *MF) { + SmallVector<MachineInstr*, 4> Starts; + SmallVector<MachineInstr*, 4> Decs; + SmallVector<MachineInstr*, 4> Ends; + + for (auto &I : MBB) { + if (IsLoopStart(I)) + Starts.push_back(&I); + else if (I.getOpcode() == ARM::t2LoopDec) + Decs.push_back(&I); + else if (I.getOpcode() == ARM::t2LoopEnd) + Ends.push_back(&I); + } + + if (Starts.empty() && Decs.empty() && Ends.empty()) + continue; + + Changed = true; + + for (auto *Start : Starts) { + if (Start->getOpcode() == ARM::t2WhileLoopStart) + RevertWhile(Start); + else + Start->eraseFromParent(); + } + for (auto *Dec : Decs) + RevertLoopDec(Dec); + + for (auto *End : Ends) + RevertLoopEnd(End); + } + return Changed; +} + FunctionPass *llvm::createARMLowOverheadLoopsPass() { return new ARMLowOverheadLoops(); } |