aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp1096
1 files changed, 1096 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
new file mode 100644
index 000000000000..e9ffee28fcd2
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
@@ -0,0 +1,1096 @@
+//===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This pass does a few optimisations related to Tail predicated loops
+/// and MVE VPT blocks before register allocation is performed. For VPT blocks
+/// the goal is to maximize the sizes of the blocks that will be created by the
+/// MVE VPT Block Insertion pass (which runs after register allocation). For
+/// tail predicated loops we transform the loop into something that will
+/// hopefully make the backend ARMLowOverheadLoops pass's job easier.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MVETailPredUtils.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-mve-vpt-opts"
+
+static cl::opt<bool>
+MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
+ cl::desc("Enable merging Loop End and Dec instructions."),
+ cl::init(true));
+
+static cl::opt<bool>
+SetLRPredicate("arm-set-lr-predicate", cl::Hidden,
+ cl::desc("Enable setting lr as a predicate in tail predication regions."),
+ cl::init(true));
+
+namespace {
+class MVETPAndVPTOptimisations : public MachineFunctionPass {
+public:
+ static char ID;
+ const Thumb2InstrInfo *TII;
+ MachineRegisterInfo *MRI;
+
+ MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineLoopInfoWrapperPass>();
+ AU.addPreserved<MachineLoopInfoWrapperPass>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
+ AU.addPreserved<MachineDominatorTreeWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override {
+ return "ARM MVE TailPred and VPT Optimisation Pass";
+ }
+
+private:
+ bool LowerWhileLoopStart(MachineLoop *ML);
+ bool MergeLoopEnd(MachineLoop *ML);
+ bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
+ MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
+ MachineInstr &Instr,
+ MachineOperand &User,
+ Register Target);
+ bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
+ bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
+ bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
+ bool ConvertVPSEL(MachineBasicBlock &MBB);
+ bool HintDoLoopStartReg(MachineBasicBlock &MBB);
+ MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *PreHeader,
+ MachineInstr *LoopStart);
+};
+
+char MVETPAndVPTOptimisations::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE,
+ "ARM MVE TailPred and VPT Optimisations pass", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
+INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE,
+ "ARM MVE TailPred and VPT Optimisations pass", false, false)
+
+static MachineInstr *LookThroughCOPY(MachineInstr *MI,
+ MachineRegisterInfo *MRI) {
+ while (MI && MI->getOpcode() == TargetOpcode::COPY &&
+ MI->getOperand(1).getReg().isVirtual())
+ MI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ return MI;
+}
+
+// Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
+// corresponding PHI that make up a low overhead loop. Only handles 'do' loops
+// at the moment, returning a t2DoLoopStart in LoopStart.
+static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI,
+ MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
+ MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
+ MachineBasicBlock *Header = ML->getHeader();
+ MachineBasicBlock *Latch = ML->getLoopLatch();
+ if (!Header || !Latch) {
+ LLVM_DEBUG(dbgs() << " no Loop Latch or Header\n");
+ return false;
+ }
+
+ // Find the loop end from the terminators.
+ LoopEnd = nullptr;
+ for (auto &T : Latch->terminators()) {
+ if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
+ LoopEnd = &T;
+ break;
+ }
+ if (T.getOpcode() == ARM::t2LoopEndDec &&
+ T.getOperand(2).getMBB() == Header) {
+ LoopEnd = &T;
+ break;
+ }
+ }
+ if (!LoopEnd) {
+ LLVM_DEBUG(dbgs() << " no LoopEnd\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << " found loop end: " << *LoopEnd);
+
+ // Find the dec from the use of the end. There may be copies between
+ // instructions. We expect the loop to loop like:
+ // $vs = t2DoLoopStart ...
+ // loop:
+ // $vp = phi [ $vs ], [ $vd ]
+ // ...
+ // $vd = t2LoopDec $vp
+ // ...
+ // t2LoopEnd $vd, loop
+ if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
+ LoopDec = LoopEnd;
+ else {
+ LoopDec =
+ LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI);
+ if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
+ LLVM_DEBUG(dbgs() << " didn't find LoopDec where we expected!\n");
+ return false;
+ }
+ }
+ LLVM_DEBUG(dbgs() << " found loop dec: " << *LoopDec);
+
+ LoopPhi =
+ LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI);
+ if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
+ LoopPhi->getNumOperands() != 5 ||
+ (LoopPhi->getOperand(2).getMBB() != Latch &&
+ LoopPhi->getOperand(4).getMBB() != Latch)) {
+ LLVM_DEBUG(dbgs() << " didn't find PHI where we expected!\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << " found loop phi: " << *LoopPhi);
+
+ Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
+ ? LoopPhi->getOperand(3).getReg()
+ : LoopPhi->getOperand(1).getReg();
+ LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
+ if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
+ LoopStart->getOpcode() != ARM::t2WhileLoopSetup &&
+ LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) {
+ LLVM_DEBUG(dbgs() << " didn't find Start where we expected!\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << " found loop start: " << *LoopStart);
+
+ return true;
+}
+
+static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII) {
+ MachineBasicBlock *MBB = MI->getParent();
+ assert(MI->getOpcode() == ARM::t2WhileLoopSetup &&
+ "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
+
+ // Subs
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
+ MIB.add(MI->getOperand(0));
+ MIB.add(MI->getOperand(1));
+ MIB.addImm(0);
+ MIB.addImm(ARMCC::AL);
+ MIB.addReg(ARM::NoRegister);
+ MIB.addReg(ARM::CPSR, RegState::Define);
+
+ // Attempt to find a t2WhileLoopStart and revert to a t2Bcc.
+ for (MachineInstr &I : MBB->terminators()) {
+ if (I.getOpcode() == ARM::t2WhileLoopStart) {
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc));
+ MIB.add(MI->getOperand(1)); // branch target
+ MIB.addImm(ARMCC::EQ);
+ MIB.addReg(ARM::CPSR);
+ I.eraseFromParent();
+ break;
+ }
+ }
+
+ MI->eraseFromParent();
+}
+
+// The Hardware Loop insertion and ISel Lowering produce the pseudos for the
+// start of a while loop:
+// %a:gprlr = t2WhileLoopSetup %Cnt
+// t2WhileLoopStart %a, %BB
+// We want to convert those to a single instruction which, like t2LoopEndDec and
+// t2DoLoopStartTP is both a terminator and produces a value:
+// %a:grplr: t2WhileLoopStartLR %Cnt, %BB
+//
+// Otherwise if we can't, we revert the loop. t2WhileLoopSetup and
+// t2WhileLoopStart are not valid past regalloc.
+bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) {
+ LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop "
+ << ML->getHeader()->getName() << "\n");
+
+ MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
+ if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
+ return false;
+
+ if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup)
+ return false;
+
+ Register LR = LoopStart->getOperand(0).getReg();
+ auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) {
+ return MI.getOpcode() == ARM::t2WhileLoopStart;
+ });
+ if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) {
+ RevertWhileLoopSetup(LoopStart, TII);
+ RevertLoopDec(LoopStart, TII);
+ RevertLoopEnd(LoopStart, TII);
+ return true;
+ }
+
+ MachineInstrBuilder MI =
+ BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
+ TII->get(ARM::t2WhileLoopStartLR), LR)
+ .add(LoopStart->getOperand(1))
+ .add(WLSIt->getOperand(1));
+ (void)MI;
+ LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr());
+
+ WLSIt->eraseFromParent();
+ LoopStart->eraseFromParent();
+ return true;
+}
+
+// Return true if this instruction is invalid in a low overhead loop, usually
+// because it clobbers LR.
+static bool IsInvalidTPInstruction(MachineInstr &MI) {
+ return MI.isCall() || isLoopStart(MI);
+}
+
+// Starting from PreHeader, search for invalid instructions back until the
+// LoopStart block is reached. If invalid instructions are found, the loop start
+// is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will
+// return the new DLS LoopStart if updated.
+MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
+ MachineBasicBlock *PreHeader, MachineInstr *LoopStart) {
+ SmallVector<MachineBasicBlock *> Worklist;
+ SmallPtrSet<MachineBasicBlock *, 4> Visited;
+ Worklist.push_back(PreHeader);
+ Visited.insert(LoopStart->getParent());
+
+ while (!Worklist.empty()) {
+ MachineBasicBlock *MBB = Worklist.pop_back_val();
+ if (Visited.count(MBB))
+ continue;
+
+ for (MachineInstr &MI : *MBB) {
+ if (!IsInvalidTPInstruction(MI))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: " << MI);
+
+ // Create a t2DoLoopStart at the end of the preheader.
+ MachineInstrBuilder MIB =
+ BuildMI(*PreHeader, PreHeader->getFirstTerminator(),
+ LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart));
+ MIB.add(LoopStart->getOperand(0));
+ MIB.add(LoopStart->getOperand(1));
+
+ // Make sure to remove the kill flags, to prevent them from being invalid.
+ LoopStart->getOperand(1).setIsKill(false);
+
+ // Revert the t2WhileLoopStartLR to a CMP and Br.
+ RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true);
+ return MIB;
+ }
+
+ Visited.insert(MBB);
+ for (auto *Pred : MBB->predecessors())
+ Worklist.push_back(Pred);
+ }
+ return LoopStart;
+}
+
+// This function converts loops with t2LoopEnd and t2LoopEnd instructions into
+// a single t2LoopEndDec instruction. To do that it needs to make sure that LR
+// will be valid to be used for the low overhead loop, which means nothing else
+// is using LR (especially calls) and there are no superfluous copies in the
+// loop. The t2LoopEndDec is a branching terminator that produces a value (the
+// decrement) around the loop edge, which means we need to be careful that they
+// will be valid to allocate without any spilling.
+bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
+ if (!MergeEndDec)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
+ << "\n");
+
+ MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
+ if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
+ return false;
+
+ // Check if there is an illegal instruction (a call) in the low overhead loop
+ // and if so revert it now before we get any further. While loops also need to
+ // check the preheaders, but can be reverted to a DLS loop if needed.
+ auto *PreHeader = ML->getLoopPreheader();
+ if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader)
+ LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart);
+
+ for (MachineBasicBlock *MBB : ML->blocks()) {
+ for (MachineInstr &MI : *MBB) {
+ if (IsInvalidTPInstruction(MI)) {
+ LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: " << MI);
+ if (LoopStart->getOpcode() == ARM::t2DoLoopStart)
+ RevertDoLoopStart(LoopStart, TII);
+ else
+ RevertWhileLoopStartLR(LoopStart, TII);
+ RevertLoopDec(LoopDec, TII);
+ RevertLoopEnd(LoopEnd, TII);
+ return true;
+ }
+ }
+ }
+
+ // Remove any copies from the loop, to ensure the phi that remains is both
+ // simpler and contains no extra uses. Because t2LoopEndDec is a terminator
+ // that cannot spill, we need to be careful what remains in the loop.
+ Register PhiReg = LoopPhi->getOperand(0).getReg();
+ Register DecReg = LoopDec->getOperand(0).getReg();
+ Register StartReg = LoopStart->getOperand(0).getReg();
+ // Ensure the uses are expected, and collect any copies we want to remove.
+ SmallVector<MachineInstr *, 4> Copies;
+ auto CheckUsers = [&Copies](Register BaseReg,
+ ArrayRef<MachineInstr *> ExpectedUsers,
+ MachineRegisterInfo *MRI) {
+ SmallVector<Register, 4> Worklist;
+ Worklist.push_back(BaseReg);
+ while (!Worklist.empty()) {
+ Register Reg = Worklist.pop_back_val();
+ for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
+ if (llvm::is_contained(ExpectedUsers, &MI))
+ continue;
+ if (MI.getOpcode() != TargetOpcode::COPY ||
+ !MI.getOperand(0).getReg().isVirtual()) {
+ LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
+ return false;
+ }
+ Worklist.push_back(MI.getOperand(0).getReg());
+ Copies.push_back(&MI);
+ }
+ }
+ return true;
+ };
+ if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
+ !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
+ !CheckUsers(StartReg, {LoopPhi}, MRI)) {
+ // Don't leave a t2WhileLoopStartLR without the LoopDecEnd.
+ if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) {
+ RevertWhileLoopStartLR(LoopStart, TII);
+ RevertLoopDec(LoopDec, TII);
+ RevertLoopEnd(LoopEnd, TII);
+ return true;
+ }
+ return false;
+ }
+
+ MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
+ MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
+ MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
+
+ if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
+ LoopPhi->getOperand(3).setReg(StartReg);
+ LoopPhi->getOperand(1).setReg(DecReg);
+ } else {
+ LoopPhi->getOperand(1).setReg(StartReg);
+ LoopPhi->getOperand(3).setReg(DecReg);
+ }
+
+ SmallVector<MachineOperand, 4> Cond; // For analyzeBranch.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
+ if (!TII->analyzeBranch(*LoopEnd->getParent(), TBB, FBB, Cond) && !FBB) {
+ // If the LoopEnd falls through, need to insert a t2B to the fall-through
+ // block so that the non-analyzable t2LoopEndDec doesn't fall through.
+ MachineFunction::iterator MBBI = ++LoopEnd->getParent()->getIterator();
+ BuildMI(LoopEnd->getParent(), DebugLoc(), TII->get(ARM::t2B))
+ .addMBB(&*MBBI)
+ .add(predOps(ARMCC::AL));
+ }
+
+ // Replace the loop dec and loop end as a single instruction.
+ MachineInstrBuilder MI =
+ BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
+ TII->get(ARM::t2LoopEndDec), DecReg)
+ .addReg(PhiReg)
+ .add(LoopEnd->getOperand(1));
+ (void)MI;
+ LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
+
+ LoopDec->eraseFromParent();
+ LoopEnd->eraseFromParent();
+ for (auto *MI : Copies)
+ MI->eraseFromParent();
+ return true;
+}
+
+// Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
+// instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
+// instruction, making the backend ARMLowOverheadLoops passes job of finding the
+// VCTP operand much simpler.
+bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
+ MachineDominatorTree *DT) {
+ LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
+ << ML->getHeader()->getName() << "\n");
+
+ // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's
+ // in the loop.
+ MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
+ if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
+ return false;
+ if (LoopDec != LoopEnd || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
+ LoopStart->getOpcode() != ARM::t2WhileLoopStartLR))
+ return false;
+
+ SmallVector<MachineInstr *, 4> VCTPs;
+ SmallVector<MachineInstr *, 4> MVEInstrs;
+ for (MachineBasicBlock *BB : ML->blocks()) {
+ for (MachineInstr &MI : *BB)
+ if (isVCTP(&MI))
+ VCTPs.push_back(&MI);
+ else if (findFirstVPTPredOperandIdx(MI) != -1)
+ MVEInstrs.push_back(&MI);
+ }
+
+ if (VCTPs.empty()) {
+ LLVM_DEBUG(dbgs() << " no VCTPs\n");
+ return false;
+ }
+
+ // Check all VCTPs are the same.
+ MachineInstr *FirstVCTP = *VCTPs.begin();
+ for (MachineInstr *VCTP : VCTPs) {
+ LLVM_DEBUG(dbgs() << " with VCTP " << *VCTP);
+ if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
+ VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
+ LLVM_DEBUG(dbgs() << " VCTP's are not identical\n");
+ return false;
+ }
+ }
+
+ // Check for the register being used can be setup before the loop. We expect
+ // this to be:
+ // $vx = ...
+ // loop:
+ // $vp = PHI [ $vx ], [ $vd ]
+ // ..
+ // $vpr = VCTP $vp
+ // ..
+ // $vd = t2SUBri $vp, #n
+ // ..
+ Register CountReg = FirstVCTP->getOperand(1).getReg();
+ if (!CountReg.isVirtual()) {
+ LLVM_DEBUG(dbgs() << " cannot determine VCTP PHI\n");
+ return false;
+ }
+ MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
+ if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
+ Phi->getNumOperands() != 5 ||
+ (Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
+ Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
+ LLVM_DEBUG(dbgs() << " cannot determine VCTP Count\n");
+ return false;
+ }
+ CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
+ ? Phi->getOperand(3).getReg()
+ : Phi->getOperand(1).getReg();
+
+ // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of
+ // the preheader and add the new CountReg to it. We attempt to place it late
+ // in the preheader, but may need to move that earlier based on uses.
+ MachineBasicBlock *MBB = LoopStart->getParent();
+ MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator();
+ for (MachineInstr &Use :
+ MRI->use_instructions(LoopStart->getOperand(0).getReg()))
+ if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
+ !DT->dominates(ML->getHeader(), Use.getParent())) {
+ LLVM_DEBUG(dbgs() << " InsertPt could not be a terminator!\n");
+ return false;
+ }
+
+ unsigned NewOpc = LoopStart->getOpcode() == ARM::t2DoLoopStart
+ ? ARM::t2DoLoopStartTP
+ : ARM::t2WhileLoopStartTP;
+ MachineInstrBuilder MI =
+ BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), TII->get(NewOpc))
+ .add(LoopStart->getOperand(0))
+ .add(LoopStart->getOperand(1))
+ .addReg(CountReg);
+ if (NewOpc == ARM::t2WhileLoopStartTP)
+ MI.add(LoopStart->getOperand(2));
+ LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << " with "
+ << *MI.getInstr());
+ MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
+ LoopStart->eraseFromParent();
+
+ if (SetLRPredicate) {
+ // Each instruction in the loop needs to be using LR as the predicate from
+ // the Phi as the predicate.
+ Register LR = LoopPhi->getOperand(0).getReg();
+ for (MachineInstr *MI : MVEInstrs) {
+ int Idx = findFirstVPTPredOperandIdx(*MI);
+ MI->getOperand(Idx + 2).setReg(LR);
+ }
+ }
+
+ return true;
+}
+
+// Returns true if Opcode is any VCMP Opcode.
+static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
+
+// Returns true if a VCMP with this Opcode can have its operands swapped.
+// There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
+// and VCMPr instructions (since the r is always on the right).
+static bool CanHaveSwappedOperands(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return true;
+ case ARM::MVE_VCMPf32:
+ case ARM::MVE_VCMPf16:
+ case ARM::MVE_VCMPf32r:
+ case ARM::MVE_VCMPf16r:
+ case ARM::MVE_VCMPi8r:
+ case ARM::MVE_VCMPi16r:
+ case ARM::MVE_VCMPi32r:
+ case ARM::MVE_VCMPu8r:
+ case ARM::MVE_VCMPu16r:
+ case ARM::MVE_VCMPu32r:
+ case ARM::MVE_VCMPs8r:
+ case ARM::MVE_VCMPs16r:
+ case ARM::MVE_VCMPs32r:
+ return false;
+ }
+}
+
+// Returns the CondCode of a VCMP Instruction.
+static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) {
+ assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
+ return ARMCC::CondCodes(Instr.getOperand(3).getImm());
+}
+
+// Returns true if Cond is equivalent to a VPNOT instruction on the result of
+// Prev. Cond and Prev must be VCMPs.
+static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) {
+ assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
+
+ // Opcodes must match.
+ if (Cond.getOpcode() != Prev.getOpcode())
+ return false;
+
+ MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
+ MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
+
+ // If the VCMP has the opposite condition with the same operands, we can
+ // replace it with a VPNOT
+ ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
+ ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
+ if (ExpectedCode == GetCondCode(Prev))
+ if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
+ return true;
+ // Check again with operands swapped if possible
+ if (!CanHaveSwappedOperands(Cond.getOpcode()))
+ return false;
+ ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
+ return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
+ CondOP2.isIdenticalTo(PrevOP1);
+}
+
+// Returns true if Instr writes to VCCR.
+static bool IsWritingToVCCR(MachineInstr &Instr) {
+ if (Instr.getNumOperands() == 0)
+ return false;
+ MachineOperand &Dst = Instr.getOperand(0);
+ if (!Dst.isReg())
+ return false;
+ Register DstReg = Dst.getReg();
+ if (!DstReg.isVirtual())
+ return false;
+ MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
+ const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
+ return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
+}
+
+// Transforms
+// <Instr that uses %A ('User' Operand)>
+// Into
+// %K = VPNOT %Target
+// <Instr that uses %K ('User' Operand)>
+// And returns the newly inserted VPNOT.
+// This optimization is done in the hopes of preventing spills/reloads of VPR by
+// reducing the number of VCCR values with overlapping lifetimes.
+MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
+ MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User,
+ Register Target) {
+ Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target));
+
+ MachineInstrBuilder MIBuilder =
+ BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
+ .addDef(NewResult)
+ .addReg(Target);
+ addUnpredicatedMveVpredNOp(MIBuilder);
+
+ // Make the user use NewResult instead, and clear its kill flag.
+ User.setReg(NewResult);
+ User.setIsKill(false);
+
+ LLVM_DEBUG(dbgs() << " Inserting VPNOT (for spill prevention): ";
+ MIBuilder.getInstr()->dump());
+
+ return *MIBuilder.getInstr();
+}
+
+// Moves a VPNOT before its first user if an instruction that uses Reg is found
+// in-between the VPNOT and its user.
+// Returns true if there is at least one user of the VPNOT in the block.
+static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator Iter,
+ Register Reg) {
+ assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!");
+ assert(getVPTInstrPredicate(*Iter) == ARMVCC::None &&
+ "The VPNOT cannot be predicated");
+
+ MachineInstr &VPNOT = *Iter;
+ Register VPNOTResult = VPNOT.getOperand(0).getReg();
+ Register VPNOTOperand = VPNOT.getOperand(1).getReg();
+
+ // Whether the VPNOT will need to be moved, and whether we found a user of the
+ // VPNOT.
+ bool MustMove = false, HasUser = false;
+ MachineOperand *VPNOTOperandKiller = nullptr;
+ for (; Iter != MBB.end(); ++Iter) {
+ if (MachineOperand *MO =
+ Iter->findRegisterUseOperand(VPNOTOperand, /*TRI=*/nullptr,
+ /*isKill*/ true)) {
+ // If we find the operand that kills the VPNOTOperand's result, save it.
+ VPNOTOperandKiller = MO;
+ }
+
+ if (Iter->findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr) != -1) {
+ MustMove = true;
+ continue;
+ }
+
+ if (Iter->findRegisterUseOperandIdx(VPNOTResult, /*TRI=*/nullptr) == -1)
+ continue;
+
+ HasUser = true;
+ if (!MustMove)
+ break;
+
+ // Move the VPNOT right before Iter
+ LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << " Before: ";
+ Iter->dump());
+ MBB.splice(Iter, &MBB, VPNOT.getIterator());
+ // If we move the instr, and its operand was killed earlier, remove the kill
+ // flag.
+ if (VPNOTOperandKiller)
+ VPNOTOperandKiller->setIsKill(false);
+
+ break;
+ }
+ return HasUser;
+}
+
+// This optimisation attempts to reduce the number of overlapping lifetimes of
+// VCCR values by replacing uses of old VCCR values with VPNOTs. For example,
+// this replaces
+// %A:vccr = (something)
+// %B:vccr = VPNOT %A
+// %Foo = (some op that uses %B)
+// %Bar = (some op that uses %A)
+// With
+// %A:vccr = (something)
+// %B:vccr = VPNOT %A
+// %Foo = (some op that uses %B)
+// %TMP2:vccr = VPNOT %B
+// %Bar = (some op that uses %A)
+bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
+ MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end();
+ SmallVector<MachineInstr *, 4> DeadInstructions;
+ bool Modified = false;
+
+ while (Iter != End) {
+ Register VCCRValue, OppositeVCCRValue;
+ // The first loop looks for 2 unpredicated instructions:
+ // %A:vccr = (instr) ; A is stored in VCCRValue
+ // %B:vccr = VPNOT %A ; B is stored in OppositeVCCRValue
+ for (; Iter != End; ++Iter) {
+ // We're only interested in unpredicated instructions that write to VCCR.
+ if (!IsWritingToVCCR(*Iter) ||
+ getVPTInstrPredicate(*Iter) != ARMVCC::None)
+ continue;
+ Register Dst = Iter->getOperand(0).getReg();
+
+ // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've
+ // found what we were looking for.
+ if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
+ Iter->findRegisterUseOperandIdx(VCCRValue, /*TRI=*/nullptr) != -1) {
+ // Move the VPNOT closer to its first user if needed, and ignore if it
+ // has no users.
+ if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue))
+ continue;
+
+ OppositeVCCRValue = Dst;
+ ++Iter;
+ break;
+ }
+
+ // Else, just set VCCRValue.
+ VCCRValue = Dst;
+ }
+
+ // If the first inner loop didn't find anything, stop here.
+ if (Iter == End)
+ break;
+
+ assert(VCCRValue && OppositeVCCRValue &&
+ "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
+ "stopped before the end of the block!");
+ assert(VCCRValue != OppositeVCCRValue &&
+ "VCCRValue should not be equal to OppositeVCCRValue!");
+
+ // LastVPNOTResult always contains the same value as OppositeVCCRValue.
+ Register LastVPNOTResult = OppositeVCCRValue;
+
+ // This second loop tries to optimize the remaining instructions.
+ for (; Iter != End; ++Iter) {
+ bool IsInteresting = false;
+
+ if (MachineOperand *MO =
+ Iter->findRegisterUseOperand(VCCRValue, /*TRI=*/nullptr)) {
+ IsInteresting = true;
+
+ // - If the instruction is a VPNOT, it can be removed, and we can just
+ // replace its uses with LastVPNOTResult.
+ // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue.
+ if (Iter->getOpcode() == ARM::MVE_VPNOT) {
+ Register Result = Iter->getOperand(0).getReg();
+
+ MRI->replaceRegWith(Result, LastVPNOTResult);
+ DeadInstructions.push_back(&*Iter);
+ Modified = true;
+
+ LLVM_DEBUG(dbgs()
+ << "Replacing all uses of '" << printReg(Result)
+ << "' with '" << printReg(LastVPNOTResult) << "'\n");
+ } else {
+ MachineInstr &VPNOT =
+ ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult);
+ Modified = true;
+
+ LastVPNOTResult = VPNOT.getOperand(0).getReg();
+ std::swap(VCCRValue, OppositeVCCRValue);
+
+ LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue)
+ << "' with '" << printReg(LastVPNOTResult)
+ << "' in instr: " << *Iter);
+ }
+ } else {
+ // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult
+ // instead as they contain the same value.
+ if (MachineOperand *MO = Iter->findRegisterUseOperand(
+ OppositeVCCRValue, /*TRI=*/nullptr)) {
+ IsInteresting = true;
+
+ // This is pointless if LastVPNOTResult == OppositeVCCRValue.
+ if (LastVPNOTResult != OppositeVCCRValue) {
+ LLVM_DEBUG(dbgs() << "Replacing usage of '"
+ << printReg(OppositeVCCRValue) << "' with '"
+ << printReg(LastVPNOTResult) << " for instr: ";
+ Iter->dump());
+ MO->setReg(LastVPNOTResult);
+ Modified = true;
+ }
+
+ MO->setIsKill(false);
+ }
+
+ // If this is an unpredicated VPNOT on
+ // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it.
+ if (Iter->getOpcode() == ARM::MVE_VPNOT &&
+ getVPTInstrPredicate(*Iter) == ARMVCC::None) {
+ Register VPNOTOperand = Iter->getOperand(1).getReg();
+ if (VPNOTOperand == LastVPNOTResult ||
+ VPNOTOperand == OppositeVCCRValue) {
+ IsInteresting = true;
+
+ std::swap(VCCRValue, OppositeVCCRValue);
+ LastVPNOTResult = Iter->getOperand(0).getReg();
+ }
+ }
+ }
+
+ // If this instruction was not interesting, and it writes to VCCR, stop.
+ if (!IsInteresting && IsWritingToVCCR(*Iter))
+ break;
+ }
+ }
+
+ for (MachineInstr *DeadInstruction : DeadInstructions)
+ DeadInstruction->eraseFromParent();
+
+ return Modified;
+}
+
+// This optimisation replaces VCMPs with VPNOTs when they are equivalent.
+bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
+ SmallVector<MachineInstr *, 4> DeadInstructions;
+
+ // The last VCMP that we have seen and that couldn't be replaced.
+ // This is reset when an instruction that writes to VCCR/VPR is found, or when
+ // a VCMP is replaced with a VPNOT.
+ // We'll only replace VCMPs with VPNOTs when this is not null, and when the
+ // current VCMP is the opposite of PrevVCMP.
+ MachineInstr *PrevVCMP = nullptr;
+ // If we find an instruction that kills the result of PrevVCMP, we save the
+ // operand here to remove the kill flag in case we need to use PrevVCMP's
+ // result.
+ MachineOperand *PrevVCMPResultKiller = nullptr;
+
+ for (MachineInstr &Instr : MBB.instrs()) {
+ if (PrevVCMP) {
+ if (MachineOperand *MO =
+ Instr.findRegisterUseOperand(PrevVCMP->getOperand(0).getReg(),
+ /*TRI=*/nullptr, /*isKill*/ true)) {
+ // If we come accross the instr that kills PrevVCMP's result, record it
+ // so we can remove the kill flag later if we need to.
+ PrevVCMPResultKiller = MO;
+ }
+ }
+
+ // Ignore predicated instructions.
+ if (getVPTInstrPredicate(Instr) != ARMVCC::None)
+ continue;
+
+ // Only look at VCMPs
+ if (!IsVCMP(Instr.getOpcode())) {
+ // If the instruction writes to VCCR, forget the previous VCMP.
+ if (IsWritingToVCCR(Instr))
+ PrevVCMP = nullptr;
+ continue;
+ }
+
+ if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
+ PrevVCMP = &Instr;
+ continue;
+ }
+
+ // The register containing the result of the VCMP that we're going to
+ // replace.
+ Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
+
+ // Build a VPNOT to replace the VCMP, reusing its operands.
+ MachineInstrBuilder MIBuilder =
+ BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
+ .add(Instr.getOperand(0))
+ .addReg(PrevVCMPResultReg);
+ addUnpredicatedMveVpredNOp(MIBuilder);
+ LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
+ MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: ";
+ Instr.dump());
+
+ // If we found an instruction that uses, and kills PrevVCMP's result,
+ // remove the kill flag.
+ if (PrevVCMPResultKiller)
+ PrevVCMPResultKiller->setIsKill(false);
+
+ // Finally, mark the old VCMP for removal and reset
+ // PrevVCMP/PrevVCMPResultKiller.
+ DeadInstructions.push_back(&Instr);
+ PrevVCMP = nullptr;
+ PrevVCMPResultKiller = nullptr;
+ }
+
+ for (MachineInstr *DeadInstruction : DeadInstructions)
+ DeadInstruction->eraseFromParent();
+
+ return !DeadInstructions.empty();
+}
+
+bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
+ MachineDominatorTree *DT) {
+ // Scan through the block, looking for instructions that use constants moves
+ // into VPR that are the negative of one another. These are expected to be
+ // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
+ // mask is kept it or and VPNOT's of it are added or reused as we scan through
+ // the function.
+ unsigned LastVPTImm = 0;
+ Register LastVPTReg = 0;
+ SmallSet<MachineInstr *, 4> DeadInstructions;
+
+ for (MachineInstr &Instr : MBB.instrs()) {
+ // Look for predicated MVE instructions.
+ int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
+ if (PIdx == -1)
+ continue;
+ Register VPR = Instr.getOperand(PIdx + 1).getReg();
+ if (!VPR.isVirtual())
+ continue;
+
+ // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr.
+ MachineInstr *Copy = MRI->getVRegDef(VPR);
+ if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
+ !Copy->getOperand(1).getReg().isVirtual() ||
+ MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
+ LastVPTReg = 0;
+ continue;
+ }
+ Register GPR = Copy->getOperand(1).getReg();
+
+ // Find the Immediate used by the copy.
+ auto getImm = [&](Register GPR) -> unsigned {
+ MachineInstr *Def = MRI->getVRegDef(GPR);
+ if (Def && (Def->getOpcode() == ARM::t2MOVi ||
+ Def->getOpcode() == ARM::t2MOVi16))
+ return Def->getOperand(1).getImm();
+ return -1U;
+ };
+ unsigned Imm = getImm(GPR);
+ if (Imm == -1U) {
+ LastVPTReg = 0;
+ continue;
+ }
+
+ unsigned NotImm = ~Imm & 0xffff;
+ if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
+ MRI->clearKillFlags(LastVPTReg);
+ Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
+ if (MRI->use_empty(VPR)) {
+ DeadInstructions.insert(Copy);
+ if (MRI->hasOneUse(GPR))
+ DeadInstructions.insert(MRI->getVRegDef(GPR));
+ }
+ LLVM_DEBUG(dbgs() << "Reusing predicate: in " << Instr);
+ VPR = LastVPTReg;
+ } else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
+ // We have found the not of a previous constant. Create a VPNot of the
+ // earlier predicate reg and use it instead of the copy.
+ Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
+ auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
+ TII->get(ARM::MVE_VPNOT), NewVPR)
+ .addReg(LastVPTReg);
+ addUnpredicatedMveVpredNOp(VPNot);
+
+ // Use the new register and check if the def is now dead.
+ Instr.getOperand(PIdx + 1).setReg(NewVPR);
+ if (MRI->use_empty(VPR)) {
+ DeadInstructions.insert(Copy);
+ if (MRI->hasOneUse(GPR))
+ DeadInstructions.insert(MRI->getVRegDef(GPR));
+ }
+ LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << " to replace use at "
+ << Instr);
+ VPR = NewVPR;
+ }
+
+ LastVPTImm = Imm;
+ LastVPTReg = VPR;
+ }
+
+ for (MachineInstr *DI : DeadInstructions)
+ DI->eraseFromParent();
+
+ return !DeadInstructions.empty();
+}
+
+// Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
+// somewhat blunt approximation to allow tail predicated with vpsel
+// instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
+// different semantics under tail predication. Until that is modelled we just
+// convert to a VMOVT (via a predicated VORR) instead.
+bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
+ bool HasVCTP = false;
+ SmallVector<MachineInstr *, 4> DeadInstructions;
+
+ for (MachineInstr &MI : MBB.instrs()) {
+ if (isVCTP(&MI)) {
+ HasVCTP = true;
+ continue;
+ }
+
+ if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
+ continue;
+
+ MachineInstrBuilder MIBuilder =
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(1))
+ .addImm(ARMVCC::Then)
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5))
+ .add(MI.getOperand(2));
+ // Silence unused variable warning in release builds.
+ (void)MIBuilder;
+ LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
+ dbgs() << " with VMOVT: "; MIBuilder.getInstr()->dump());
+ DeadInstructions.push_back(&MI);
+ }
+
+ for (MachineInstr *DeadInstruction : DeadInstructions)
+ DeadInstruction->eraseFromParent();
+
+ return !DeadInstructions.empty();
+}
+
+// Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as
+// the instruction may be removable as a noop.
+bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineInstr &MI : MBB.instrs()) {
+ if (MI.getOpcode() != ARM::t2DoLoopStart)
+ continue;
+ Register R = MI.getOperand(1).getReg();
+ MachineFunction *MF = MI.getParent()->getParent();
+ MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0);
+ Changed = true;
+ }
+ return Changed;
+}
+
+bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
+ const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
+
+ if (!STI.isThumb2() || !STI.hasLOB())
+ return false;
+
+ TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
+ MRI = &Fn.getRegInfo();
+ MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
+ MachineDominatorTree *DT =
+ &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
+
+ LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
+ << "********** Function: " << Fn.getName() << '\n');
+
+ bool Modified = false;
+ for (MachineLoop *ML : MLI->getLoopsInPreorder()) {
+ Modified |= LowerWhileLoopStart(ML);
+ Modified |= MergeLoopEnd(ML);
+ Modified |= ConvertTailPredLoop(ML, DT);
+ }
+
+ for (MachineBasicBlock &MBB : Fn) {
+ Modified |= HintDoLoopStartReg(MBB);
+ Modified |= ReplaceConstByVPNOTs(MBB, DT);
+ Modified |= ReplaceVCMPsByVPNOTs(MBB);
+ Modified |= ReduceOldVCCRValueUses(MBB);
+ Modified |= ConvertVPSEL(MBB);
+ }
+
+ LLVM_DEBUG(dbgs() << "**************************************\n");
+ return Modified;
+}
+
+/// createMVETPAndVPTOptimisationsPass
+FunctionPass *llvm::createMVETPAndVPTOptimisationsPass() {
+ return new MVETPAndVPTOptimisations();
+}