diff options
Diffstat (limited to 'llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp')
-rw-r--r-- | llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp | 464 |
1 files changed, 464 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp new file mode 100644 index 000000000000..382ddd4572c7 --- /dev/null +++ b/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp @@ -0,0 +1,464 @@ +//===-- MVEVPTOptimisationsPass.cpp ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This pass does a few optimisations related to MVE VPT blocks before +/// register allocation is performed. The goal is to maximize the sizes of the +/// blocks that will be created by the MVE VPT Block Insertion pass (which runs +/// after register allocation). The first optimisation done by this pass is the +/// replacement of "opposite" VCMPs with VPNOTs, so the Block Insertion pass +/// can delete them later to create larger VPT blocks. +/// The second optimisation replaces re-uses of old VCCR values with VPNOTs when +/// inside a block of predicated instructions. This is done to avoid +/// spill/reloads of VPR in the middle of a block, which prevents the Block +/// Insertion pass from creating large blocks. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "Thumb2InstrInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Support/Debug.h" +#include <cassert> + +using namespace llvm; + +#define DEBUG_TYPE "arm-mve-vpt-opts" + +namespace { +class MVEVPTOptimisations : public MachineFunctionPass { +public: + static char ID; + const Thumb2InstrInfo *TII; + MachineRegisterInfo *MRI; + + MVEVPTOptimisations() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &Fn) override; + + StringRef getPassName() const override { + return "ARM MVE VPT Optimisation Pass"; + } + +private: + MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB, + MachineInstr &Instr, + MachineOperand &User, + Register Target); + bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB); + bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB); +}; + +char MVEVPTOptimisations::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(MVEVPTOptimisations, DEBUG_TYPE, + "ARM MVE VPT Optimisations pass", false, false) + +// Returns true if Opcode is any VCMP Opcode. +static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; } + +// Returns true if a VCMP with this Opcode can have its operands swapped. +// There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs, +// and VCMPr instructions (since the r is always on the right). +static bool CanHaveSwappedOperands(unsigned Opcode) { + switch (Opcode) { + default: + return true; + case ARM::MVE_VCMPf32: + case ARM::MVE_VCMPf16: + case ARM::MVE_VCMPf32r: + case ARM::MVE_VCMPf16r: + case ARM::MVE_VCMPi8r: + case ARM::MVE_VCMPi16r: + case ARM::MVE_VCMPi32r: + case ARM::MVE_VCMPu8r: + case ARM::MVE_VCMPu16r: + case ARM::MVE_VCMPu32r: + case ARM::MVE_VCMPs8r: + case ARM::MVE_VCMPs16r: + case ARM::MVE_VCMPs32r: + return false; + } +} + +// Returns the CondCode of a VCMP Instruction. +static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) { + assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP"); + return ARMCC::CondCodes(Instr.getOperand(3).getImm()); +} + +// Returns true if Cond is equivalent to a VPNOT instruction on the result of +// Prev. Cond and Prev must be VCMPs. +static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) { + assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode())); + + // Opcodes must match. + if (Cond.getOpcode() != Prev.getOpcode()) + return false; + + MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2); + MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2); + + // If the VCMP has the opposite condition with the same operands, we can + // replace it with a VPNOT + ARMCC::CondCodes ExpectedCode = GetCondCode(Cond); + ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode); + if (ExpectedCode == GetCondCode(Prev)) + if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2)) + return true; + // Check again with operands swapped if possible + if (!CanHaveSwappedOperands(Cond.getOpcode())) + return false; + ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode); + return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) && + CondOP2.isIdenticalTo(PrevOP1); +} + +// Returns true if Instr writes to VCCR. +static bool IsWritingToVCCR(MachineInstr &Instr) { + if (Instr.getNumOperands() == 0) + return false; + MachineOperand &Dst = Instr.getOperand(0); + if (!Dst.isReg()) + return false; + Register DstReg = Dst.getReg(); + if (!DstReg.isVirtual()) + return false; + MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo(); + const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg); + return RegClass && (RegClass->getID() == ARM::VCCRRegClassID); +} + +// Transforms +// <Instr that uses %A ('User' Operand)> +// Into +// %K = VPNOT %Target +// <Instr that uses %K ('User' Operand)> +// And returns the newly inserted VPNOT. +// This optimization is done in the hopes of preventing spills/reloads of VPR by +// reducing the number of VCCR values with overlapping lifetimes. +MachineInstr &MVEVPTOptimisations::ReplaceRegisterUseWithVPNOT( + MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User, + Register Target) { + Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target)); + + MachineInstrBuilder MIBuilder = + BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT)) + .addDef(NewResult) + .addReg(Target); + addUnpredicatedMveVpredNOp(MIBuilder); + + // Make the user use NewResult instead, and clear its kill flag. + User.setReg(NewResult); + User.setIsKill(false); + + LLVM_DEBUG(dbgs() << " Inserting VPNOT (for spill prevention): "; + MIBuilder.getInstr()->dump()); + + return *MIBuilder.getInstr(); +} + +// Moves a VPNOT before its first user if an instruction that uses Reg is found +// in-between the VPNOT and its user. +// Returns true if there is at least one user of the VPNOT in the block. +static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB, + MachineBasicBlock::iterator Iter, + Register Reg) { + assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!"); + assert(getVPTInstrPredicate(*Iter) == ARMVCC::None && + "The VPNOT cannot be predicated"); + + MachineInstr &VPNOT = *Iter; + Register VPNOTResult = VPNOT.getOperand(0).getReg(); + Register VPNOTOperand = VPNOT.getOperand(1).getReg(); + + // Whether the VPNOT will need to be moved, and whether we found a user of the + // VPNOT. + bool MustMove = false, HasUser = false; + MachineOperand *VPNOTOperandKiller = nullptr; + for (; Iter != MBB.end(); ++Iter) { + if (MachineOperand *MO = + Iter->findRegisterUseOperand(VPNOTOperand, /*isKill*/ true)) { + // If we find the operand that kills the VPNOTOperand's result, save it. + VPNOTOperandKiller = MO; + } + + if (Iter->findRegisterUseOperandIdx(Reg) != -1) { + MustMove = true; + continue; + } + + if (Iter->findRegisterUseOperandIdx(VPNOTResult) == -1) + continue; + + HasUser = true; + if (!MustMove) + break; + + // Move the VPNOT right before Iter + LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << " Before: "; + Iter->dump()); + MBB.splice(Iter, &MBB, VPNOT.getIterator()); + // If we move the instr, and its operand was killed earlier, remove the kill + // flag. + if (VPNOTOperandKiller) + VPNOTOperandKiller->setIsKill(false); + + break; + } + return HasUser; +} + +// This optimisation attempts to reduce the number of overlapping lifetimes of +// VCCR values by replacing uses of old VCCR values with VPNOTs. For example, +// this replaces +// %A:vccr = (something) +// %B:vccr = VPNOT %A +// %Foo = (some op that uses %B) +// %Bar = (some op that uses %A) +// With +// %A:vccr = (something) +// %B:vccr = VPNOT %A +// %Foo = (some op that uses %B) +// %TMP2:vccr = VPNOT %B +// %Bar = (some op that uses %A) +bool MVEVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) { + MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end(); + SmallVector<MachineInstr *, 4> DeadInstructions; + bool Modified = false; + + while (Iter != End) { + Register VCCRValue, OppositeVCCRValue; + // The first loop looks for 2 unpredicated instructions: + // %A:vccr = (instr) ; A is stored in VCCRValue + // %B:vccr = VPNOT %A ; B is stored in OppositeVCCRValue + for (; Iter != End; ++Iter) { + // We're only interested in unpredicated instructions that write to VCCR. + if (!IsWritingToVCCR(*Iter) || + getVPTInstrPredicate(*Iter) != ARMVCC::None) + continue; + Register Dst = Iter->getOperand(0).getReg(); + + // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've + // found what we were looking for. + if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT && + Iter->findRegisterUseOperandIdx(VCCRValue) != -1) { + // Move the VPNOT closer to its first user if needed, and ignore if it + // has no users. + if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue)) + continue; + + OppositeVCCRValue = Dst; + ++Iter; + break; + } + + // Else, just set VCCRValue. + VCCRValue = Dst; + } + + // If the first inner loop didn't find anything, stop here. + if (Iter == End) + break; + + assert(VCCRValue && OppositeVCCRValue && + "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop " + "stopped before the end of the block!"); + assert(VCCRValue != OppositeVCCRValue && + "VCCRValue should not be equal to OppositeVCCRValue!"); + + // LastVPNOTResult always contains the same value as OppositeVCCRValue. + Register LastVPNOTResult = OppositeVCCRValue; + + // This second loop tries to optimize the remaining instructions. + for (; Iter != End; ++Iter) { + bool IsInteresting = false; + + if (MachineOperand *MO = Iter->findRegisterUseOperand(VCCRValue)) { + IsInteresting = true; + + // - If the instruction is a VPNOT, it can be removed, and we can just + // replace its uses with LastVPNOTResult. + // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue. + if (Iter->getOpcode() == ARM::MVE_VPNOT) { + Register Result = Iter->getOperand(0).getReg(); + + MRI->replaceRegWith(Result, LastVPNOTResult); + DeadInstructions.push_back(&*Iter); + Modified = true; + + LLVM_DEBUG(dbgs() + << "Replacing all uses of '" << printReg(Result) + << "' with '" << printReg(LastVPNOTResult) << "'\n"); + } else { + MachineInstr &VPNOT = + ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult); + Modified = true; + + LastVPNOTResult = VPNOT.getOperand(0).getReg(); + std::swap(VCCRValue, OppositeVCCRValue); + + LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue) + << "' with '" << printReg(LastVPNOTResult) + << "' in instr: " << *Iter); + } + } else { + // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult + // instead as they contain the same value. + if (MachineOperand *MO = + Iter->findRegisterUseOperand(OppositeVCCRValue)) { + IsInteresting = true; + + // This is pointless if LastVPNOTResult == OppositeVCCRValue. + if (LastVPNOTResult != OppositeVCCRValue) { + LLVM_DEBUG(dbgs() << "Replacing usage of '" + << printReg(OppositeVCCRValue) << "' with '" + << printReg(LastVPNOTResult) << " for instr: "; + Iter->dump()); + MO->setReg(LastVPNOTResult); + Modified = true; + } + + MO->setIsKill(false); + } + + // If this is an unpredicated VPNOT on + // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it. + if (Iter->getOpcode() == ARM::MVE_VPNOT && + getVPTInstrPredicate(*Iter) == ARMVCC::None) { + Register VPNOTOperand = Iter->getOperand(1).getReg(); + if (VPNOTOperand == LastVPNOTResult || + VPNOTOperand == OppositeVCCRValue) { + IsInteresting = true; + + std::swap(VCCRValue, OppositeVCCRValue); + LastVPNOTResult = Iter->getOperand(0).getReg(); + } + } + } + + // If this instruction was not interesting, and it writes to VCCR, stop. + if (!IsInteresting && IsWritingToVCCR(*Iter)) + break; + } + } + + for (MachineInstr *DeadInstruction : DeadInstructions) + DeadInstruction->removeFromParent(); + + return Modified; +} + +// This optimisation replaces VCMPs with VPNOTs when they are equivalent. +bool MVEVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) { + SmallVector<MachineInstr *, 4> DeadInstructions; + + // The last VCMP that we have seen and that couldn't be replaced. + // This is reset when an instruction that writes to VCCR/VPR is found, or when + // a VCMP is replaced with a VPNOT. + // We'll only replace VCMPs with VPNOTs when this is not null, and when the + // current VCMP is the opposite of PrevVCMP. + MachineInstr *PrevVCMP = nullptr; + // If we find an instruction that kills the result of PrevVCMP, we save the + // operand here to remove the kill flag in case we need to use PrevVCMP's + // result. + MachineOperand *PrevVCMPResultKiller = nullptr; + + for (MachineInstr &Instr : MBB.instrs()) { + if (PrevVCMP) { + if (MachineOperand *MO = Instr.findRegisterUseOperand( + PrevVCMP->getOperand(0).getReg(), /*isKill*/ true)) { + // If we come accross the instr that kills PrevVCMP's result, record it + // so we can remove the kill flag later if we need to. + PrevVCMPResultKiller = MO; + } + } + + // Ignore predicated instructions. + if (getVPTInstrPredicate(Instr) != ARMVCC::None) + continue; + + // Only look at VCMPs + if (!IsVCMP(Instr.getOpcode())) { + // If the instruction writes to VCCR, forget the previous VCMP. + if (IsWritingToVCCR(Instr)) + PrevVCMP = nullptr; + continue; + } + + if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) { + PrevVCMP = &Instr; + continue; + } + + // The register containing the result of the VCMP that we're going to + // replace. + Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg(); + + // Build a VPNOT to replace the VCMP, reusing its operands. + MachineInstrBuilder MIBuilder = + BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT)) + .add(Instr.getOperand(0)) + .addReg(PrevVCMPResultReg); + addUnpredicatedMveVpredNOp(MIBuilder); + LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): "; + MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: "; + Instr.dump()); + + // If we found an instruction that uses, and kills PrevVCMP's result, + // remove the kill flag. + if (PrevVCMPResultKiller) + PrevVCMPResultKiller->setIsKill(false); + + // Finally, mark the old VCMP for removal and reset + // PrevVCMP/PrevVCMPResultKiller. + DeadInstructions.push_back(&Instr); + PrevVCMP = nullptr; + PrevVCMPResultKiller = nullptr; + } + + for (MachineInstr *DeadInstruction : DeadInstructions) + DeadInstruction->removeFromParent(); + + return !DeadInstructions.empty(); +} + +bool MVEVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) { + const ARMSubtarget &STI = + static_cast<const ARMSubtarget &>(Fn.getSubtarget()); + + if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) + return false; + + TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo()); + MRI = &Fn.getRegInfo(); + + LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n" + << "********** Function: " << Fn.getName() << '\n'); + + bool Modified = false; + for (MachineBasicBlock &MBB : Fn) { + Modified |= ReplaceVCMPsByVPNOTs(MBB); + Modified |= ReduceOldVCCRValueUses(MBB); + } + + LLVM_DEBUG(dbgs() << "**************************************\n"); + return Modified; +} + +/// createMVEVPTOptimisationsPass +FunctionPass *llvm::createMVEVPTOptimisationsPass() { + return new MVEVPTOptimisations(); +} |