diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp | 293 |
1 files changed, 293 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp new file mode 100644 index 000000000000..42db18332f1c --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -0,0 +1,293 @@ +//===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass performs below peephole optimizations on MIR level. +// +// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri +// MOVi64imm + ANDXrr ==> ANDXri + ANDXri +// +// The mov pseudo instruction could be expanded to multiple mov instructions +// later. In this case, we could try to split the constant operand of mov +// instruction into two bitmask immediates. It makes two AND instructions +// intead of multiple `mov` + `and` instructions. +// +// 2. Remove redundant ORRWrs which is generated by zero-extend. +// +// %3:gpr32 = ORRWrs $wzr, %2, 0 +// %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32 +// +// If AArch64's 32-bit form of instruction defines the source operand of +// ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source +// operand are set to zero. +// +//===----------------------------------------------------------------------===// + +#include "AArch64ExpandImm.h" +#include "AArch64InstrInfo.h" +#include "MCTargetDesc/AArch64AddressingModes.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-mi-peephole-opt" + +namespace { + +struct AArch64MIPeepholeOpt : public MachineFunctionPass { + static char ID; + + AArch64MIPeepholeOpt() : MachineFunctionPass(ID) { + initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry()); + } + + const AArch64InstrInfo *TII; + MachineLoopInfo *MLI; + MachineRegisterInfo *MRI; + + template <typename T> + bool visitAND(MachineInstr &MI, + SmallSetVector<MachineInstr *, 8> &ToBeRemoved); + bool visitORR(MachineInstr &MI, + SmallSetVector<MachineInstr *, 8> &ToBeRemoved); + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return "AArch64 MI Peephole Optimization pass"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +char AArch64MIPeepholeOpt::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt", + "AArch64 MI Peephole Optimization", false, false) + +template <typename T> +static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) { + T UImm = static_cast<T>(Imm); + if (AArch64_AM::isLogicalImmediate(UImm, RegSize)) + return false; + + // If this immediate can be handled by one instruction, do not split it. + SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; + AArch64_IMM::expandMOVImm(UImm, RegSize, Insn); + if (Insn.size() == 1) + return false; + + // The bitmask immediate consists of consecutive ones. Let's say there is + // constant 0b00000000001000000000010000000000 which does not consist of + // consecutive ones. We can split it in to two bitmask immediate like + // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111. + // If we do AND with these two bitmask immediate, we can see original one. + unsigned LowestBitSet = countTrailingZeros(UImm); + unsigned HighestBitSet = Log2_64(UImm); + + // Create a mask which is filled with one from the position of lowest bit set + // to the position of highest bit set. + T NewImm1 = (static_cast<T>(2) << HighestBitSet) - + (static_cast<T>(1) << LowestBitSet); + // Create a mask which is filled with one outside the position of lowest bit + // set and the position of highest bit set. + T NewImm2 = UImm | ~NewImm1; + + // If the split value is not valid bitmask immediate, do not split this + // constant. + if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize)) + return false; + + Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize); + Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize); + return true; +} + +template <typename T> +bool AArch64MIPeepholeOpt::visitAND( + MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) { + // Try below transformation. + // + // MOVi32imm + ANDWrr ==> ANDWri + ANDWri + // MOVi64imm + ANDXrr ==> ANDXri + ANDXri + // + // The mov pseudo instruction could be expanded to multiple mov instructions + // later. Let's try to split the constant operand of mov instruction into two + // bitmask immediates. It makes only two AND instructions intead of multiple + // mov + and instructions. + + unsigned RegSize = sizeof(T) * 8; + assert((RegSize == 32 || RegSize == 64) && + "Invalid RegSize for AND bitmask peephole optimization"); + + // Check whether AND's MBB is in loop and the AND is loop invariant. + MachineBasicBlock *MBB = MI.getParent(); + MachineLoop *L = MLI->getLoopFor(MBB); + if (L && !L->isLoopInvariant(MI)) + return false; + + // Check whether AND's operand is MOV with immediate. + MachineInstr *MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); + if (!MovMI) + return false; + + MachineInstr *SubregToRegMI = nullptr; + // If it is SUBREG_TO_REG, check its operand. + if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) { + SubregToRegMI = MovMI; + MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg()); + if (!MovMI) + return false; + } + + if (MovMI->getOpcode() != AArch64::MOVi32imm && + MovMI->getOpcode() != AArch64::MOVi64imm) + return false; + + // If the MOV has multiple uses, do not split the immediate because it causes + // more instructions. + if (!MRI->hasOneUse(MovMI->getOperand(0).getReg())) + return false; + + if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg())) + return false; + + // Split the bitmask immediate into two. + T UImm = static_cast<T>(MovMI->getOperand(1).getImm()); + // For the 32 bit form of instruction, the upper 32 bits of the destination + // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits + // of UImm to zero. + if (SubregToRegMI) + UImm &= 0xFFFFFFFF; + T Imm1Enc; + T Imm2Enc; + if (!splitBitmaskImm(UImm, RegSize, Imm1Enc, Imm2Enc)) + return false; + + // Create new AND MIs. + DebugLoc DL = MI.getDebugLoc(); + const TargetRegisterClass *ANDImmRC = + (RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass; + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register NewTmpReg = MRI->createVirtualRegister(ANDImmRC); + Register NewDstReg = MRI->createVirtualRegister(ANDImmRC); + unsigned Opcode = (RegSize == 32) ? AArch64::ANDWri : AArch64::ANDXri; + + MRI->constrainRegClass(NewTmpReg, MRI->getRegClass(SrcReg)); + BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg) + .addReg(SrcReg) + .addImm(Imm1Enc); + + MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg)); + BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg) + .addReg(NewTmpReg) + .addImm(Imm2Enc); + + MRI->replaceRegWith(DstReg, NewDstReg); + // replaceRegWith changes MI's definition register. Keep it for SSA form until + // deleting MI. + MI.getOperand(0).setReg(DstReg); + + ToBeRemoved.insert(&MI); + if (SubregToRegMI) + ToBeRemoved.insert(SubregToRegMI); + ToBeRemoved.insert(MovMI); + + return true; +} + +bool AArch64MIPeepholeOpt::visitORR( + MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) { + // Check this ORR comes from below zero-extend pattern. + // + // def : Pat<(i64 (zext GPR32:$src)), + // (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; + if (MI.getOperand(3).getImm() != 0) + return false; + + if (MI.getOperand(1).getReg() != AArch64::WZR) + return false; + + MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); + if (!SrcMI) + return false; + + // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC + // + // When you use the 32-bit form of an instruction, the upper 32 bits of the + // source registers are ignored and the upper 32 bits of the destination + // register are set to zero. + // + // If AArch64's 32-bit form of instruction defines the source operand of + // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is + // real AArch64 instruction and if it is not, do not process the opcode + // conservatively. + if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) + return false; + + Register DefReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(2).getReg(); + MRI->replaceRegWith(DefReg, SrcReg); + MRI->clearKillFlags(SrcReg); + // replaceRegWith changes MI's definition register. Keep it for SSA form until + // deleting MI. + MI.getOperand(0).setReg(DefReg); + ToBeRemoved.insert(&MI); + + LLVM_DEBUG({ dbgs() << "Removed: " << MI << "\n"; }); + + return true; +} + +bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); + MLI = &getAnalysis<MachineLoopInfo>(); + MRI = &MF.getRegInfo(); + + if (!MRI->isSSA()) + return false; + + bool Changed = false; + SmallSetVector<MachineInstr *, 8> ToBeRemoved; + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + switch (MI.getOpcode()) { + default: + break; + case AArch64::ANDWrr: + Changed = visitAND<uint32_t>(MI, ToBeRemoved); + break; + case AArch64::ANDXrr: + Changed = visitAND<uint64_t>(MI, ToBeRemoved); + break; + case AArch64::ORRWrs: + Changed = visitORR(MI, ToBeRemoved); + } + } + } + + for (MachineInstr *MI : ToBeRemoved) + MI->eraseFromParent(); + + return Changed; +} + +FunctionPass *llvm::createAArch64MIPeepholeOptPass() { + return new AArch64MIPeepholeOpt(); +} |
