diff options
Diffstat (limited to 'lib/Target/PowerPC/PPCQPXLoadSplat.cpp')
-rw-r--r-- | lib/Target/PowerPC/PPCQPXLoadSplat.cpp | 166 |
1 files changed, 166 insertions, 0 deletions
diff --git a/lib/Target/PowerPC/PPCQPXLoadSplat.cpp b/lib/Target/PowerPC/PPCQPXLoadSplat.cpp new file mode 100644 index 0000000000000..bfe20c12974b6 --- /dev/null +++ b/lib/Target/PowerPC/PPCQPXLoadSplat.cpp @@ -0,0 +1,166 @@ +//===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The QPX vector registers overlay the scalar floating-point registers, and +// any scalar floating-point loads splat their value across all vector lanes. +// Thus, if we have a scalar load followed by a splat, we can remove the splat +// (i.e. replace the load with a load-and-splat pseudo instruction). +// +// This pass must run after anything that might do store-to-load forwarding. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "ppc-qpx-load-splat" + +STATISTIC(NumSimplified, "Number of QPX load splats simplified"); + +namespace llvm { + void initializePPCQPXLoadSplatPass(PassRegistry&); +} + +namespace { + struct PPCQPXLoadSplat : public MachineFunctionPass { + static char ID; + PPCQPXLoadSplat() : MachineFunctionPass(ID) { + initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &Fn) override; + + const char *getPassName() const override { + return "PowerPC QPX Load Splat Simplification"; + } + }; + char PPCQPXLoadSplat::ID = 0; +} + +INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat", + "PowerPC QPX Load Splat Simplification", + false, false) + +FunctionPass *llvm::createPPCQPXLoadSplatPass() { + return new PPCQPXLoadSplat(); +} + +bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + + bool MadeChange = false; + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + + for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) { + MachineBasicBlock *MBB = &*MFI; + SmallVector<MachineInstr *, 4> Splats; + + for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) { + MachineInstr *MI = &*MBBI; + + if (MI->hasUnmodeledSideEffects() || MI->isCall()) { + Splats.clear(); + continue; + } + + // We're looking for a sequence like this: + // %F0<def> = LFD 0, %X3<kill>, %QF0<imp-def>; mem:LD8[%a](tbaa=!2) + // %QF1<def> = QVESPLATI %QF0<kill>, 0, %RM<imp-use> + + for (auto SI = Splats.begin(); SI != Splats.end();) { + MachineInstr *SMI = *SI; + unsigned SplatReg = SMI->getOperand(0).getReg(); + unsigned SrcReg = SMI->getOperand(1).getReg(); + + if (MI->modifiesRegister(SrcReg, TRI)) { + switch (MI->getOpcode()) { + default: + SI = Splats.erase(SI); + continue; + case PPC::LFS: + case PPC::LFD: + case PPC::LFSU: + case PPC::LFDU: + case PPC::LFSUX: + case PPC::LFDUX: + case PPC::LFSX: + case PPC::LFDX: + case PPC::LFIWAX: + case PPC::LFIWZX: + if (SplatReg != SrcReg) { + // We need to change the load to define the scalar subregister of + // the QPX splat source register. + unsigned SubRegIndex = + TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg()); + unsigned SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex); + + // Substitute both the explicit defined register, and also the + // implicit def of the containing QPX register. + MI->getOperand(0).setReg(SplatSubReg); + MI->substituteRegister(SrcReg, SplatReg, 0, *TRI); + } + + SI = Splats.erase(SI); + + // If SMI is directly after MI, then MBBI's base iterator is + // pointing at SMI. Adjust MBBI around the call to erase SMI to + // avoid invalidating MBBI. + ++MBBI; + SMI->eraseFromParent(); + --MBBI; + + ++NumSimplified; + MadeChange = true; + continue; + } + } + + // If this instruction defines the splat register, then we cannot move + // the previous definition above it. If it reads from the splat + // register, then it must already be alive from some previous + // definition, and if the splat register is different from the source + // register, then this definition must not be the load for which we're + // searching. + if (MI->modifiesRegister(SplatReg, TRI) || + (SrcReg != SplatReg && + MI->readsRegister(SplatReg, TRI))) { + SI = Splats.erase(SI); + continue; + } + + ++SI; + } + + if (MI->getOpcode() != PPC::QVESPLATI && + MI->getOpcode() != PPC::QVESPLATIs && + MI->getOpcode() != PPC::QVESPLATIb) + continue; + if (MI->getOperand(2).getImm() != 0) + continue; + + // If there are other uses of the scalar value after this, replacing + // those uses might be non-trivial. + if (!MI->getOperand(1).isKill()) + continue; + + Splats.push_back(MI); + } + } + + return MadeChange; +} |