summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp')
-rw-r--r--llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp433
1 files changed, 346 insertions, 87 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index a0c4a25bb5b9..3156bb446963 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -26,16 +26,19 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
+#include <functional>
#include <iterator>
#include <limits>
@@ -51,6 +54,9 @@ STATISTIC(NumUnscaledPairCreated,
STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
+DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
+ "Controls which pairs are considered for renaming");
+
// The LdStLimit limits how far we search for load/store pairs.
static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
cl::init(20), cl::Hidden);
@@ -76,6 +82,11 @@ using LdStPairFlags = struct LdStPairFlags {
// to be extended, 0 means I, and 1 means the returned iterator.
int SExtIdx = -1;
+ // If not none, RenameReg can be used to rename the result register of the
+ // first store in a pair. Currently this only works when merging stores
+ // forward.
+ Optional<MCPhysReg> RenameReg = None;
+
LdStPairFlags() = default;
void setMergeForward(bool V = true) { MergeForward = V; }
@@ -83,6 +94,10 @@ using LdStPairFlags = struct LdStPairFlags {
void setSExtIdx(int V) { SExtIdx = V; }
int getSExtIdx() const { return SExtIdx; }
+
+ void setRenameReg(MCPhysReg R) { RenameReg = R; }
+ void clearRenameReg() { RenameReg = None; }
+ Optional<MCPhysReg> getRenameReg() const { return RenameReg; }
};
struct AArch64LoadStoreOpt : public MachineFunctionPass {
@@ -99,6 +114,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Track which register units have been modified and used.
LiveRegUnits ModifiedRegUnits, UsedRegUnits;
+ LiveRegUnits DefinedInBB;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AAResultsWrapperPass>();
@@ -215,69 +231,6 @@ static bool isTagStore(const MachineInstr &MI) {
}
}
-// Scaling factor for unscaled load or store.
-static int getMemScale(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- llvm_unreachable("Opcode has unknown scale!");
- case AArch64::LDRBBui:
- case AArch64::LDURBBi:
- case AArch64::LDRSBWui:
- case AArch64::LDURSBWi:
- case AArch64::STRBBui:
- case AArch64::STURBBi:
- return 1;
- case AArch64::LDRHHui:
- case AArch64::LDURHHi:
- case AArch64::LDRSHWui:
- case AArch64::LDURSHWi:
- case AArch64::STRHHui:
- case AArch64::STURHHi:
- return 2;
- case AArch64::LDRSui:
- case AArch64::LDURSi:
- case AArch64::LDRSWui:
- case AArch64::LDURSWi:
- case AArch64::LDRWui:
- case AArch64::LDURWi:
- case AArch64::STRSui:
- case AArch64::STURSi:
- case AArch64::STRWui:
- case AArch64::STURWi:
- case AArch64::LDPSi:
- case AArch64::LDPSWi:
- case AArch64::LDPWi:
- case AArch64::STPSi:
- case AArch64::STPWi:
- return 4;
- case AArch64::LDRDui:
- case AArch64::LDURDi:
- case AArch64::LDRXui:
- case AArch64::LDURXi:
- case AArch64::STRDui:
- case AArch64::STURDi:
- case AArch64::STRXui:
- case AArch64::STURXi:
- case AArch64::LDPDi:
- case AArch64::LDPXi:
- case AArch64::STPDi:
- case AArch64::STPXi:
- return 8;
- case AArch64::LDRQui:
- case AArch64::LDURQi:
- case AArch64::STRQui:
- case AArch64::STURQi:
- case AArch64::LDPQi:
- case AArch64::STPQi:
- case AArch64::STGOffset:
- case AArch64::STZGOffset:
- case AArch64::ST2GOffset:
- case AArch64::STZ2GOffset:
- case AArch64::STGPi:
- return 16;
- }
-}
-
static unsigned getMatchingNonSExtOpcode(unsigned Opc,
bool *IsValidLdStrOpc = nullptr) {
if (IsValidLdStrOpc)
@@ -588,7 +541,7 @@ static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
// ST*G and all paired ldst have the same scale in pre/post-indexed variants
// as in the "unsigned offset" variant.
// All other pre/post indexed ldst instructions are unscaled.
- Scale = (IsTagStore || IsPaired) ? getMemScale(MI) : 1;
+ Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;
if (IsPaired) {
MinOffset = -64;
@@ -599,8 +552,8 @@ static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
}
}
-static const MachineOperand &getLdStRegOp(const MachineInstr &MI,
- unsigned PairedRegOp = 0) {
+static MachineOperand &getLdStRegOp(MachineInstr &MI,
+ unsigned PairedRegOp = 0) {
assert(PairedRegOp < 2 && "Unexpected register operand idx.");
unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0;
return MI.getOperand(Idx);
@@ -620,8 +573,8 @@ static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst,
MachineInstr &StoreInst,
const AArch64InstrInfo *TII) {
assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
- int LoadSize = getMemScale(LoadInst);
- int StoreSize = getMemScale(StoreInst);
+ int LoadSize = TII->getMemScale(LoadInst);
+ int StoreSize = TII->getMemScale(StoreInst);
int UnscaledStOffset = TII->isUnscaledLdSt(StoreInst)
? getLdStOffsetOp(StoreInst).getImm()
: getLdStOffsetOp(StoreInst).getImm() * StoreSize;
@@ -731,7 +684,7 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
unsigned Opc = I->getOpcode();
bool IsScaled = !TII->isUnscaledLdSt(Opc);
- int OffsetStride = IsScaled ? 1 : getMemScale(*I);
+ int OffsetStride = IsScaled ? 1 : TII->getMemScale(*I);
bool MergeForward = Flags.getMergeForward();
// Insert our new paired instruction after whichever of the paired
@@ -783,6 +736,44 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
return NextI;
}
+// Apply Fn to all instructions between MI and the beginning of the block, until
+// a def for DefReg is reached. Returns true, iff Fn returns true for all
+// visited instructions. Stop after visiting Limit iterations.
+static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg,
+ const TargetRegisterInfo *TRI, unsigned Limit,
+ std::function<bool(MachineInstr &, bool)> &Fn) {
+ auto MBB = MI.getParent();
+ for (MachineBasicBlock::reverse_iterator I = MI.getReverseIterator(),
+ E = MBB->rend();
+ I != E; I++) {
+ if (!Limit)
+ return false;
+ --Limit;
+
+ bool isDef = any_of(I->operands(), [DefReg, TRI](MachineOperand &MOP) {
+ return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
+ TRI->regsOverlap(MOP.getReg(), DefReg);
+ });
+ if (!Fn(*I, isDef))
+ return false;
+ if (isDef)
+ break;
+ }
+ return true;
+}
+
+static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units,
+ const TargetRegisterInfo *TRI) {
+
+ for (const MachineOperand &MOP : phys_regs_and_masks(MI))
+ if (MOP.isReg() && MOP.isKill())
+ Units.removeReg(MOP.getReg());
+
+ for (const MachineOperand &MOP : phys_regs_and_masks(MI))
+ if (MOP.isReg() && !MOP.isKill())
+ Units.addReg(MOP.getReg());
+}
+
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
@@ -800,9 +791,76 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
unsigned Opc =
SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
bool IsUnscaled = TII->isUnscaledLdSt(Opc);
- int OffsetStride = IsUnscaled ? getMemScale(*I) : 1;
+ int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
bool MergeForward = Flags.getMergeForward();
+
+ Optional<MCPhysReg> RenameReg = Flags.getRenameReg();
+ if (MergeForward && RenameReg) {
+ MCRegister RegToRename = getLdStRegOp(*I).getReg();
+ DefinedInBB.addReg(*RenameReg);
+
+ // Return the sub/super register for RenameReg, matching the size of
+ // OriginalReg.
+ auto GetMatchingSubReg = [this,
+ RenameReg](MCPhysReg OriginalReg) -> MCPhysReg {
+ for (MCPhysReg SubOrSuper : TRI->sub_and_superregs_inclusive(*RenameReg))
+ if (TRI->getMinimalPhysRegClass(OriginalReg) ==
+ TRI->getMinimalPhysRegClass(SubOrSuper))
+ return SubOrSuper;
+ llvm_unreachable("Should have found matching sub or super register!");
+ };
+
+ std::function<bool(MachineInstr &, bool)> UpdateMIs =
+ [this, RegToRename, GetMatchingSubReg](MachineInstr &MI, bool IsDef) {
+ if (IsDef) {
+ bool SeenDef = false;
+ for (auto &MOP : MI.operands()) {
+ // Rename the first explicit definition and all implicit
+ // definitions matching RegToRename.
+ if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
+ (!SeenDef || (MOP.isDef() && MOP.isImplicit())) &&
+ TRI->regsOverlap(MOP.getReg(), RegToRename)) {
+ assert((MOP.isImplicit() ||
+ (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
+ "Need renamable operands");
+ MOP.setReg(GetMatchingSubReg(MOP.getReg()));
+ SeenDef = true;
+ }
+ }
+ } else {
+ for (auto &MOP : MI.operands()) {
+ if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
+ TRI->regsOverlap(MOP.getReg(), RegToRename)) {
+ assert((MOP.isImplicit() ||
+ (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
+ "Need renamable operands");
+ MOP.setReg(GetMatchingSubReg(MOP.getReg()));
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Renamed " << MI << "\n");
+ return true;
+ };
+ forAllMIsUntilDef(*I, RegToRename, TRI, LdStLimit, UpdateMIs);
+
+#if !defined(NDEBUG)
+ // Make sure the register used for renaming is not used between the paired
+ // instructions. That would trash the content before the new paired
+ // instruction.
+ for (auto &MI :
+ iterator_range<MachineInstrBundleIterator<llvm::MachineInstr>>(
+ std::next(I), std::next(Paired)))
+ assert(all_of(MI.operands(),
+ [this, &RenameReg](const MachineOperand &MOP) {
+ return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
+ !TRI->regsOverlap(MOP.getReg(), *RenameReg);
+ }) &&
+ "Rename register used between paired instruction, trashing the "
+ "content");
+#endif
+ }
+
// Insert our new paired instruction after whichever of the paired
// instructions MergeForward indicates.
MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
@@ -818,11 +876,11 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
// We're trying to pair instructions that differ in how they are scaled. If
// I is scaled then scale the offset of Paired accordingly. Otherwise, do
// the opposite (i.e., make Paired's offset unscaled).
- int MemSize = getMemScale(*Paired);
+ int MemSize = TII->getMemScale(*Paired);
if (PairedIsUnscaled) {
// If the unscaled offset isn't a multiple of the MemSize, we can't
// pair the operations together.
- assert(!(PairedOffset % getMemScale(*Paired)) &&
+ assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
"Offset should be a multiple of the stride!");
PairedOffset /= MemSize;
} else {
@@ -847,9 +905,9 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
// Scale the immediate offset, if necessary.
if (TII->isUnscaledLdSt(RtMI->getOpcode())) {
- assert(!(OffsetImm % getMemScale(*RtMI)) &&
+ assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
"Unscaled offset cannot be scaled.");
- OffsetImm /= getMemScale(*RtMI);
+ OffsetImm /= TII->getMemScale(*RtMI);
}
// Construct the new instruction.
@@ -931,6 +989,11 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
}
LLVM_DEBUG(dbgs() << "\n");
+ if (MergeForward)
+ for (const MachineOperand &MOP : phys_regs_and_masks(*I))
+ if (MOP.isReg() && MOP.isKill())
+ DefinedInBB.addReg(MOP.getReg());
+
// Erase the old instructions.
I->eraseFromParent();
Paired->eraseFromParent();
@@ -944,8 +1007,8 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
MachineBasicBlock::iterator NextI = LoadI;
++NextI;
- int LoadSize = getMemScale(*LoadI);
- int StoreSize = getMemScale(*StoreI);
+ int LoadSize = TII->getMemScale(*LoadI);
+ int StoreSize = TII->getMemScale(*StoreI);
Register LdRt = getLdStRegOp(*LoadI).getReg();
const MachineOperand &StMO = getLdStRegOp(*StoreI);
Register StRt = getLdStRegOp(*StoreI).getReg();
@@ -1207,6 +1270,148 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,
// FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
}
+static bool
+canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
+ SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
+ const TargetRegisterInfo *TRI) {
+ if (!FirstMI.mayStore())
+ return false;
+
+ // Check if we can find an unused register which we can use to rename
+ // the register used by the first load/store.
+ auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
+ MachineFunction &MF = *FirstMI.getParent()->getParent();
+ if (!RegClass || !MF.getRegInfo().tracksLiveness())
+ return false;
+
+ auto RegToRename = getLdStRegOp(FirstMI).getReg();
+ // For now, we only rename if the store operand gets killed at the store.
+ if (!getLdStRegOp(FirstMI).isKill() &&
+ !any_of(FirstMI.operands(),
+ [TRI, RegToRename](const MachineOperand &MOP) {
+ return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
+ MOP.isImplicit() && MOP.isKill() &&
+ TRI->regsOverlap(RegToRename, MOP.getReg());
+ })) {
+ LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI << "\n");
+ return false;
+ }
+ auto canRenameMOP = [](const MachineOperand &MOP) {
+ return MOP.isImplicit() ||
+ (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
+ };
+
+ bool FoundDef = false;
+
+ // For each instruction between FirstMI and the previous def for RegToRename,
+ // we
+ // * check if we can rename RegToRename in this instruction
+ // * collect the registers used and required register classes for RegToRename.
+ std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
+ bool IsDef) {
+ LLVM_DEBUG(dbgs() << "Checking " << MI << "\n");
+ // Currently we do not try to rename across frame-setup instructions.
+ if (MI.getFlag(MachineInstr::FrameSetup)) {
+ LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions currently ("
+ << MI << ")\n");
+ return false;
+ }
+
+ UsedInBetween.accumulate(MI);
+
+ // For a definition, check that we can rename the definition and exit the
+ // loop.
+ FoundDef = IsDef;
+
+ // For defs, check if we can rename the first def of RegToRename.
+ if (FoundDef) {
+ for (auto &MOP : MI.operands()) {
+ if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
+ !TRI->regsOverlap(MOP.getReg(), RegToRename))
+ continue;
+ if (!canRenameMOP(MOP)) {
+ LLVM_DEBUG(dbgs()
+ << " Cannot rename " << MOP << " in " << MI << "\n");
+ return false;
+ }
+ RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
+ }
+ return true;
+ } else {
+ for (auto &MOP : MI.operands()) {
+ if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
+ !TRI->regsOverlap(MOP.getReg(), RegToRename))
+ continue;
+
+ if (!canRenameMOP(MOP)) {
+ LLVM_DEBUG(dbgs()
+ << " Cannot rename " << MOP << " in " << MI << "\n");
+ return false;
+ }
+ RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
+ }
+ }
+ return true;
+ };
+
+ if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs))
+ return false;
+
+ if (!FoundDef) {
+ LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");
+ return false;
+ }
+ return true;
+}
+
+// Check if we can find a physical register for renaming. This register must:
+// * not be defined up to FirstMI (checking DefinedInBB)
+// * not used between the MI and the defining instruction of the register to
+// rename (checked using UsedInBetween).
+// * is available in all used register classes (checked using RequiredClasses).
+static Optional<MCPhysReg> tryToFindRegisterToRename(
+ MachineInstr &FirstMI, MachineInstr &MI, LiveRegUnits &DefinedInBB,
+ LiveRegUnits &UsedInBetween,
+ SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
+ const TargetRegisterInfo *TRI) {
+ auto &MF = *FirstMI.getParent()->getParent();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ // Checks if any sub- or super-register of PR is callee saved.
+ auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
+ return any_of(TRI->sub_and_superregs_inclusive(PR),
+ [&MF, TRI](MCPhysReg SubOrSuper) {
+ return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
+ });
+ };
+
+ // Check if PR or one of its sub- or super-registers can be used for all
+ // required register classes.
+ auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
+ return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
+ return any_of(TRI->sub_and_superregs_inclusive(PR),
+ [C, TRI](MCPhysReg SubOrSuper) {
+ return C == TRI->getMinimalPhysRegClass(SubOrSuper);
+ });
+ });
+ };
+
+ auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
+ for (const MCPhysReg &PR : *RegClass) {
+ if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
+ !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
+ CanBeUsedForAllClasses(PR)) {
+ DefinedInBB.addReg(PR);
+ LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI)
+ << "\n");
+ return {PR};
+ }
+ }
+ LLVM_DEBUG(dbgs() << "No rename register found from "
+ << TRI->getRegClassName(RegClass) << "\n");
+ return None;
+}
+
/// Scan the instructions looking for a load/store that can be combined with the
/// current instruction into a wider equivalent or a load/store pair.
MachineBasicBlock::iterator
@@ -1215,6 +1420,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
bool FindNarrowMerge) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator MBBI = I;
+ MachineBasicBlock::iterator MBBIWithRenameReg;
MachineInstr &FirstMI = *I;
++MBBI;
@@ -1223,9 +1429,16 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
Register Reg = getLdStRegOp(FirstMI).getReg();
Register BaseReg = getLdStBaseOp(FirstMI).getReg();
int Offset = getLdStOffsetOp(FirstMI).getImm();
- int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1;
+ int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
+ Optional<bool> MaybeCanRename = None;
+ SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
+ LiveRegUnits UsedInBetween;
+ UsedInBetween.init(*TRI);
+
+ Flags.clearRenameReg();
+
// Track which register units have been modified and used between the first
// insn (inclusive) and the second insn.
ModifiedRegUnits.clear();
@@ -1237,6 +1450,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
MachineInstr &MI = *MBBI;
+ UsedInBetween.accumulate(MI);
+
// Don't count transient instructions towards the search limit since there
// may be different numbers of them if e.g. debug information is present.
if (!MI.isTransient())
@@ -1259,7 +1474,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// We're trying to pair instructions that differ in how they are scaled.
// If FirstMI is scaled then scale the offset of MI accordingly.
// Otherwise, do the opposite (i.e., make MI's offset unscaled).
- int MemSize = getMemScale(MI);
+ int MemSize = TII->getMemScale(MI);
if (MIIsUnscaled) {
// If the unscaled offset isn't a multiple of the MemSize, we can't
// pair the operations together: bail and keep looking.
@@ -1329,7 +1544,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
!(MI.mayLoad() &&
!UsedRegUnits.available(getLdStRegOp(MI).getReg())) &&
!mayAlias(MI, MemInsns, AA)) {
+
Flags.setMergeForward(false);
+ Flags.clearRenameReg();
return MBBI;
}
@@ -1337,18 +1554,41 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// between the two instructions and none of the instructions between the
// first and the second alias with the first, we can combine the first
// into the second.
- if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg()) &&
- !(MayLoad &&
+ if (!(MayLoad &&
!UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) &&
!mayAlias(FirstMI, MemInsns, AA)) {
- Flags.setMergeForward(true);
- return MBBI;
+
+ if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
+ Flags.setMergeForward(true);
+ Flags.clearRenameReg();
+ return MBBI;
+ }
+
+ if (DebugCounter::shouldExecute(RegRenamingCounter)) {
+ if (!MaybeCanRename)
+ MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween,
+ RequiredClasses, TRI)};
+
+ if (*MaybeCanRename) {
+ Optional<MCPhysReg> MaybeRenameReg = tryToFindRegisterToRename(
+ FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses,
+ TRI);
+ if (MaybeRenameReg) {
+ Flags.setRenameReg(*MaybeRenameReg);
+ Flags.setMergeForward(true);
+ MBBIWithRenameReg = MBBI;
+ }
+ }
+ }
}
// Unable to combine these instructions due to interference in between.
// Keep looking.
}
}
+ if (Flags.getRenameReg())
+ return MBBIWithRenameReg;
+
// If the instruction wasn't a matching load or store. Stop searching if we
// encounter a call instruction that might modify memory.
if (MI.isCall())
@@ -1492,7 +1732,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
MachineBasicBlock::iterator MBBI = I;
Register BaseReg = getLdStBaseOp(MemMI).getReg();
- int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI);
+ int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * TII->getMemScale(MemMI);
// Scan forward looking for post-index opportunities. Updating instructions
// can't be formed if the memory instruction doesn't have the offset we're
@@ -1663,7 +1903,7 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
// with Offset-1)
bool IsUnscaled = TII->isUnscaledLdSt(MI);
int Offset = getLdStOffsetOp(MI).getImm();
- int OffsetStride = IsUnscaled ? getMemScale(MI) : 1;
+ int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
// Allow one more for offset.
if (Offset > 0)
Offset -= OffsetStride;
@@ -1680,7 +1920,13 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
++NumUnscaledPairCreated;
// Keeping the iterator straight is a pain, so we let the merge routine tell
// us what the next instruction is after it's done mucking about.
+ auto Prev = std::prev(MBBI);
MBBI = mergePairedInsns(MBBI, Paired, Flags);
+ // Collect liveness info for instructions between Prev and the new position
+ // MBBI.
+ for (auto I = std::next(Prev); I != MBBI; I++)
+ updateDefinedRegisters(*I, DefinedInBB, TRI);
+
return true;
}
return false;
@@ -1723,7 +1969,7 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
// The immediate in the load/store is scaled by the size of the memory
// operation. The immediate in the add we're looking for,
// however, is not, so adjust here.
- int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI);
+ int UnscaledOffset = getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI);
// Look forward to try to find a pre-index instruction. For example,
// ldr x1, [x0, #64]
@@ -1742,6 +1988,7 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
bool EnableNarrowZeroStOpt) {
+
bool Modified = false;
// Four tranformations to do here:
// 1) Find loads that directly read from stores and promote them by
@@ -1786,8 +2033,17 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
// ldr x1, [x2, #8]
// ; becomes
// ldp x0, x1, [x2]
+
+ if (MBB.getParent()->getRegInfo().tracksLiveness()) {
+ DefinedInBB.clear();
+ DefinedInBB.addLiveIns(MBB);
+ }
+
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
MBBI != E;) {
+ // Track currently live registers up to this point, to help with
+ // searching for a rename register on demand.
+ updateDefinedRegisters(*MBBI, DefinedInBB, TRI);
if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
Modified = true;
else
@@ -1825,11 +2081,14 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
// or store.
ModifiedRegUnits.init(*TRI);
UsedRegUnits.init(*TRI);
+ DefinedInBB.init(*TRI);
bool Modified = false;
bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
- for (auto &MBB : Fn)
- Modified |= optimizeBlock(MBB, enableNarrowZeroStOpt);
+ for (auto &MBB : Fn) {
+ auto M = optimizeBlock(MBB, enableNarrowZeroStOpt);
+ Modified |= M;
+ }
return Modified;
}