summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-07-29 20:15:26 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-07-29 20:15:26 +0000
commit344a3780b2e33f6ca763666c380202b18aab72a3 (patch)
treef0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
parentb60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp')
-rw-r--r--llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp190
1 files changed, 160 insertions, 30 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index ad180cb2935e..bf042c83294a 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
@@ -247,28 +248,38 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
return std::numeric_limits<unsigned>::max();
case AArch64::STRDui:
case AArch64::STURDi:
+ case AArch64::STRDpre:
case AArch64::STRQui:
case AArch64::STURQi:
+ case AArch64::STRQpre:
case AArch64::STRBBui:
case AArch64::STURBBi:
case AArch64::STRHHui:
case AArch64::STURHHi:
case AArch64::STRWui:
+ case AArch64::STRWpre:
case AArch64::STURWi:
case AArch64::STRXui:
+ case AArch64::STRXpre:
case AArch64::STURXi:
case AArch64::LDRDui:
case AArch64::LDURDi:
+ case AArch64::LDRDpre:
case AArch64::LDRQui:
case AArch64::LDURQi:
+ case AArch64::LDRQpre:
case AArch64::LDRWui:
case AArch64::LDURWi:
+ case AArch64::LDRWpre:
case AArch64::LDRXui:
case AArch64::LDURXi:
+ case AArch64::LDRXpre:
case AArch64::STRSui:
case AArch64::STURSi:
+ case AArch64::STRSpre:
case AArch64::LDRSui:
case AArch64::LDURSi:
+ case AArch64::LDRSpre:
return Opc;
case AArch64::LDRSWui:
return AArch64::LDRWui;
@@ -303,33 +314,53 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
case AArch64::STRSui:
case AArch64::STURSi:
return AArch64::STPSi;
+ case AArch64::STRSpre:
+ return AArch64::STPSpre;
case AArch64::STRDui:
case AArch64::STURDi:
return AArch64::STPDi;
+ case AArch64::STRDpre:
+ return AArch64::STPDpre;
case AArch64::STRQui:
case AArch64::STURQi:
return AArch64::STPQi;
+ case AArch64::STRQpre:
+ return AArch64::STPQpre;
case AArch64::STRWui:
case AArch64::STURWi:
return AArch64::STPWi;
+ case AArch64::STRWpre:
+ return AArch64::STPWpre;
case AArch64::STRXui:
case AArch64::STURXi:
return AArch64::STPXi;
+ case AArch64::STRXpre:
+ return AArch64::STPXpre;
case AArch64::LDRSui:
case AArch64::LDURSi:
return AArch64::LDPSi;
+ case AArch64::LDRSpre:
+ return AArch64::LDPSpre;
case AArch64::LDRDui:
case AArch64::LDURDi:
return AArch64::LDPDi;
+ case AArch64::LDRDpre:
+ return AArch64::LDPDpre;
case AArch64::LDRQui:
case AArch64::LDURQi:
return AArch64::LDPQi;
+ case AArch64::LDRQpre:
+ return AArch64::LDPQpre;
case AArch64::LDRWui:
case AArch64::LDURWi:
return AArch64::LDPWi;
+ case AArch64::LDRWpre:
+ return AArch64::LDPWpre;
case AArch64::LDRXui:
case AArch64::LDURXi:
return AArch64::LDPXi;
+ case AArch64::LDRXpre:
+ return AArch64::LDPXpre;
case AArch64::LDRSWui:
case AArch64::LDURSWi:
return AArch64::LDPSWi;
@@ -538,6 +569,37 @@ static bool isPairedLdSt(const MachineInstr &MI) {
}
}
+static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI) {
+
+ unsigned OpcA = FirstMI.getOpcode();
+ unsigned OpcB = MI.getOpcode();
+
+ switch (OpcA) {
+ default:
+ return false;
+ case AArch64::STRSpre:
+ return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
+ case AArch64::STRDpre:
+ return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
+ case AArch64::STRQpre:
+ return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
+ case AArch64::STRWpre:
+ return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
+ case AArch64::STRXpre:
+ return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
+ case AArch64::LDRSpre:
+ return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
+ case AArch64::LDRDpre:
+ return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
+ case AArch64::LDRQpre:
+ return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
+ case AArch64::LDRWpre:
+ return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
+ case AArch64::LDRXpre:
+ return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
+ }
+}
+
// Returns the scale and offset range of pre/post indexed variants of MI.
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
int &MinOffset, int &MaxOffset) {
@@ -560,17 +622,20 @@ static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
static MachineOperand &getLdStRegOp(MachineInstr &MI,
unsigned PairedRegOp = 0) {
assert(PairedRegOp < 2 && "Unexpected register operand idx.");
- unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0;
+ bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);
+ if (IsPreLdSt)
+ PairedRegOp += 1;
+ unsigned Idx = isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
return MI.getOperand(Idx);
}
static const MachineOperand &getLdStBaseOp(const MachineInstr &MI) {
- unsigned Idx = isPairedLdSt(MI) ? 2 : 1;
+ unsigned Idx = isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 2 : 1;
return MI.getOperand(Idx);
}
static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI) {
- unsigned Idx = isPairedLdSt(MI) ? 3 : 2;
+ unsigned Idx = isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 3 : 2;
return MI.getOperand(Idx);
}
@@ -580,10 +645,10 @@ static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst,
assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
int LoadSize = TII->getMemScale(LoadInst);
int StoreSize = TII->getMemScale(StoreInst);
- int UnscaledStOffset = TII->isUnscaledLdSt(StoreInst)
+ int UnscaledStOffset = TII->hasUnscaledLdStOffset(StoreInst)
? getLdStOffsetOp(StoreInst).getImm()
: getLdStOffsetOp(StoreInst).getImm() * StoreSize;
- int UnscaledLdOffset = TII->isUnscaledLdSt(LoadInst)
+ int UnscaledLdOffset = TII->hasUnscaledLdStOffset(LoadInst)
? getLdStOffsetOp(LoadInst).getImm()
: getLdStOffsetOp(LoadInst).getImm() * LoadSize;
return (UnscaledStOffset <= UnscaledLdOffset) &&
@@ -688,7 +753,7 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
NextI = next_nodbg(NextI, E);
unsigned Opc = I->getOpcode();
- bool IsScaled = !TII->isUnscaledLdSt(Opc);
+ bool IsScaled = !TII->hasUnscaledLdStOffset(Opc);
int OffsetStride = IsScaled ? 1 : TII->getMemScale(*I);
bool MergeForward = Flags.getMergeForward();
@@ -794,7 +859,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
int SExtIdx = Flags.getSExtIdx();
unsigned Opc =
SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
- bool IsUnscaled = TII->isUnscaledLdSt(Opc);
+ bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc);
int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
bool MergeForward = Flags.getMergeForward();
@@ -875,7 +940,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
int Offset = getLdStOffsetOp(*I).getImm();
int PairedOffset = getLdStOffsetOp(*Paired).getImm();
- bool PairedIsUnscaled = TII->isUnscaledLdSt(Paired->getOpcode());
+ bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
if (IsUnscaled != PairedIsUnscaled) {
// We're trying to pair instructions that differ in how they are scaled. If
// I is scaled then scale the offset of Paired accordingly. Otherwise, do
@@ -893,8 +958,11 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
}
// Which register is Rt and which is Rt2 depends on the offset order.
+ // However, for pre load/stores the Rt should be the one of the pre
+ // load/store.
MachineInstr *RtMI, *Rt2MI;
- if (Offset == PairedOffset + OffsetStride) {
+ if (Offset == PairedOffset + OffsetStride &&
+ !AArch64InstrInfo::isPreLdSt(*I)) {
RtMI = &*Paired;
Rt2MI = &*I;
// Here we swapped the assumption made for SExtIdx.
@@ -908,7 +976,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
}
int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
// Scale the immediate offset, if necessary.
- if (TII->isUnscaledLdSt(RtMI->getOpcode())) {
+ if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
"Unscaled offset cannot be scaled.");
OffsetImm /= TII->getMemScale(*RtMI);
@@ -939,13 +1007,20 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MI.clearRegisterKills(Reg, TRI);
}
}
- MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingPairOpcode(Opc)))
- .add(RegOp0)
- .add(RegOp1)
- .add(BaseRegOp)
- .addImm(OffsetImm)
- .cloneMergedMemRefs({&*I, &*Paired})
- .setMIFlags(I->mergeFlagsWith(*Paired));
+
+ unsigned int MatchPairOpcode = getMatchingPairOpcode(Opc);
+ MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(MatchPairOpcode));
+
+ // Adds the pre-index operand for pre-indexed ld/st pairs.
+ if (AArch64InstrInfo::isPreLdSt(*RtMI))
+ MIB.addReg(BaseRegOp.getReg(), RegState::Define);
+
+ MIB.add(RegOp0)
+ .add(RegOp1)
+ .add(BaseRegOp)
+ .addImm(OffsetImm)
+ .cloneMergedMemRefs({&*I, &*Paired})
+ .setMIFlags(I->mergeFlagsWith(*Paired));
(void)MIB;
@@ -1053,8 +1128,8 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
// performance and correctness are verified only in little-endian.
if (!Subtarget->isLittleEndian())
return NextI;
- bool IsUnscaled = TII->isUnscaledLdSt(*LoadI);
- assert(IsUnscaled == TII->isUnscaledLdSt(*StoreI) &&
+ bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);
+ assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
"Unsupported ld/st match");
assert(LoadSize <= StoreSize && "Invalid load size");
int UnscaledLdOffset = IsUnscaled
@@ -1231,9 +1306,9 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,
unsigned OpcA = FirstMI.getOpcode();
unsigned OpcB = MI.getOpcode();
- // Opcodes match: nothing more to check.
+ // Opcodes match: If the opcodes are pre ld/st there is nothing more to check.
if (OpcA == OpcB)
- return true;
+ return !AArch64InstrInfo::isPreLdSt(FirstMI);
// Try to match a sign-extended load/store with a zero-extended load/store.
bool IsValidLdStrOpc, PairIsValidLdStrOpc;
@@ -1256,8 +1331,14 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,
if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
return false;
+ // The STR<S,D,Q,W,X>pre - STR<S,D,Q,W,X>ui and
+ // LDR<S,D,Q,W,X>pre-LDR<S,D,Q,W,X>ui
+ // are candidate pairs that can be merged.
+ if (isPreLdStPairCandidate(FirstMI, MI))
+ return true;
+
// Try to match an unscaled load/store with a scaled load/store.
- return TII->isUnscaledLdSt(OpcA) != TII->isUnscaledLdSt(OpcB) &&
+ return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) &&
getMatchingPairOpcode(OpcA) == getMatchingPairOpcode(OpcB);
// FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
@@ -1447,7 +1528,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
MBBI = next_nodbg(MBBI, E);
bool MayLoad = FirstMI.mayLoad();
- bool IsUnscaled = TII->isUnscaledLdSt(FirstMI);
+ bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
Register Reg = getLdStRegOp(FirstMI).getReg();
Register BaseReg = getLdStBaseOp(FirstMI).getReg();
int Offset = getLdStOffsetOp(FirstMI).getImm();
@@ -1495,7 +1576,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// a relocation.
Register MIBaseReg = getLdStBaseOp(MI).getReg();
int MIOffset = getLdStOffsetOp(MI).getImm();
- bool MIIsUnscaled = TII->isUnscaledLdSt(MI);
+ bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
if (IsUnscaled != MIIsUnscaled) {
// We're trying to pair instructions that differ in how they are scaled.
// If FirstMI is scaled then scale the offset of MI accordingly.
@@ -1516,8 +1597,41 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
}
}
- if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
- (Offset + OffsetStride == MIOffset))) {
+ bool IsPreLdSt = isPreLdStPairCandidate(FirstMI, MI);
+
+ if (BaseReg == MIBaseReg) {
+ // If the offset of the second ld/st is not equal to the size of the
+ // destination register it can’t be paired with a pre-index ld/st
+ // pair. Additionally if the base reg is used or modified the operations
+ // can't be paired: bail and keep looking.
+ if (IsPreLdSt) {
+ bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
+ bool IsBaseRegUsed =
+ !UsedRegUnits.available(getLdStBaseOp(MI).getReg());
+ bool IsBaseRegModified =
+ !ModifiedRegUnits.available(getLdStBaseOp(MI).getReg());
+ // If the stored value and the address of the second instruction is
+ // the same, it needs to be using the updated register and therefore
+ // it must not be folded.
+ bool IsMIRegTheSame =
+ getLdStRegOp(MI).getReg() == getLdStBaseOp(MI).getReg();
+ if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
+ IsMIRegTheSame) {
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
+ UsedRegUnits, TRI);
+ MemInsns.push_back(&MI);
+ continue;
+ }
+ } else {
+ if ((Offset != MIOffset + OffsetStride) &&
+ (Offset + OffsetStride != MIOffset)) {
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
+ UsedRegUnits, TRI);
+ MemInsns.push_back(&MI);
+ continue;
+ }
+ }
+
int MinOffset = Offset < MIOffset ? Offset : MIOffset;
if (FindNarrowMerge) {
// If the alignment requirements of the scaled wide load/store
@@ -1849,6 +1963,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
MachineBasicBlock::iterator E = I->getParent()->end();
MachineInstr &MemMI = *I;
MachineBasicBlock::iterator MBBI = I;
+ MachineFunction &MF = *MemMI.getMF();
Register BaseReg = getLdStBaseOp(MemMI).getReg();
int Offset = getLdStOffsetOp(MemMI).getImm();
@@ -1876,11 +1991,16 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
return E;
}
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ unsigned RedZoneSize =
+ Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
+
// Track which register units have been modified and used between the first
// insn (inclusive) and the second insn.
ModifiedRegUnits.clear();
UsedRegUnits.clear();
unsigned Count = 0;
+ bool MemAcessBeforeSPPreInc = false;
do {
MBBI = prev_nodbg(MBBI, B);
MachineInstr &MI = *MBBI;
@@ -1891,8 +2011,13 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
++Count;
// If we found a match, return it.
- if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset))
+ if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) {
+ // Check that the update value is within our red zone limit (which may be
+ // zero).
+ if (MemAcessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize)
+ return E;
return MBBI;
+ }
// Update the status of what the instruction clobbered and used.
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
@@ -1902,6 +2027,11 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
if (!ModifiedRegUnits.available(BaseReg) ||
!UsedRegUnits.available(BaseReg))
return E;
+ // Keep track if we have a memory access before an SP pre-increment, in this
+ // case we need to validate later that the update amount respects the red
+ // zone.
+ if (BaseRegSP && MBBI->mayLoadOrStore())
+ MemAcessBeforeSPPreInc = true;
} while (MBBI != B && Count < Limit);
return E;
}
@@ -1968,7 +2098,7 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
// Early exit if the offset is not possible to match. (6 bits of positive
// range, plus allow an extra one in case we find a later insn that matches
// with Offset-1)
- bool IsUnscaled = TII->isUnscaledLdSt(MI);
+ bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
int Offset = getLdStOffsetOp(MI).getImm();
int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
// Allow one more for offset.
@@ -1983,7 +2113,7 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
if (Paired != E) {
++NumPairCreated;
- if (TII->isUnscaledLdSt(MI))
+ if (TII->hasUnscaledLdStOffset(MI))
++NumUnscaledPairCreated;
// Keeping the iterator straight is a pain, so we let the merge routine tell
// us what the next instruction is after it's done mucking about.
@@ -2018,7 +2148,7 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
}
// Don't know how to handle unscaled pre/post-index versions below, so bail.
- if (TII->isUnscaledLdSt(MI.getOpcode()))
+ if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
return false;
// Look back to try to find a pre-index instruction. For example,