aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-08-20 20:50:12 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-08-20 20:50:12 +0000
commite6d1592492a3a379186bfb02bd0f4eda0669c0d5 (patch)
tree599ab169a01f1c86eda9adc774edaedde2f2db5b /lib/Target/ARM/ARMLoadStoreOptimizer.cpp
parent1a56a5ead7a2e84bee8240f5f6b033b5f1707154 (diff)
Diffstat (limited to 'lib/Target/ARM/ARMLoadStoreOptimizer.cpp')
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp149
1 files changed, 86 insertions, 63 deletions
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 6da7430a8e51..90a1ce238c3f 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1,9 +1,8 @@
//===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -174,12 +173,14 @@ namespace {
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
- ArrayRef<std::pair<unsigned, bool>> Regs);
+ ArrayRef<std::pair<unsigned, bool>> Regs,
+ ArrayRef<MachineInstr*> Instrs);
MachineInstr *CreateLoadStoreDouble(
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
- ArrayRef<std::pair<unsigned, bool>> Regs) const;
+ ArrayRef<std::pair<unsigned, bool>> Regs,
+ ArrayRef<MachineInstr*> Instrs) const;
void FormCandidates(const MemOpQueue &MemOps);
MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
@@ -623,7 +624,8 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
- ArrayRef<std::pair<unsigned, bool>> Regs) {
+ ArrayRef<std::pair<unsigned, bool>> Regs,
+ ArrayRef<MachineInstr*> Instrs) {
unsigned NumRegs = Regs.size();
assert(NumRegs > 1);
@@ -815,6 +817,8 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
for (const std::pair<unsigned, bool> &R : Regs)
MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
+ MIB.cloneMergedMemRefs(Instrs);
+
return MIB.getInstr();
}
@@ -822,7 +826,8 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
- ArrayRef<std::pair<unsigned, bool>> Regs) const {
+ ArrayRef<std::pair<unsigned, bool>> Regs,
+ ArrayRef<MachineInstr*> Instrs) const {
bool IsLoad = isi32Load(Opcode);
assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
@@ -838,6 +843,7 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
.addReg(Regs[1].first, getKillRegState(Regs[1].second));
}
MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ MIB.cloneMergedMemRefs(Instrs);
return MIB.getInstr();
}
@@ -895,10 +901,11 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
MachineInstr *Merged = nullptr;
if (Cand.CanMergeToLSDouble)
Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
- Opcode, Pred, PredReg, DL, Regs);
+ Opcode, Pred, PredReg, DL, Regs,
+ Cand.Instrs);
if (!Merged && Cand.CanMergeToLSMulti)
Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
- Opcode, Pred, PredReg, DL, Regs);
+ Opcode, Pred, PredReg, DL, Regs, Cand.Instrs);
if (!Merged)
return nullptr;
@@ -1287,7 +1294,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
// can still change to a writeback form as that will save us 2 bytes
// of code size. It can create WAW hazards though, so only do it if
// we're minimizing code size.
- if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill)
+ if (!STI->hasMinSize() || !BaseKill)
return false;
bool HighRegsUsed = false;
@@ -1436,14 +1443,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
.addReg(Base, getKillRegState(isLd ? BaseKill : false))
.addImm(Pred).addReg(PredReg)
.addReg(MO.getReg(), (isLd ? getDefRegState(true) :
- getKillRegState(MO.isKill())));
+ getKillRegState(MO.isKill())))
+ .cloneMemRefs(*MI);
} else if (isLd) {
if (isAM2) {
// LDR_PRE, LDR_POST
if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
- .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg)
+ .cloneMemRefs(*MI);
} else {
int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
@@ -1451,7 +1460,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
.addReg(Base)
.addReg(0)
.addImm(Imm)
- .add(predOps(Pred, PredReg));
+ .add(predOps(Pred, PredReg))
+ .cloneMemRefs(*MI);
}
} else {
// t2LDR_PRE, t2LDR_POST
@@ -1459,7 +1469,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
.addReg(Base, RegState::Define)
.addReg(Base)
.addImm(Offset)
- .add(predOps(Pred, PredReg));
+ .add(predOps(Pred, PredReg))
+ .cloneMemRefs(*MI);
}
} else {
MachineOperand &MO = MI->getOperand(0);
@@ -1474,14 +1485,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
.addReg(Base)
.addReg(0)
.addImm(Imm)
- .add(predOps(Pred, PredReg));
+ .add(predOps(Pred, PredReg))
+ .cloneMemRefs(*MI);
} else {
// t2STR_PRE, t2STR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
.addReg(Base)
.addImm(Offset)
- .add(predOps(Pred, PredReg));
+ .add(predOps(Pred, PredReg))
+ .cloneMemRefs(*MI);
}
}
MBB.erase(MBBI);
@@ -1541,7 +1554,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
// Transfer implicit operands.
for (const MachineOperand &MO : MI.implicit_operands())
MIB.add(MO);
- MIB.setMemRefs(MI.memoperands());
+ MIB.cloneMemRefs(MI);
MBB.erase(MBBI);
return true;
@@ -1581,7 +1594,9 @@ static bool isMemoryOp(const MachineInstr &MI) {
const MachineMemOperand &MMO = **MI.memoperands_begin();
// Don't touch volatile memory accesses - we may be changing their order.
- if (MMO.isVolatile())
+ // TODO: We could allow unordered and monotonic atomics here, but we need to
+ // make sure the resulting ldm/stm is correctly marked as atomic.
+ if (MMO.isVolatile() || MMO.isAtomic())
return false;
// Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
@@ -1607,19 +1622,26 @@ static void InsertLDR_STR(MachineBasicBlock &MBB,
bool isDef, unsigned NewOpc, unsigned Reg,
bool RegDeadKill, bool RegUndef, unsigned BaseReg,
bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
- unsigned PredReg, const TargetInstrInfo *TII) {
+ unsigned PredReg, const TargetInstrInfo *TII,
+ MachineInstr *MI) {
if (isDef) {
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
TII->get(NewOpc))
.addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
.addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ // FIXME: This is overly conservative; the new instruction accesses 4
+ // bytes, not 8.
+ MIB.cloneMemRefs(*MI);
} else {
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
TII->get(NewOpc))
.addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
.addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ // FIXME: This is overly conservative; the new instruction accesses 4
+ // bytes, not 8.
+ MIB.cloneMemRefs(*MI);
}
}
@@ -1677,7 +1699,8 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
.addReg(BaseReg, getKillRegState(BaseKill))
.addImm(Pred).addReg(PredReg)
.addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
- .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
+ .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill))
+ .cloneMemRefs(*MI);
++NumLDRD2LDM;
} else {
BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
@@ -1686,7 +1709,8 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
.addReg(EvenReg,
getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
.addReg(OddReg,
- getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
+ getKillRegState(OddDeadKill) | getUndefRegState(OddUndef))
+ .cloneMemRefs(*MI);
++NumSTRD2STM;
}
} else {
@@ -1704,9 +1728,10 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
assert(!TRI->regsOverlap(OddReg, BaseReg));
InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
- false, BaseReg, false, BaseUndef, Pred, PredReg, TII);
+ false, BaseReg, false, BaseUndef, Pred, PredReg, TII, MI);
InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
- false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII);
+ false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
+ MI);
} else {
if (OddReg == EvenReg && EvenDeadKill) {
// If the two source operands are the same, the kill marker is
@@ -1719,9 +1744,11 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
if (EvenReg == BaseReg)
EvenDeadKill = false;
InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
- EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII);
+ EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII,
+ MI);
InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
- OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII);
+ OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
+ MI);
}
if (isLd)
++NumLDRD2LDR;
@@ -2048,6 +2075,11 @@ char ARMPreAllocLoadStoreOpt::ID = 0;
INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
+// Limit the number of instructions to be rescheduled.
+// FIXME: tune this limit, and/or come up with some better heuristics.
+static cl::opt<unsigned> InstReorderLimit("arm-prera-ldst-opt-reorder-limit",
+ cl::init(8), cl::Hidden);
+
bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
if (AssumeMisalignedLoadStores || skipFunction(Fn.getFunction()))
return false;
@@ -2140,7 +2172,8 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
// At the moment, we ignore the memoryoperand's value.
// If we want to use AliasAnalysis, we should check it accordingly.
if (!Op0->hasOneMemOperand() ||
- (*Op0->memoperands_begin())->isVolatile())
+ (*Op0->memoperands_begin())->isVolatile() ||
+ (*Op0->memoperands_begin())->isAtomic())
return false;
unsigned Align = (*Op0->memoperands_begin())->getAlignment();
@@ -2223,7 +2256,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
}
// Don't try to reschedule too many instructions.
- if (NumMove == 8) // FIXME: Tune this limit.
+ if (NumMove == InstReorderLimit)
break;
// Found a mergable instruction; save information about it.
@@ -2351,10 +2384,13 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
bool RetVal = false;
DenseMap<MachineInstr*, unsigned> MI2LocMap;
- DenseMap<unsigned, SmallVector<MachineInstr *, 4>> Base2LdsMap;
- DenseMap<unsigned, SmallVector<MachineInstr *, 4>> Base2StsMap;
- SmallVector<unsigned, 4> LdBases;
- SmallVector<unsigned, 4> StBases;
+ using MapIt = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator;
+ using Base2InstMap = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>;
+ using BaseVec = SmallVector<unsigned, 4>;
+ Base2InstMap Base2LdsMap;
+ Base2InstMap Base2StsMap;
+ BaseVec LdBases;
+ BaseVec StBases;
unsigned Loc = 0;
MachineBasicBlock::iterator MBBI = MBB->begin();
@@ -2381,41 +2417,28 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
bool isLd = isLoadSingle(Opc);
unsigned Base = MI.getOperand(1).getReg();
int Offset = getMemoryOpOffset(MI);
-
bool StopHere = false;
- if (isLd) {
- DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator BI =
- Base2LdsMap.find(Base);
- if (BI != Base2LdsMap.end()) {
- for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
- if (Offset == getMemoryOpOffset(*BI->second[i])) {
- StopHere = true;
- break;
- }
- }
- if (!StopHere)
- BI->second.push_back(&MI);
- } else {
- Base2LdsMap[Base].push_back(&MI);
- LdBases.push_back(Base);
+ auto FindBases = [&] (Base2InstMap &Base2Ops, BaseVec &Bases) {
+ MapIt BI = Base2Ops.find(Base);
+ if (BI == Base2Ops.end()) {
+ Base2Ops[Base].push_back(&MI);
+ Bases.push_back(Base);
+ return;
}
- } else {
- DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator BI =
- Base2StsMap.find(Base);
- if (BI != Base2StsMap.end()) {
- for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
- if (Offset == getMemoryOpOffset(*BI->second[i])) {
- StopHere = true;
- break;
- }
+ for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+ if (Offset == getMemoryOpOffset(*BI->second[i])) {
+ StopHere = true;
+ break;
}
- if (!StopHere)
- BI->second.push_back(&MI);
- } else {
- Base2StsMap[Base].push_back(&MI);
- StBases.push_back(Base);
}
- }
+ if (!StopHere)
+ BI->second.push_back(&MI);
+ };
+
+ if (isLd)
+ FindBases(Base2LdsMap, LdBases);
+ else
+ FindBases(Base2StsMap, StBases);
if (StopHere) {
// Found a duplicate (a base+offset combination that's seen earlier).