aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-08-20 20:50:12 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-08-20 20:50:12 +0000
commite6d1592492a3a379186bfb02bd0f4eda0669c0d5 (patch)
tree599ab169a01f1c86eda9adc774edaedde2f2db5b /lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
parent1a56a5ead7a2e84bee8240f5f6b033b5f1707154 (diff)
Diffstat (limited to 'lib/Target/AMDGPU/SILoadStoreOptimizer.cpp')
-rw-r--r--lib/Target/AMDGPU/SILoadStoreOptimizer.cpp60
1 files changed, 35 insertions, 25 deletions
diff --git a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index be291b127301..ae8b967893a2 100644
--- a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -1,9 +1,8 @@
//===- SILoadStoreOptimizer.cpp -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -132,6 +131,8 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
bool GLC1;
bool SLC0;
bool SLC1;
+ bool DLC0;
+ bool DLC1;
bool UseST64;
SmallVector<MachineInstr *, 8> InstsToMove;
};
@@ -257,13 +258,11 @@ static void addDefsUsesToList(const MachineInstr &MI,
static bool memAccessesCanBeReordered(MachineBasicBlock::iterator A,
MachineBasicBlock::iterator B,
- const SIInstrInfo *TII,
AliasAnalysis *AA) {
// RAW or WAR - cannot reorder
// WAW - cannot reorder
// RAR - safe to reorder
- return !(A->mayStore() || B->mayStore()) ||
- TII->areMemAccessesTriviallyDisjoint(*A, *B, AA);
+ return !(A->mayStore() || B->mayStore()) || !A->mayAlias(AA, *B, true);
}
// Add MI and its defs to the lists if MI reads one of the defs that are
@@ -282,6 +281,7 @@ static bool addToListsIfDependent(MachineInstr &MI, DenseSet<unsigned> &RegDefs,
// registers are in SSA form.
if (Use.isReg() &&
((Use.readsReg() && RegDefs.count(Use.getReg())) ||
+ (Use.isDef() && RegDefs.count(Use.getReg())) ||
(Use.isDef() && TargetRegisterInfo::isPhysicalRegister(Use.getReg()) &&
PhysRegUses.count(Use.getReg())))) {
Insts.push_back(&MI);
@@ -295,13 +295,13 @@ static bool addToListsIfDependent(MachineInstr &MI, DenseSet<unsigned> &RegDefs,
static bool canMoveInstsAcrossMemOp(MachineInstr &MemOp,
ArrayRef<MachineInstr *> InstsToMove,
- const SIInstrInfo *TII, AliasAnalysis *AA) {
+ AliasAnalysis *AA) {
assert(MemOp.mayLoadOrStore());
for (MachineInstr *InstToMove : InstsToMove) {
if (!InstToMove->mayLoadOrStore())
continue;
- if (!memAccessesCanBeReordered(MemOp, *InstToMove, TII, AA))
+ if (!memAccessesCanBeReordered(MemOp, *InstToMove, AA))
return false;
}
return true;
@@ -326,7 +326,7 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) {
return (EltOffset0 + CI.Width0 == EltOffset1 ||
EltOffset1 + CI.Width1 == EltOffset0) &&
- CI.GLC0 == CI.GLC1 &&
+ CI.GLC0 == CI.GLC1 && CI.DLC0 == CI.DLC1 &&
(CI.InstClass == S_BUFFER_LOAD_IMM || CI.SLC0 == CI.SLC1);
}
@@ -567,8 +567,8 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
}
if (MBBI->mayLoadOrStore() &&
- (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) ||
- !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))) {
+ (!memAccessesCanBeReordered(*CI.I, *MBBI, AA) ||
+ !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA))) {
// We fail condition #1, but we may still be able to satisfy condition
// #2. Add this instruction to the move list and then we will check
// if condition #2 holds once we have selected the matching instruction.
@@ -640,6 +640,8 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
CI.SLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::slc)->getImm();
CI.SLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::slc)->getImm();
}
+ CI.DLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::dlc)->getImm();
+ CI.DLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::dlc)->getImm();
}
// Check both offsets fit in the reduced range.
@@ -647,7 +649,7 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
// move and make sure they are all safe to move down past the merged
// instruction.
if (widthsFit(*STM, CI) && offsetsCanBeCombined(CI))
- if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
+ if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA))
return true;
}
@@ -656,8 +658,8 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
// it was safe to move I and also all the instruction in InstsToMove
// down past this instruction.
// check if we can move I across MBBI and if we can move all I's users
- if (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) ||
- !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
+ if (!memAccessesCanBeReordered(*CI.I, *MBBI, AA) ||
+ !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA))
break;
}
return false;
@@ -726,7 +728,8 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI) {
TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg)
.addReg(ImmReg)
- .addReg(AddrReg->getReg(), 0, BaseSubReg);
+ .addReg(AddrReg->getReg(), 0, BaseSubReg)
+ .addImm(0); // clamp bit
BaseSubReg = 0;
}
@@ -819,7 +822,8 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI) {
TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg)
.addReg(ImmReg)
- .addReg(AddrReg->getReg(), 0, BaseSubReg);
+ .addReg(AddrReg->getReg(), 0, BaseSubReg)
+ .addImm(0); // clamp bit
BaseSubReg = 0;
}
@@ -858,6 +862,7 @@ SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) {
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
.addImm(MergedOffset) // offset
.addImm(CI.GLC0) // glc
+ .addImm(CI.DLC0) // dlc
.cloneMergedMemRefs({&*CI.I, &*CI.Paired});
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
@@ -910,6 +915,7 @@ SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) {
.addImm(CI.GLC0) // glc
.addImm(CI.SLC0) // slc
.addImm(0) // tfe
+ .addImm(CI.DLC0) // dlc
.cloneMergedMemRefs({&*CI.I, &*CI.Paired});
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
@@ -1089,9 +1095,10 @@ SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI) {
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
.addImm(std::min(CI.Offset0, CI.Offset1)) // offset
- .addImm(CI.GLC0) // glc
- .addImm(CI.SLC0) // slc
- .addImm(0) // tfe
+ .addImm(CI.GLC0) // glc
+ .addImm(CI.SLC0) // slc
+ .addImm(0) // tfe
+ .addImm(CI.DLC0) // dlc
.cloneMergedMemRefs({&*CI.I, &*CI.Paired});
moveInstsAfter(MIB, CI.InstsToMove);
@@ -1137,9 +1144,10 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
MachineOperand OffsetLo = createRegOrImm(static_cast<int32_t>(Addr.Offset), MI);
MachineOperand OffsetHi =
createRegOrImm(static_cast<int32_t>(Addr.Offset >> 32), MI);
- unsigned CarryReg = MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
- unsigned DeadCarryReg =
- MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+
+ const auto *CarryRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
+ unsigned CarryReg = MRI->createVirtualRegister(CarryRC);
+ unsigned DeadCarryReg = MRI->createVirtualRegister(CarryRC);
unsigned DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
unsigned DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -1147,7 +1155,8 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_I32_e64), DestSub0)
.addReg(CarryReg, RegState::Define)
.addReg(Addr.Base.LoReg, 0, Addr.Base.LoSubReg)
- .add(OffsetLo);
+ .add(OffsetLo)
+ .addImm(0); // clamp bit
(void)LoHalf;
LLVM_DEBUG(dbgs() << " "; LoHalf->dump(););
@@ -1156,7 +1165,8 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
.addReg(DeadCarryReg, RegState::Define | RegState::Dead)
.addReg(Addr.Base.HiReg, 0, Addr.Base.HiSubReg)
.add(OffsetHi)
- .addReg(CarryReg, RegState::Kill);
+ .addReg(CarryReg, RegState::Kill)
+ .addImm(0); // clamp bit
(void)HiHalf;
LLVM_DEBUG(dbgs() << " "; HiHalf->dump(););