diff options
Diffstat (limited to 'lib/Target/AMDGPU/SILoadStoreOptimizer.cpp')
-rw-r--r-- | lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 60 |
1 files changed, 35 insertions, 25 deletions
diff --git a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index be291b127301..ae8b967893a2 100644 --- a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -1,9 +1,8 @@ //===- SILoadStoreOptimizer.cpp -------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -132,6 +131,8 @@ class SILoadStoreOptimizer : public MachineFunctionPass { bool GLC1; bool SLC0; bool SLC1; + bool DLC0; + bool DLC1; bool UseST64; SmallVector<MachineInstr *, 8> InstsToMove; }; @@ -257,13 +258,11 @@ static void addDefsUsesToList(const MachineInstr &MI, static bool memAccessesCanBeReordered(MachineBasicBlock::iterator A, MachineBasicBlock::iterator B, - const SIInstrInfo *TII, AliasAnalysis *AA) { // RAW or WAR - cannot reorder // WAW - cannot reorder // RAR - safe to reorder - return !(A->mayStore() || B->mayStore()) || - TII->areMemAccessesTriviallyDisjoint(*A, *B, AA); + return !(A->mayStore() || B->mayStore()) || !A->mayAlias(AA, *B, true); } // Add MI and its defs to the lists if MI reads one of the defs that are @@ -282,6 +281,7 @@ static bool addToListsIfDependent(MachineInstr &MI, DenseSet<unsigned> &RegDefs, // registers are in SSA form. if (Use.isReg() && ((Use.readsReg() && RegDefs.count(Use.getReg())) || + (Use.isDef() && RegDefs.count(Use.getReg())) || (Use.isDef() && TargetRegisterInfo::isPhysicalRegister(Use.getReg()) && PhysRegUses.count(Use.getReg())))) { Insts.push_back(&MI); @@ -295,13 +295,13 @@ static bool addToListsIfDependent(MachineInstr &MI, DenseSet<unsigned> &RegDefs, static bool canMoveInstsAcrossMemOp(MachineInstr &MemOp, ArrayRef<MachineInstr *> InstsToMove, - const SIInstrInfo *TII, AliasAnalysis *AA) { + AliasAnalysis *AA) { assert(MemOp.mayLoadOrStore()); for (MachineInstr *InstToMove : InstsToMove) { if (!InstToMove->mayLoadOrStore()) continue; - if (!memAccessesCanBeReordered(MemOp, *InstToMove, TII, AA)) + if (!memAccessesCanBeReordered(MemOp, *InstToMove, AA)) return false; } return true; @@ -326,7 +326,7 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) { if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) { return (EltOffset0 + CI.Width0 == EltOffset1 || EltOffset1 + CI.Width1 == EltOffset0) && - CI.GLC0 == CI.GLC1 && + CI.GLC0 == CI.GLC1 && CI.DLC0 == CI.DLC1 && (CI.InstClass == S_BUFFER_LOAD_IMM || CI.SLC0 == CI.SLC1); } @@ -567,8 +567,8 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) { } if (MBBI->mayLoadOrStore() && - (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) || - !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))) { + (!memAccessesCanBeReordered(*CI.I, *MBBI, AA) || + !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA))) { // We fail condition #1, but we may still be able to satisfy condition // #2. Add this instruction to the move list and then we will check // if condition #2 holds once we have selected the matching instruction. @@ -640,6 +640,8 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) { CI.SLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::slc)->getImm(); CI.SLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::slc)->getImm(); } + CI.DLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::dlc)->getImm(); + CI.DLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::dlc)->getImm(); } // Check both offsets fit in the reduced range. @@ -647,7 +649,7 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) { // move and make sure they are all safe to move down past the merged // instruction. if (widthsFit(*STM, CI) && offsetsCanBeCombined(CI)) - if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA)) + if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA)) return true; } @@ -656,8 +658,8 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) { // it was safe to move I and also all the instruction in InstsToMove // down past this instruction. // check if we can move I across MBBI and if we can move all I's users - if (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) || - !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA)) + if (!memAccessesCanBeReordered(*CI.I, *MBBI, AA) || + !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA)) break; } return false; @@ -726,7 +728,8 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI) { TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg) .addReg(ImmReg) - .addReg(AddrReg->getReg(), 0, BaseSubReg); + .addReg(AddrReg->getReg(), 0, BaseSubReg) + .addImm(0); // clamp bit BaseSubReg = 0; } @@ -819,7 +822,8 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI) { TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg) .addReg(ImmReg) - .addReg(AddrReg->getReg(), 0, BaseSubReg); + .addReg(AddrReg->getReg(), 0, BaseSubReg) + .addImm(0); // clamp bit BaseSubReg = 0; } @@ -858,6 +862,7 @@ SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) { .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase)) .addImm(MergedOffset) // offset .addImm(CI.GLC0) // glc + .addImm(CI.DLC0) // dlc .cloneMergedMemRefs({&*CI.I, &*CI.Paired}); std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI); @@ -910,6 +915,7 @@ SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) { .addImm(CI.GLC0) // glc .addImm(CI.SLC0) // slc .addImm(0) // tfe + .addImm(CI.DLC0) // dlc .cloneMergedMemRefs({&*CI.I, &*CI.Paired}); std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI); @@ -1089,9 +1095,10 @@ SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI) { MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) .addImm(std::min(CI.Offset0, CI.Offset1)) // offset - .addImm(CI.GLC0) // glc - .addImm(CI.SLC0) // slc - .addImm(0) // tfe + .addImm(CI.GLC0) // glc + .addImm(CI.SLC0) // slc + .addImm(0) // tfe + .addImm(CI.DLC0) // dlc .cloneMergedMemRefs({&*CI.I, &*CI.Paired}); moveInstsAfter(MIB, CI.InstsToMove); @@ -1137,9 +1144,10 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI, MachineOperand OffsetLo = createRegOrImm(static_cast<int32_t>(Addr.Offset), MI); MachineOperand OffsetHi = createRegOrImm(static_cast<int32_t>(Addr.Offset >> 32), MI); - unsigned CarryReg = MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); - unsigned DeadCarryReg = - MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + + const auto *CarryRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); + unsigned CarryReg = MRI->createVirtualRegister(CarryRC); + unsigned DeadCarryReg = MRI->createVirtualRegister(CarryRC); unsigned DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); @@ -1147,7 +1155,8 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI, BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_I32_e64), DestSub0) .addReg(CarryReg, RegState::Define) .addReg(Addr.Base.LoReg, 0, Addr.Base.LoSubReg) - .add(OffsetLo); + .add(OffsetLo) + .addImm(0); // clamp bit (void)LoHalf; LLVM_DEBUG(dbgs() << " "; LoHalf->dump();); @@ -1156,7 +1165,8 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI, .addReg(DeadCarryReg, RegState::Define | RegState::Dead) .addReg(Addr.Base.HiReg, 0, Addr.Base.HiSubReg) .add(OffsetHi) - .addReg(CarryReg, RegState::Kill); + .addReg(CarryReg, RegState::Kill) + .addImm(0); // clamp bit (void)HiHalf; LLVM_DEBUG(dbgs() << " "; HiHalf->dump();); |