aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SILowerI1Copies.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SILowerI1Copies.cpp')
-rw-r--r--lib/Target/AMDGPU/SILowerI1Copies.cpp107
1 files changed, 74 insertions, 33 deletions
diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp
index eb038bb5d5fc..1c0f836f07e6 100644
--- a/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -1,15 +1,14 @@
//===-- SILowerI1Copies.cpp - Lower I1 Copies -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass lowers all occurrences of i1 values (with a vreg_1 register class)
-// to lane masks (64-bit scalar registers). The pass assumes machine SSA form
-// and a wave-level control flow graph.
+// to lane masks (32 / 64-bit scalar registers). The pass assumes machine SSA
+// form and a wave-level control flow graph.
//
// Before this pass, values that are semantically i1 and are defined and used
// within the same basic block are already represented as lane masks in scalar
@@ -51,6 +50,7 @@ public:
static char ID;
private:
+ bool IsWave32 = false;
MachineFunction *MF = nullptr;
MachineDominatorTree *DT = nullptr;
MachinePostDominatorTree *PDT = nullptr;
@@ -58,6 +58,14 @@ private:
const GCNSubtarget *ST = nullptr;
const SIInstrInfo *TII = nullptr;
+ unsigned ExecReg;
+ unsigned MovOp;
+ unsigned AndOp;
+ unsigned OrOp;
+ unsigned XorOp;
+ unsigned AndN2Op;
+ unsigned OrN2Op;
+
DenseSet<unsigned> ConstrainRegs;
public:
@@ -87,6 +95,11 @@ private:
MachineBasicBlock::iterator
getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
+ bool isVreg1(unsigned Reg) const {
+ return TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass;
+ }
+
bool isLaneMaskReg(unsigned Reg) const {
return TII->getRegisterInfo().isSGPRReg(*MRI, Reg) &&
TII->getRegisterInfo().getRegSizeInBits(Reg, *MRI) ==
@@ -412,8 +425,10 @@ FunctionPass *llvm::createSILowerI1CopiesPass() {
}
static unsigned createLaneMaskReg(MachineFunction &MF) {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
- return MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ return MRI.createVirtualRegister(ST.isWave32() ? &AMDGPU::SReg_32RegClass
+ : &AMDGPU::SReg_64RegClass);
}
static unsigned insertUndefLaneMask(MachineBasicBlock &MBB) {
@@ -443,13 +458,32 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &TheMF) {
ST = &MF->getSubtarget<GCNSubtarget>();
TII = ST->getInstrInfo();
+ IsWave32 = ST->isWave32();
+
+ if (IsWave32) {
+ ExecReg = AMDGPU::EXEC_LO;
+ MovOp = AMDGPU::S_MOV_B32;
+ AndOp = AMDGPU::S_AND_B32;
+ OrOp = AMDGPU::S_OR_B32;
+ XorOp = AMDGPU::S_XOR_B32;
+ AndN2Op = AMDGPU::S_ANDN2_B32;
+ OrN2Op = AMDGPU::S_ORN2_B32;
+ } else {
+ ExecReg = AMDGPU::EXEC;
+ MovOp = AMDGPU::S_MOV_B64;
+ AndOp = AMDGPU::S_AND_B64;
+ OrOp = AMDGPU::S_OR_B64;
+ XorOp = AMDGPU::S_XOR_B64;
+ AndN2Op = AMDGPU::S_ANDN2_B64;
+ OrN2Op = AMDGPU::S_ORN2_B64;
+ }
lowerCopiesFromI1();
lowerPhis();
lowerCopiesToI1();
for (unsigned Reg : ConstrainRegs)
- MRI->constrainRegClass(Reg, &AMDGPU::SReg_64_XEXECRegClass);
+ MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass);
ConstrainRegs.clear();
return true;
@@ -465,13 +499,10 @@ void SILowerI1Copies::lowerCopiesFromI1() {
unsigned DstReg = MI.getOperand(0).getReg();
unsigned SrcReg = MI.getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
- MRI->getRegClass(SrcReg) != &AMDGPU::VReg_1RegClass)
+ if (!isVreg1(SrcReg))
continue;
- if (isLaneMaskReg(DstReg) ||
- (TargetRegisterInfo::isVirtualRegister(DstReg) &&
- MRI->getRegClass(DstReg) == &AMDGPU::VReg_1RegClass))
+ if (isLaneMaskReg(DstReg) || isVreg1(DstReg))
continue;
// Copy into a 32-bit vector register.
@@ -484,6 +515,8 @@ void SILowerI1Copies::lowerCopiesFromI1() {
ConstrainRegs.insert(SrcReg);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
+ .addImm(0)
+ .addImm(0)
.addImm(-1)
.addReg(SrcReg);
DeadCopies.push_back(&MI);
@@ -503,18 +536,22 @@ void SILowerI1Copies::lowerPhis() {
SmallVector<MachineBasicBlock *, 4> IncomingBlocks;
SmallVector<unsigned, 4> IncomingRegs;
SmallVector<unsigned, 4> IncomingUpdated;
+#ifndef NDEBUG
+ DenseSet<unsigned> PhiRegisters;
+#endif
for (MachineBasicBlock &MBB : *MF) {
LF.initialize(MBB);
for (MachineInstr &MI : MBB.phis()) {
unsigned DstReg = MI.getOperand(0).getReg();
- if (MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass)
+ if (!isVreg1(DstReg))
continue;
LLVM_DEBUG(dbgs() << "Lower PHI: " << MI);
- MRI->setRegClass(DstReg, &AMDGPU::SReg_64RegClass);
+ MRI->setRegClass(DstReg, IsWave32 ? &AMDGPU::SReg_32RegClass
+ : &AMDGPU::SReg_64RegClass);
// Collect incoming values.
for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
@@ -525,18 +562,22 @@ void SILowerI1Copies::lowerPhis() {
if (IncomingDef->getOpcode() == AMDGPU::COPY) {
IncomingReg = IncomingDef->getOperand(1).getReg();
- assert(isLaneMaskReg(IncomingReg));
+ assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
assert(!IncomingDef->getOperand(1).getSubReg());
} else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) {
continue;
} else {
- assert(IncomingDef->isPHI());
+ assert(IncomingDef->isPHI() || PhiRegisters.count(IncomingReg));
}
IncomingBlocks.push_back(IncomingMBB);
IncomingRegs.push_back(IncomingReg);
}
+#ifndef NDEBUG
+ PhiRegisters.insert(DstReg);
+#endif
+
// Phis in a loop that are observed outside the loop receive a simple but
// conservatively correct treatment.
MachineBasicBlock *PostDomBound = &MBB;
@@ -629,8 +670,7 @@ void SILowerI1Copies::lowerCopiesToI1() {
continue;
unsigned DstReg = MI.getOperand(0).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DstReg) ||
- MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass)
+ if (!isVreg1(DstReg))
continue;
if (MRI->use_empty(DstReg)) {
@@ -640,7 +680,8 @@ void SILowerI1Copies::lowerCopiesToI1() {
LLVM_DEBUG(dbgs() << "Lower Other: " << MI);
- MRI->setRegClass(DstReg, &AMDGPU::SReg_64RegClass);
+ MRI->setRegClass(DstReg, IsWave32 ? &AMDGPU::SReg_32RegClass
+ : &AMDGPU::SReg_64RegClass);
if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF)
continue;
@@ -649,7 +690,7 @@ void SILowerI1Copies::lowerCopiesToI1() {
assert(!MI.getOperand(1).getSubReg());
if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
- !isLaneMaskReg(SrcReg)) {
+ (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32);
unsigned TmpReg = createLaneMaskReg(*MF);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64), TmpReg)
@@ -699,7 +740,7 @@ bool SILowerI1Copies::isConstantLaneMask(unsigned Reg, bool &Val) const {
return false;
}
- if (MI->getOpcode() != AMDGPU::S_MOV_B64)
+ if (MI->getOpcode() != MovOp)
return false;
if (!MI->getOperand(1).isImm())
@@ -774,10 +815,10 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
if (PrevVal == CurVal) {
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(CurReg);
} else if (CurVal) {
- BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(AMDGPU::EXEC);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(ExecReg);
} else {
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_XOR_B64), DstReg)
- .addReg(AMDGPU::EXEC)
+ BuildMI(MBB, I, DL, TII->get(XorOp), DstReg)
+ .addReg(ExecReg)
.addImm(-1);
}
return;
@@ -790,9 +831,9 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
PrevMaskedReg = PrevReg;
} else {
PrevMaskedReg = createLaneMaskReg(*MF);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ANDN2_B64), PrevMaskedReg)
+ BuildMI(MBB, I, DL, TII->get(AndN2Op), PrevMaskedReg)
.addReg(PrevReg)
- .addReg(AMDGPU::EXEC);
+ .addReg(ExecReg);
}
}
if (!CurConstant) {
@@ -801,9 +842,9 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
CurMaskedReg = CurReg;
} else {
CurMaskedReg = createLaneMaskReg(*MF);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_AND_B64), CurMaskedReg)
+ BuildMI(MBB, I, DL, TII->get(AndOp), CurMaskedReg)
.addReg(CurReg)
- .addReg(AMDGPU::EXEC);
+ .addReg(ExecReg);
}
}
@@ -814,12 +855,12 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg)
.addReg(PrevMaskedReg);
} else if (PrevConstant && PrevVal) {
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ORN2_B64), DstReg)
+ BuildMI(MBB, I, DL, TII->get(OrN2Op), DstReg)
.addReg(CurMaskedReg)
- .addReg(AMDGPU::EXEC);
+ .addReg(ExecReg);
} else {
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_OR_B64), DstReg)
+ BuildMI(MBB, I, DL, TII->get(OrOp), DstReg)
.addReg(PrevMaskedReg)
- .addReg(CurMaskedReg ? CurMaskedReg : (unsigned)AMDGPU::EXEC);
+ .addReg(CurMaskedReg ? CurMaskedReg : ExecReg);
}
}