aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp44
1 files changed, 31 insertions, 13 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index e50f5f28e030..f4ca27808a30 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2410,13 +2410,22 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
// the encoding of $symbol starts 12 bytes after the start of the s_add_u32
// instruction.
+ int64_t Adjust = 0;
+ if (ST.hasGetPCZeroExtension()) {
+ // Fix up hardware that does not sign-extend the 48-bit PC value by
+ // inserting: s_sext_i32_i16 reghi, reghi
+ Bundler.append(
+ BuildMI(MF, DL, get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
+ Adjust += 4;
+ }
+
if (OpLo.isGlobal())
- OpLo.setOffset(OpLo.getOffset() + 4);
+ OpLo.setOffset(OpLo.getOffset() + Adjust + 4);
Bundler.append(
BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
if (OpHi.isGlobal())
- OpHi.setOffset(OpHi.getOffset() + 12);
+ OpHi.setOffset(OpHi.getOffset() + Adjust + 12);
Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
.addReg(RegHi)
.add(OpHi));
@@ -2480,6 +2489,19 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case AMDGPU::S_MUL_I64_I32_PSEUDO:
MI.setDesc(get(AMDGPU::S_MUL_U64));
break;
+
+ case AMDGPU::S_GETPC_B64_pseudo:
+ MI.setDesc(get(AMDGPU::S_GETPC_B64));
+ if (ST.hasGetPCZeroExtension()) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
+ // Fix up hardware that does not sign-extend the 48-bit PC value by
+ // inserting: s_sext_i32_i16 dsthi, dsthi
+ BuildMI(MBB, std::next(MI.getIterator()), DL, get(AMDGPU::S_SEXT_I32_I16),
+ DstHi)
+ .addReg(DstHi);
+ }
+ break;
}
return true;
}
@@ -5280,7 +5302,8 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
: AMDGPU::V_CEIL_F16_fake16_e64;
case AMDGPU::S_FLOOR_F16:
- return AMDGPU::V_FLOOR_F16_fake16_e64;
+ return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
+ : AMDGPU::V_FLOOR_F16_fake16_e64;
case AMDGPU::S_TRUNC_F16:
return AMDGPU::V_TRUNC_F16_fake16_e64;
case AMDGPU::S_RNDNE_F16:
@@ -8756,6 +8779,7 @@ SIInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
{
{MONoClobber, "amdgpu-noclobber"},
+ {MOLastUse, "amdgpu-last-use"},
};
return ArrayRef(TargetFlags);
@@ -8944,8 +8968,9 @@ bool SIInstrInfo::splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset,
// Depending on the used address space and instructions, some immediate offsets
// are allowed and some are not.
-// In general, flat instruction offsets can only be non-negative, global and
-// scratch instruction offsets can also be negative.
+// Pre-GFX12, flat instruction offsets can only be non-negative, global and
+// scratch instruction offsets can also be negative. On GFX12, offsets can be
+// negative for all variants.
//
// There are several bugs related to these offsets:
// On gfx10.1, flat instructions that go into the global address space cannot
@@ -9076,8 +9101,7 @@ bool SIInstrInfo::isAsmOnlyOpcode(int MCOp) const {
}
int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
- if (SIInstrInfo::isSoftWaitcnt(Opcode))
- Opcode = SIInstrInfo::getNonSoftWaitcntOpcode(Opcode);
+ Opcode = SIInstrInfo::getNonSoftWaitcntOpcode(Opcode);
unsigned Gen = subtargetEncodingFamily(ST);
@@ -9113,12 +9137,6 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
- // TODO-GFX12: Remove this.
- // Hack to allow some GFX12 codegen tests to run before all the encodings are
- // implemented.
- if (MCOp == (uint16_t)-1 && Gen == SIEncodingFamily::GFX12)
- MCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX11);
-
// -1 means that Opcode is already a native instruction.
if (MCOp == -1)
return Opcode;