diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 44 |
1 files changed, 31 insertions, 13 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index e50f5f28e030..f4ca27808a30 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2410,13 +2410,22 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { // the encoding of $symbol starts 12 bytes after the start of the s_add_u32 // instruction. + int64_t Adjust = 0; + if (ST.hasGetPCZeroExtension()) { + // Fix up hardware that does not sign-extend the 48-bit PC value by + // inserting: s_sext_i32_i16 reghi, reghi + Bundler.append( + BuildMI(MF, DL, get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi)); + Adjust += 4; + } + if (OpLo.isGlobal()) - OpLo.setOffset(OpLo.getOffset() + 4); + OpLo.setOffset(OpLo.getOffset() + Adjust + 4); Bundler.append( BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo)); if (OpHi.isGlobal()) - OpHi.setOffset(OpHi.getOffset() + 12); + OpHi.setOffset(OpHi.getOffset() + Adjust + 12); Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi) .addReg(RegHi) .add(OpHi)); @@ -2480,6 +2489,19 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case AMDGPU::S_MUL_I64_I32_PSEUDO: MI.setDesc(get(AMDGPU::S_MUL_U64)); break; + + case AMDGPU::S_GETPC_B64_pseudo: + MI.setDesc(get(AMDGPU::S_GETPC_B64)); + if (ST.hasGetPCZeroExtension()) { + Register Dst = MI.getOperand(0).getReg(); + Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1); + // Fix up hardware that does not sign-extend the 48-bit PC value by + // inserting: s_sext_i32_i16 dsthi, dsthi + BuildMI(MBB, std::next(MI.getIterator()), DL, get(AMDGPU::S_SEXT_I32_I16), + DstHi) + .addReg(DstHi); + } + break; } return true; } @@ -5280,7 +5302,8 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64 : AMDGPU::V_CEIL_F16_fake16_e64; case AMDGPU::S_FLOOR_F16: - return AMDGPU::V_FLOOR_F16_fake16_e64; + return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64 + : AMDGPU::V_FLOOR_F16_fake16_e64; case AMDGPU::S_TRUNC_F16: return AMDGPU::V_TRUNC_F16_fake16_e64; case AMDGPU::S_RNDNE_F16: @@ -8756,6 +8779,7 @@ SIInstrInfo::getSerializableMachineMemOperandTargetFlags() const { static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] = { {MONoClobber, "amdgpu-noclobber"}, + {MOLastUse, "amdgpu-last-use"}, }; return ArrayRef(TargetFlags); @@ -8944,8 +8968,9 @@ bool SIInstrInfo::splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, // Depending on the used address space and instructions, some immediate offsets // are allowed and some are not. -// In general, flat instruction offsets can only be non-negative, global and -// scratch instruction offsets can also be negative. +// Pre-GFX12, flat instruction offsets can only be non-negative, global and +// scratch instruction offsets can also be negative. On GFX12, offsets can be +// negative for all variants. // // There are several bugs related to these offsets: // On gfx10.1, flat instructions that go into the global address space cannot @@ -9076,8 +9101,7 @@ bool SIInstrInfo::isAsmOnlyOpcode(int MCOp) const { } int SIInstrInfo::pseudoToMCOpcode(int Opcode) const { - if (SIInstrInfo::isSoftWaitcnt(Opcode)) - Opcode = SIInstrInfo::getNonSoftWaitcntOpcode(Opcode); + Opcode = SIInstrInfo::getNonSoftWaitcntOpcode(Opcode); unsigned Gen = subtargetEncodingFamily(ST); @@ -9113,12 +9137,6 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const { int MCOp = AMDGPU::getMCOpcode(Opcode, Gen); - // TODO-GFX12: Remove this. - // Hack to allow some GFX12 codegen tests to run before all the encodings are - // implemented. - if (MCOp == (uint16_t)-1 && Gen == SIEncodingFamily::GFX12) - MCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX11); - // -1 means that Opcode is already a native instruction. if (MCOp == -1) return Opcode; |
