diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2024-01-24 19:11:41 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2024-01-24 19:11:41 +0000 |
| commit | 4df029cc74e5ec124f14a5682e44999ce4f086df (patch) | |
| tree | fa2e8720472930df97920b4185215c910159f10d /llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | |
| parent | 950076cd18f3fa9d789b4add9d405898efff09a5 (diff) | |
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 28 |
1 files changed, 21 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index ad8dcda93c36..fdee74d58d26 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1917,7 +1917,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( unsigned CPol = MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm(); if (BaseOpcode->Atomic) CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization - if (CPol & ~(IsGFX12Plus ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12)) + if (CPol & ~((IsGFX12Plus ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12) | + AMDGPU::CPol::VOLATILE)) return false; int NumVAddrRegs = 0; @@ -3927,7 +3928,7 @@ AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const { } InstructionSelector::ComplexRendererFns -AMDGPUInstructionSelector::selectDotIUVOP3PMods(MachineOperand &Root) const { +AMDGPUInstructionSelector::selectVOP3PModsNeg(MachineOperand &Root) const { // Literal i1 value set in intrinsic, represents SrcMods for the next operand. // Value is in Imm operand as i1 sign extended to int64_t. // 1(-1) promotes packed values to signed, 0 treats them as unsigned. @@ -4556,7 +4557,7 @@ bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(Register Addr) const { // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative // values. - if (AMDGPU::isGFX12Plus(STI)) + if (STI.hasSignedScratchOffsets()) return true; Register LHS = AddrMI->getOperand(1).getReg(); @@ -4585,6 +4586,11 @@ bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(Register Addr) const { if (isNoUnsignedWrap(AddrMI)) return true; + // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative + // values. + if (STI.hasSignedScratchOffsets()) + return true; + Register LHS = AddrMI->getOperand(1).getReg(); Register RHS = AddrMI->getOperand(2).getReg(); return KB->signBitIsZero(RHS) && KB->signBitIsZero(LHS); @@ -4594,6 +4600,11 @@ bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(Register Addr) const { // of: SGPR + VGPR + Imm. bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm( Register Addr) const { + // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative + // values. + if (STI.hasSignedScratchOffsets()) + return true; + MachineInstr *AddrMI = getDefIgnoringCopies(Addr, *MRI); Register Base = AddrMI->getOperand(1).getReg(); std::optional<DefinitionAndSourceRegister> BaseDef = @@ -5411,6 +5422,7 @@ bool AMDGPUInstructionSelector::selectNamedBarrierInst( I.eraseFromParent(); return true; } + bool AMDGPUInstructionSelector::selectSBarrierLeave(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); const DebugLoc &DL = I.getDebugLoc(); @@ -5496,11 +5508,13 @@ void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB, MIB.addImm(Swizzle); } -void AMDGPUInstructionSelector::renderSetGLC(MachineInstrBuilder &MIB, - const MachineInstr &MI, - int OpIdx) const { +void AMDGPUInstructionSelector::renderExtractCpolSetGLC( + MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const { assert(OpIdx >= 0 && "expected to match an immediate operand"); - MIB.addImm(MI.getOperand(OpIdx).getImm() | AMDGPU::CPol::GLC); + const uint32_t Cpol = MI.getOperand(OpIdx).getImm() & + (AMDGPU::isGFX12Plus(STI) ? AMDGPU::CPol::ALL + : AMDGPU::CPol::ALL_pregfx12); + MIB.addImm(Cpol | AMDGPU::CPol::GLC); } void AMDGPUInstructionSelector::renderFrameIndex(MachineInstrBuilder &MIB, |
