diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp index 1e2cf3890d0a..3ccfd9dde269 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp @@ -311,6 +311,12 @@ Value *AMDGPUAtomicOptimizer::buildReduction(IRBuilder<> &B, if (ST->isWave32()) return V; + if (ST->hasPermLane64()) { + // Reduce across the upper and lower 32 lanes. + return buildNonAtomicBinOp( + B, Op, V, B.CreateIntrinsic(Intrinsic::amdgcn_permlane64, {}, V)); + } + // Pick an arbitrary lane from 0..31 and an arbitrary lane from 32..63 and // combine them with a scalar operation. Function *ReadLane = |
