aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp6
1 files changed, 6 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index 1e2cf3890d0a..3ccfd9dde269 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -311,6 +311,12 @@ Value *AMDGPUAtomicOptimizer::buildReduction(IRBuilder<> &B,
if (ST->isWave32())
return V;
+ if (ST->hasPermLane64()) {
+ // Reduce across the upper and lower 32 lanes.
+ return buildNonAtomicBinOp(
+ B, Op, V, B.CreateIntrinsic(Intrinsic::amdgcn_permlane64, {}, V));
+ }
+
// Pick an arbitrary lane from 0..31 and an arbitrary lane from 32..63 and
// combine them with a scalar operation.
Function *ReadLane =