aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp')
-rw-r--r--lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp38
1 files changed, 18 insertions, 20 deletions
diff --git a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index 743dc7a0d00b..5dd5b3691e0a 100644
--- a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPULowerKernelArguments.cpp ------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -110,8 +109,9 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
// modes on SI to know the high bits are 0 so pointer adds don't wrap. We
// can't represent this with range metadata because it's only allowed for
// integer types.
- if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
- ST.getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ if ((PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ PT->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) &&
+ !ST.hasUsableDSOffset())
continue;
// FIXME: We can replace this with equivalent alias.scope/noalias
@@ -132,6 +132,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
KernArgBaseAlign);
Value *ArgPtr;
+ Type *AdjustedArgTy;
if (DoShiftOpt) { // FIXME: Handle aggregate types
// Since we don't have sub-dword scalar loads, avoid doing an extload by
// loading earlier than the argument address, and extracting the relevant
@@ -139,30 +140,27 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
//
// Additionally widen any sub-dword load to i32 even if suitably aligned,
// so that CSE between different argument loads works easily.
-
ArgPtr = Builder.CreateConstInBoundsGEP1_64(
- KernArgSegment,
- AlignDownOffset,
- Arg.getName() + ".kernarg.offset.align.down");
- ArgPtr = Builder.CreateBitCast(ArgPtr,
- Builder.getInt32Ty()->getPointerTo(AS),
- ArgPtr->getName() + ".cast");
+ Builder.getInt8Ty(), KernArgSegment, AlignDownOffset,
+ Arg.getName() + ".kernarg.offset.align.down");
+ AdjustedArgTy = Builder.getInt32Ty();
} else {
ArgPtr = Builder.CreateConstInBoundsGEP1_64(
- KernArgSegment,
- EltOffset,
- Arg.getName() + ".kernarg.offset");
- ArgPtr = Builder.CreateBitCast(ArgPtr, ArgTy->getPointerTo(AS),
- ArgPtr->getName() + ".cast");
+ Builder.getInt8Ty(), KernArgSegment, EltOffset,
+ Arg.getName() + ".kernarg.offset");
+ AdjustedArgTy = ArgTy;
}
if (IsV3 && Size >= 32) {
V4Ty = VectorType::get(VT->getVectorElementType(), 4);
// Use the hack that clang uses to avoid SelectionDAG ruining v3 loads
- ArgPtr = Builder.CreateBitCast(ArgPtr, V4Ty->getPointerTo(AS));
+ AdjustedArgTy = V4Ty;
}
- LoadInst *Load = Builder.CreateAlignedLoad(ArgPtr, AdjustedAlign);
+ ArgPtr = Builder.CreateBitCast(ArgPtr, AdjustedArgTy->getPointerTo(AS),
+ ArgPtr->getName() + ".cast");
+ LoadInst *Load =
+ Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign);
Load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(Ctx, {}));
MDBuilder MDB(Ctx);