aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp211
1 files changed, 89 insertions, 122 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 99b7ffb33884..5a4426ba8113 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -334,86 +334,49 @@ static FixedVectorType *arrayTypeToVecType(ArrayType *ArrayTy) {
ArrayTy->getNumElements());
}
-static Value *stripBitcasts(Value *V) {
- while (Instruction *I = dyn_cast<Instruction>(V)) {
- if (I->getOpcode() != Instruction::BitCast)
- break;
- V = I->getOperand(0);
- }
- return V;
-}
-
static Value *
calculateVectorIndex(Value *Ptr,
const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
- GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(stripBitcasts(Ptr));
+ auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts());
if (!GEP)
- return nullptr;
+ return ConstantInt::getNullValue(Type::getInt32Ty(Ptr->getContext()));
auto I = GEPIdx.find(GEP);
- return I == GEPIdx.end() ? nullptr : I->second;
+ assert(I != GEPIdx.end() && "Must have entry for GEP!");
+ return I->second;
}
-static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
- // FIXME we only support simple cases
- if (GEP->getNumOperands() != 3)
+static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
+ Type *VecElemTy, const DataLayout &DL) {
+ // TODO: Extracting a "multiple of X" from a GEP might be a useful generic
+ // helper.
+ unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
+ MapVector<Value *, APInt> VarOffsets;
+ APInt ConstOffset(BW, 0);
+ if (GEP->getPointerOperand()->stripPointerCasts() != Alloca ||
+ !GEP->collectOffset(DL, BW, VarOffsets, ConstOffset))
return nullptr;
- ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
- if (!I0 || !I0->isZero())
+ unsigned VecElemSize = DL.getTypeAllocSize(VecElemTy);
+ if (VarOffsets.size() > 1)
return nullptr;
- return GEP->getOperand(2);
-}
-
-// Not an instruction handled below to turn into a vector.
-//
-// TODO: Check isTriviallyVectorizable for calls and handle other
-// instructions.
-static bool canVectorizeInst(Instruction *Inst, User *User,
- const DataLayout &DL) {
- switch (Inst->getOpcode()) {
- case Instruction::Load: {
- // Currently only handle the case where the Pointer Operand is a GEP.
- // Also we could not vectorize volatile or atomic loads.
- LoadInst *LI = cast<LoadInst>(Inst);
- if (isa<AllocaInst>(User) &&
- LI->getPointerOperandType() == User->getType() &&
- isa<VectorType>(LI->getType()))
- return true;
-
- Instruction *PtrInst = dyn_cast<Instruction>(LI->getPointerOperand());
- if (!PtrInst)
- return false;
-
- return (PtrInst->getOpcode() == Instruction::GetElementPtr ||
- PtrInst->getOpcode() == Instruction::BitCast) &&
- LI->isSimple();
+ if (VarOffsets.size() == 1) {
+ // Only handle cases where we don't need to insert extra arithmetic
+ // instructions.
+ const auto &VarOffset = VarOffsets.front();
+ if (!ConstOffset.isZero() || VarOffset.second != VecElemSize)
+ return nullptr;
+ return VarOffset.first;
}
- case Instruction::BitCast:
- return true;
- case Instruction::Store: {
- // Must be the stored pointer operand, not a stored value, plus
- // since it should be canonical form, the User should be a GEP.
- // Also we could not vectorize volatile or atomic stores.
- StoreInst *SI = cast<StoreInst>(Inst);
- if (isa<AllocaInst>(User) &&
- SI->getPointerOperandType() == User->getType() &&
- isa<VectorType>(SI->getValueOperand()->getType()))
- return true;
- Instruction *UserInst = dyn_cast<Instruction>(User);
- if (!UserInst)
- return false;
+ APInt Quot;
+ uint64_t Rem;
+ APInt::udivrem(ConstOffset, VecElemSize, Quot, Rem);
+ if (Rem != 0)
+ return nullptr;
- return (SI->getPointerOperand() == User) &&
- (UserInst->getOpcode() == Instruction::GetElementPtr ||
- UserInst->getOpcode() == Instruction::BitCast) &&
- SI->isSimple();
- }
- default:
- return false;
- }
+ return ConstantInt::get(GEP->getContext(), Quot);
}
static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
@@ -455,73 +418,87 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
}
std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
- std::vector<Value *> WorkList;
- SmallVector<User *, 8> Users(Alloca->users());
- SmallVector<User *, 8> UseUsers(Users.size(), Alloca);
+ SmallVector<Instruction *> WorkList;
+ SmallVector<Use *, 8> Uses;
+ for (Use &U : Alloca->uses())
+ Uses.push_back(&U);
+
Type *VecEltTy = VectorTy->getElementType();
- while (!Users.empty()) {
- User *AllocaUser = Users.pop_back_val();
- User *UseUser = UseUsers.pop_back_val();
- Instruction *Inst = dyn_cast<Instruction>(AllocaUser);
+ while (!Uses.empty()) {
+ Use *U = Uses.pop_back_val();
+ Instruction *Inst = dyn_cast<Instruction>(U->getUser());
- GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser);
- if (!GEP) {
- if (!canVectorizeInst(Inst, UseUser, DL))
+ if (Value *Ptr = getLoadStorePointerOperand(Inst)) {
+ // This is a store of the pointer, not to the pointer.
+ if (isa<StoreInst>(Inst) &&
+ U->getOperandNo() != StoreInst::getPointerOperandIndex())
return false;
- if (Inst->getOpcode() == Instruction::BitCast) {
- Type *FromTy = Inst->getOperand(0)->getType()->getPointerElementType();
- Type *ToTy = Inst->getType()->getPointerElementType();
- if (FromTy->isAggregateType() || ToTy->isAggregateType() ||
- DL.getTypeSizeInBits(FromTy) != DL.getTypeSizeInBits(ToTy))
- continue;
-
- for (User *CastUser : Inst->users()) {
- if (isAssumeLikeIntrinsic(cast<Instruction>(CastUser)))
- continue;
- Users.push_back(CastUser);
- UseUsers.push_back(Inst);
- }
+ Type *AccessTy = getLoadStoreType(Inst);
+ Ptr = Ptr->stripPointerCasts();
+ // Alloca already accessed as vector, leave alone.
+ if (Ptr == Alloca && DL.getTypeStoreSize(Alloca->getAllocatedType()) ==
+ DL.getTypeStoreSize(AccessTy))
continue;
- }
- WorkList.push_back(AllocaUser);
+ // Check that this is a simple access of a vector element.
+ bool IsSimple = isa<LoadInst>(Inst) ? cast<LoadInst>(Inst)->isSimple()
+ : cast<StoreInst>(Inst)->isSimple();
+ if (!IsSimple ||
+ !CastInst::isBitOrNoopPointerCastable(VecEltTy, AccessTy, DL))
+ return false;
+
+ WorkList.push_back(Inst);
continue;
}
- Value *Index = GEPToVectorIndex(GEP);
+ if (isa<BitCastInst>(Inst)) {
+ // Look through bitcasts.
+ for (Use &U : Inst->uses())
+ Uses.push_back(&U);
+ continue;
+ }
- // If we can't compute a vector index from this GEP, then we can't
- // promote this alloca to vector.
- if (!Index) {
- LLVM_DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP
- << '\n');
- return false;
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+ // If we can't compute a vector index from this GEP, then we can't
+ // promote this alloca to vector.
+ Value *Index = GEPToVectorIndex(GEP, Alloca, VecEltTy, DL);
+ if (!Index) {
+ LLVM_DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP
+ << '\n');
+ return false;
+ }
+
+ GEPVectorIdx[GEP] = Index;
+ for (Use &U : Inst->uses())
+ Uses.push_back(&U);
+ continue;
}
- GEPVectorIdx[GEP] = Index;
- Users.append(GEP->user_begin(), GEP->user_end());
- UseUsers.append(GEP->getNumUses(), GEP);
+ // Ignore assume-like intrinsics and comparisons used in assumes.
+ if (isAssumeLikeIntrinsic(Inst))
+ continue;
+
+ if (isa<ICmpInst>(Inst) && all_of(Inst->users(), [](User *U) {
+ return isAssumeLikeIntrinsic(cast<Instruction>(U));
+ }))
+ continue;
+
+ // Unknown user.
+ return false;
}
LLVM_DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> "
<< *VectorTy << '\n');
- for (Value *V : WorkList) {
- Instruction *Inst = cast<Instruction>(V);
+ for (Instruction *Inst : WorkList) {
IRBuilder<> Builder(Inst);
switch (Inst->getOpcode()) {
case Instruction::Load: {
- if (Inst->getType() == AllocaTy || Inst->getType()->isVectorTy())
- break;
-
Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
- if (!Index)
- break;
-
- Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
+ Type *VecPtrTy = VectorTy->getPointerTo(Alloca->getAddressSpace());
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
Value *VecValue = Builder.CreateLoad(VectorTy, BitCast);
Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
@@ -533,16 +510,9 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
}
case Instruction::Store: {
StoreInst *SI = cast<StoreInst>(Inst);
- if (SI->getValueOperand()->getType() == AllocaTy ||
- SI->getValueOperand()->getType()->isVectorTy())
- break;
-
Value *Ptr = SI->getPointerOperand();
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
- if (!Index)
- break;
-
- Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
+ Type *VecPtrTy = VectorTy->getPointerTo(Alloca->getAddressSpace());
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
Value *VecValue = Builder.CreateLoad(VectorTy, BitCast);
Value *Elt = SI->getValueOperand();
@@ -808,10 +778,7 @@ bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(const Function &F) {
//
// FIXME: We should really do something to fix the addresses to a more optimal
// value instead
- llvm::sort(AllocatedSizes, [](std::pair<uint64_t, Align> LHS,
- std::pair<uint64_t, Align> RHS) {
- return LHS.second < RHS.second;
- });
+ llvm::sort(AllocatedSizes, llvm::less_second());
// Check how much local memory is being used by global objects
CurrentLocalMemUsage = 0;
@@ -917,7 +884,7 @@ bool AMDGPUPromoteAllocaImpl::handleAlloca(AllocaInst &I, bool SufficientLDS) {
// usage order.
//
// FIXME: It is also possible that if we're allowed to use all of the memory
- // could could end up using more than the maximum due to alignment padding.
+ // could end up using more than the maximum due to alignment padding.
uint32_t NewSize = alignTo(CurrentLocalMemUsage, Alignment);
uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy);