aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp15
6 files changed, 127 insertions, 40 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
index b9134ce26e80..84013a8909db 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -396,6 +396,18 @@ static bool getPotentialCopiesOfMemoryValue(
NullOnly = false;
};
+ auto AdjustWrittenValueType = [&](const AAPointerInfo::Access &Acc,
+ Value &V) {
+ Value *AdjV = AA::getWithType(V, *I.getType());
+ if (!AdjV) {
+ LLVM_DEBUG(dbgs() << "Underlying object written but stored value "
+ "cannot be converted to read type: "
+ << *Acc.getRemoteInst() << " : " << *I.getType()
+ << "\n";);
+ }
+ return AdjV;
+ };
+
auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) {
if ((IsLoad && !Acc.isWriteOrAssumption()) || (!IsLoad && !Acc.isRead()))
return true;
@@ -417,7 +429,10 @@ static bool getPotentialCopiesOfMemoryValue(
if (IsLoad) {
assert(isa<LoadInst>(I) && "Expected load or store instruction only!");
if (!Acc.isWrittenValueUnknown()) {
- NewCopies.push_back(Acc.getWrittenValue());
+ Value *V = AdjustWrittenValueType(Acc, *Acc.getWrittenValue());
+ if (!V)
+ return false;
+ NewCopies.push_back(V);
NewCopyOrigins.push_back(Acc.getRemoteInst());
return true;
}
@@ -428,7 +443,10 @@ static bool getPotentialCopiesOfMemoryValue(
<< *Acc.getRemoteInst() << "\n";);
return false;
}
- NewCopies.push_back(SI->getValueOperand());
+ Value *V = AdjustWrittenValueType(Acc, *SI->getValueOperand());
+ if (!V)
+ return false;
+ NewCopies.push_back(V);
NewCopyOrigins.push_back(SI);
} else {
assert(isa<StoreInst>(I) && "Expected load or store instruction only!");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 001ef55ba472..42158e4e05dd 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1043,12 +1043,14 @@ struct AAPointerInfoImpl
const auto &NoSyncAA = A.getAAFor<AANoSync>(
QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL);
const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>(
- IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL);
+ IRPosition::function(Scope), &QueryingAA, DepClassTy::NONE);
bool AllInSameNoSyncFn = NoSyncAA.isAssumedNoSync();
bool InstIsExecutedByInitialThreadOnly =
ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I);
bool InstIsExecutedInAlignedRegion =
ExecDomainAA && ExecDomainAA->isExecutedInAlignedRegion(A, I);
+ if (InstIsExecutedInAlignedRegion || InstIsExecutedByInitialThreadOnly)
+ A.recordDependence(*ExecDomainAA, QueryingAA, DepClassTy::OPTIONAL);
InformationCache &InfoCache = A.getInfoCache();
bool IsThreadLocalObj =
@@ -1063,14 +1065,24 @@ struct AAPointerInfoImpl
auto CanIgnoreThreadingForInst = [&](const Instruction &I) -> bool {
if (IsThreadLocalObj || AllInSameNoSyncFn)
return true;
- if (!ExecDomainAA)
+ const auto *FnExecDomainAA =
+ I.getFunction() == &Scope
+ ? ExecDomainAA
+ : A.lookupAAFor<AAExecutionDomain>(
+ IRPosition::function(*I.getFunction()), &QueryingAA,
+ DepClassTy::NONE);
+ if (!FnExecDomainAA)
return false;
if (InstIsExecutedInAlignedRegion ||
- ExecDomainAA->isExecutedInAlignedRegion(A, I))
+ FnExecDomainAA->isExecutedInAlignedRegion(A, I)) {
+ A.recordDependence(*FnExecDomainAA, QueryingAA, DepClassTy::OPTIONAL);
return true;
+ }
if (InstIsExecutedByInitialThreadOnly &&
- ExecDomainAA->isExecutedByInitialThreadOnly(I))
+ FnExecDomainAA->isExecutedByInitialThreadOnly(I)) {
+ A.recordDependence(*FnExecDomainAA, QueryingAA, DepClassTy::OPTIONAL);
return true;
+ }
return false;
};
@@ -4161,12 +4173,14 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
return true;
if (auto *LI = dyn_cast<LoadInst>(V)) {
if (llvm::all_of(LI->uses(), [&](const Use &U) {
- return InfoCache.isOnlyUsedByAssume(
- cast<Instruction>(*U.getUser())) ||
- A.isAssumedDead(U, this, nullptr, UsedAssumedInformation);
+ auto &UserI = cast<Instruction>(*U.getUser());
+ if (InfoCache.isOnlyUsedByAssume(UserI)) {
+ if (AssumeOnlyInst)
+ AssumeOnlyInst->insert(&UserI);
+ return true;
+ }
+ return A.isAssumedDead(U, this, nullptr, UsedAssumedInformation);
})) {
- if (AssumeOnlyInst)
- AssumeOnlyInst->insert(LI);
return true;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index bee154dab10f..eb499a1aa912 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -188,9 +188,9 @@ struct AAICVTracker;
struct OMPInformationCache : public InformationCache {
OMPInformationCache(Module &M, AnalysisGetter &AG,
BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC,
- KernelSet &Kernels)
+ KernelSet &Kernels, bool OpenMPPostLink)
: InformationCache(M, AG, Allocator, CGSCC), OMPBuilder(M),
- Kernels(Kernels) {
+ Kernels(Kernels), OpenMPPostLink(OpenMPPostLink) {
OMPBuilder.initialize();
initializeRuntimeFunctions(M);
@@ -448,6 +448,24 @@ struct OMPInformationCache : public InformationCache {
CI->setCallingConv(Fn->getCallingConv());
}
+ // Helper function to determine if it's legal to create a call to the runtime
+ // functions.
+ bool runtimeFnsAvailable(ArrayRef<RuntimeFunction> Fns) {
+ // We can always emit calls if we haven't yet linked in the runtime.
+ if (!OpenMPPostLink)
+ return true;
+
+ // Once the runtime has been already been linked in we cannot emit calls to
+ // any undefined functions.
+ for (RuntimeFunction Fn : Fns) {
+ RuntimeFunctionInfo &RFI = RFIs[Fn];
+
+ if (RFI.Declaration && RFI.Declaration->isDeclaration())
+ return false;
+ }
+ return true;
+ }
+
/// Helper to initialize all runtime function information for those defined
/// in OpenMPKinds.def.
void initializeRuntimeFunctions(Module &M) {
@@ -523,6 +541,9 @@ struct OMPInformationCache : public InformationCache {
/// Collection of known OpenMP runtime functions..
DenseSet<const Function *> RTLFunctions;
+
+ /// Indicates if we have already linked in the OpenMP device library.
+ bool OpenMPPostLink = false;
};
template <typename Ty, bool InsertInvalidates = true>
@@ -1412,7 +1433,10 @@ private:
Changed |= WasSplit;
return WasSplit;
};
- RFI.foreachUse(SCC, SplitMemTransfers);
+ if (OMPInfoCache.runtimeFnsAvailable(
+ {OMPRTL___tgt_target_data_begin_mapper_issue,
+ OMPRTL___tgt_target_data_begin_mapper_wait}))
+ RFI.foreachUse(SCC, SplitMemTransfers);
return Changed;
}
@@ -2656,7 +2680,9 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
bool isExecutedInAlignedRegion(Attributor &A,
const Instruction &I) const override {
- if (!isValidState() || isa<CallBase>(I))
+ assert(I.getFunction() == getAnchorScope() &&
+ "Instruction is out of scope!");
+ if (!isValidState())
return false;
const Instruction *CurI;
@@ -2667,14 +2693,18 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
auto *CB = dyn_cast<CallBase>(CurI);
if (!CB)
continue;
+ if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) {
+ break;
+ }
const auto &It = CEDMap.find(CB);
if (It == CEDMap.end())
continue;
- if (!It->getSecond().IsReachedFromAlignedBarrierOnly)
+ if (!It->getSecond().IsReachingAlignedBarrierOnly)
return false;
+ break;
} while ((CurI = CurI->getNextNonDebugInstruction()));
- if (!CurI && !BEDMap.lookup(I.getParent()).IsReachedFromAlignedBarrierOnly)
+ if (!CurI && !BEDMap.lookup(I.getParent()).IsReachingAlignedBarrierOnly)
return false;
// Check backward until a call or the block beginning is reached.
@@ -2683,12 +2713,16 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
auto *CB = dyn_cast<CallBase>(CurI);
if (!CB)
continue;
+ if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) {
+ break;
+ }
const auto &It = CEDMap.find(CB);
if (It == CEDMap.end())
continue;
if (!AA::isNoSyncInst(A, *CB, *this)) {
- if (It->getSecond().IsReachedFromAlignedBarrierOnly)
+ if (It->getSecond().IsReachedFromAlignedBarrierOnly) {
break;
+ }
return false;
}
@@ -2984,7 +3018,8 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
if (EDAA.getState().isValidState()) {
const auto &CalleeED = EDAA.getFunctionExecutionDomain();
ED.IsReachedFromAlignedBarrierOnly =
- CalleeED.IsReachedFromAlignedBarrierOnly;
+ CallED.IsReachedFromAlignedBarrierOnly =
+ CalleeED.IsReachedFromAlignedBarrierOnly;
AlignedBarrierLastInBlock = ED.IsReachedFromAlignedBarrierOnly;
if (IsNoSync || !CalleeED.IsReachedFromAlignedBarrierOnly)
ED.EncounteredNonLocalSideEffect |=
@@ -2999,8 +3034,9 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
continue;
}
}
- ED.IsReachedFromAlignedBarrierOnly =
- IsNoSync && ED.IsReachedFromAlignedBarrierOnly;
+ if (!IsNoSync)
+ ED.IsReachedFromAlignedBarrierOnly =
+ CallED.IsReachedFromAlignedBarrierOnly = false;
AlignedBarrierLastInBlock &= ED.IsReachedFromAlignedBarrierOnly;
ED.EncounteredNonLocalSideEffect |= !CB->doesNotAccessMemory();
if (!IsNoSync)
@@ -3914,6 +3950,12 @@ struct AAKernelInfoFunction : AAKernelInfo {
bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) {
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
+ // We cannot change to SPMD mode if the runtime functions aren't availible.
+ if (!OMPInfoCache.runtimeFnsAvailable(
+ {OMPRTL___kmpc_get_hardware_thread_id_in_block,
+ OMPRTL___kmpc_barrier_simple_spmd}))
+ return false;
+
if (!SPMDCompatibilityTracker.isAssumed()) {
for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) {
if (!NonCompatibleI)
@@ -4021,6 +4063,13 @@ struct AAKernelInfoFunction : AAKernelInfo {
if (!ReachedKnownParallelRegions.isValidState())
return ChangeStatus::UNCHANGED;
+ auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
+ if (!OMPInfoCache.runtimeFnsAvailable(
+ {OMPRTL___kmpc_get_hardware_num_threads_in_block,
+ OMPRTL___kmpc_get_warp_size, OMPRTL___kmpc_barrier_simple_generic,
+ OMPRTL___kmpc_kernel_parallel, OMPRTL___kmpc_kernel_end_parallel}))
+ return ChangeStatus::UNCHANGED;
+
const int InitModeArgNo = 1;
const int InitUseStateMachineArgNo = 2;
@@ -4167,7 +4216,6 @@ struct AAKernelInfoFunction : AAKernelInfo {
BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB);
Module &M = *Kernel->getParent();
- auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
FunctionCallee BlockHwSizeFn =
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___kmpc_get_hardware_num_threads_in_block);
@@ -5343,7 +5391,10 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
BumpPtrAllocator Allocator;
CallGraphUpdater CGUpdater;
- OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ nullptr, Kernels);
+ bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
+ LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink;
+ OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ nullptr, Kernels,
+ PostLink);
unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
@@ -5417,9 +5468,11 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
CallGraphUpdater CGUpdater;
CGUpdater.initialize(CG, C, AM, UR);
+ bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
+ LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink;
SetVector<Function *> Functions(SCC.begin(), SCC.end());
OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
- /*CGSCC*/ &Functions, Kernels);
+ /*CGSCC*/ &Functions, Kernels, PostLink);
unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 1480a0ff9e2f..de3095852048 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3184,16 +3184,6 @@ Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
}
break;
}
- case Instruction::And: {
- const APInt *BOC;
- if (match(BOp1, m_APInt(BOC))) {
- // If we have ((X & C) == C), turn it into ((X & C) != 0).
- if (C == *BOC && C.isPowerOf2())
- return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
- BO, Constant::getNullValue(RHS->getType()));
- }
- break;
- }
case Instruction::UDiv:
if (C.isZero()) {
// (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A)
@@ -5653,6 +5643,12 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
}
}
}
+
+ // Op0 eq C_Pow2 -> Op0 ne 0 if Op0 is known to be C_Pow2 or zero.
+ if (Op1Known.isConstant() && Op1Known.getConstant().isPowerOf2() &&
+ (Op0Known & Op1Known) == Op0Known)
+ return new ICmpInst(CmpInst::getInversePredicate(Pred), Op0,
+ ConstantInt::getNullValue(Op1->getType()));
break;
}
case ICmpInst::ICMP_ULT: {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
index 31cdd2ee56b9..b2ed95b05e04 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
@@ -2930,7 +2930,8 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
return;
unsigned BitWidth = DL.getPointerTypeSizeInBits(NewTy);
- if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
+ if (BitWidth == OldLI.getType()->getScalarSizeInBits() &&
+ !getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
MDNode *NN = MDNode::get(OldLI.getContext(), std::nullopt);
NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index bb8544356c6d..0bd519a6d945 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5959,7 +5959,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
// Saves the list of values that are used in the loop but are defined outside
// the loop (not including non-instruction values such as arguments and
// constants).
- SmallPtrSet<Value *, 8> LoopInvariants;
+ SmallPtrSet<Instruction *, 8> LoopInvariants;
for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {
for (Instruction &I : BB->instructionsWithoutDebug()) {
@@ -6085,11 +6085,16 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
for (auto *Inst : LoopInvariants) {
// FIXME: The target might use more than one register for the type
// even in the scalar case.
- unsigned Usage =
- VFs[i].isScalar() ? 1 : GetRegUsage(Inst->getType(), VFs[i]);
+ bool IsScalar = all_of(Inst->users(), [&](User *U) {
+ auto *I = cast<Instruction>(U);
+ return TheLoop != LI->getLoopFor(I->getParent()) ||
+ isScalarAfterVectorization(I, VFs[i]);
+ });
+
+ ElementCount VF = IsScalar ? ElementCount::getFixed(1) : VFs[i];
unsigned ClassID =
- TTI.getRegisterClassForType(VFs[i].isVector(), Inst->getType());
- Invariant[ClassID] += Usage;
+ TTI.getRegisterClassForType(VF.isVector(), Inst->getType());
+ Invariant[ClassID] += GetRegUsage(Inst->getType(), VF);
}
LLVM_DEBUG({