diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms')
6 files changed, 127 insertions, 40 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp index b9134ce26e80..84013a8909db 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp @@ -396,6 +396,18 @@ static bool getPotentialCopiesOfMemoryValue( NullOnly = false; }; + auto AdjustWrittenValueType = [&](const AAPointerInfo::Access &Acc, + Value &V) { + Value *AdjV = AA::getWithType(V, *I.getType()); + if (!AdjV) { + LLVM_DEBUG(dbgs() << "Underlying object written but stored value " + "cannot be converted to read type: " + << *Acc.getRemoteInst() << " : " << *I.getType() + << "\n";); + } + return AdjV; + }; + auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) { if ((IsLoad && !Acc.isWriteOrAssumption()) || (!IsLoad && !Acc.isRead())) return true; @@ -417,7 +429,10 @@ static bool getPotentialCopiesOfMemoryValue( if (IsLoad) { assert(isa<LoadInst>(I) && "Expected load or store instruction only!"); if (!Acc.isWrittenValueUnknown()) { - NewCopies.push_back(Acc.getWrittenValue()); + Value *V = AdjustWrittenValueType(Acc, *Acc.getWrittenValue()); + if (!V) + return false; + NewCopies.push_back(V); NewCopyOrigins.push_back(Acc.getRemoteInst()); return true; } @@ -428,7 +443,10 @@ static bool getPotentialCopiesOfMemoryValue( << *Acc.getRemoteInst() << "\n";); return false; } - NewCopies.push_back(SI->getValueOperand()); + Value *V = AdjustWrittenValueType(Acc, *SI->getValueOperand()); + if (!V) + return false; + NewCopies.push_back(V); NewCopyOrigins.push_back(SI); } else { assert(isa<StoreInst>(I) && "Expected load or store instruction only!"); diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 001ef55ba472..42158e4e05dd 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1043,12 +1043,14 @@ struct AAPointerInfoImpl const auto &NoSyncAA = A.getAAFor<AANoSync>( QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL); const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>( - IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL); + IRPosition::function(Scope), &QueryingAA, DepClassTy::NONE); bool AllInSameNoSyncFn = NoSyncAA.isAssumedNoSync(); bool InstIsExecutedByInitialThreadOnly = ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I); bool InstIsExecutedInAlignedRegion = ExecDomainAA && ExecDomainAA->isExecutedInAlignedRegion(A, I); + if (InstIsExecutedInAlignedRegion || InstIsExecutedByInitialThreadOnly) + A.recordDependence(*ExecDomainAA, QueryingAA, DepClassTy::OPTIONAL); InformationCache &InfoCache = A.getInfoCache(); bool IsThreadLocalObj = @@ -1063,14 +1065,24 @@ struct AAPointerInfoImpl auto CanIgnoreThreadingForInst = [&](const Instruction &I) -> bool { if (IsThreadLocalObj || AllInSameNoSyncFn) return true; - if (!ExecDomainAA) + const auto *FnExecDomainAA = + I.getFunction() == &Scope + ? ExecDomainAA + : A.lookupAAFor<AAExecutionDomain>( + IRPosition::function(*I.getFunction()), &QueryingAA, + DepClassTy::NONE); + if (!FnExecDomainAA) return false; if (InstIsExecutedInAlignedRegion || - ExecDomainAA->isExecutedInAlignedRegion(A, I)) + FnExecDomainAA->isExecutedInAlignedRegion(A, I)) { + A.recordDependence(*FnExecDomainAA, QueryingAA, DepClassTy::OPTIONAL); return true; + } if (InstIsExecutedByInitialThreadOnly && - ExecDomainAA->isExecutedByInitialThreadOnly(I)) + FnExecDomainAA->isExecutedByInitialThreadOnly(I)) { + A.recordDependence(*FnExecDomainAA, QueryingAA, DepClassTy::OPTIONAL); return true; + } return false; }; @@ -4161,12 +4173,14 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl { return true; if (auto *LI = dyn_cast<LoadInst>(V)) { if (llvm::all_of(LI->uses(), [&](const Use &U) { - return InfoCache.isOnlyUsedByAssume( - cast<Instruction>(*U.getUser())) || - A.isAssumedDead(U, this, nullptr, UsedAssumedInformation); + auto &UserI = cast<Instruction>(*U.getUser()); + if (InfoCache.isOnlyUsedByAssume(UserI)) { + if (AssumeOnlyInst) + AssumeOnlyInst->insert(&UserI); + return true; + } + return A.isAssumedDead(U, this, nullptr, UsedAssumedInformation); })) { - if (AssumeOnlyInst) - AssumeOnlyInst->insert(LI); return true; } } diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index bee154dab10f..eb499a1aa912 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -188,9 +188,9 @@ struct AAICVTracker; struct OMPInformationCache : public InformationCache { OMPInformationCache(Module &M, AnalysisGetter &AG, BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC, - KernelSet &Kernels) + KernelSet &Kernels, bool OpenMPPostLink) : InformationCache(M, AG, Allocator, CGSCC), OMPBuilder(M), - Kernels(Kernels) { + Kernels(Kernels), OpenMPPostLink(OpenMPPostLink) { OMPBuilder.initialize(); initializeRuntimeFunctions(M); @@ -448,6 +448,24 @@ struct OMPInformationCache : public InformationCache { CI->setCallingConv(Fn->getCallingConv()); } + // Helper function to determine if it's legal to create a call to the runtime + // functions. + bool runtimeFnsAvailable(ArrayRef<RuntimeFunction> Fns) { + // We can always emit calls if we haven't yet linked in the runtime. + if (!OpenMPPostLink) + return true; + + // Once the runtime has been already been linked in we cannot emit calls to + // any undefined functions. + for (RuntimeFunction Fn : Fns) { + RuntimeFunctionInfo &RFI = RFIs[Fn]; + + if (RFI.Declaration && RFI.Declaration->isDeclaration()) + return false; + } + return true; + } + /// Helper to initialize all runtime function information for those defined /// in OpenMPKinds.def. void initializeRuntimeFunctions(Module &M) { @@ -523,6 +541,9 @@ struct OMPInformationCache : public InformationCache { /// Collection of known OpenMP runtime functions.. DenseSet<const Function *> RTLFunctions; + + /// Indicates if we have already linked in the OpenMP device library. + bool OpenMPPostLink = false; }; template <typename Ty, bool InsertInvalidates = true> @@ -1412,7 +1433,10 @@ private: Changed |= WasSplit; return WasSplit; }; - RFI.foreachUse(SCC, SplitMemTransfers); + if (OMPInfoCache.runtimeFnsAvailable( + {OMPRTL___tgt_target_data_begin_mapper_issue, + OMPRTL___tgt_target_data_begin_mapper_wait})) + RFI.foreachUse(SCC, SplitMemTransfers); return Changed; } @@ -2656,7 +2680,9 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { bool isExecutedInAlignedRegion(Attributor &A, const Instruction &I) const override { - if (!isValidState() || isa<CallBase>(I)) + assert(I.getFunction() == getAnchorScope() && + "Instruction is out of scope!"); + if (!isValidState()) return false; const Instruction *CurI; @@ -2667,14 +2693,18 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { auto *CB = dyn_cast<CallBase>(CurI); if (!CB) continue; + if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) { + break; + } const auto &It = CEDMap.find(CB); if (It == CEDMap.end()) continue; - if (!It->getSecond().IsReachedFromAlignedBarrierOnly) + if (!It->getSecond().IsReachingAlignedBarrierOnly) return false; + break; } while ((CurI = CurI->getNextNonDebugInstruction())); - if (!CurI && !BEDMap.lookup(I.getParent()).IsReachedFromAlignedBarrierOnly) + if (!CurI && !BEDMap.lookup(I.getParent()).IsReachingAlignedBarrierOnly) return false; // Check backward until a call or the block beginning is reached. @@ -2683,12 +2713,16 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { auto *CB = dyn_cast<CallBase>(CurI); if (!CB) continue; + if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) { + break; + } const auto &It = CEDMap.find(CB); if (It == CEDMap.end()) continue; if (!AA::isNoSyncInst(A, *CB, *this)) { - if (It->getSecond().IsReachedFromAlignedBarrierOnly) + if (It->getSecond().IsReachedFromAlignedBarrierOnly) { break; + } return false; } @@ -2984,7 +3018,8 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { if (EDAA.getState().isValidState()) { const auto &CalleeED = EDAA.getFunctionExecutionDomain(); ED.IsReachedFromAlignedBarrierOnly = - CalleeED.IsReachedFromAlignedBarrierOnly; + CallED.IsReachedFromAlignedBarrierOnly = + CalleeED.IsReachedFromAlignedBarrierOnly; AlignedBarrierLastInBlock = ED.IsReachedFromAlignedBarrierOnly; if (IsNoSync || !CalleeED.IsReachedFromAlignedBarrierOnly) ED.EncounteredNonLocalSideEffect |= @@ -2999,8 +3034,9 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { continue; } } - ED.IsReachedFromAlignedBarrierOnly = - IsNoSync && ED.IsReachedFromAlignedBarrierOnly; + if (!IsNoSync) + ED.IsReachedFromAlignedBarrierOnly = + CallED.IsReachedFromAlignedBarrierOnly = false; AlignedBarrierLastInBlock &= ED.IsReachedFromAlignedBarrierOnly; ED.EncounteredNonLocalSideEffect |= !CB->doesNotAccessMemory(); if (!IsNoSync) @@ -3914,6 +3950,12 @@ struct AAKernelInfoFunction : AAKernelInfo { bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) { auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); + // We cannot change to SPMD mode if the runtime functions aren't availible. + if (!OMPInfoCache.runtimeFnsAvailable( + {OMPRTL___kmpc_get_hardware_thread_id_in_block, + OMPRTL___kmpc_barrier_simple_spmd})) + return false; + if (!SPMDCompatibilityTracker.isAssumed()) { for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) { if (!NonCompatibleI) @@ -4021,6 +4063,13 @@ struct AAKernelInfoFunction : AAKernelInfo { if (!ReachedKnownParallelRegions.isValidState()) return ChangeStatus::UNCHANGED; + auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); + if (!OMPInfoCache.runtimeFnsAvailable( + {OMPRTL___kmpc_get_hardware_num_threads_in_block, + OMPRTL___kmpc_get_warp_size, OMPRTL___kmpc_barrier_simple_generic, + OMPRTL___kmpc_kernel_parallel, OMPRTL___kmpc_kernel_end_parallel})) + return ChangeStatus::UNCHANGED; + const int InitModeArgNo = 1; const int InitUseStateMachineArgNo = 2; @@ -4167,7 +4216,6 @@ struct AAKernelInfoFunction : AAKernelInfo { BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB); Module &M = *Kernel->getParent(); - auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); FunctionCallee BlockHwSizeFn = OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( M, OMPRTL___kmpc_get_hardware_num_threads_in_block); @@ -5343,7 +5391,10 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { BumpPtrAllocator Allocator; CallGraphUpdater CGUpdater; - OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ nullptr, Kernels); + bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink || + LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink; + OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ nullptr, Kernels, + PostLink); unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? SetFixpointIterations : 32; @@ -5417,9 +5468,11 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C, CallGraphUpdater CGUpdater; CGUpdater.initialize(CG, C, AM, UR); + bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink || + LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink; SetVector<Function *> Functions(SCC.begin(), SCC.end()); OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, - /*CGSCC*/ &Functions, Kernels); + /*CGSCC*/ &Functions, Kernels, PostLink); unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? SetFixpointIterations : 32; diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 1480a0ff9e2f..de3095852048 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3184,16 +3184,6 @@ Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant( } break; } - case Instruction::And: { - const APInt *BOC; - if (match(BOp1, m_APInt(BOC))) { - // If we have ((X & C) == C), turn it into ((X & C) != 0). - if (C == *BOC && C.isPowerOf2()) - return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, - BO, Constant::getNullValue(RHS->getType())); - } - break; - } case Instruction::UDiv: if (C.isZero()) { // (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A) @@ -5653,6 +5643,12 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { } } } + + // Op0 eq C_Pow2 -> Op0 ne 0 if Op0 is known to be C_Pow2 or zero. + if (Op1Known.isConstant() && Op1Known.getConstant().isPowerOf2() && + (Op0Known & Op1Known) == Op0Known) + return new ICmpInst(CmpInst::getInversePredicate(Pred), Op0, + ConstantInt::getNullValue(Op1->getType())); break; } case ICmpInst::ICMP_ULT: { diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp index 31cdd2ee56b9..b2ed95b05e04 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp @@ -2930,7 +2930,8 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, return; unsigned BitWidth = DL.getPointerTypeSizeInBits(NewTy); - if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { + if (BitWidth == OldLI.getType()->getScalarSizeInBits() && + !getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { MDNode *NN = MDNode::get(OldLI.getContext(), std::nullopt); NewLI.setMetadata(LLVMContext::MD_nonnull, NN); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index bb8544356c6d..0bd519a6d945 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5959,7 +5959,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) { // Saves the list of values that are used in the loop but are defined outside // the loop (not including non-instruction values such as arguments and // constants). - SmallPtrSet<Value *, 8> LoopInvariants; + SmallPtrSet<Instruction *, 8> LoopInvariants; for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) { for (Instruction &I : BB->instructionsWithoutDebug()) { @@ -6085,11 +6085,16 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) { for (auto *Inst : LoopInvariants) { // FIXME: The target might use more than one register for the type // even in the scalar case. - unsigned Usage = - VFs[i].isScalar() ? 1 : GetRegUsage(Inst->getType(), VFs[i]); + bool IsScalar = all_of(Inst->users(), [&](User *U) { + auto *I = cast<Instruction>(U); + return TheLoop != LI->getLoopFor(I->getParent()) || + isScalarAfterVectorization(I, VFs[i]); + }); + + ElementCount VF = IsScalar ? ElementCount::getFixed(1) : VFs[i]; unsigned ClassID = - TTI.getRegisterClassForType(VFs[i].isVector(), Inst->getType()); - Invariant[ClassID] += Usage; + TTI.getRegisterClassForType(VF.isVector(), Inst->getType()); + Invariant[ClassID] += GetRegUsage(Inst->getType(), VF); } LLVM_DEBUG({ |
