diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms')
12 files changed, 198 insertions, 522 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 34c8a380448e..503ce019dc84 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -19,7 +19,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -29,7 +28,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" @@ -398,6 +396,54 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) { return true; } +/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids +/// pessimistic codegen that has to account for setting errno and can enable +/// vectorization. +static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI, + TargetLibraryInfo &TLI, AssumptionCache &AC, + DominatorTree &DT) { + // Match a call to sqrt mathlib function. + auto *Call = dyn_cast<CallInst>(&I); + if (!Call) + return false; + + Module *M = Call->getModule(); + LibFunc Func; + if (!TLI.getLibFunc(*Call, Func) || !isLibFuncEmittable(M, &TLI, Func)) + return false; + + if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl) + return false; + + // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created + // (because NNAN or the operand arg must not be less than -0.0) and (2) we + // would not end up lowering to a libcall anyway (which could change the value + // of errno), then: + // (1) errno won't be set. + // (2) it is safe to convert this to an intrinsic call. + Type *Ty = Call->getType(); + Value *Arg = Call->getArgOperand(0); + if (TTI.haveFastSqrt(Ty) && + (Call->hasNoNaNs() || + cannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI, 0, &AC, &I, + &DT))) { + IRBuilder<> Builder(&I); + IRBuilderBase::FastMathFlagGuard Guard(Builder); + Builder.setFastMathFlags(Call->getFastMathFlags()); + + Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty); + Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt"); + I.replaceAllUsesWith(NewSqrt); + + // Explicitly erase the old call because a call with side effects is not + // trivially dead. + I.eraseFromParent(); + return true; + } + + return false; +} + // Check if this array of constants represents a cttz table. // Iterate over the elements from \p Table by trying to find/match all // the numbers from 0 to \p InputBits that should represent cttz results. @@ -869,159 +915,13 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) { return true; } -/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids -/// pessimistic codegen that has to account for setting errno and can enable -/// vectorization. -static bool foldSqrt(CallInst *Call, TargetTransformInfo &TTI, - TargetLibraryInfo &TLI, AssumptionCache &AC, - DominatorTree &DT) { - Module *M = Call->getModule(); - - // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created - // (because NNAN or the operand arg must not be less than -0.0) and (2) we - // would not end up lowering to a libcall anyway (which could change the value - // of errno), then: - // (1) errno won't be set. - // (2) it is safe to convert this to an intrinsic call. - Type *Ty = Call->getType(); - Value *Arg = Call->getArgOperand(0); - if (TTI.haveFastSqrt(Ty) && - (Call->hasNoNaNs() || - cannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI, 0, &AC, Call, - &DT))) { - IRBuilder<> Builder(Call); - IRBuilderBase::FastMathFlagGuard Guard(Builder); - Builder.setFastMathFlags(Call->getFastMathFlags()); - - Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty); - Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt"); - Call->replaceAllUsesWith(NewSqrt); - - // Explicitly erase the old call because a call with side effects is not - // trivially dead. - Call->eraseFromParent(); - return true; - } - - return false; -} - -/// Try to expand strcmp(P, "x") calls. -static bool expandStrcmp(CallInst *CI, DominatorTree &DT, bool &MadeCFGChange) { - Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); - - // Trivial cases are optimized during inst combine - if (Str1P == Str2P) - return false; - - StringRef Str1, Str2; - bool HasStr1 = getConstantStringInfo(Str1P, Str1); - bool HasStr2 = getConstantStringInfo(Str2P, Str2); - - Value *NonConstantP = nullptr; - StringRef ConstantStr; - - if (!HasStr1 && HasStr2 && Str2.size() == 1) { - NonConstantP = Str1P; - ConstantStr = Str2; - } else if (!HasStr2 && HasStr1 && Str1.size() == 1) { - NonConstantP = Str2P; - ConstantStr = Str1; - } else { - return false; - } - - // Check if strcmp result is only used in a comparison with zero - if (!isOnlyUsedInZeroComparison(CI)) - return false; - - // For strcmp(P, "x") do the following transformation: - // - // (before) - // dst = strcmp(P, "x") - // - // (after) - // v0 = P[0] - 'x' - // [if v0 == 0] - // v1 = P[1] - // dst = phi(v0, v1) - // - - IRBuilder<> B(CI->getParent()); - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); - - Type *RetType = CI->getType(); - - B.SetInsertPoint(CI); - BasicBlock *InitialBB = B.GetInsertBlock(); - Value *Str1FirstCharacterValue = - B.CreateZExt(B.CreateLoad(B.getInt8Ty(), NonConstantP), RetType); - Value *Str2FirstCharacterValue = - ConstantInt::get(RetType, static_cast<unsigned char>(ConstantStr[0])); - Value *FirstCharacterSub = - B.CreateNSWSub(Str1FirstCharacterValue, Str2FirstCharacterValue); - Value *IsFirstCharacterSubZero = - B.CreateICmpEQ(FirstCharacterSub, ConstantInt::get(RetType, 0)); - Instruction *IsFirstCharacterSubZeroBBTerminator = SplitBlockAndInsertIfThen( - IsFirstCharacterSubZero, CI, /*Unreachable*/ false, - /*BranchWeights*/ nullptr, &DTU); - - B.SetInsertPoint(IsFirstCharacterSubZeroBBTerminator); - B.GetInsertBlock()->setName("strcmp_expand_sub_is_zero"); - BasicBlock *IsFirstCharacterSubZeroBB = B.GetInsertBlock(); - Value *Str1SecondCharacterValue = B.CreateZExt( - B.CreateLoad(B.getInt8Ty(), B.CreateConstInBoundsGEP1_64( - B.getInt8Ty(), NonConstantP, 1)), - RetType); - - B.SetInsertPoint(CI); - B.GetInsertBlock()->setName("strcmp_expand_sub_join"); - - PHINode *Result = B.CreatePHI(RetType, 2); - Result->addIncoming(FirstCharacterSub, InitialBB); - Result->addIncoming(Str1SecondCharacterValue, IsFirstCharacterSubZeroBB); - - CI->replaceAllUsesWith(Result); - CI->eraseFromParent(); - - MadeCFGChange = true; - - return true; -} - -static bool foldLibraryCalls(Instruction &I, TargetTransformInfo &TTI, - TargetLibraryInfo &TLI, DominatorTree &DT, - AssumptionCache &AC, bool &MadeCFGChange) { - CallInst *CI = dyn_cast<CallInst>(&I); - if (!CI) - return false; - - LibFunc Func; - Module *M = I.getModule(); - if (!TLI.getLibFunc(*CI, Func) || !isLibFuncEmittable(M, &TLI, Func)) - return false; - - switch (Func) { - case LibFunc_sqrt: - case LibFunc_sqrtf: - case LibFunc_sqrtl: - return foldSqrt(CI, TTI, TLI, AC, DT); - case LibFunc_strcmp: - return expandStrcmp(CI, DT, MadeCFGChange); - default: - break; - } - - return false; -} - /// This is the entry point for folds that could be implemented in regular /// InstCombine, but they are separated because they are not expected to /// occur frequently and/or have more than a constant-length pattern match. static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AliasAnalysis &AA, - AssumptionCache &AC, bool &MadeCFGChange) { + AssumptionCache &AC) { bool MadeChange = false; for (BasicBlock &BB : F) { // Ignore unreachable basic blocks. @@ -1046,7 +946,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT, // NOTE: This function introduces erasing of the instruction `I`, so it // needs to be called at the end of this sequence, otherwise we may make // bugs. - MadeChange |= foldLibraryCalls(I, TTI, TLI, DT, AC, MadeCFGChange); + MadeChange |= foldSqrt(I, TTI, TLI, AC, DT); } } @@ -1062,12 +962,12 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT, /// handled in the callers of this function. static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, DominatorTree &DT, - AliasAnalysis &AA, bool &ChangedCFG) { + AliasAnalysis &AA) { bool MadeChange = false; const DataLayout &DL = F.getParent()->getDataLayout(); TruncInstCombine TIC(AC, TLI, DL, DT); MadeChange |= TIC.run(F); - MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC, ChangedCFG); + MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC); return MadeChange; } @@ -1078,21 +978,12 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F, auto &DT = AM.getResult<DominatorTreeAnalysis>(F); auto &TTI = AM.getResult<TargetIRAnalysis>(F); auto &AA = AM.getResult<AAManager>(F); - - bool MadeCFGChange = false; - - if (!runImpl(F, AC, TTI, TLI, DT, AA, MadeCFGChange)) { + if (!runImpl(F, AC, TTI, TLI, DT, AA)) { // No changes, all analyses are preserved. return PreservedAnalyses::all(); } - // Mark all the analyses that instcombine updates as preserved. PreservedAnalyses PA; - - if (MadeCFGChange) - PA.preserve<DominatorTreeAnalysis>(); - else - PA.preserveSet<CFGAnalyses>(); - + PA.preserveSet<CFGAnalyses>(); return PA; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp index d78ab1c1ea28..d0606c15f3d5 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroElide.cpp @@ -194,12 +194,49 @@ bool Lowerer::hasEscapePath(const CoroBeginInst *CB, for (auto *DA : It->second) Visited.insert(DA->getParent()); + SmallPtrSet<const BasicBlock *, 32> EscapingBBs; + for (auto *U : CB->users()) { + // The use from coroutine intrinsics are not a problem. + if (isa<CoroFreeInst, CoroSubFnInst, CoroSaveInst>(U)) + continue; + + // Think all other usages may be an escaping candidate conservatively. + // + // Note that the major user of switch ABI coroutine (the C++) will store + // resume.fn, destroy.fn and the index to the coroutine frame immediately. + // So the parent of the coro.begin in C++ will be always escaping. + // Then we can't get any performance benefits for C++ by improving the + // precision of the method. + // + // The reason why we still judge it is we want to make LLVM Coroutine in + // switch ABIs to be self contained as much as possible instead of a + // by-product of C++20 Coroutines. + EscapingBBs.insert(cast<Instruction>(U)->getParent()); + } + + bool PotentiallyEscaped = false; + do { const auto *BB = Worklist.pop_back_val(); if (!Visited.insert(BB).second) continue; - if (TIs.count(BB)) - return true; + + // A Path insensitive marker to test whether the coro.begin escapes. + // It is intentional to make it path insensitive while it may not be + // precise since we don't want the process to be too slow. + PotentiallyEscaped |= EscapingBBs.count(BB); + + if (TIs.count(BB)) { + if (!BB->getTerminator()->isExceptionalTerminator() || PotentiallyEscaped) + return true; + + // If the function ends with the exceptional terminator, the memory used + // by the coroutine frame can be released by stack unwinding + // automatically. So we can think the coro.begin doesn't escape if it + // exits the function by exceptional terminator. + + continue; + } // Conservatively say that there is potentially a path. if (!--Limit) @@ -236,36 +273,36 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const { // memory location storing that value and not the virtual register. SmallPtrSet<BasicBlock *, 8> Terminators; - // First gather all of the non-exceptional terminators for the function. + // First gather all of the terminators for the function. // Consider the final coro.suspend as the real terminator when the current // function is a coroutine. - for (BasicBlock &B : *F) { - auto *TI = B.getTerminator(); - if (TI->getNumSuccessors() == 0 && !TI->isExceptionalTerminator() && - !isa<UnreachableInst>(TI)) - Terminators.insert(&B); - } + for (BasicBlock &B : *F) { + auto *TI = B.getTerminator(); + + if (TI->getNumSuccessors() != 0 || isa<UnreachableInst>(TI)) + continue; + + Terminators.insert(&B); + } // Filter out the coro.destroy that lie along exceptional paths. SmallPtrSet<CoroBeginInst *, 8> ReferencedCoroBegins; for (const auto &It : DestroyAddr) { - // If there is any coro.destroy dominates all of the terminators for the - // coro.begin, we could know the corresponding coro.begin wouldn't escape. - for (Instruction *DA : It.second) { - if (llvm::all_of(Terminators, [&](auto *TI) { - return DT.dominates(DA, TI->getTerminator()); - })) { - ReferencedCoroBegins.insert(It.first); - break; - } - } - - // Whether there is any paths from coro.begin to Terminators which not pass - // through any of the coro.destroys. + // If every terminators is dominated by coro.destroy, we could know the + // corresponding coro.begin wouldn't escape. + // + // Otherwise hasEscapePath would decide whether there is any paths from + // coro.begin to Terminators which not pass through any of the + // coro.destroys. // // hasEscapePath is relatively slow, so we avoid to run it as much as // possible. - if (!ReferencedCoroBegins.count(It.first) && + if (llvm::all_of(Terminators, + [&](auto *TI) { + return llvm::any_of(It.second, [&](auto *DA) { + return DT.dominates(DA, TI->getTerminator()); + }); + }) || !hasEscapePath(It.first, Terminators)) ReferencedCoroBegins.insert(It.first); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 3d6c501e4596..ac5dbc7cfb2a 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -78,11 +78,6 @@ static cl::opt<unsigned> MaxClones( "The maximum number of clones allowed for a single function " "specialization")); -static cl::opt<unsigned> MaxIncomingPhiValues( - "funcspec-max-incoming-phi-values", cl::init(4), cl::Hidden, cl::desc( - "The maximum number of incoming values a PHI node can have to be " - "considered during the specialization bonus estimation")); - static cl::opt<unsigned> MinFunctionSize( "funcspec-min-function-size", cl::init(100), cl::Hidden, cl::desc( "Don't specialize functions that have less than this number of " @@ -109,7 +104,6 @@ static cl::opt<bool> SpecializeLiteralConstant( // the combination of size and latency savings in comparison to the non // specialized version of the function. static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList, - DenseSet<BasicBlock *> &DeadBlocks, ConstMap &KnownConstants, SCCPSolver &Solver, BlockFrequencyInfo &BFI, TargetTransformInfo &TTI) { @@ -124,12 +118,6 @@ static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList, if (!Weight) continue; - // These blocks are considered dead as far as the InstCostVisitor is - // concerned. They haven't been proven dead yet by the Solver, but - // may become if we propagate the constant specialization arguments. - if (!DeadBlocks.insert(BB).second) - continue; - for (Instruction &I : *BB) { // Disregard SSA copies. if (auto *II = dyn_cast<IntrinsicInst>(&I)) @@ -164,19 +152,9 @@ static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) { return nullptr; } -Cost InstCostVisitor::getBonusFromPendingPHIs() { - Cost Bonus = 0; - while (!PendingPHIs.empty()) { - Instruction *Phi = PendingPHIs.pop_back_val(); - Bonus += getUserBonus(Phi); - } - return Bonus; -} - Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) { // Cache the iterator before visiting. - LastVisited = Use ? KnownConstants.insert({Use, C}).first - : KnownConstants.end(); + LastVisited = KnownConstants.insert({Use, C}).first; if (auto *I = dyn_cast<SwitchInst>(User)) return estimateSwitchInst(*I); @@ -203,15 +181,13 @@ Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) { for (auto *U : User->users()) if (auto *UI = dyn_cast<Instruction>(U)) - if (UI != User && Solver.isBlockExecutable(UI->getParent())) + if (Solver.isBlockExecutable(UI->getParent())) Bonus += getUserBonus(UI, User, C); return Bonus; } Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) { - assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); - if (I.getCondition() != LastVisited->first) return 0; @@ -232,13 +208,10 @@ Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) { WorkList.push_back(BB); } - return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, BFI, - TTI); + return estimateBasicBlocks(WorkList, KnownConstants, Solver, BFI, TTI); } Cost InstCostVisitor::estimateBranchInst(BranchInst &I) { - assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); - if (I.getCondition() != LastVisited->first) return 0; @@ -250,39 +223,10 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) { Succ->getUniquePredecessor() == I.getParent()) WorkList.push_back(Succ); - return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, BFI, - TTI); -} - -Constant *InstCostVisitor::visitPHINode(PHINode &I) { - if (I.getNumIncomingValues() > MaxIncomingPhiValues) - return nullptr; - - bool Inserted = VisitedPHIs.insert(&I).second; - Constant *Const = nullptr; - - for (unsigned Idx = 0, E = I.getNumIncomingValues(); Idx != E; ++Idx) { - Value *V = I.getIncomingValue(Idx); - if (auto *Inst = dyn_cast<Instruction>(V)) - if (Inst == &I || DeadBlocks.contains(I.getIncomingBlock(Idx))) - continue; - Constant *C = findConstantFor(V, KnownConstants); - if (!C) { - if (Inserted) - PendingPHIs.push_back(&I); - return nullptr; - } - if (!Const) - Const = C; - else if (C != Const) - return nullptr; - } - return Const; + return estimateBasicBlocks(WorkList, KnownConstants, Solver, BFI, TTI); } Constant *InstCostVisitor::visitFreezeInst(FreezeInst &I) { - assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); - if (isGuaranteedNotToBeUndefOrPoison(LastVisited->second)) return LastVisited->second; return nullptr; @@ -309,8 +253,6 @@ Constant *InstCostVisitor::visitCallBase(CallBase &I) { } Constant *InstCostVisitor::visitLoadInst(LoadInst &I) { - assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); - if (isa<ConstantPointerNull>(LastVisited->second)) return nullptr; return ConstantFoldLoadFromConstPtr(LastVisited->second, I.getType(), DL); @@ -333,8 +275,6 @@ Constant *InstCostVisitor::visitGetElementPtrInst(GetElementPtrInst &I) { } Constant *InstCostVisitor::visitSelectInst(SelectInst &I) { - assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); - if (I.getCondition() != LastVisited->first) return nullptr; @@ -350,8 +290,6 @@ Constant *InstCostVisitor::visitCastInst(CastInst &I) { } Constant *InstCostVisitor::visitCmpInst(CmpInst &I) { - assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); - bool Swap = I.getOperand(1) == LastVisited->first; Value *V = Swap ? I.getOperand(0) : I.getOperand(1); Constant *Other = findConstantFor(V, KnownConstants); @@ -365,14 +303,10 @@ Constant *InstCostVisitor::visitCmpInst(CmpInst &I) { } Constant *InstCostVisitor::visitUnaryOperator(UnaryOperator &I) { - assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); - return ConstantFoldUnaryOpOperand(I.getOpcode(), LastVisited->second, DL); } Constant *InstCostVisitor::visitBinaryOperator(BinaryOperator &I) { - assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); - bool Swap = I.getOperand(1) == LastVisited->first; Value *V = Swap ? I.getOperand(0) : I.getOperand(1); Constant *Other = findConstantFor(V, KnownConstants); @@ -779,17 +713,13 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost, AllSpecs[Index].CallSites.push_back(&CS); } else { // Calculate the specialisation gain. - Cost Score = 0; + Cost Score = 0 - SpecCost; InstCostVisitor Visitor = getInstCostVisitorFor(F); for (ArgInfo &A : S.Args) Score += getSpecializationBonus(A.Formal, A.Actual, Visitor); - Score += Visitor.getBonusFromPendingPHIs(); - - LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization score = " - << Score << "\n"); // Discard unprofitable specialisations. - if (!ForceSpecialization && Score <= SpecCost) + if (!ForceSpecialization && Score <= 0) continue; // Create a new specialisation entry. diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index afd6e034f46d..767b7c7defbb 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -906,7 +906,7 @@ InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I) { auto NewFoldedConst = [&](bool IsTrueArm, Value *V) { bool IsCastOpRHS = (CastOp == RHS); - bool IsZExt = isa<ZExtInst>(CastOp); + bool IsZExt = isa<ZExtOperator>(CastOp); Constant *C; if (IsTrueArm) { diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index 3e3be536defc..597cec8e61c9 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -1777,6 +1777,20 @@ void CHR::cloneScopeBlocks(CHRScope *Scope, BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); NewBlocks.push_back(NewBB); VMap[BB] = NewBB; + + // Unreachable predecessors will not be cloned and will not have an edge + // to the cloned block. As such, also remove them from any phi nodes. + // To avoid iterator invalidation, first collect the dead predecessors + // from the first phi node, and then perform the actual removal. + if (auto *FirstPN = dyn_cast<PHINode>(NewBB->begin())) { + SmallVector<BasicBlock *> DeadPreds; + for (BasicBlock *Pred : FirstPN->blocks()) + if (!DT.isReachableFromEntry(Pred)) + DeadPreds.push_back(Pred); + for (PHINode &PN : make_early_inc_range(NewBB->phis())) + for (BasicBlock *Pred : DeadPreds) + PN.removeIncomingValue(Pred); + } } // Place the cloned blocks right after the original blocks (right before the diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 21f0b1a92293..75adcabc0d34 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -898,7 +898,9 @@ bool GCOVProfiler::emitProfileNotes( if (Line == Loc.getLine()) continue; Line = Loc.getLine(); - if (SP != getDISubprogram(Loc.getScope())) + MDNode *Scope = Loc.getScope(); + // TODO: Handle blocks from another file due to #line, #include, etc. + if (isa<DILexicalBlockFile>(Scope) || SP != getDISubprogram(Scope)) continue; GCOVLines &Lines = Block.getFile(Filename); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 15628d32280d..2b88dd08d88b 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -424,7 +424,7 @@ static Decomposition decompose(Value *V, return MergeResults(Op0, Op1, IsSigned); ConstantInt *CI; - if (match(V, m_NSWMul(m_Value(Op0), m_ConstantInt(CI)))) { + if (match(V, m_NSWMul(m_Value(Op0), m_ConstantInt(CI))) && canUseSExt(CI)) { auto Result = decompose(Op0, Preconditions, IsSigned, DL); Result.mul(CI->getSExtValue()); return Result; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 24390f1b54f6..5b8f1b00dc03 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1269,6 +1269,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { if (IsLoadCSE) { LoadInst *NLoadI = cast<LoadInst>(AvailableVal); combineMetadataForCSE(NLoadI, LoadI, false); + LVI->forgetValue(NLoadI); }; // If the returned value is the load itself, replace with poison. This can @@ -1461,6 +1462,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { for (LoadInst *PredLoadI : CSELoads) { combineMetadataForCSE(PredLoadI, LoadI, true); + LVI->forgetValue(PredLoadI); } LoadI->replaceAllUsesWith(PN); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 68642a01b37c..00937e0d734a 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -69,7 +69,6 @@ STATISTIC(NumMemSetInfer, "Number of memsets inferred"); STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); STATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); STATISTIC(NumCallSlot, "Number of call slot optimizations performed"); -STATISTIC(NumStackMove, "Number of stack-move optimizations performed"); namespace { @@ -731,23 +730,6 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI, return true; } - // If this is a load-store pair from a stack slot to a stack slot, we - // might be able to perform the stack-move optimization just as we do for - // memcpys from an alloca to an alloca. - if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) { - if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) { - if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca, - DL.getTypeStoreSize(T), BAA)) { - // Avoid invalidating the iterator. - BBI = SI->getNextNonDebugInstruction()->getIterator(); - eraseInstruction(SI); - eraseInstruction(LI); - ++NumMemCpyInstr; - return true; - } - } - } - return false; } @@ -1426,217 +1408,6 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, return true; } -// Attempts to optimize the pattern whereby memory is copied from an alloca to -// another alloca, where the two allocas don't have conflicting mod/ref. If -// successful, the two allocas can be merged into one and the transfer can be -// deleted. This pattern is generated frequently in Rust, due to the ubiquity of -// move operations in that language. -// -// Once we determine that the optimization is safe to perform, we replace all -// uses of the destination alloca with the source alloca. We also "shrink wrap" -// the lifetime markers of the single merged alloca to before the first use -// and after the last use. Note that the "shrink wrapping" procedure is a safe -// transformation only because we restrict the scope of this optimization to -// allocas that aren't captured. -bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, - AllocaInst *DestAlloca, - AllocaInst *SrcAlloca, uint64_t Size, - BatchAAResults &BAA) { - LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n" - << *Store << "\n"); - - // Make sure the two allocas are in the same address space. - if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) { - LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n"); - return false; - } - - // 1. Check that copy is full. Calculate the static size of the allocas to be - // merged, bail out if we can't. - const DataLayout &DL = DestAlloca->getModule()->getDataLayout(); - std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL); - if (!SrcSize || SrcSize->isScalable() || Size != SrcSize->getFixedValue()) { - LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n"); - return false; - } - std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL); - if (!DestSize || DestSize->isScalable() || - Size != DestSize->getFixedValue()) { - LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n"); - return false; - } - - // 2-1. Check that src and dest are static allocas, which are not affected by - // stacksave/stackrestore. - if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca() || - SrcAlloca->getParent() != Load->getParent() || - SrcAlloca->getParent() != Store->getParent()) - return false; - - // 2-2. Check that src and dest are never captured, unescaped allocas. Also - // collect lifetime markers first/last users in order to shrink wrap the - // lifetimes, and instructions with noalias metadata to remove them. - - SmallVector<Instruction *, 4> LifetimeMarkers; - Instruction *FirstUser = nullptr, *LastUser = nullptr; - SmallSet<Instruction *, 4> NoAliasInstrs; - - // Recursively track the user and check whether modified alias exist. - auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool { - bool CanBeNull, CanBeFreed; - return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed); - }; - - auto CaptureTrackingWithModRef = - [&](Instruction *AI, - function_ref<bool(Instruction *)> ModRefCallback) -> bool { - SmallVector<Instruction *, 8> Worklist; - Worklist.push_back(AI); - unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking(); - Worklist.reserve(MaxUsesToExplore); - SmallSet<const Use *, 20> Visited; - while (!Worklist.empty()) { - Instruction *I = Worklist.back(); - Worklist.pop_back(); - for (const Use &U : I->uses()) { - if (Visited.size() >= MaxUsesToExplore) { - LLVM_DEBUG( - dbgs() - << "Stack Move: Exceeded max uses to see ModRef, bailing\n"); - return false; - } - if (!Visited.insert(&U).second) - continue; - switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) { - case UseCaptureKind::MAY_CAPTURE: - return false; - case UseCaptureKind::PASSTHROUGH: - // Instructions cannot have non-instruction users. - Worklist.push_back(cast<Instruction>(U.getUser())); - continue; - case UseCaptureKind::NO_CAPTURE: { - auto *UI = cast<Instruction>(U.getUser()); - if (DestAlloca->getParent() != UI->getParent()) - return false; - if (!FirstUser || UI->comesBefore(FirstUser)) - FirstUser = UI; - if (!LastUser || LastUser->comesBefore(UI)) - LastUser = UI; - if (UI->isLifetimeStartOrEnd()) { - // We note the locations of these intrinsic calls so that we can - // delete them later if the optimization succeeds, this is safe - // since both llvm.lifetime.start and llvm.lifetime.end intrinsics - // conceptually fill all the bytes of the alloca with an undefined - // value. - int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue(); - if (Size < 0 || Size == DestSize) { - LifetimeMarkers.push_back(UI); - continue; - } - } - if (UI->hasMetadata(LLVMContext::MD_noalias)) - NoAliasInstrs.insert(UI); - if (!ModRefCallback(UI)) - return false; - } - } - } - } - return true; - }; - - // 3. Check that dest has no Mod/Ref, except full size lifetime intrinsics, - // from the alloca to the Store. - ModRefInfo DestModRef = ModRefInfo::NoModRef; - MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size)); - auto DestModRefCallback = [&](Instruction *UI) -> bool { - // We don't care about the store itself. - if (UI == Store) - return true; - ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc); - // FIXME: For multi-BB cases, we need to see reachability from it to - // store. - // Bailout if Dest may have any ModRef before Store. - if (UI->comesBefore(Store) && isModOrRefSet(Res)) - return false; - DestModRef |= BAA.getModRefInfo(UI, DestLoc); - - return true; - }; - - if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback)) - return false; - - // 3. Check that, from after the Load to the end of the BB, - // 3-1. if the dest has any Mod, src has no Ref, and - // 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes. - MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size)); - - auto SrcModRefCallback = [&](Instruction *UI) -> bool { - // Any ModRef before Load doesn't matter, also Load and Store can be - // ignored. - if (UI->comesBefore(Load) || UI == Load || UI == Store) - return true; - ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc); - if ((isModSet(DestModRef) && isRefSet(Res)) || - (isRefSet(DestModRef) && isModSet(Res))) - return false; - - return true; - }; - - if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback)) - return false; - - // We can do the transformation. First, align the allocas appropriately. - SrcAlloca->setAlignment( - std::max(SrcAlloca->getAlign(), DestAlloca->getAlign())); - - // Merge the two allocas. - DestAlloca->replaceAllUsesWith(SrcAlloca); - eraseInstruction(DestAlloca); - - // Drop metadata on the source alloca. - SrcAlloca->dropUnknownNonDebugMetadata(); - - // Do "shrink wrap" the lifetimes, if the original lifetime intrinsics exists. - if (!LifetimeMarkers.empty()) { - LLVMContext &C = SrcAlloca->getContext(); - IRBuilder<> Builder(C); - - ConstantInt *AllocaSize = ConstantInt::get(Type::getInt64Ty(C), Size); - // Create a new lifetime start marker before the first user of src or alloca - // users. - Builder.SetInsertPoint(FirstUser->getParent(), FirstUser->getIterator()); - Builder.CreateLifetimeStart(SrcAlloca, AllocaSize); - - // Create a new lifetime end marker after the last user of src or alloca - // users. - // FIXME: If the last user is the terminator for the bb, we can insert - // lifetime.end marker to the immidiate post-dominator, but currently do - // nothing. - if (!LastUser->isTerminator()) { - Builder.SetInsertPoint(LastUser->getParent(), ++LastUser->getIterator()); - Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize); - } - - // Remove all other lifetime markers. - for (Instruction *I : LifetimeMarkers) - eraseInstruction(I); - } - - // As this transformation can cause memory accesses that didn't previously - // alias to begin to alias one another, we remove !noalias metadata from any - // uses of either alloca. This is conservative, but more precision doesn't - // seem worthwhile right now. - for (Instruction *I : NoAliasInstrs) - I->setMetadata(LLVMContext::MD_noalias, nullptr); - - LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n"); - NumStackMove++; - return true; -} - /// Perform simplification of memcpy's. If we have memcpy A /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite /// B to be a memcpy from X to Z (or potentially a memmove, depending on @@ -1693,14 +1464,13 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess( AnyClobber, MemoryLocation::getForSource(M), BAA); - // There are five possible optimizations we can do for memcpy: + // There are four possible optimizations we can do for memcpy: // a) memcpy-memcpy xform which exposes redundance for DSE. // b) call-memcpy xform for return slot optimization. // c) memcpy from freshly alloca'd space or space that has just started // its lifetime copies undefined data, and we can therefore eliminate // the memcpy in favor of the data that was already at the destination. // d) memcpy from a just-memset'd source can be turned into memset. - // e) elimination of memcpy via stack-move optimization. if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) { if (Instruction *MI = MD->getMemoryInst()) { if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) { @@ -1719,8 +1489,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { } } if (auto *MDep = dyn_cast<MemCpyInst>(MI)) - if (processMemCpyMemCpyDependence(M, MDep, BAA)) - return true; + return processMemCpyMemCpyDependence(M, MDep, BAA); if (auto *MDep = dyn_cast<MemSetInst>(MI)) { if (performMemCpyToMemSetOptzn(M, MDep, BAA)) { LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n"); @@ -1739,27 +1508,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { } } - // If the transfer is from a stack slot to a stack slot, then we may be able - // to perform the stack-move optimization. See the comments in - // performStackMoveOptzn() for more details. - auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest()); - if (!DestAlloca) - return false; - auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource()); - if (!SrcAlloca) - return false; - ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength()); - if (Len == nullptr) - return false; - if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca, Len->getZExtValue(), - BAA)) { - // Avoid invalidating the iterator. - BBI = M->getNextNonDebugInstruction()->getIterator(); - eraseInstruction(M); - ++NumMemCpyInstr; - return true; - } - return false; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp index 4f1350e4ebb9..2031e70bee1d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -675,6 +675,12 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) { for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) { if (CI->isByValArgument(I)) { copyLocalTempOfByValueOperandIntoArguments(CI, I); + // When eliminating a tail call, we modify the values of the arguments. + // Therefore, if the byval parameter has a readonly attribute, we have to + // remove it. It is safe because, from the perspective of a caller, the + // byval parameter is always treated as "readonly," even if the readonly + // attribute is removed. + F.removeParamAttr(I, Attribute::ReadOnly); ArgumentPHIs[I]->addIncoming(F.getArg(I), BB); } else ArgumentPHIs[I]->addIncoming(CI->getArgOperand(I), BB); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 5b0951252c07..3ad97613fe7a 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -227,9 +227,21 @@ static Value *convertStrToInt(CallInst *CI, StringRef &Str, Value *EndPtr, return ConstantInt::get(RetTy, Result); } +static bool isOnlyUsedInComparisonWithZero(Value *V) { + for (User *U : V->users()) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(U)) + if (Constant *C = dyn_cast<Constant>(IC->getOperand(1))) + if (C->isNullValue()) + continue; + // Unknown instruction. + return false; + } + return true; +} + static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len, const DataLayout &DL) { - if (!isOnlyUsedInZeroComparison(CI)) + if (!isOnlyUsedInComparisonWithZero(CI)) return false; if (!isDereferenceableAndAlignedPointer(Str, Align(1), APInt(64, Len), DL)) diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d7e40e8ef978..b603bbe55dc9 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3781,10 +3781,44 @@ void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) { // the incoming edges. VPBasicBlock *Header = State.Plan->getVectorLoopRegion()->getEntryBasicBlock(); + + // Gather all VPReductionPHIRecipe and sort them so that Intermediate stores + // sank outside of the loop would keep the same order as they had in the + // original loop. + SmallVector<VPReductionPHIRecipe *> ReductionPHIList; for (VPRecipeBase &R : Header->phis()) { if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) - fixReduction(ReductionPhi, State); - else if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R)) + ReductionPHIList.emplace_back(ReductionPhi); + } + stable_sort(ReductionPHIList, [this](const VPReductionPHIRecipe *R1, + const VPReductionPHIRecipe *R2) { + auto *IS1 = R1->getRecurrenceDescriptor().IntermediateStore; + auto *IS2 = R2->getRecurrenceDescriptor().IntermediateStore; + + // If neither of the recipes has an intermediate store, keep the order the + // same. + if (!IS1 && !IS2) + return false; + + // If only one of the recipes has an intermediate store, then move it + // towards the beginning of the list. + if (IS1 && !IS2) + return true; + + if (!IS1 && IS2) + return false; + + // If both recipes have an intermediate store, then the recipe with the + // later store should be processed earlier. So it should go to the beginning + // of the list. + return DT->dominates(IS2, IS1); + }); + + for (VPReductionPHIRecipe *ReductionPhi : ReductionPHIList) + fixReduction(ReductionPhi, State); + + for (VPRecipeBase &R : Header->phis()) { + if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R)) fixFixedOrderRecurrence(FOR, State); } } |