diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2024-01-24 19:17:23 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-04-06 20:13:49 +0000 |
commit | 7a6dacaca14b62ca4b74406814becb87a3fefac0 (patch) | |
tree | 273a870ac27484bb1f5ee55e7ef0dc0d061f63e7 /contrib/llvm-project/llvm/lib/Transforms | |
parent | 46c59ea9b61755455ff6bf9f3e7b834e1af634ea (diff) | |
parent | 4df029cc74e5ec124f14a5682e44999ce4f086df (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms')
63 files changed, 1178 insertions, 759 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index d09ac1c099c1..49fa0f59d488 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -808,8 +808,8 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0); Load1Ptr = Load1Ptr->stripAndAccumulateConstantOffsets( DL, Offset1, /* AllowNonInbounds */ true); - Load1Ptr = Builder.CreateGEP(Builder.getInt8Ty(), Load1Ptr, - Builder.getInt32(Offset1.getZExtValue())); + Load1Ptr = Builder.CreatePtrAdd(Load1Ptr, + Builder.getInt32(Offset1.getZExtValue())); } // Generate wider load. NewLoad = Builder.CreateAlignedLoad(WiderType, Load1Ptr, LI1->getAlign(), diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroEarly.cpp index d510b90d9dec..489106422e19 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroEarly.cpp @@ -203,7 +203,7 @@ void Lowerer::lowerEarlyIntrinsics(Function &F) { if (CII->getInfo().isPreSplit()) { assert(F.isPresplitCoroutine() && "The frontend uses Swtich-Resumed ABI should emit " - "\"coroutine.presplit\" attribute for the coroutine."); + "\"presplitcoroutine\" attribute for the coroutine."); setCannotDuplicate(CII); CII->setCoroutineSelf(); CoroId = cast<CoroIdInst>(&I); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 89a1ad2243c8..e69c718f0ae3 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -963,18 +963,15 @@ static void cacheDIVar(FrameDataInfo &FrameData, if (DIVarCache.contains(V)) continue; - SmallVector<DbgDeclareInst *, 1> DDIs; - SmallVector<DPValue *, 1> DPVs; - findDbgDeclares(DDIs, V, &DPVs); - auto CacheIt = [&DIVarCache, V](auto &Container) { + auto CacheIt = [&DIVarCache, V](const auto &Container) { auto *I = llvm::find_if(Container, [](auto *DDI) { return DDI->getExpression()->getNumElements() == 0; }); if (I != Container.end()) DIVarCache.insert({V, (*I)->getVariable()}); }; - CacheIt(DDIs); - CacheIt(DPVs); + CacheIt(findDbgDeclares(V)); + CacheIt(findDPVDeclares(V)); } } @@ -1125,9 +1122,8 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape, assert(PromiseAlloca && "Coroutine with switch ABI should own Promise alloca"); - SmallVector<DbgDeclareInst *, 1> DIs; - SmallVector<DPValue *, 1> DPVs; - findDbgDeclares(DIs, PromiseAlloca, &DPVs); + TinyPtrVector<DbgDeclareInst *> DIs = findDbgDeclares(PromiseAlloca); + TinyPtrVector<DPValue *> DPVs = findDPVDeclares(PromiseAlloca); DILocalVariable *PromiseDIVariable = nullptr; DILocation *DILoc = nullptr; @@ -1293,8 +1289,8 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape, // struct f.frame { // ResumeFnTy ResumeFnAddr; // ResumeFnTy DestroyFnAddr; -// int ResumeIndex; // ... promise (if present) ... +// int ResumeIndex; // ... spills ... // }; static StructType *buildFrameType(Function &F, coro::Shape &Shape, @@ -1865,15 +1861,14 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) { FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP, SpillAlignment, E.first->getName() + Twine(".reload")); - SmallVector<DbgDeclareInst *, 1> DIs; - SmallVector<DPValue *, 1> DPVs; - findDbgDeclares(DIs, Def, &DPVs); + TinyPtrVector<DbgDeclareInst *> DIs = findDbgDeclares(Def); + TinyPtrVector<DPValue *> DPVs = findDPVDeclares(Def); // Try best to find dbg.declare. If the spill is a temp, there may not // be a direct dbg.declare. Walk up the load chain to find one from an // alias. if (F->getSubprogram()) { auto *CurDef = Def; - while (DIs.empty() && isa<LoadInst>(CurDef)) { + while (DIs.empty() && DPVs.empty() && isa<LoadInst>(CurDef)) { auto *LdInst = cast<LoadInst>(CurDef); // Only consider ptr to ptr same type load. if (LdInst->getPointerOperandType() != LdInst->getType()) @@ -1881,9 +1876,8 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) { CurDef = LdInst->getPointerOperand(); if (!isa<AllocaInst, LoadInst>(CurDef)) break; - DIs.clear(); - DPVs.clear(); - findDbgDeclares(DIs, CurDef, &DPVs); + DIs = findDbgDeclares(CurDef); + DPVs = findDPVDeclares(CurDef); } } @@ -2022,8 +2016,8 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) { auto *FramePtr = GetFramePointer(Alloca); auto &Value = *Alias.second; auto ITy = IntegerType::get(C, Value.getBitWidth()); - auto *AliasPtr = Builder.CreateGEP(Type::getInt8Ty(C), FramePtr, - ConstantInt::get(ITy, Value)); + auto *AliasPtr = + Builder.CreatePtrAdd(FramePtr, ConstantInt::get(ITy, Value)); Alias.first->replaceUsesWithIf( AliasPtr, [&](Use &U) { return DT.dominates(CB, U); }); } @@ -2972,7 +2966,7 @@ void coro::salvageDebugInfo( Function *F = DPV.getFunction(); // Follow the pointer arithmetic all the way to the incoming // function argument and convert into a DIExpression. - bool SkipOutermostLoad = DPV.getType() == DPValue::LocationType::Declare; + bool SkipOutermostLoad = DPV.isDbgDeclare(); Value *OriginalStorage = DPV.getVariableLocationOp(0); auto SalvagedInfo = ::salvageDebugInfoImpl( diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index fb3fa8d23daa..8058282c4225 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -100,7 +100,7 @@ static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL, Value *Ptr, Type *ResElemTy, int64_t Offset) { if (Offset != 0) { APInt APOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset); - Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(APOffset)); + Ptr = IRB.CreatePtrAdd(Ptr, IRB.getInt(APOffset)); } return Ptr; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index cc5a4ee8c2bd..585364dd7aa2 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -298,8 +298,8 @@ static Value *constructPointer(Value *Ptr, int64_t Offset, << "-bytes\n"); if (Offset) - Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt64(Offset), - Ptr->getName() + ".b" + Twine(Offset)); + Ptr = IRB.CreatePtrAdd(Ptr, IRB.getInt64(Offset), + Ptr->getName() + ".b" + Twine(Offset)); return Ptr; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp index 48ef0772e800..6af3a45701bc 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/EmbedBitcodePass.h" +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/Bitcode/BitcodeWriterPass.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" @@ -16,6 +18,7 @@ #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include <memory> #include <string> using namespace llvm; @@ -30,9 +33,16 @@ PreservedAnalyses EmbedBitcodePass::run(Module &M, ModuleAnalysisManager &AM) { report_fatal_error( "EmbedBitcode pass currently only supports ELF object format", /*gen_crash_diag=*/false); + std::string Data; raw_string_ostream OS(Data); - ThinLTOBitcodeWriterPass(OS, /*ThinLinkOS=*/nullptr).run(M, AM); + if (IsThinLTO) + ThinLTOBitcodeWriterPass(OS, /*ThinLinkOS=*/nullptr).run(M, AM); + else + BitcodeWriterPass(OS, /*ShouldPreserveUseListOrder=*/false, EmitLTOSummary) + .run(M, AM); + embedBufferInModule(M, MemoryBufferRef(Data, "ModuleData"), ".llvm.lto"); + return PreservedAnalyses::all(); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp index a6e19df7c5f1..8e6d0e814372 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -154,8 +154,7 @@ struct OutlinableGroup { /// \param SourceBB - the BasicBlock to pull Instructions from. /// \param TargetBB - the BasicBlock to put Instruction into. static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) { - for (Instruction &I : llvm::make_early_inc_range(SourceBB)) - I.moveBeforePreserving(TargetBB, TargetBB.end()); + TargetBB.splice(TargetBB.end(), &SourceBB); } /// A function to sort the keys of \p Map, which must be a mapping of constant diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 0a6f69bc73d5..e10b3c56ae14 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -45,6 +45,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/Cloning.h" #include <sstream> +#include <unordered_map> #include <vector> using namespace llvm; using namespace llvm::memprof; @@ -577,7 +578,7 @@ class ModuleCallsiteContextGraph public: ModuleCallsiteContextGraph( Module &M, - function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter); + llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter); private: friend CallsiteContextGraph<ModuleCallsiteContextGraph, Function, @@ -605,7 +606,7 @@ private: unsigned CloneNo) const; const Module &Mod; - function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter; + llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter; }; /// Represents a call in the summary index graph, which can either be an @@ -640,7 +641,7 @@ class IndexCallsiteContextGraph public: IndexCallsiteContextGraph( ModuleSummaryIndex &Index, - function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> + llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> isPrevailing); ~IndexCallsiteContextGraph() { @@ -686,7 +687,7 @@ private: std::map<const FunctionSummary *, ValueInfo> FSToVIMap; const ModuleSummaryIndex &Index; - function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> + llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> isPrevailing; // Saves/owns the callsite info structures synthesized for missing tail call @@ -1523,7 +1524,8 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes( } ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( - Module &M, function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) + Module &M, + llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) : Mod(M), OREGetter(OREGetter) { for (auto &F : M) { std::vector<CallInfo> CallsWithMetadata; @@ -1582,7 +1584,7 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( IndexCallsiteContextGraph::IndexCallsiteContextGraph( ModuleSummaryIndex &Index, - function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> + llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> isPrevailing) : Index(Index), isPrevailing(isPrevailing) { for (auto &I : Index) { @@ -3622,7 +3624,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() { bool MemProfContextDisambiguation::processModule( Module &M, - function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) { + llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) { // If we have an import summary, then the cloning decisions were made during // the thin link on the index. Apply them and return. @@ -3689,7 +3691,7 @@ PreservedAnalyses MemProfContextDisambiguation::run(Module &M, void MemProfContextDisambiguation::run( ModuleSummaryIndex &Index, - function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> + llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> isPrevailing) { // TODO: If/when other types of memprof cloning are enabled beyond just for // hot and cold, we will need to change this to individually control the diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp index 8f0b12d0cfed..090e5560483e 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -186,9 +186,7 @@ void SampleProfileProber::computeCFGHash() { std::vector<uint8_t> Indexes; JamCRC JC; for (auto &BB : *F) { - auto *TI = BB.getTerminator(); - for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { - auto *Succ = TI->getSuccessor(I); + for (BasicBlock *Succ : successors(&BB)) { auto Index = getBlockId(Succ); for (int J = 0; J < 4; J++) Indexes.push_back((uint8_t)(Index >> (J * 8))); diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 85afc020dbf8..01aba47cdbff 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -1769,7 +1769,7 @@ void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName, continue; auto *RetType = cast<IntegerType>(Call.CB.getType()); IRBuilder<> B(&Call.CB); - Value *Addr = B.CreateGEP(Int8Ty, Call.VTable, Byte); + Value *Addr = B.CreatePtrAdd(Call.VTable, Byte); if (RetType->getBitWidth() == 1) { Value *Bits = B.CreateLoad(Int8Ty, Addr); Value *BitsAndBit = B.CreateAnd(Bits, Bit); @@ -2066,14 +2066,14 @@ void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) { Value *LoadedValue = nullptr; if (TypeCheckedLoadFunc->getIntrinsicID() == Intrinsic::type_checked_load_relative) { - Value *GEP = LoadB.CreateGEP(Int8Ty, Ptr, Offset); + Value *GEP = LoadB.CreatePtrAdd(Ptr, Offset); LoadedValue = LoadB.CreateLoad(Int32Ty, GEP); LoadedValue = LoadB.CreateSExt(LoadedValue, IntPtrTy); GEP = LoadB.CreatePtrToInt(GEP, IntPtrTy); LoadedValue = LoadB.CreateAdd(GEP, LoadedValue); LoadedValue = LoadB.CreateIntToPtr(LoadedValue, Int8PtrTy); } else { - Value *GEP = LoadB.CreateGEP(Int8Ty, Ptr, Offset); + Value *GEP = LoadB.CreatePtrAdd(Ptr, Offset); LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEP); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index c7e6f32c5406..8a00b75a1f74 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1683,6 +1683,9 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { } } + if (Instruction *R = tryFoldInstWithCtpopWithNot(&I)) + return R; + // TODO(jingyue): Consider willNotOverflowSignedAdd and // willNotOverflowUnsignedAdd to reduce the number of invocations of // computeKnownBits. @@ -2445,6 +2448,9 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { } } + if (Instruction *R = tryFoldInstWithCtpopWithNot(&I)) + return R; + if (Instruction *R = foldSubOfMinMax(I, Builder)) return R; diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 0620752e3213..5fd944a859ef 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2809,6 +2809,10 @@ static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC, match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))) return X; + // (shl ShVal, X) | (lshr ShVal, ((-X) & (Width - 1))) + if (match(R, m_And(m_Neg(m_Specific(L)), m_SpecificInt(Mask)))) + return L; + // Similar to above, but the shift amount may be extended after masking, // so return the extended value as the parameter for the intrinsic. if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) && @@ -3398,6 +3402,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (Instruction *R = foldBinOpShiftWithShift(I)) return R; + if (Instruction *R = tryFoldInstWithCtpopWithNot(&I)) + return R; + Value *X, *Y; const APInt *CV; if (match(&I, m_c_Or(m_OneUse(m_Xor(m_Value(X), m_APInt(CV))), m_Value(Y))) && diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 64fbd5543a9e..a647be2d26c7 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -282,10 +282,12 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) { Constant *FillVal = ConstantInt::get(ITy, Fill); StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile()); S->copyMetadata(*MI, LLVMContext::MD_DIAssignID); - for (auto *DAI : at::getAssignmentMarkers(S)) { - if (llvm::is_contained(DAI->location_ops(), FillC)) - DAI->replaceVariableLocationOp(FillC, FillVal); - } + auto replaceOpForAssignmentMarkers = [FillC, FillVal](auto *DbgAssign) { + if (llvm::is_contained(DbgAssign->location_ops(), FillC)) + DbgAssign->replaceVariableLocationOp(FillC, FillVal); + }; + for_each(at::getAssignmentMarkers(S), replaceOpForAssignmentMarkers); + for_each(at::getDPVAssignmentMarkers(S), replaceOpForAssignmentMarkers); S->setAlignment(Alignment); if (isa<AtomicMemSetInst>(MI)) diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 6629ca840a67..58f0763bb0c0 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -103,6 +103,16 @@ Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty, } } break; + case Instruction::ShuffleVector: { + auto *ScalarTy = cast<VectorType>(Ty)->getElementType(); + auto *VTy = cast<VectorType>(I->getOperand(0)->getType()); + auto *FixedTy = VectorType::get(ScalarTy, VTy->getElementCount()); + Value *Op0 = EvaluateInDifferentType(I->getOperand(0), FixedTy, isSigned); + Value *Op1 = EvaluateInDifferentType(I->getOperand(1), FixedTy, isSigned); + Res = new ShuffleVectorInst(Op0, Op1, + cast<ShuffleVectorInst>(I)->getShuffleMask()); + break; + } default: // TODO: Can handle more cases here. llvm_unreachable("Unreachable!"); @@ -363,6 +373,9 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC, I->getOpcode() == Instruction::FPToSI); return Ty->getScalarSizeInBits() >= MinBitWidth; } + case Instruction::ShuffleVector: + return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) && + canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI); default: // TODO: Can handle more cases here. break; diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 7c1aff445524..8c0fd6622551 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1323,6 +1323,9 @@ Instruction *InstCombinerImpl::foldICmpWithConstant(ICmpInst &Cmp) { return replaceInstUsesWith(Cmp, NewPhi); } + if (Instruction *R = tryFoldInstWithCtpopWithNot(&Cmp)) + return R; + return nullptr; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 21c61bd99018..c24b6e3a5b33 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -505,6 +505,10 @@ public: Value *SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS); + // If `I` has operand `(ctpop (not x))`, fold `I` with `(sub nuw nsw + // BitWidth(x), (ctpop x))`. + Instruction *tryFoldInstWithCtpopWithNot(Instruction *I); + // (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C)) // -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C) // (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt)) diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index e7f983a00e30..6c3adf00c189 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -330,6 +330,19 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { return BinaryOperator::CreateMul(X, X); } + { + Value *X, *Y; + // abs(X) * abs(Y) -> abs(X * Y) + if (I.hasNoSignedWrap() && + match(Op0, + m_OneUse(m_Intrinsic<Intrinsic::abs>(m_Value(X), m_One()))) && + match(Op1, m_OneUse(m_Intrinsic<Intrinsic::abs>(m_Value(Y), m_One())))) + return replaceInstUsesWith( + I, Builder.CreateBinaryIntrinsic(Intrinsic::abs, + Builder.CreateNSWMul(X, Y), + Builder.getTrue())); + } + // -X * C --> X * -C Value *X, *Y; Constant *Op1C; diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index b7958978c450..54490c46dfae 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -366,14 +366,14 @@ static Instruction *foldShiftOfShiftedBinOp(BinaryOperator &I, Type *Ty = I.getType(); - // Find a matching one-use shift by constant. The fold is not valid if the sum + // Find a matching shift by constant. The fold is not valid if the sum // of the shift values equals or exceeds bitwidth. - // TODO: Remove the one-use check if the other logic operand (Y) is constant. Value *X, *Y; - auto matchFirstShift = [&](Value *V) { - APInt Threshold(Ty->getScalarSizeInBits(), Ty->getScalarSizeInBits()); - return match(V, - m_OneUse(m_BinOp(ShiftOpcode, m_Value(X), m_Constant(C0)))) && + auto matchFirstShift = [&](Value *V, Value *W) { + unsigned Size = Ty->getScalarSizeInBits(); + APInt Threshold(Size, Size); + return match(V, m_BinOp(ShiftOpcode, m_Value(X), m_Constant(C0))) && + (V->hasOneUse() || match(W, m_ImmConstant())) && match(ConstantExpr::getAdd(C0, C1), m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold)); }; @@ -382,9 +382,9 @@ static Instruction *foldShiftOfShiftedBinOp(BinaryOperator &I, // is not so we cannot reoder if we match operand(1) and need to keep the // operands in their original positions. bool FirstShiftIsOp1 = false; - if (matchFirstShift(BinInst->getOperand(0))) + if (matchFirstShift(BinInst->getOperand(0), BinInst->getOperand(1))) Y = BinInst->getOperand(1); - else if (matchFirstShift(BinInst->getOperand(1))) { + else if (matchFirstShift(BinInst->getOperand(1), BinInst->getOperand(0))) { Y = BinInst->getOperand(0); FirstShiftIsOp1 = BinInst->getOpcode() == Instruction::Sub; } else diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 7f2018b3a199..249f4a7710e0 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -740,6 +740,93 @@ static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, return RetVal; } +// If `I` has one Const operand and the other matches `(ctpop (not x))`, +// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`. +// This is only useful is the new subtract can fold so we only handle the +// following cases: +// 1) (add/sub/disjoint_or C, (ctpop (not x)) +// -> (add/sub/disjoint_or C', (ctpop x)) +// 1) (cmp pred C, (ctpop (not x)) +// -> (cmp pred C', (ctpop x)) +Instruction *InstCombinerImpl::tryFoldInstWithCtpopWithNot(Instruction *I) { + unsigned Opc = I->getOpcode(); + unsigned ConstIdx = 1; + switch (Opc) { + default: + return nullptr; + // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x)) + // We can fold the BitWidth(x) with add/sub/icmp as long the other operand + // is constant. + case Instruction::Sub: + ConstIdx = 0; + break; + case Instruction::ICmp: + // Signed predicates aren't correct in some edge cases like for i2 types, as + // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed + // comparisons against it are simplfied to unsigned. + if (cast<ICmpInst>(I)->isSigned()) + return nullptr; + break; + case Instruction::Or: + if (!match(I, m_DisjointOr(m_Value(), m_Value()))) + return nullptr; + [[fallthrough]]; + case Instruction::Add: + break; + } + + Value *Op; + // Find ctpop. + if (!match(I->getOperand(1 - ConstIdx), + m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(Op))))) + return nullptr; + + Constant *C; + // Check other operand is ImmConstant. + if (!match(I->getOperand(ConstIdx), m_ImmConstant(C))) + return nullptr; + + Type *Ty = Op->getType(); + Constant *BitWidthC = ConstantInt::get(Ty, Ty->getScalarSizeInBits()); + // Need extra check for icmp. Note if this check is true, it generally means + // the icmp will simplify to true/false. + if (Opc == Instruction::ICmp && !cast<ICmpInst>(I)->isEquality() && + !ConstantExpr::getICmp(ICmpInst::ICMP_UGT, C, BitWidthC)->isZeroValue()) + return nullptr; + + // Check we can invert `(not x)` for free. + bool Consumes = false; + if (!isFreeToInvert(Op, Op->hasOneUse(), Consumes) || !Consumes) + return nullptr; + Value *NotOp = getFreelyInverted(Op, Op->hasOneUse(), &Builder); + assert(NotOp != nullptr && + "Desync between isFreeToInvert and getFreelyInverted"); + + Value *CtpopOfNotOp = Builder.CreateIntrinsic(Ty, Intrinsic::ctpop, NotOp); + + Value *R = nullptr; + + // Do the transformation here to avoid potentially introducing an infinite + // loop. + switch (Opc) { + case Instruction::Sub: + R = Builder.CreateAdd(CtpopOfNotOp, ConstantExpr::getSub(C, BitWidthC)); + break; + case Instruction::Or: + case Instruction::Add: + R = Builder.CreateSub(ConstantExpr::getAdd(C, BitWidthC), CtpopOfNotOp); + break; + case Instruction::ICmp: + R = Builder.CreateICmp(cast<ICmpInst>(I)->getSwappedPredicate(), + CtpopOfNotOp, ConstantExpr::getSub(BitWidthC, C)); + break; + default: + llvm_unreachable("Unhandled Opcode"); + } + assert(R != nullptr); + return replaceInstUsesWith(*I, R); +} + // (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C)) // IFF // 1) the logic_shifts match @@ -4435,7 +4522,7 @@ bool InstCombinerImpl::run() { if (isa<PHINode>(I)) // PHI -> Non-PHI InsertPos = InstParent->getFirstInsertionPt(); else // Non-PHI -> PHI - InsertPos = InstParent->getFirstNonPHI()->getIterator(); + InsertPos = InstParent->getFirstNonPHIIt(); } Result->insertInto(InstParent, InsertPos); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 5e7e08eaa997..caab98c732ee 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1590,8 +1590,7 @@ void AddressSanitizer::instrumentMaskedLoadOrStore( InstrumentedAddress = IRB.CreateExtractElement(Addr, Index); } else if (Stride) { Index = IRB.CreateMul(Index, Stride); - Addr = IRB.CreateBitCast(Addr, PointerType::getUnqual(*C)); - InstrumentedAddress = IRB.CreateGEP(Type::getInt8Ty(*C), Addr, {Index}); + InstrumentedAddress = IRB.CreatePtrAdd(Addr, Index); } else { InstrumentedAddress = IRB.CreateGEP(VTy, Addr, {Zero, Index}); } @@ -2079,6 +2078,8 @@ bool ModuleAddressSanitizer::ShouldUseMachOGlobalsSection() const { return true; if (TargetTriple.isDriverKit()) return true; + if (TargetTriple.isXROS()) + return true; return false; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CGProfile.cpp index e2e5f21b376b..c322d0abd6bc 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CGProfile.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CGProfile.cpp @@ -44,8 +44,8 @@ addModuleFlags(Module &M, return true; } -static bool runCGProfilePass( - Module &M, FunctionAnalysisManager &FAM) { +static bool runCGProfilePass(Module &M, FunctionAnalysisManager &FAM, + bool InLTO) { MapVector<std::pair<Function *, Function *>, uint64_t> Counts; InstrProfSymtab Symtab; auto UpdateCounts = [&](TargetTransformInfo &TTI, Function *F, @@ -59,7 +59,7 @@ static bool runCGProfilePass( Count = SaturatingAdd(Count, NewCount); }; // Ignore error here. Indirect calls are ignored if this fails. - (void)(bool) Symtab.create(M); + (void)(bool)Symtab.create(M, InLTO); for (auto &F : M) { // Avoid extra cost of running passes for BFI when the function doesn't have // entry count. @@ -101,7 +101,7 @@ static bool runCGProfilePass( PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) { FunctionAnalysisManager &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); - runCGProfilePass(M, FAM); + runCGProfilePass(M, FAM, InLTO); return PreservedAnalyses::all(); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 1ff0a34bae24..c7f6f2a43c17 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -535,7 +535,7 @@ std::string GCOVProfiler::mangleName(const DICompileUnit *CU, SmallString<128> Filename = GCovFile->getString(); sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda"); - return std::string(Filename.str()); + return std::string(Filename); } } @@ -546,7 +546,7 @@ std::string GCOVProfiler::mangleName(const DICompileUnit *CU, if (sys::fs::current_path(CurPath)) return std::string(FName); sys::path::append(CurPath, FName); - return std::string(CurPath.str()); + return std::string(CurPath); } bool GCOVProfiler::runOnModule( diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index f7f8fed643e9..efb621cde906 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -862,7 +862,7 @@ Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) { if (Mapping.Offset == 0) return IRB.CreateIntToPtr(Shadow, PtrTy); // (Mem >> Scale) + Offset - return IRB.CreateGEP(Int8Ty, ShadowBase, Shadow); + return IRB.CreatePtrAdd(ShadowBase, Shadow); } int64_t HWAddressSanitizer::getAccessInfo(bool IsWrite, diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 94af63da38c8..15bca538860d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -4103,7 +4103,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // do the usual thing: check argument shadow and mark all outputs as // clean. Note that any side effects of the inline asm that are not // immediately visible in its constraints are not handled. - if (ClHandleAsmConservative && MS.CompileKernel) + // For now, handle inline asm by default for KMSAN. + bool HandleAsm = ClHandleAsmConservative.getNumOccurrences() + ? ClHandleAsmConservative + : MS.CompileKernel; + if (HandleAsm) visitAsmInstruction(CB); else visitInstruction(CB); @@ -4557,7 +4561,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { return; Value *SizeVal = IRB.CreateTypeSize(MS.IntptrTy, DL.getTypeStoreSize(ElemTy)); - IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Operand, SizeVal}); + if (MS.CompileKernel) { + IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Operand, SizeVal}); + } else { + // ElemTy, derived from elementtype(), does not encode the alignment of + // the pointer. Conservatively assume that the shadow memory is unaligned. + auto [ShadowPtr, _] = + getShadowOriginPtrUserspace(Operand, IRB, IRB.getInt8Ty(), Align(1)); + IRB.CreateAlignedStore(getCleanShadow(ElemTy), ShadowPtr, Align(1)); + } } /// Get the number of output arguments returned by pointers. @@ -5253,8 +5265,8 @@ struct VarArgAArch64Helper : public VarArgHelperBase { Align(8), /*isStore*/ true) .first; - Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy, - GrRegSaveAreaShadowPtrOff); + Value *GrSrcPtr = + IRB.CreateInBoundsPtrAdd(VAArgTLSCopy, GrRegSaveAreaShadowPtrOff); Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff); IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, Align(8), GrSrcPtr, Align(8), @@ -5269,10 +5281,9 @@ struct VarArgAArch64Helper : public VarArgHelperBase { Align(8), /*isStore*/ true) .first; - Value *VrSrcPtr = IRB.CreateInBoundsGEP( - IRB.getInt8Ty(), - IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy, - IRB.getInt32(AArch64VrBegOffset)), + Value *VrSrcPtr = IRB.CreateInBoundsPtrAdd( + IRB.CreateInBoundsPtrAdd(VAArgTLSCopy, + IRB.getInt32(AArch64VrBegOffset)), VrRegSaveAreaShadowPtrOff); Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff); @@ -5285,8 +5296,8 @@ struct VarArgAArch64Helper : public VarArgHelperBase { Align(16), /*isStore*/ true) .first; - Value *StackSrcPtr = IRB.CreateInBoundsGEP( - IRB.getInt8Ty(), VAArgTLSCopy, IRB.getInt32(AArch64VAEndOffset)); + Value *StackSrcPtr = IRB.CreateInBoundsPtrAdd( + VAArgTLSCopy, IRB.getInt32(AArch64VAEndOffset)); IRB.CreateMemCpy(StackSaveAreaShadowPtr, Align(16), StackSrcPtr, Align(16), VAArgOverflowSize); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index ce570bdfd8b8..17c1c4423842 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -329,8 +329,8 @@ ModuleSanitizerCoverage::CreateSecStartEnd(Module &M, const char *Section, // Account for the fact that on windows-msvc __start_* symbols actually // point to a uint64_t before the start of the array. - auto GEP = IRB.CreateGEP(Int8Ty, SecStart, - ConstantInt::get(IntptrTy, sizeof(uint64_t))); + auto GEP = + IRB.CreatePtrAdd(SecStart, ConstantInt::get(IntptrTy, sizeof(uint64_t))); return std::make_pair(GEP, SecEnd); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp index 0fea6bcc4882..7af9c39f8236 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp @@ -242,7 +242,7 @@ static bool findDependencies(DependenceKind Flavor, const Value *Arg, } while (!Worklist.empty()); // Determine whether the original StartBB post-dominates all of the blocks we - // visited. If not, insert a sentinal indicating that most optimizations are + // visited. If not, insert a sentinel indicating that most optimizations are // not safe. for (const BasicBlock *BB : Visited) { if (BB == StartBB) diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp index 9af275a9f4e2..90b544c89226 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp @@ -549,6 +549,11 @@ ADCEChanged AggressiveDeadCodeElimination::removeDeadInstructions() { // like the rest of this loop does. Extending support to assignment tracking // is future work. for (DPValue &DPV : make_early_inc_range(I.getDbgValueRange())) { + // Avoid removing a DPV that is linked to instructions because it holds + // information about an existing store. + if (DPV.isDbgAssign()) + if (!at::getAssignmentInsts(&DPV).empty()) + continue; if (AliveScopes.count(DPV.getDebugLoc()->getScope())) continue; I.dropOneDbgValue(&DPV); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp index 9e40d94dd73c..49f8761a1392 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -576,9 +576,6 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S, ConstCandVecType::iterator &MaxCostItr) { unsigned NumUses = 0; - bool OptForSize = Entry->getParent()->hasOptSize() || - llvm::shouldOptimizeForSize(Entry->getParent(), PSI, BFI, - PGSOQueryType::IRPass); if (!OptForSize || std::distance(S,E) > 100) { for (auto ConstCand = S; ConstCand != E; ++ConstCand) { NumUses += ConstCand->Uses.size(); @@ -948,6 +945,10 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI, this->Ctx = &Fn.getContext(); this->Entry = &Entry; this->PSI = PSI; + this->OptForSize = Entry.getParent()->hasOptSize() || + llvm::shouldOptimizeForSize(Entry.getParent(), PSI, BFI, + PGSOQueryType::IRPass); + // Collect all constant candidates. collectConstantCandidates(Fn); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 6fec54ac7922..8f09569d0d9c 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -1314,9 +1314,7 @@ static void generateReproducer(CmpInst *Cond, Module *M, static std::optional<bool> checkCondition(CmpInst::Predicate Pred, Value *A, Value *B, Instruction *CheckInst, - ConstraintInfo &Info, unsigned NumIn, - unsigned NumOut, - Instruction *ContextInst) { + ConstraintInfo &Info) { LLVM_DEBUG(dbgs() << "Checking " << *CheckInst << "\n"); auto R = Info.getConstraintForSolving(Pred, A, B); @@ -1385,9 +1383,9 @@ static bool checkAndReplaceCondition( return true; }; - if (auto ImpliedCondition = checkCondition( - Cmp->getPredicate(), Cmp->getOperand(0), Cmp->getOperand(1), Cmp, - Info, NumIn, NumOut, ContextInst)) + if (auto ImpliedCondition = + checkCondition(Cmp->getPredicate(), Cmp->getOperand(0), + Cmp->getOperand(1), Cmp, Info)) return ReplaceCmpWithConstant(Cmp, *ImpliedCondition); return false; } @@ -1446,8 +1444,7 @@ static bool checkOrAndOpImpliedByOther( // Check if the second condition can be simplified now. if (auto ImpliedCondition = checkCondition(CmpToCheck->getPredicate(), CmpToCheck->getOperand(0), - CmpToCheck->getOperand(1), CmpToCheck, Info, CB.NumIn, - CB.NumOut, CB.getContextInst())) { + CmpToCheck->getOperand(1), CmpToCheck, Info)) { if (IsOr && isa<SelectInst>(JoinOp)) { JoinOp->setOperand( OtherOpIdx == 0 ? 2 : 0, diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index c5bf913cda30..85d4065286e4 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -307,7 +307,7 @@ void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold, struct ClonedBlock { BasicBlock *BB; - uint64_t State; ///< \p State corresponds to the next value of a switch stmnt. + APInt State; ///< \p State corresponds to the next value of a switch stmnt. }; typedef std::deque<BasicBlock *> PathType; @@ -344,9 +344,9 @@ inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { /// exit state, and the block that determines the next state. struct ThreadingPath { /// Exit value is DFA's exit state for the given path. - uint64_t getExitValue() const { return ExitVal; } + APInt getExitValue() const { return ExitVal; } void setExitValue(const ConstantInt *V) { - ExitVal = V->getZExtValue(); + ExitVal = V->getValue(); IsExitValSet = true; } bool isExitValueSet() const { return IsExitValSet; } @@ -365,7 +365,7 @@ struct ThreadingPath { private: PathType Path; - uint64_t ExitVal; + APInt ExitVal; const BasicBlock *DBB = nullptr; bool IsExitValSet = false; }; @@ -744,7 +744,7 @@ private: for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { PathType PathBBs = TPath.getPath(); - uint64_t NextState = TPath.getExitValue(); + APInt NextState = TPath.getExitValue(); const BasicBlock *Determinator = TPath.getDeterminatorBB(); // Update Metrics for the Switch block, this is always cloned @@ -901,7 +901,7 @@ private: DuplicateBlockMap &DuplicateMap, SmallSet<BasicBlock *, 16> &BlocksToClean, DomTreeUpdater *DTU) { - uint64_t NextState = Path.getExitValue(); + APInt NextState = Path.getExitValue(); const BasicBlock *Determinator = Path.getDeterminatorBB(); PathType PathBBs = Path.getPath(); @@ -910,8 +910,9 @@ private: PathBBs.pop_front(); auto DetIt = llvm::find(PathBBs, Determinator); - auto Prev = std::prev(DetIt); - BasicBlock *PrevBB = *Prev; + // When there is only one BB in PathBBs, the determinator takes itself as a + // direct predecessor. + BasicBlock *PrevBB = PathBBs.size() == 1 ? *DetIt : *std::prev(DetIt); for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { BasicBlock *BB = *BBIt; BlocksToClean.insert(BB); @@ -993,13 +994,14 @@ private: /// This function also includes updating phi nodes in the successors of the /// BB, and remapping uses that were defined locally in the cloned BB. BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB, - uint64_t NextState, + const APInt &NextState, DuplicateBlockMap &DuplicateMap, DefMap &NewDefs, DomTreeUpdater *DTU) { ValueToValueMapTy VMap; BasicBlock *NewBB = CloneBasicBlock( - BB, VMap, ".jt" + std::to_string(NextState), BB->getParent()); + BB, VMap, ".jt" + std::to_string(NextState.getLimitedValue()), + BB->getParent()); NewBB->moveAfter(BB); NumCloned++; @@ -1034,7 +1036,7 @@ private: /// This means creating a new incoming value from NewBB with the new /// instruction wherever there is an incoming value from BB. void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB, - uint64_t NextState, ValueToValueMapTy &VMap, + const APInt &NextState, ValueToValueMapTy &VMap, DuplicateBlockMap &DuplicateMap) { std::vector<BasicBlock *> BlocksToUpdate; @@ -1144,7 +1146,7 @@ private: void updateLastSuccessor(ThreadingPath &TPath, DuplicateBlockMap &DuplicateMap, DomTreeUpdater *DTU) { - uint64_t NextState = TPath.getExitValue(); + APInt NextState = TPath.getExitValue(); BasicBlock *BB = TPath.getPath().back(); BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap); @@ -1198,7 +1200,7 @@ private: /// Checks if BB was already cloned for a particular next state value. If it /// was then it returns this cloned block, and otherwise null. - BasicBlock *getClonedBB(BasicBlock *BB, uint64_t NextState, + BasicBlock *getClonedBB(BasicBlock *BB, const APInt &NextState, DuplicateBlockMap &DuplicateMap) { CloneList ClonedBBs = DuplicateMap[BB]; @@ -1212,10 +1214,10 @@ private: /// Helper to get the successor corresponding to a particular case value for /// a switch statement. - BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, uint64_t NextState) { + BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, const APInt &NextState) { BasicBlock *NextCase = nullptr; for (auto Case : Switch->cases()) { - if (Case.getCaseValue()->getZExtValue() == NextState) { + if (Case.getCaseValue()->getValue() == NextState) { NextCase = Case.getCaseSuccessor(); break; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 203fcdfc87d9..250ad19902f0 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -488,27 +488,27 @@ static void shortenAssignment(Instruction *Inst, Value *OriginalDest, uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits; uint64_t DeadSliceOffsetInBits = OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0); - auto SetDeadFragExpr = [](DbgAssignIntrinsic *DAI, + auto SetDeadFragExpr = [](auto *Assign, DIExpression::FragmentInfo DeadFragment) { // createFragmentExpression expects an offset relative to the existing // fragment offset if there is one. uint64_t RelativeOffset = DeadFragment.OffsetInBits - - DAI->getExpression() + Assign->getExpression() ->getFragmentInfo() .value_or(DIExpression::FragmentInfo(0, 0)) .OffsetInBits; if (auto NewExpr = DIExpression::createFragmentExpression( - DAI->getExpression(), RelativeOffset, DeadFragment.SizeInBits)) { - DAI->setExpression(*NewExpr); + Assign->getExpression(), RelativeOffset, DeadFragment.SizeInBits)) { + Assign->setExpression(*NewExpr); return; } // Failed to create a fragment expression for this so discard the value, // making this a kill location. auto *Expr = *DIExpression::createFragmentExpression( - DIExpression::get(DAI->getContext(), std::nullopt), + DIExpression::get(Assign->getContext(), std::nullopt), DeadFragment.OffsetInBits, DeadFragment.SizeInBits); - DAI->setExpression(Expr); - DAI->setKillLocation(); + Assign->setExpression(Expr); + Assign->setKillLocation(); }; // A DIAssignID to use so that the inserted dbg.assign intrinsics do not @@ -526,32 +526,35 @@ static void shortenAssignment(Instruction *Inst, Value *OriginalDest, // returned by getAssignmentMarkers so save a copy of the markers to iterate // over. auto LinkedRange = at::getAssignmentMarkers(Inst); + SmallVector<DPValue *> LinkedDPVAssigns = at::getDPVAssignmentMarkers(Inst); SmallVector<DbgAssignIntrinsic *> Linked(LinkedRange.begin(), LinkedRange.end()); - for (auto *DAI : Linked) { + auto InsertAssignForOverlap = [&](auto *Assign) { std::optional<DIExpression::FragmentInfo> NewFragment; if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits, - DeadSliceSizeInBits, DAI, + DeadSliceSizeInBits, Assign, NewFragment) || !NewFragment) { // We couldn't calculate the intersecting fragment for some reason. Be // cautious and unlink the whole assignment from the store. - DAI->setKillAddress(); - DAI->setAssignId(GetDeadLink()); - continue; + Assign->setKillAddress(); + Assign->setAssignId(GetDeadLink()); + return; } // No intersect. if (NewFragment->SizeInBits == 0) - continue; + return; // Fragments overlap: insert a new dbg.assign for this dead part. - auto *NewAssign = cast<DbgAssignIntrinsic>(DAI->clone()); - NewAssign->insertAfter(DAI); + auto *NewAssign = static_cast<decltype(Assign)>(Assign->clone()); + NewAssign->insertAfter(Assign); NewAssign->setAssignId(GetDeadLink()); if (NewFragment) SetDeadFragExpr(NewAssign, *NewFragment); NewAssign->setKillAddress(); - } + }; + for_each(Linked, InsertAssignForOverlap); + for_each(LinkedDPVAssigns, InsertAssignForOverlap); } static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp index 9117378568b7..f3e40a5cb809 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp @@ -2323,8 +2323,8 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) { SmallVector<std::pair<SmallSetVector<Value *, 8>, bool>, 0> Result; for (auto [Set, HasReadsOutsideSet] : Sets) { SmallSetVector<Value *, 8> PointerMustAliases; - for (const auto &ASI : *Set) - PointerMustAliases.insert(ASI.getValue()); + for (const auto &MemLoc : *Set) + PointerMustAliases.insert(const_cast<Value *>(MemLoc.Ptr)); Result.emplace_back(std::move(PointerMustAliases), HasReadsOutsideSet); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp index c041e3621a16..bfe9374cf2f8 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp @@ -452,6 +452,13 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT, BasicBlock *ExitBlock = L->getUniqueExitBlock(); + // We can't directly branch to an EH pad. Don't bother handling this edge + // case. + if (ExitBlock && ExitBlock->isEHPad()) { + LLVM_DEBUG(dbgs() << "Cannot delete loop exiting to EH pad.\n"); + return LoopDeletionResult::Unmodified; + } + if (ExitBlock && isLoopNeverExecuted(L)) { LLVM_DEBUG(dbgs() << "Loop is proven to never execute, delete it!\n"); // We need to forget the loop before setting the incoming values of the exit @@ -487,13 +494,6 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT, return LoopDeletionResult::Unmodified; } - // We can't directly branch to an EH pad. Don't bother handling this edge - // case. - if (ExitBlock && ExitBlock->isEHPad()) { - LLVM_DEBUG(dbgs() << "Cannot delete loop exiting to EH pad.\n"); - return LoopDeletionResult::Unmodified; - } - // Finally, we have to check that the loop really is dead. bool Changed = false; if (!isLoopDead(L, SE, ExitingBlocks, ExitBlock, Changed, Preheader, LI)) { diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index a58bbe318563..7ebc5da8b25a 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -6366,10 +6366,12 @@ struct SCEVDbgValueBuilder { /// and DIExpression. struct DVIRecoveryRec { DVIRecoveryRec(DbgValueInst *DbgValue) - : DVI(DbgValue), Expr(DbgValue->getExpression()), + : DbgRef(DbgValue), Expr(DbgValue->getExpression()), HadLocationArgList(false) {} + DVIRecoveryRec(DPValue *DPV) + : DbgRef(DPV), Expr(DPV->getExpression()), HadLocationArgList(false) {} - DbgValueInst *DVI; + PointerUnion<DbgValueInst *, DPValue *> DbgRef; DIExpression *Expr; bool HadLocationArgList; SmallVector<WeakVH, 2> LocationOps; @@ -6401,17 +6403,19 @@ static unsigned numLLVMArgOps(SmallVectorImpl<uint64_t> &Expr) { /// Overwrites DVI with the location and Ops as the DIExpression. This will /// create an invalid expression if Ops has any dwarf::DW_OP_llvm_arg operands, /// because a DIArglist is not created for the first argument of the dbg.value. -static void updateDVIWithLocation(DbgValueInst &DVI, Value *Location, +template <typename T> +static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl<uint64_t> &Ops) { - assert( - numLLVMArgOps(Ops) == 0 && - "Expected expression that does not contain any DW_OP_llvm_arg operands."); - DVI.setRawLocation(ValueAsMetadata::get(Location)); - DVI.setExpression(DIExpression::get(DVI.getContext(), Ops)); + assert(numLLVMArgOps(Ops) == 0 && "Expected expression that does not " + "contain any DW_OP_llvm_arg operands."); + DbgVal.setRawLocation(ValueAsMetadata::get(Location)); + DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops)); + DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops)); } /// Overwrite DVI with locations placed into a DIArglist. -static void updateDVIWithLocations(DbgValueInst &DVI, +template <typename T> +static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl<Value *> &Locations, SmallVectorImpl<uint64_t> &Ops) { assert(numLLVMArgOps(Ops) != 0 && @@ -6421,8 +6425,8 @@ static void updateDVIWithLocations(DbgValueInst &DVI, for (Value *V : Locations) MetadataLocs.push_back(ValueAsMetadata::get(V)); auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs); - DVI.setRawLocation(llvm::DIArgList::get(DVI.getContext(), ValArrayRef)); - DVI.setExpression(DIExpression::get(DVI.getContext(), Ops)); + DbgVal.setRawLocation(llvm::DIArgList::get(DbgVal.getContext(), ValArrayRef)); + DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops)); } /// Write the new expression and new location ops for the dbg.value. If possible @@ -6433,30 +6437,37 @@ static void updateDVIWithLocations(DbgValueInst &DVI, static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec, SmallVectorImpl<Value *> &NewLocationOps, SmallVectorImpl<uint64_t> &NewExpr) { - unsigned NumLLVMArgs = numLLVMArgOps(NewExpr); - if (NumLLVMArgs == 0) { - // Location assumed to be on the stack. - updateDVIWithLocation(*DVIRec.DVI, NewLocationOps[0], NewExpr); - } else if (NumLLVMArgs == 1 && NewExpr[0] == dwarf::DW_OP_LLVM_arg) { - // There is only a single DW_OP_llvm_arg at the start of the expression, - // so it can be omitted along with DIArglist. - assert(NewExpr[1] == 0 && - "Lone LLVM_arg in a DIExpression should refer to location-op 0."); - llvm::SmallVector<uint64_t, 6> ShortenedOps(llvm::drop_begin(NewExpr, 2)); - updateDVIWithLocation(*DVIRec.DVI, NewLocationOps[0], ShortenedOps); - } else { - // Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary. - updateDVIWithLocations(*DVIRec.DVI, NewLocationOps, NewExpr); - } + auto UpdateDbgValueInstImpl = [&](auto *DbgVal) { + unsigned NumLLVMArgs = numLLVMArgOps(NewExpr); + if (NumLLVMArgs == 0) { + // Location assumed to be on the stack. + updateDVIWithLocation(*DbgVal, NewLocationOps[0], NewExpr); + } else if (NumLLVMArgs == 1 && NewExpr[0] == dwarf::DW_OP_LLVM_arg) { + // There is only a single DW_OP_llvm_arg at the start of the expression, + // so it can be omitted along with DIArglist. + assert(NewExpr[1] == 0 && + "Lone LLVM_arg in a DIExpression should refer to location-op 0."); + llvm::SmallVector<uint64_t, 6> ShortenedOps(llvm::drop_begin(NewExpr, 2)); + updateDVIWithLocation(*DbgVal, NewLocationOps[0], ShortenedOps); + } else { + // Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary. + updateDVIWithLocations(*DbgVal, NewLocationOps, NewExpr); + } - // If the DIExpression was previously empty then add the stack terminator. - // Non-empty expressions have only had elements inserted into them and so the - // terminator should already be present e.g. stack_value or fragment. - DIExpression *SalvageExpr = DVIRec.DVI->getExpression(); - if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) { - SalvageExpr = DIExpression::append(SalvageExpr, {dwarf::DW_OP_stack_value}); - DVIRec.DVI->setExpression(SalvageExpr); - } + // If the DIExpression was previously empty then add the stack terminator. + // Non-empty expressions have only had elements inserted into them and so + // the terminator should already be present e.g. stack_value or fragment. + DIExpression *SalvageExpr = DbgVal->getExpression(); + if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) { + SalvageExpr = + DIExpression::append(SalvageExpr, {dwarf::DW_OP_stack_value}); + DbgVal->setExpression(SalvageExpr); + } + }; + if (isa<DbgValueInst *>(DVIRec.DbgRef)) + UpdateDbgValueInstImpl(cast<DbgValueInst *>(DVIRec.DbgRef)); + else + UpdateDbgValueInstImpl(cast<DPValue *>(DVIRec.DbgRef)); } /// Cached location ops may be erased during LSR, in which case a poison is @@ -6470,40 +6481,49 @@ static Value *getValueOrPoison(WeakVH &VH, LLVMContext &C) { /// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values. static void restorePreTransformState(DVIRecoveryRec &DVIRec) { - LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n" - << "scev-salvage: post-LSR: " << *DVIRec.DVI << '\n'); - assert(DVIRec.Expr && "Expected an expression"); - DVIRec.DVI->setExpression(DVIRec.Expr); - - // Even a single location-op may be inside a DIArgList and referenced with - // DW_OP_LLVM_arg, which is valid only with a DIArgList. - if (!DVIRec.HadLocationArgList) { - assert(DVIRec.LocationOps.size() == 1 && - "Unexpected number of location ops."); - // LSR's unsuccessful salvage attempt may have added DIArgList, which in - // this case was not present before, so force the location back to a single - // uncontained Value. - Value *CachedValue = - getValueOrPoison(DVIRec.LocationOps[0], DVIRec.DVI->getContext()); - DVIRec.DVI->setRawLocation(ValueAsMetadata::get(CachedValue)); - } else { - SmallVector<ValueAsMetadata *, 3> MetadataLocs; - for (WeakVH VH : DVIRec.LocationOps) { - Value *CachedValue = getValueOrPoison(VH, DVIRec.DVI->getContext()); - MetadataLocs.push_back(ValueAsMetadata::get(CachedValue)); + auto RestorePreTransformStateImpl = [&](auto *DbgVal) { + LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n" + << "scev-salvage: post-LSR: " << *DbgVal << '\n'); + assert(DVIRec.Expr && "Expected an expression"); + DbgVal->setExpression(DVIRec.Expr); + + // Even a single location-op may be inside a DIArgList and referenced with + // DW_OP_LLVM_arg, which is valid only with a DIArgList. + if (!DVIRec.HadLocationArgList) { + assert(DVIRec.LocationOps.size() == 1 && + "Unexpected number of location ops."); + // LSR's unsuccessful salvage attempt may have added DIArgList, which in + // this case was not present before, so force the location back to a + // single uncontained Value. + Value *CachedValue = + getValueOrPoison(DVIRec.LocationOps[0], DbgVal->getContext()); + DbgVal->setRawLocation(ValueAsMetadata::get(CachedValue)); + } else { + SmallVector<ValueAsMetadata *, 3> MetadataLocs; + for (WeakVH VH : DVIRec.LocationOps) { + Value *CachedValue = getValueOrPoison(VH, DbgVal->getContext()); + MetadataLocs.push_back(ValueAsMetadata::get(CachedValue)); + } + auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs); + DbgVal->setRawLocation( + llvm::DIArgList::get(DbgVal->getContext(), ValArrayRef)); } - auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs); - DVIRec.DVI->setRawLocation( - llvm::DIArgList::get(DVIRec.DVI->getContext(), ValArrayRef)); - } - LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DVIRec.DVI << '\n'); + LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DbgVal << '\n'); + }; + if (isa<DbgValueInst *>(DVIRec.DbgRef)) + RestorePreTransformStateImpl(cast<DbgValueInst *>(DVIRec.DbgRef)); + else + RestorePreTransformStateImpl(cast<DPValue *>(DVIRec.DbgRef)); } static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr) { - if (!DVIRec.DVI->isKillLocation()) + + if (isa<DbgValueInst *>(DVIRec.DbgRef) + ? !cast<DbgValueInst *>(DVIRec.DbgRef)->isKillLocation() + : !cast<DPValue *>(DVIRec.DbgRef)->isKillLocation()) return false; // LSR may have caused several changes to the dbg.value in the failed salvage @@ -6596,16 +6616,20 @@ static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, } UpdateDbgValueInst(DVIRec, NewLocationOps, NewExpr); - LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: " << *DVIRec.DVI << "\n"); + if (isa<DbgValueInst *>(DVIRec.DbgRef)) + LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: " + << *cast<DbgValueInst *>(DVIRec.DbgRef) << "\n"); + else + LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: " + << *cast<DPValue *>(DVIRec.DbgRef) << "\n"); return true; } /// Obtain an expression for the iteration count, then attempt to salvage the /// dbg.value intrinsics. -static void -DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, - llvm::PHINode *LSRInductionVar, - SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) { +static void DbgRewriteSalvageableDVIs( + llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, + SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) { if (DVIToUpdate.empty()) return; @@ -6647,48 +6671,56 @@ static void DbgGatherSalvagableDVI( SmallSet<AssertingVH<DbgValueInst>, 2> &DVIHandles) { for (const auto &B : L->getBlocks()) { for (auto &I : *B) { - auto DVI = dyn_cast<DbgValueInst>(&I); - if (!DVI) - continue; - // Ensure that if any location op is undef that the dbg.vlue is not - // cached. - if (DVI->isKillLocation()) - continue; - - // Check that the location op SCEVs are suitable for translation to - // DIExpression. - const auto &HasTranslatableLocationOps = - [&](const DbgValueInst *DVI) -> bool { - for (const auto LocOp : DVI->location_ops()) { - if (!LocOp) - return false; - - if (!SE.isSCEVable(LocOp->getType())) - return false; - - const SCEV *S = SE.getSCEV(LocOp); - if (SE.containsUndefs(S)) - return false; + auto ProcessDbgValue = [&](auto *DbgVal) -> bool { + // Ensure that if any location op is undef that the dbg.vlue is not + // cached. + if (DbgVal->isKillLocation()) + return false; + + // Check that the location op SCEVs are suitable for translation to + // DIExpression. + const auto &HasTranslatableLocationOps = + [&](const auto *DbgValToTranslate) -> bool { + for (const auto LocOp : DbgValToTranslate->location_ops()) { + if (!LocOp) + return false; + + if (!SE.isSCEVable(LocOp->getType())) + return false; + + const SCEV *S = SE.getSCEV(LocOp); + if (SE.containsUndefs(S)) + return false; + } + return true; + }; + + if (!HasTranslatableLocationOps(DbgVal)) + return false; + + std::unique_ptr<DVIRecoveryRec> NewRec = + std::make_unique<DVIRecoveryRec>(DbgVal); + // Each location Op may need a SCEVDbgValueBuilder in order to recover + // it. Pre-allocating a vector will enable quick lookups of the builder + // later during the salvage. + NewRec->RecoveryExprs.resize(DbgVal->getNumVariableLocationOps()); + for (const auto LocOp : DbgVal->location_ops()) { + NewRec->SCEVs.push_back(SE.getSCEV(LocOp)); + NewRec->LocationOps.push_back(LocOp); + NewRec->HadLocationArgList = DbgVal->hasArgList(); } + SalvageableDVISCEVs.push_back(std::move(NewRec)); return true; }; - - if (!HasTranslatableLocationOps(DVI)) - continue; - - std::unique_ptr<DVIRecoveryRec> NewRec = - std::make_unique<DVIRecoveryRec>(DVI); - // Each location Op may need a SCEVDbgValueBuilder in order to recover it. - // Pre-allocating a vector will enable quick lookups of the builder later - // during the salvage. - NewRec->RecoveryExprs.resize(DVI->getNumVariableLocationOps()); - for (const auto LocOp : DVI->location_ops()) { - NewRec->SCEVs.push_back(SE.getSCEV(LocOp)); - NewRec->LocationOps.push_back(LocOp); - NewRec->HadLocationArgList = DVI->hasArgList(); + for (auto &DPV : I.getDbgValueRange()) { + if (DPV.isDbgValue() || DPV.isDbgAssign()) + ProcessDbgValue(&DPV); } - SalvageableDVISCEVs.push_back(std::move(NewRec)); - DVIHandles.insert(DVI); + auto DVI = dyn_cast<DbgValueInst>(&I); + if (!DVI) + continue; + if (ProcessDbgValue(DVI)) + DVIHandles.insert(DVI); } } } @@ -6816,7 +6848,8 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, // iteration. The simplest case to consider is a candidate IV which is // narrower than the trip count (and thus original IV), but this can // also happen due to non-unit strides on the candidate IVs. - if (!AddRec->hasNoSelfWrap()) + if (!AddRec->hasNoSelfWrap() || + !SE.isKnownNonZero(AddRec->getStepRecurrence(SE))) continue; const SCEVAddRecExpr *PostInc = AddRec->getPostIncExpr(SE); @@ -6984,7 +7017,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, assert(Expander.isSafeToExpand(TermValueS) && "Terminating value was checked safe in canFoldTerminatingCondition"); - // Create new terminating value at loop header + // Create new terminating value at loop preheader Value *TermValue = Expander.expandCodeFor(TermValueS, ToHelpFold->getType(), LoopPreheader->getTerminator()); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp index 9d5e6693c0e5..f39c24484840 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -258,13 +258,13 @@ bool LoopVersioningLICM::legalLoopMemoryAccesses() { // With MustAlias its not worth adding runtime bound check. if (AS.isMustAlias()) return false; - Value *SomePtr = AS.begin()->getValue(); + const Value *SomePtr = AS.begin()->Ptr; bool TypeCheck = true; // Check for Mod & MayAlias HasMayAlias |= AS.isMayAlias(); HasMod |= AS.isMod(); - for (const auto &A : AS) { - Value *Ptr = A.getValue(); + for (const auto &MemLoc : AS) { + const Value *Ptr = MemLoc.Ptr; // Alias tracker should have pointers of same data type. // // FIXME: check no longer effective since opaque pointers? diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 9d058e0d2483..805bbe40bd7c 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1297,9 +1297,9 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize); Value *MemsetLen = Builder.CreateSelect( Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff); - Instruction *NewMemSet = Builder.CreateMemSet( - Builder.CreateGEP(Builder.getInt8Ty(), Dest, SrcSize), - MemSet->getOperand(1), MemsetLen, Alignment); + Instruction *NewMemSet = + Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize), + MemSet->getOperand(1), MemsetLen, Alignment); assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) && "MemCpy must be a MemoryDef"); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index b98f823ab00b..45ce3bf3ceae 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -967,6 +967,44 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache, return BDVState(BaseValue, BDVState::Base, BaseValue); }; + // Even though we have identified a concrete base (or a conflict) for all live + // pointers at this point, there are cases where the base is of an + // incompatible type compared to the original instruction. We conservatively + // mark those as conflicts to ensure that corresponding BDVs will be generated + // in the next steps. + + // this is a rather explicit check for all cases where we should mark the + // state as a conflict to force the latter stages of the algorithm to emit + // the BDVs. + // TODO: in many cases the instructions emited for the conflicting states + // will be identical to the I itself (if the I's operate on their BDVs + // themselves). We should exploit this, but can't do it here since it would + // break the invariant about the BDVs not being known to be a base. + // TODO: the code also does not handle constants at all - the algorithm relies + // on all constants having the same BDV and therefore constant-only insns + // will never be in conflict, but this check is ignored here. If the + // constant conflicts will be to BDVs themselves, they will be identical + // instructions and will get optimized away (as in the above TODO) + auto MarkConflict = [&](Instruction *I, Value *BaseValue) { + // II and EE mixes vector & scalar so is always a conflict + if (isa<InsertElementInst>(I) || isa<ExtractElementInst>(I)) + return true; + // Shuffle vector is always a conflict as it creates new vector from + // existing ones. + if (isa<ShuffleVectorInst>(I)) + return true; + // Any instructions where the computed base type differs from the + // instruction type. An example is where an extract instruction is used by a + // select. Here the select's BDV is a vector (because of extract's BDV), + // while the select itself is a scalar type. Note that the IE and EE + // instruction check is not fully subsumed by the vector<->scalar check at + // the end, this is due to the BDV algorithm being ignorant of BDV types at + // this junction. + if (!areBothVectorOrScalar(BaseValue, I)) + return true; + return false; + }; + bool Progress = true; while (Progress) { #ifndef NDEBUG @@ -993,6 +1031,14 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache, NewState.meet(OpState); }); + // if the instruction has known base, but should in fact be marked as + // conflict because of incompatible in/out types, we mark it as such + // ensuring that it will propagate through the fixpoint iteration + auto I = cast<Instruction>(BDV); + auto BV = NewState.getBaseValue(); + if (BV && MarkConflict(I, BV)) + NewState = BDVState(I, BDVState::Conflict); + BDVState OldState = Pair.second; if (OldState != NewState) { Progress = true; @@ -1010,46 +1056,9 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache, for (const auto &Pair : States) { LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n"); } -#endif - - // Even though we have identified a concrete base (or a conflict) for all live - // pointers at this point, there are cases where the base is of an - // incompatible type compared to the original instruction. We conservatively - // mark those as conflicts to ensure that corresponding BDVs will be generated - // in the next steps. - - // this is a rather explicit check for all cases where we should mark the - // state as a conflict to force the latter stages of the algorithm to emit - // the BDVs. - // TODO: in many cases the instructions emited for the conflicting states - // will be identical to the I itself (if the I's operate on their BDVs - // themselves). We should expoit this, but can't do it here since it would - // break the invariant about the BDVs not being known to be a base. - // TODO: the code also does not handle constants at all - the algorithm relies - // on all constants having the same BDV and therefore constant-only insns - // will never be in conflict, but this check is ignored here. If the - // constant conflicts will be to BDVs themselves, they will be identical - // instructions and will get optimized away (as in the above TODO) - auto MarkConflict = [&](Instruction *I, Value *BaseValue) { - // II and EE mixes vector & scalar so is always a conflict - if (isa<InsertElementInst>(I) || isa<ExtractElementInst>(I)) - return true; - // Shuffle vector is always a conflict as it creates new vector from - // existing ones. - if (isa<ShuffleVectorInst>(I)) - return true; - // Any instructions where the computed base type differs from the - // instruction type. An example is where an extract instruction is used by a - // select. Here the select's BDV is a vector (because of extract's BDV), - // while the select itself is a scalar type. Note that the IE and EE - // instruction check is not fully subsumed by the vector<->scalar check at - // the end, this is due to the BDV algorithm being ignorant of BDV types at - // this junction. - if (!areBothVectorOrScalar(BaseValue, I)) - return true; - return false; - }; + // since we do the conflict marking as part of the fixpoint iteration this + // loop only asserts that invariants are met for (auto Pair : States) { Instruction *I = cast<Instruction>(Pair.first); BDVState State = Pair.second; @@ -1061,18 +1070,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache, (!isKnownBase(I, KnownBases) || !areBothVectorOrScalar(I, BaseValue)) && "why did it get added?"); assert(!State.isUnknown() && "Optimistic algorithm didn't complete!"); - - // since we only mark vec-scalar insns as conflicts in the pass, our work is - // done if the instruction already conflicts - if (State.isConflict()) - continue; - - if (MarkConflict(I, BaseValue)) - States[I] = BDVState(I, BDVState::Conflict); } - -#ifndef NDEBUG - VerifyStates(); #endif // Insert Phis for all conflicts diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp index 75cddfa16d6d..bdbaf4f55c96 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp @@ -319,6 +319,29 @@ static DebugVariable getAggregateVariable(DbgVariableIntrinsic *DVI) { return DebugVariable(DVI->getVariable(), std::nullopt, DVI->getDebugLoc().getInlinedAt()); } +static DebugVariable getAggregateVariable(DPValue *DPV) { + return DebugVariable(DPV->getVariable(), std::nullopt, + DPV->getDebugLoc().getInlinedAt()); +} + +static DPValue *createLinkedAssign(DPValue *, DIBuilder &DIB, + Instruction *LinkedInstr, Value *NewValue, + DILocalVariable *Variable, + DIExpression *Expression, Value *Address, + DIExpression *AddressExpression, + const DILocation *DI) { + (void)DIB; + return DPValue::createLinkedDPVAssign(LinkedInstr, NewValue, Variable, + Expression, Address, AddressExpression, + DI); +} +static DbgAssignIntrinsic *createLinkedAssign( + DbgAssignIntrinsic *, DIBuilder &DIB, Instruction *LinkedInstr, + Value *NewValue, DILocalVariable *Variable, DIExpression *Expression, + Value *Address, DIExpression *AddressExpression, const DILocation *DI) { + return DIB.insertDbgAssign(LinkedInstr, NewValue, Variable, Expression, + Address, AddressExpression, DI); +} /// Find linked dbg.assign and generate a new one with the correct /// FragmentInfo. Link Inst to the new dbg.assign. If Value is nullptr the @@ -340,8 +363,9 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL) { auto MarkerRange = at::getAssignmentMarkers(OldInst); + auto DPVAssignMarkerRange = at::getDPVAssignmentMarkers(OldInst); // Nothing to do if OldInst has no linked dbg.assign intrinsics. - if (MarkerRange.empty()) + if (MarkerRange.empty() && DPVAssignMarkerRange.empty()) return; LLVM_DEBUG(dbgs() << " migrateDebugInfo\n"); @@ -362,6 +386,9 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, for (auto *DAI : at::getAssignmentMarkers(OldAlloca)) BaseFragments[getAggregateVariable(DAI)] = DAI->getExpression()->getFragmentInfo(); + for (auto *DPV : at::getDPVAssignmentMarkers(OldAlloca)) + BaseFragments[getAggregateVariable(DPV)] = + DPV->getExpression()->getFragmentInfo(); // The new inst needs a DIAssignID unique metadata tag (if OldInst has // one). It shouldn't already have one: assert this assumption. @@ -371,7 +398,7 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, DIBuilder DIB(*OldInst->getModule(), /*AllowUnresolved*/ false); assert(OldAlloca->isStaticAlloca()); - for (DbgAssignIntrinsic *DbgAssign : MarkerRange) { + auto MigrateDbgAssign = [&](auto DbgAssign) { LLVM_DEBUG(dbgs() << " existing dbg.assign is: " << *DbgAssign << "\n"); auto *Expr = DbgAssign->getExpression(); @@ -382,7 +409,7 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, { auto R = BaseFragments.find(getAggregateVariable(DbgAssign)); if (R == BaseFragments.end()) - continue; + return; BaseFragment = R->second; } std::optional<DIExpression::FragmentInfo> CurrentFragment = @@ -393,7 +420,7 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, BaseFragment, CurrentFragment, NewFragment); if (Result == Skip) - continue; + return; if (Result == UseFrag && !(NewFragment == CurrentFragment)) { if (CurrentFragment) { // Rewrite NewFragment to be relative to the existing one (this is @@ -425,9 +452,10 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, } ::Value *NewValue = Value ? Value : DbgAssign->getValue(); - auto *NewAssign = DIB.insertDbgAssign( - Inst, NewValue, DbgAssign->getVariable(), Expr, Dest, - DIExpression::get(Ctx, std::nullopt), DbgAssign->getDebugLoc()); + auto *NewAssign = createLinkedAssign( + DbgAssign, DIB, Inst, NewValue, DbgAssign->getVariable(), Expr, Dest, + DIExpression::get(Expr->getContext(), std::nullopt), + DbgAssign->getDebugLoc()); // If we've updated the value but the original dbg.assign has an arglist // then kill it now - we can't use the requested new value. @@ -461,9 +489,11 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, NewAssign->moveBefore(DbgAssign); NewAssign->setDebugLoc(DbgAssign->getDebugLoc()); - LLVM_DEBUG(dbgs() << "Created new assign intrinsic: " << *NewAssign - << "\n"); - } + LLVM_DEBUG(dbgs() << "Created new assign: " << *NewAssign << "\n"); + }; + + for_each(MarkerRange, MigrateDbgAssign); + for_each(DPVAssignMarkerRange, MigrateDbgAssign); } namespace { @@ -1903,8 +1933,8 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix) { if (Offset != 0) - Ptr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(Offset), - NamePrefix + "sroa_idx"); + Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(Offset), + NamePrefix + "sroa_idx"); return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy, NamePrefix + "sroa_cast"); } @@ -2108,8 +2138,9 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, /// Test whether a vector type is viable for promotion. /// -/// This implements the necessary checking for \c isVectorPromotionViable over -/// all slices of the alloca for the given VectorType. +/// This implements the necessary checking for \c checkVectorTypesForPromotion +/// (and thus isVectorPromotionViable) over all slices of the alloca for the +/// given VectorType. static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy, const DataLayout &DL) { uint64_t ElementSize = @@ -2134,6 +2165,98 @@ static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy, return true; } +/// Test whether any vector type in \p CandidateTys is viable for promotion. +/// +/// This implements the necessary checking for \c isVectorPromotionViable over +/// all slices of the alloca for the given VectorType. +static VectorType * +checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, + SmallVectorImpl<VectorType *> &CandidateTys, + bool HaveCommonEltTy, Type *CommonEltTy, + bool HaveVecPtrTy, bool HaveCommonVecPtrTy, + VectorType *CommonVecPtrTy) { + // If we didn't find a vector type, nothing to do here. + if (CandidateTys.empty()) + return nullptr; + + // Pointer-ness is sticky, if we had a vector-of-pointers candidate type, + // then we should choose it, not some other alternative. + // But, we can't perform a no-op pointer address space change via bitcast, + // so if we didn't have a common pointer element type, bail. + if (HaveVecPtrTy && !HaveCommonVecPtrTy) + return nullptr; + + // Try to pick the "best" element type out of the choices. + if (!HaveCommonEltTy && HaveVecPtrTy) { + // If there was a pointer element type, there's really only one choice. + CandidateTys.clear(); + CandidateTys.push_back(CommonVecPtrTy); + } else if (!HaveCommonEltTy && !HaveVecPtrTy) { + // Integer-ify vector types. + for (VectorType *&VTy : CandidateTys) { + if (!VTy->getElementType()->isIntegerTy()) + VTy = cast<VectorType>(VTy->getWithNewType(IntegerType::getIntNTy( + VTy->getContext(), VTy->getScalarSizeInBits()))); + } + + // Rank the remaining candidate vector types. This is easy because we know + // they're all integer vectors. We sort by ascending number of elements. + auto RankVectorTypesComp = [&DL](VectorType *RHSTy, VectorType *LHSTy) { + (void)DL; + assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() == + DL.getTypeSizeInBits(LHSTy).getFixedValue() && + "Cannot have vector types of different sizes!"); + assert(RHSTy->getElementType()->isIntegerTy() && + "All non-integer types eliminated!"); + assert(LHSTy->getElementType()->isIntegerTy() && + "All non-integer types eliminated!"); + return cast<FixedVectorType>(RHSTy)->getNumElements() < + cast<FixedVectorType>(LHSTy)->getNumElements(); + }; + auto RankVectorTypesEq = [&DL](VectorType *RHSTy, VectorType *LHSTy) { + (void)DL; + assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() == + DL.getTypeSizeInBits(LHSTy).getFixedValue() && + "Cannot have vector types of different sizes!"); + assert(RHSTy->getElementType()->isIntegerTy() && + "All non-integer types eliminated!"); + assert(LHSTy->getElementType()->isIntegerTy() && + "All non-integer types eliminated!"); + return cast<FixedVectorType>(RHSTy)->getNumElements() == + cast<FixedVectorType>(LHSTy)->getNumElements(); + }; + llvm::sort(CandidateTys, RankVectorTypesComp); + CandidateTys.erase(std::unique(CandidateTys.begin(), CandidateTys.end(), + RankVectorTypesEq), + CandidateTys.end()); + } else { +// The only way to have the same element type in every vector type is to +// have the same vector type. Check that and remove all but one. +#ifndef NDEBUG + for (VectorType *VTy : CandidateTys) { + assert(VTy->getElementType() == CommonEltTy && + "Unaccounted for element type!"); + assert(VTy == CandidateTys[0] && + "Different vector types with the same element type!"); + } +#endif + CandidateTys.resize(1); + } + + // FIXME: hack. Do we have a named constant for this? + // SDAG SDNode can't have more than 65535 operands. + llvm::erase_if(CandidateTys, [](VectorType *VTy) { + return cast<FixedVectorType>(VTy)->getNumElements() > + std::numeric_limits<unsigned short>::max(); + }); + + for (VectorType *VTy : CandidateTys) + if (checkVectorTypeForPromotion(P, VTy, DL)) + return VTy; + + return nullptr; +} + /// Test whether the given alloca partitioning and range of slices can be /// promoted to a vector. /// @@ -2181,6 +2304,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { } } }; + // Put load and store types into a set for de-duplication. for (const Slice &S : P) { Type *Ty; @@ -2195,6 +2319,12 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset()) CheckCandidateType(Ty); } + + if (auto *VTy = checkVectorTypesForPromotion( + P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, + HaveCommonVecPtrTy, CommonVecPtrTy)) + return VTy; + // Consider additional vector types where the element type size is a // multiple of load/store element size. for (Type *Ty : LoadStoreTys) { @@ -2204,6 +2334,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { // Make a copy of CandidateTys and iterate through it, because we might // append to CandidateTys in the loop. SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys; + CandidateTys.clear(); for (VectorType *&VTy : CandidateTysCopy) { unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue(); unsigned ElementSize = @@ -2216,86 +2347,9 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { } } - // If we didn't find a vector type, nothing to do here. - if (CandidateTys.empty()) - return nullptr; - - // Pointer-ness is sticky, if we had a vector-of-pointers candidate type, - // then we should choose it, not some other alternative. - // But, we can't perform a no-op pointer address space change via bitcast, - // so if we didn't have a common pointer element type, bail. - if (HaveVecPtrTy && !HaveCommonVecPtrTy) - return nullptr; - - // Try to pick the "best" element type out of the choices. - if (!HaveCommonEltTy && HaveVecPtrTy) { - // If there was a pointer element type, there's really only one choice. - CandidateTys.clear(); - CandidateTys.push_back(CommonVecPtrTy); - } else if (!HaveCommonEltTy && !HaveVecPtrTy) { - // Integer-ify vector types. - for (VectorType *&VTy : CandidateTys) { - if (!VTy->getElementType()->isIntegerTy()) - VTy = cast<VectorType>(VTy->getWithNewType(IntegerType::getIntNTy( - VTy->getContext(), VTy->getScalarSizeInBits()))); - } - - // Rank the remaining candidate vector types. This is easy because we know - // they're all integer vectors. We sort by ascending number of elements. - auto RankVectorTypesComp = [&DL](VectorType *RHSTy, VectorType *LHSTy) { - (void)DL; - assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() == - DL.getTypeSizeInBits(LHSTy).getFixedValue() && - "Cannot have vector types of different sizes!"); - assert(RHSTy->getElementType()->isIntegerTy() && - "All non-integer types eliminated!"); - assert(LHSTy->getElementType()->isIntegerTy() && - "All non-integer types eliminated!"); - return cast<FixedVectorType>(RHSTy)->getNumElements() < - cast<FixedVectorType>(LHSTy)->getNumElements(); - }; - auto RankVectorTypesEq = [&DL](VectorType *RHSTy, VectorType *LHSTy) { - (void)DL; - assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() == - DL.getTypeSizeInBits(LHSTy).getFixedValue() && - "Cannot have vector types of different sizes!"); - assert(RHSTy->getElementType()->isIntegerTy() && - "All non-integer types eliminated!"); - assert(LHSTy->getElementType()->isIntegerTy() && - "All non-integer types eliminated!"); - return cast<FixedVectorType>(RHSTy)->getNumElements() == - cast<FixedVectorType>(LHSTy)->getNumElements(); - }; - llvm::sort(CandidateTys, RankVectorTypesComp); - CandidateTys.erase(std::unique(CandidateTys.begin(), CandidateTys.end(), - RankVectorTypesEq), - CandidateTys.end()); - } else { -// The only way to have the same element type in every vector type is to -// have the same vector type. Check that and remove all but one. -#ifndef NDEBUG - for (VectorType *VTy : CandidateTys) { - assert(VTy->getElementType() == CommonEltTy && - "Unaccounted for element type!"); - assert(VTy == CandidateTys[0] && - "Different vector types with the same element type!"); - } -#endif - CandidateTys.resize(1); - } - - // FIXME: hack. Do we have a named constant for this? - // SDAG SDNode can't have more than 65535 operands. - llvm::erase_if(CandidateTys, [](VectorType *VTy) { - return cast<FixedVectorType>(VTy)->getNumElements() > - std::numeric_limits<unsigned short>::max(); - }); - - for (VectorType *VTy : CandidateTys) - if (checkVectorTypeForPromotion(P, VTy, DL)) - return VTy; - - return nullptr; + return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy, + CommonEltTy, HaveVecPtrTy, + HaveCommonVecPtrTy, CommonVecPtrTy); } /// Test whether a slice of an alloca is valid for integer widening. @@ -3108,6 +3162,7 @@ private: // emit dbg.assign intrinsics for mem intrinsics storing through non- // constant geps, or storing a variable number of bytes. assert(at::getAssignmentMarkers(&II).empty() && + at::getDPVAssignmentMarkers(&II).empty() && "AT: Unexpected link to non-const GEP"); deleteIfTriviallyDead(OldPtr); return false; @@ -3254,11 +3309,13 @@ private: Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); if (IsDest) { // Update the address component of linked dbg.assigns. - for (auto *DAI : at::getAssignmentMarkers(&II)) { - if (llvm::is_contained(DAI->location_ops(), II.getDest()) || - DAI->getAddress() == II.getDest()) - DAI->replaceVariableLocationOp(II.getDest(), AdjustedPtr); - } + auto UpdateAssignAddress = [&](auto *DbgAssign) { + if (llvm::is_contained(DbgAssign->location_ops(), II.getDest()) || + DbgAssign->getAddress() == II.getDest()) + DbgAssign->replaceVariableLocationOp(II.getDest(), AdjustedPtr); + }; + for_each(at::getAssignmentMarkers(&II), UpdateAssignAddress); + for_each(at::getDPVAssignmentMarkers(&II), UpdateAssignAddress); II.setDest(AdjustedPtr); II.setDestAlignment(SliceAlign); } else { @@ -3842,6 +3899,7 @@ private: DL); } else { assert(at::getAssignmentMarkers(Store).empty() && + at::getDPVAssignmentMarkers(Store).empty() && "AT: unexpected debug.assign linked to store through " "unbounded GEP"); } @@ -4861,10 +4919,22 @@ static void insertNewDbgInst(DIBuilder &DIB, DPValue *Orig, AllocaInst *NewAddr, DIExpression *NewFragmentExpr, Instruction *BeforeInst) { (void)DIB; - DPValue *New = new DPValue(ValueAsMetadata::get(NewAddr), Orig->getVariable(), - NewFragmentExpr, Orig->getDebugLoc(), - DPValue::LocationType::Declare); - BeforeInst->getParent()->insertDPValueBefore(New, BeforeInst->getIterator()); + if (Orig->isDbgDeclare()) { + DPValue *DPV = DPValue::createDPVDeclare( + NewAddr, Orig->getVariable(), NewFragmentExpr, Orig->getDebugLoc()); + BeforeInst->getParent()->insertDPValueBefore(DPV, + BeforeInst->getIterator()); + return; + } + if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) { + NewAddr->setMetadata(LLVMContext::MD_DIAssignID, + DIAssignID::getDistinct(NewAddr->getContext())); + } + auto *NewAssign = DPValue::createLinkedDPVAssign( + NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr, + Orig->getAddressExpression(), Orig->getDebugLoc()); + LLVM_DEBUG(dbgs() << "Created new DPVAssign: " << *NewAssign << "\n"); + (void)NewAssign; } /// Walks the slices of an alloca and form partitions based on them, @@ -5021,9 +5091,6 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { // Remove any existing intrinsics on the new alloca describing // the variable fragment. - SmallVector<DbgDeclareInst *, 1> FragDbgDeclares; - SmallVector<DPValue *, 1> FragDPVs; - findDbgDeclares(FragDbgDeclares, Fragment.Alloca, &FragDPVs); auto RemoveOne = [DbgVariable](auto *OldDII) { auto SameVariableFragment = [](const auto *LHS, const auto *RHS) { return LHS->getVariable() == RHS->getVariable() && @@ -5033,8 +5100,8 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { if (SameVariableFragment(OldDII, DbgVariable)) OldDII->eraseFromParent(); }; - for_each(FragDbgDeclares, RemoveOne); - for_each(FragDPVs, RemoveOne); + for_each(findDbgDeclares(Fragment.Alloca), RemoveOne); + for_each(findDPVDeclares(Fragment.Alloca), RemoveOne); insertNewDbgInst(DIB, DbgVariable, Fragment.Alloca, FragmentExpr, &AI); } @@ -5042,12 +5109,10 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { // Migrate debug information from the old alloca to the new alloca(s) // and the individual partitions. - SmallVector<DbgDeclareInst *, 1> DbgDeclares; - SmallVector<DPValue *, 1> DPValues; - findDbgDeclares(DbgDeclares, &AI, &DPValues); - for_each(DbgDeclares, MigrateOne); - for_each(DPValues, MigrateOne); + for_each(findDbgDeclares(&AI), MigrateOne); + for_each(findDPVDeclares(&AI), MigrateOne); for_each(at::getAssignmentMarkers(&AI), MigrateOne); + for_each(at::getDPVAssignmentMarkers(&AI), MigrateOne); return Changed; } @@ -5169,12 +5234,9 @@ bool SROA::deleteDeadInstructions( // not be able to find it. if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { DeletedAllocas.insert(AI); - SmallVector<DbgDeclareInst *, 1> DbgDeclares; - SmallVector<DPValue *, 1> DPValues; - findDbgDeclares(DbgDeclares, AI, &DPValues); - for (DbgDeclareInst *OldDII : DbgDeclares) + for (DbgDeclareInst *OldDII : findDbgDeclares(AI)) OldDII->eraseFromParent(); - for (DPValue *OldDII : DPValues) + for (DPValue *OldDII : findDPVDeclares(AI)) OldDII->eraseFromParent(); } @@ -5271,8 +5333,9 @@ std::pair<bool /*Changed*/, bool /*CFGChanged*/> SROA::runSROA(Function &F) { "Should not have modified the CFG when told to preserve it."); if (Changed && isAssignmentTrackingEnabled(*F.getParent())) { - for (auto &BB : F) + for (auto &BB : F) { RemoveRedundantDbgInstrs(&BB); + } } return {Changed, CFGChanged}; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index d2fed11445e4..17c466f38c9c 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -896,8 +896,7 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( } } // Create an ugly GEP with a single index for each index. - ResultPtr = - Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep"); + ResultPtr = Builder.CreatePtrAdd(ResultPtr, Idx, "uglygep"); if (FirstResult == nullptr) FirstResult = ResultPtr; } @@ -906,8 +905,7 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( // Create a GEP with the constant offset index. if (AccumulativeByteOffset != 0) { Value *Offset = ConstantInt::get(PtrIndexTy, AccumulativeByteOffset); - ResultPtr = - Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep"); + ResultPtr = Builder.CreatePtrAdd(ResultPtr, Offset, "uglygep"); } else isSwapCandidate = false; @@ -1107,9 +1105,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { Type *PtrIdxTy = DL->getIndexType(GEP->getType()); IRBuilder<> Builder(GEP); - NewGEP = cast<Instruction>(Builder.CreateGEP( - Builder.getInt8Ty(), NewGEP, - {ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true)}, + NewGEP = cast<Instruction>(Builder.CreatePtrAdd( + NewGEP, ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true), GEP->getName(), GEPWasInBounds)); NewGEP->copyMetadata(*GEP); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index 2cce6eb22341..75910d7b698a 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -656,8 +656,7 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis( case Candidate::GEP: { bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds(); // C = (char *)Basis + Bump - Reduced = - Builder.CreateGEP(Builder.getInt8Ty(), Basis.Ins, Bump, "", InBounds); + Reduced = Builder.CreatePtrAdd(Basis.Ins, Bump, "", InBounds); break; } default: diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 8b5a6d618412..ec0482ac2cde 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -405,10 +405,17 @@ static bool DPValuesRemoveRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) { // If the same variable fragment is described more than once it is enough // to keep the last one (i.e. the first found since we for reverse // iteration). - // FIXME: add assignment tracking support (see parallel implementation - // below). - if (!R.second) - ToBeRemoved.push_back(&DPV); + if (R.second) + continue; + + if (DPV.isDbgAssign()) { + // Don't delete dbg.assign intrinsics that are linked to instructions. + if (!at::getAssignmentInsts(&DPV).empty()) + continue; + // Unlinked dbg.assign intrinsics can be treated like dbg.values. + } + + ToBeRemoved.push_back(&DPV); continue; } // Sequence with consecutive dbg.value instrs ended. Clear the map to @@ -495,14 +502,25 @@ static bool DPValuesRemoveRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) { DebugVariable Key(DPV.getVariable(), std::nullopt, DPV.getDebugLoc()->getInlinedAt()); auto VMI = VariableMap.find(Key); + // A dbg.assign with no linked instructions can be treated like a + // dbg.value (i.e. can be deleted). + bool IsDbgValueKind = + (!DPV.isDbgAssign() || at::getAssignmentInsts(&DPV).empty()); + // Update the map if we found a new value/expression describing the // variable, or if the variable wasn't mapped already. SmallVector<Value *, 4> Values(DPV.location_ops()); if (VMI == VariableMap.end() || VMI->second.first != Values || VMI->second.second != DPV.getExpression()) { - VariableMap[Key] = {Values, DPV.getExpression()}; + if (IsDbgValueKind) + VariableMap[Key] = {Values, DPV.getExpression()}; + else + VariableMap[Key] = {Values, nullptr}; continue; } + // Don't delete dbg.assign intrinsics that are linked to instructions. + if (!IsDbgValueKind) + continue; // Found an identical mapping. Remember the instruction for later removal. ToBeRemoved.push_back(&DPV); } @@ -514,6 +532,42 @@ static bool DPValuesRemoveRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) { return !ToBeRemoved.empty(); } +static bool DPValuesRemoveUndefDbgAssignsFromEntryBlock(BasicBlock *BB) { + assert(BB->isEntryBlock() && "expected entry block"); + SmallVector<DPValue *, 8> ToBeRemoved; + DenseSet<DebugVariable> SeenDefForAggregate; + // Returns the DebugVariable for DVI with no fragment info. + auto GetAggregateVariable = [](const DPValue &DPV) { + return DebugVariable(DPV.getVariable(), std::nullopt, + DPV.getDebugLoc().getInlinedAt()); + }; + + // Remove undef dbg.assign intrinsics that are encountered before + // any non-undef intrinsics from the entry block. + for (auto &I : *BB) { + for (DPValue &DPV : I.getDbgValueRange()) { + if (!DPV.isDbgValue() && !DPV.isDbgAssign()) + continue; + bool IsDbgValueKind = + (DPV.isDbgValue() || at::getAssignmentInsts(&DPV).empty()); + DebugVariable Aggregate = GetAggregateVariable(DPV); + if (!SeenDefForAggregate.contains(Aggregate)) { + bool IsKill = DPV.isKillLocation() && IsDbgValueKind; + if (!IsKill) { + SeenDefForAggregate.insert(Aggregate); + } else if (DPV.isDbgAssign()) { + ToBeRemoved.push_back(&DPV); + } + } + } + } + + for (DPValue *DPV : ToBeRemoved) + DPV->eraseFromParent(); + + return !ToBeRemoved.empty(); +} + static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) { if (BB->IsNewDbgInfoFormat) return DPValuesRemoveRedundantDbgInstrsUsingForwardScan(BB); @@ -536,7 +590,7 @@ static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) { SmallVector<Value *, 4> Values(DVI->getValues()); if (VMI == VariableMap.end() || VMI->second.first != Values || VMI->second.second != DVI->getExpression()) { - // Use a sentinal value (nullptr) for the DIExpression when we see a + // Use a sentinel value (nullptr) for the DIExpression when we see a // linked dbg.assign so that the next debug intrinsic will never match // it (i.e. always treat linked dbg.assigns as if they're unique). if (IsDbgValueKind) @@ -578,7 +632,10 @@ static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) { /// then (only) the instruction marked with (*) can be removed. /// Possible improvements: /// - Keep track of non-overlapping fragments. -static bool remomveUndefDbgAssignsFromEntryBlock(BasicBlock *BB) { +static bool removeUndefDbgAssignsFromEntryBlock(BasicBlock *BB) { + if (BB->IsNewDbgInfoFormat) + return DPValuesRemoveUndefDbgAssignsFromEntryBlock(BB); + assert(BB->isEntryBlock() && "expected entry block"); SmallVector<DbgAssignIntrinsic *, 8> ToBeRemoved; DenseSet<DebugVariable> SeenDefForAggregate; @@ -629,7 +686,7 @@ bool llvm::RemoveRedundantDbgInstrs(BasicBlock *BB) { MadeChanges |= removeRedundantDbgInstrsUsingBackwardScan(BB); if (BB->isEntryBlock() && isAssignmentTrackingEnabled(*BB->getParent()->getParent())) - MadeChanges |= remomveUndefDbgAssignsFromEntryBlock(BB); + MadeChanges |= removeUndefDbgAssignsFromEntryBlock(BB); MadeChanges |= removeRedundantDbgInstrsUsingForwardScan(BB); if (MadeChanges) diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp index f5abed0dd517..278111883459 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1589,11 +1589,14 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, for (auto &DPV : I.getDbgValueRange()) { // Apply the two updates that dbg.values get: invalid operands, and // variable metadata fixup. - // FIXME: support dbg.assign form of DPValues. if (any_of(DPV.location_ops(), IsInvalidLocation)) { DPVsToDelete.push_back(&DPV); continue; } + if (DPV.isDbgAssign() && IsInvalidLocation(DPV.getAddress())) { + DPVsToDelete.push_back(&DPV); + continue; + } if (!DPV.getDebugLoc().getInlinedAt()) DPV.setVariable(GetUpdatedDIVariable(DPV.getVariable())); DPV.setDebugLoc(DebugLoc::replaceInlinedAtSubprogram(DPV.getDebugLoc(), @@ -1735,13 +1738,9 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, NumExitBlocks = ExitBlocks.size(); for (BasicBlock *Block : Blocks) { - Instruction *TI = Block->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - if (Blocks.count(TI->getSuccessor(i))) - continue; - BasicBlock *OldTarget = TI->getSuccessor(i); - OldTargets.push_back(OldTarget); - } + for (BasicBlock *OldTarget : successors(Block)) + if (!Blocks.contains(OldTarget)) + OldTargets.push_back(OldTarget); } // If we have to split PHI nodes of the entry or exit blocks, do so now. diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp index ea3135630665..9bfac2ac9167 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/VFABIDemangler.h" #include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp index 39d5f6e53c1d..d4d4bf5ebdf3 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1789,13 +1789,15 @@ static at::StorageToVarsMap collectEscapedLocals(const DataLayout &DL, continue; // Find all local variables associated with the backing storage. - for (auto *DAI : at::getAssignmentMarkers(Base)) { + auto CollectAssignsForStorage = [&](auto *DbgAssign) { // Skip variables from inlined functions - they are not local variables. - if (DAI->getDebugLoc().getInlinedAt()) - continue; - LLVM_DEBUG(errs() << " > DEF : " << *DAI << "\n"); - EscapedLocals[Base].insert(at::VarRecord(DAI)); - } + if (DbgAssign->getDebugLoc().getInlinedAt()) + return; + LLVM_DEBUG(errs() << " > DEF : " << *DbgAssign << "\n"); + EscapedLocals[Base].insert(at::VarRecord(DbgAssign)); + }; + for_each(at::getAssignmentMarkers(Base), CollectAssignsForStorage); + for_each(at::getDPVAssignmentMarkers(Base), CollectAssignsForStorage); } return EscapedLocals; } @@ -1827,6 +1829,10 @@ static void fixupAssignments(Function::iterator Start, Function::iterator End) { // attachment or use, replace it with a new version. for (auto BBI = Start; BBI != End; ++BBI) { for (Instruction &I : *BBI) { + for (DPValue &DPV : I.getDbgValueRange()) { + if (DPV.isDbgAssign()) + DPV.setAssignId(GetNewID(DPV.getAssignID())); + } if (auto *ID = I.getMetadata(LLVMContext::MD_DIAssignID)) I.setMetadata(LLVMContext::MD_DIAssignID, GetNewID(ID)); else if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(&I)) diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp index b9cad764aaef..459e3d980592 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp @@ -1724,20 +1724,6 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, SI->getIterator()); } -namespace llvm { -// RemoveDIs: duplicate the getDebugValueLoc method using DPValues instead of -// dbg.value intrinsics. In llvm namespace so that it overloads the -// DbgVariableIntrinsic version. -static DebugLoc getDebugValueLoc(DPValue *DPV) { - // Original dbg.declare must have a location. - const DebugLoc &DeclareLoc = DPV->getDebugLoc(); - MDNode *Scope = DeclareLoc.getScope(); - DILocation *InlinedAt = DeclareLoc.getInlinedAt(); - // Produce an unknown location with the correct scope / inlinedAt fields. - return DILocation::get(DPV->getContext(), 0, 0, Scope, InlinedAt); -} -} // namespace llvm - /// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value /// that has an associated llvm.dbg.declare intrinsic. void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, @@ -1767,7 +1753,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, void llvm::ConvertDebugDeclareToDebugValue(DPValue *DPV, StoreInst *SI, DIBuilder &Builder) { - assert(DPV->isAddressOfVariable()); + assert(DPV->isAddressOfVariable() || DPV->isDbgAssign()); auto *DIVar = DPV->getVariable(); assert(DIVar && "Missing variable"); auto *DIExpr = DPV->getExpression(); @@ -2130,9 +2116,8 @@ void llvm::insertDebugValuesForPHIs(BasicBlock *BB, bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, DIBuilder &Builder, uint8_t DIExprFlags, int Offset) { - SmallVector<DbgDeclareInst *, 1> DbgDeclares; - SmallVector<DPValue *, 1> DPValues; - findDbgDeclares(DbgDeclares, Address, &DPValues); + TinyPtrVector<DbgDeclareInst *> DbgDeclares = findDbgDeclares(Address); + TinyPtrVector<DPValue *> DPVDeclares = findDPVDeclares(Address); auto ReplaceOne = [&](auto *DII) { assert(DII->getVariable() && "Missing variable"); @@ -2143,9 +2128,9 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, }; for_each(DbgDeclares, ReplaceOne); - for_each(DPValues, ReplaceOne); + for_each(DPVDeclares, ReplaceOne); - return !DbgDeclares.empty() || !DPValues.empty(); + return !DbgDeclares.empty() || !DPVDeclares.empty(); } static void updateOneDbgValueForAlloca(const DebugLoc &Loc, @@ -2204,14 +2189,13 @@ void llvm::salvageDebugInfo(Instruction &I) { salvageDebugInfoForDbgValues(I, DbgUsers, DPUsers); } -/// Salvage the address component of \p DAI. -static void salvageDbgAssignAddress(DbgAssignIntrinsic *DAI) { - Instruction *I = dyn_cast<Instruction>(DAI->getAddress()); +template <typename T> static void salvageDbgAssignAddress(T *Assign) { + Instruction *I = dyn_cast<Instruction>(Assign->getAddress()); // Only instructions can be salvaged at the moment. if (!I) return; - assert(!DAI->getAddressExpression()->getFragmentInfo().has_value() && + assert(!Assign->getAddressExpression()->getFragmentInfo().has_value() && "address-expression shouldn't have fragment info"); // The address component of a dbg.assign cannot be variadic. @@ -2225,16 +2209,16 @@ static void salvageDbgAssignAddress(DbgAssignIntrinsic *DAI) { return; DIExpression *SalvagedExpr = DIExpression::appendOpsToArg( - DAI->getAddressExpression(), Ops, 0, /*StackValue=*/false); + Assign->getAddressExpression(), Ops, 0, /*StackValue=*/false); assert(!SalvagedExpr->getFragmentInfo().has_value() && "address-expression shouldn't have fragment info"); // Salvage succeeds if no additional values are required. if (AdditionalValues.empty()) { - DAI->setAddress(NewV); - DAI->setAddressExpression(SalvagedExpr); + Assign->setAddress(NewV); + Assign->setAddressExpression(SalvagedExpr); } else { - DAI->setKillAddress(); + Assign->setKillAddress(); } } @@ -2308,10 +2292,19 @@ void llvm::salvageDebugInfoForDbgValues( } // Duplicate of above block for DPValues. for (auto *DPV : DPUsers) { + if (DPV->isDbgAssign()) { + if (DPV->getAddress() == &I) { + salvageDbgAssignAddress(DPV); + Salvaged = true; + } + if (DPV->getValue() != &I) + continue; + } + // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they // are implicitly pointing out the value as a DWARF memory location // description. - bool StackValue = DPV->getType() == DPValue::LocationType::Value; + bool StackValue = DPV->getType() != DPValue::LocationType::Declare; auto DPVLocation = DPV->location_ops(); assert( is_contained(DPVLocation, &I) && @@ -2345,7 +2338,7 @@ void llvm::salvageDebugInfoForDbgValues( SalvagedExpr->getNumElements() <= MaxExpressionSize; if (AdditionalValues.empty() && IsValidSalvageExpr) { DPV->setExpression(SalvagedExpr); - } else if (DPV->getType() == DPValue::LocationType::Value && + } else if (DPV->getType() != DPValue::LocationType::Declare && IsValidSalvageExpr && DPV->getNumVariableLocationOps() + AdditionalValues.size() <= MaxDebugArgs) { @@ -2355,8 +2348,7 @@ void llvm::salvageDebugInfoForDbgValues( // currently only valid for stack value expressions. // Also do not salvage if the resulting DIArgList would contain an // unreasonably large number of values. - Value *Undef = UndefValue::get(I.getOperand(0)->getType()); - DPV->replaceVariableLocationOp(I.getOperand(0), Undef); + DPV->setKillLocation(); } LLVM_DEBUG(dbgs() << "SALVAGE: " << DPV << '\n'); Salvaged = true; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp index 47c6bcbaf26e..d671a9373bf0 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp @@ -321,9 +321,6 @@ void MemoryOpRemark::visitVariable(const Value *V, bool FoundDI = false; // Try to get an llvm.dbg.declare, which has a DILocalVariable giving us the // real debug info name and size of the variable. - SmallVector<DbgDeclareInst *, 1> DbgDeclares; - SmallVector<DPValue *, 1> DPValues; - findDbgDeclares(DbgDeclares, const_cast<Value *>(V), &DPValues); auto FindDI = [&](const auto *DVI) { if (DILocalVariable *DILV = DVI->getVariable()) { std::optional<uint64_t> DISize = getSizeInBytes(DILV->getSizeInBits()); @@ -334,8 +331,8 @@ void MemoryOpRemark::visitVariable(const Value *V, } } }; - for_each(DbgDeclares, FindDI); - for_each(DPValues, FindDI); + for_each(findDbgDeclares(const_cast<Value *>(V)), FindDI); + for_each(findDPVDeclares(const_cast<Value *>(V)), FindDI); if (FoundDI) { assert(!Result.empty()); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp index 7de0959ca57e..209a6a34a3c9 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -329,35 +329,6 @@ std::string llvm::getUniqueModuleId(Module *M) { return ("." + Str).str(); } -void VFABI::setVectorVariantNames(CallInst *CI, - ArrayRef<std::string> VariantMappings) { - if (VariantMappings.empty()) - return; - - SmallString<256> Buffer; - llvm::raw_svector_ostream Out(Buffer); - for (const std::string &VariantMapping : VariantMappings) - Out << VariantMapping << ","; - // Get rid of the trailing ','. - assert(!Buffer.str().empty() && "Must have at least one char."); - Buffer.pop_back(); - - Module *M = CI->getModule(); -#ifndef NDEBUG - for (const std::string &VariantMapping : VariantMappings) { - LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n"); - std::optional<VFInfo> VI = - VFABI::tryDemangleForVFABI(VariantMapping, CI->getFunctionType()); - assert(VI && "Cannot add an invalid VFABI name."); - assert(M->getNamedValue(VI->VectorName) && - "Cannot add variant to attribute: " - "vector function declaration is missing."); - } -#endif - CI->addFnAttr( - Attribute::get(M->getContext(), MappingsAttrName, Buffer.str())); -} - void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf, StringRef SectionName, Align Alignment) { // Embed the memory buffer into the module. diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MoveAutoInit.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MoveAutoInit.cpp index a977ad87b79f..9a5dba219cee 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/MoveAutoInit.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/MoveAutoInit.cpp @@ -164,6 +164,9 @@ static bool runMoveAutoInit(Function &F, DominatorTree &DT, MemorySSA &MSSA) { if (TransitiveSuccessors.count(Pred)) continue; + if (!DT.isReachableFromEntry(Pred)) + continue; + DominatingPredecessor = DominatingPredecessor ? DT.findNearestCommonDominator(DominatingPredecessor, Pred) @@ -178,9 +181,10 @@ static bool runMoveAutoInit(Function &F, DominatorTree &DT, MemorySSA &MSSA) { // CatchSwitchInst blocks can only have one instruction, so they are not // good candidates for insertion. - while (isa<CatchSwitchInst>(UsersDominator->getFirstInsertionPt())) { + while (isa<CatchSwitchInst>(UsersDominator->getFirstNonPHI())) { for (BasicBlock *Pred : predecessors(UsersDominator)) - UsersDominator = DT.findNearestCommonDominator(UsersDominator, Pred); + if (DT.isReachableFromEntry(Pred)) + UsersDominator = DT.findNearestCommonDominator(UsersDominator, Pred); } // We finally found a place where I can be moved while not introducing extra diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp index f41a14cdfbec..9655cb9cf6f4 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp @@ -54,7 +54,7 @@ public: Hasher.final(Hash); SmallString<32> Result; MD5::stringifyResult(Hash, Result); - TheHash = std::string(Result.str()); + TheHash = std::string(Result); return TheHash; } }; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 717b6d301c8c..88b05aab8db4 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -101,12 +101,30 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { namespace { +static DPValue *createDebugValue(DIBuilder &DIB, Value *NewValue, + DILocalVariable *Variable, + DIExpression *Expression, const DILocation *DI, + DPValue *InsertBefore) { + (void)DIB; + return DPValue::createDPValue(NewValue, Variable, Expression, DI, + *InsertBefore); +} +static DbgValueInst *createDebugValue(DIBuilder &DIB, Value *NewValue, + DILocalVariable *Variable, + DIExpression *Expression, + const DILocation *DI, + Instruction *InsertBefore) { + return static_cast<DbgValueInst *>(DIB.insertDbgValueIntrinsic( + NewValue, Variable, Expression, DI, InsertBefore)); +} + /// Helper for updating assignment tracking debug info when promoting allocas. class AssignmentTrackingInfo { /// DbgAssignIntrinsics linked to the alloca with at most one per variable /// fragment. (i.e. not be a comprehensive set if there are multiple /// dbg.assigns for one variable fragment). SmallVector<DbgVariableIntrinsic *> DbgAssigns; + SmallVector<DPValue *> DPVAssigns; public: void init(AllocaInst *AI) { @@ -115,16 +133,21 @@ public: if (Vars.insert(DebugVariable(DAI)).second) DbgAssigns.push_back(DAI); } + for (DPValue *DPV : at::getDPVAssignmentMarkers(AI)) { + if (Vars.insert(DebugVariable(DPV)).second) + DPVAssigns.push_back(DPV); + } } /// Update assignment tracking debug info given for the to-be-deleted store /// \p ToDelete that stores to this alloca. - void updateForDeletedStore( - StoreInst *ToDelete, DIBuilder &DIB, - SmallSet<DbgAssignIntrinsic *, 8> *DbgAssignsToDelete) const { + void + updateForDeletedStore(StoreInst *ToDelete, DIBuilder &DIB, + SmallSet<DbgAssignIntrinsic *, 8> *DbgAssignsToDelete, + SmallSet<DPValue *, 8> *DPVAssignsToDelete) const { // There's nothing to do if the alloca doesn't have any variables using // assignment tracking. - if (DbgAssigns.empty()) + if (DbgAssigns.empty() && DPVAssigns.empty()) return; // Insert a dbg.value where the linked dbg.assign is and remember to delete @@ -134,13 +157,17 @@ public: // dbg.assign for each variable fragment for the untracked store handling // (after this loop). SmallSet<DebugVariableAggregate, 2> VarHasDbgAssignForStore; - for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(ToDelete)) { - VarHasDbgAssignForStore.insert(DebugVariableAggregate(DAI)); - DbgAssignsToDelete->insert(DAI); - DIB.insertDbgValueIntrinsic(DAI->getValue(), DAI->getVariable(), - DAI->getExpression(), DAI->getDebugLoc(), - DAI); - } + auto InsertValueForAssign = [&](auto *DbgAssign, auto *&AssignList) { + VarHasDbgAssignForStore.insert(DebugVariableAggregate(DbgAssign)); + AssignList->insert(DbgAssign); + createDebugValue(DIB, DbgAssign->getValue(), DbgAssign->getVariable(), + DbgAssign->getExpression(), DbgAssign->getDebugLoc(), + DbgAssign); + }; + for (auto *Assign : at::getAssignmentMarkers(ToDelete)) + InsertValueForAssign(Assign, DbgAssignsToDelete); + for (auto *Assign : at::getDPVAssignmentMarkers(ToDelete)) + InsertValueForAssign(Assign, DPVAssignsToDelete); // It's possible for variables using assignment tracking to have no // dbg.assign linked to this store. These are variables in DbgAssigns that @@ -150,11 +177,13 @@ public: // cannot be represented using assignment tracking (non-const offset or // size) or one that is trackable but has had its DIAssignID attachment // dropped accidentally. - for (auto *DAI : DbgAssigns) { - if (VarHasDbgAssignForStore.contains(DebugVariableAggregate(DAI))) - continue; - ConvertDebugDeclareToDebugValue(DAI, ToDelete, DIB); - } + auto ConvertUnlinkedAssignToValue = [&](auto *Assign) { + if (VarHasDbgAssignForStore.contains(DebugVariableAggregate(Assign))) + return; + ConvertDebugDeclareToDebugValue(Assign, ToDelete, DIB); + }; + for_each(DbgAssigns, ConvertUnlinkedAssignToValue); + for_each(DPVAssigns, ConvertUnlinkedAssignToValue); } /// Update assignment tracking debug info given for the newly inserted PHI \p @@ -165,10 +194,15 @@ public: // debug-phi. for (auto *DAI : DbgAssigns) ConvertDebugDeclareToDebugValue(DAI, NewPhi, DIB); + for (auto *DPV : DPVAssigns) + ConvertDebugDeclareToDebugValue(DPV, NewPhi, DIB); } - void clear() { DbgAssigns.clear(); } - bool empty() { return DbgAssigns.empty(); } + void clear() { + DbgAssigns.clear(); + DPVAssigns.clear(); + } + bool empty() { return DbgAssigns.empty() && DPVAssigns.empty(); } }; struct AllocaInfo { @@ -229,11 +263,15 @@ struct AllocaInfo { } } DbgUserVec AllDbgUsers; - findDbgUsers(AllDbgUsers, AI, &DPUsers); + SmallVector<DPValue *> AllDPUsers; + findDbgUsers(AllDbgUsers, AI, &AllDPUsers); std::copy_if(AllDbgUsers.begin(), AllDbgUsers.end(), std::back_inserter(DbgUsers), [](DbgVariableIntrinsic *DII) { return !isa<DbgAssignIntrinsic>(DII); }); + std::copy_if(AllDPUsers.begin(), AllDPUsers.end(), + std::back_inserter(DPUsers), + [](DPValue *DPV) { return !DPV->isDbgAssign(); }); AssignmentTracking.init(AI); } }; @@ -341,6 +379,7 @@ struct PromoteMem2Reg { /// A set of dbg.assigns to delete because they've been demoted to /// dbg.values. Call cleanUpDbgAssigns to delete them. SmallSet<DbgAssignIntrinsic *, 8> DbgAssignsToDelete; + SmallSet<DPValue *, 8> DPVAssignsToDelete; /// The set of basic blocks the renamer has already visited. SmallPtrSet<BasicBlock *, 16> Visited; @@ -390,6 +429,9 @@ private: for (auto *DAI : DbgAssignsToDelete) DAI->eraseFromParent(); DbgAssignsToDelete.clear(); + for (auto *DPV : DPVAssignsToDelete) + DPV->eraseFromParent(); + DPVAssignsToDelete.clear(); } }; @@ -462,10 +504,12 @@ static void removeIntrinsicUsers(AllocaInst *AI) { /// false there were some loads which were not dominated by the single store /// and thus must be phi-ed with undef. We fall back to the standard alloca /// promotion algorithm in that case. -static bool rewriteSingleStoreAlloca( - AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI, const DataLayout &DL, - DominatorTree &DT, AssumptionCache *AC, - SmallSet<DbgAssignIntrinsic *, 8> *DbgAssignsToDelete) { +static bool +rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI, + const DataLayout &DL, DominatorTree &DT, + AssumptionCache *AC, + SmallSet<DbgAssignIntrinsic *, 8> *DbgAssignsToDelete, + SmallSet<DPValue *, 8> *DPVAssignsToDelete) { StoreInst *OnlyStore = Info.OnlyStore; bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); BasicBlock *StoreBB = OnlyStore->getParent(); @@ -525,8 +569,8 @@ static bool rewriteSingleStoreAlloca( DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); // Update assignment tracking info for the store we're going to delete. - Info.AssignmentTracking.updateForDeletedStore(Info.OnlyStore, DIB, - DbgAssignsToDelete); + Info.AssignmentTracking.updateForDeletedStore( + Info.OnlyStore, DIB, DbgAssignsToDelete, DPVAssignsToDelete); // Record debuginfo for the store and remove the declaration's // debuginfo. @@ -570,10 +614,12 @@ static bool rewriteSingleStoreAlloca( /// use(t); /// *A = 42; /// } -static bool promoteSingleBlockAlloca( - AllocaInst *AI, const AllocaInfo &Info, LargeBlockInfo &LBI, - const DataLayout &DL, DominatorTree &DT, AssumptionCache *AC, - SmallSet<DbgAssignIntrinsic *, 8> *DbgAssignsToDelete) { +static bool +promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, + LargeBlockInfo &LBI, const DataLayout &DL, + DominatorTree &DT, AssumptionCache *AC, + SmallSet<DbgAssignIntrinsic *, 8> *DbgAssignsToDelete, + SmallSet<DPValue *, 8> *DPVAssignsToDelete) { // The trickiest case to handle is when we have large blocks. Because of this, // this code is optimized assuming that large blocks happen. This does not // significantly pessimize the small block case. This uses LargeBlockInfo to @@ -637,8 +683,8 @@ static bool promoteSingleBlockAlloca( while (!AI->use_empty()) { StoreInst *SI = cast<StoreInst>(AI->user_back()); // Update assignment tracking info for the store we're going to delete. - Info.AssignmentTracking.updateForDeletedStore(SI, DIB, DbgAssignsToDelete); - + Info.AssignmentTracking.updateForDeletedStore(SI, DIB, DbgAssignsToDelete, + DPVAssignsToDelete); // Record debuginfo for the store before removing it. auto DbgUpdateForStore = [&](auto &Container) { for (auto *DbgItem : Container) { @@ -710,7 +756,7 @@ void PromoteMem2Reg::run() { // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { if (rewriteSingleStoreAlloca(AI, Info, LBI, SQ.DL, DT, AC, - &DbgAssignsToDelete)) { + &DbgAssignsToDelete, &DPVAssignsToDelete)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); ++NumSingleStore; @@ -722,7 +768,7 @@ void PromoteMem2Reg::run() { // linear sweep over the block to eliminate it. if (Info.OnlyUsedInOneBlock && promoteSingleBlockAlloca(AI, Info, LBI, SQ.DL, DT, AC, - &DbgAssignsToDelete)) { + &DbgAssignsToDelete, &DPVAssignsToDelete)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); continue; @@ -1128,8 +1174,8 @@ NextIteration: // Record debuginfo for the store before removing it. IncomingLocs[AllocaNo] = SI->getDebugLoc(); - AllocaATInfo[AllocaNo].updateForDeletedStore(SI, DIB, - &DbgAssignsToDelete); + AllocaATInfo[AllocaNo].updateForDeletedStore(SI, DIB, &DbgAssignsToDelete, + &DPVAssignsToDelete); auto ConvertDbgDeclares = [&](auto &Container) { for (auto *DbgItem : Container) if (DbgItem->isAddressOfVariable()) diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index cd3ac317cd23..a1d7f0f9ba0f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -169,12 +169,8 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { // during expansion. if (Op == Instruction::IntToPtr) { auto *PtrTy = cast<PointerType>(Ty); - if (DL.isNonIntegralPointerType(PtrTy)) { - assert(DL.getTypeAllocSize(Builder.getInt8Ty()) == 1 && - "alloc size of i8 must by 1 byte for the GEP to be correct"); - return Builder.CreateGEP( - Builder.getInt8Ty(), Constant::getNullValue(PtrTy), V, "scevgep"); - } + if (DL.isNonIntegralPointerType(PtrTy)) + return Builder.CreatePtrAdd(Constant::getNullValue(PtrTy), V, "scevgep"); } // Short-circuit unnecessary bitcasts. if (Op == Instruction::BitCast) { @@ -321,7 +317,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V) { // Fold a GEP with constant operands. if (Constant *CLHS = dyn_cast<Constant>(V)) if (Constant *CRHS = dyn_cast<Constant>(Idx)) - return Builder.CreateGEP(Builder.getInt8Ty(), CLHS, CRHS); + return Builder.CreatePtrAdd(CLHS, CRHS); // Do a quick scan to see if we have this GEP nearby. If so, reuse it. unsigned ScanLimit = 6; @@ -358,7 +354,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V) { } // Emit a GEP. - return Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "scevgep"); + return Builder.CreatePtrAdd(V, Idx, "scevgep"); } /// PickMostRelevantLoop - Given two loops pick the one that's most relevant for @@ -2123,9 +2119,9 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, if (isa<PointerType>(ARTy)) { Value *NegMulV = Builder.CreateNeg(MulV); if (NeedPosCheck) - Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV); + Add = Builder.CreatePtrAdd(StartValue, MulV); if (NeedNegCheck) - Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV); + Sub = Builder.CreatePtrAdd(StartValue, NegMulV); } else { if (NeedPosCheck) Add = Builder.CreateAdd(StartValue, MulV); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 7515e539e7fb..13eae549b2ce 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1770,6 +1770,11 @@ bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf( Locs.push_back(I1->getDebugLoc()); for (auto *OtherSuccTI : OtherSuccTIs) Locs.push_back(OtherSuccTI->getDebugLoc()); + // Also clone DPValues from the existing terminator, and all others (to + // duplicate existing hoisting behaviour). + NT->cloneDebugInfoFrom(I1); + for (Instruction *OtherSuccTI : OtherSuccTIs) + NT->cloneDebugInfoFrom(OtherSuccTI); NT->setDebugLoc(DILocation::getMergedLocations(Locs)); // PHIs created below will adopt NT's merged DebugLoc. @@ -3101,10 +3106,12 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, // %merge = select %cond, %two, %one // store %merge, %x.dest, !DIAssignID !2 // dbg.assign %merge, "x", ..., !2 - for (auto *DAI : at::getAssignmentMarkers(SpeculatedStore)) { - if (llvm::is_contained(DAI->location_ops(), OrigV)) - DAI->replaceVariableLocationOp(OrigV, S); - } + auto replaceVariable = [OrigV, S](auto *DbgAssign) { + if (llvm::is_contained(DbgAssign->location_ops(), OrigV)) + DbgAssign->replaceVariableLocationOp(OrigV, S); + }; + for_each(at::getAssignmentMarkers(SpeculatedStore), replaceVariable); + for_each(at::getDPVAssignmentMarkers(SpeculatedStore), replaceVariable); } // Metadata can be dependent on the condition we are hoisting above. @@ -3133,7 +3140,9 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, // instructions, in the same way that dbg.value intrinsics are dropped at the // end of this block. for (auto &It : make_range(ThenBB->begin(), ThenBB->end())) - It.dropDbgValues(); + for (DPValue &DPV : make_early_inc_range(It.getDbgValueRange())) + if (!DPV.isDbgAssign()) + It.dropOneDbgValue(&DPV); BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(), std::prev(ThenBB->end())); @@ -5414,13 +5423,11 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) { } static void createUnreachableSwitchDefault(SwitchInst *Switch, - DomTreeUpdater *DTU, - bool RemoveOrigDefaultBlock = true) { + DomTreeUpdater *DTU) { LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); auto *BB = Switch->getParent(); auto *OrigDefaultBlock = Switch->getDefaultDest(); - if (RemoveOrigDefaultBlock) - OrigDefaultBlock->removePredecessor(BB); + OrigDefaultBlock->removePredecessor(BB); BasicBlock *NewDefaultBlock = BasicBlock::Create( BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(), OrigDefaultBlock); @@ -5429,8 +5436,7 @@ static void createUnreachableSwitchDefault(SwitchInst *Switch, if (DTU) { SmallVector<DominatorTree::UpdateType, 2> Updates; Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock}); - if (RemoveOrigDefaultBlock && - !is_contained(successors(BB), OrigDefaultBlock)) + if (!is_contained(successors(BB), OrigDefaultBlock)) Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock}); DTU->applyUpdates(Updates); } @@ -5612,28 +5618,10 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, Known.getBitWidth() - (Known.Zero | Known.One).popcount(); assert(NumUnknownBits <= Known.getBitWidth()); if (HasDefault && DeadCases.empty() && - NumUnknownBits < 64 /* avoid overflow */) { - uint64_t AllNumCases = 1ULL << NumUnknownBits; - if (SI->getNumCases() == AllNumCases) { - createUnreachableSwitchDefault(SI, DTU); - return true; - } - // When only one case value is missing, replace default with that case. - // Eliminating the default branch will provide more opportunities for - // optimization, such as lookup tables. - if (SI->getNumCases() == AllNumCases - 1) { - assert(NumUnknownBits > 1 && "Should be canonicalized to a branch"); - uint64_t MissingCaseVal = 0; - for (const auto &Case : SI->cases()) - MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue(); - auto *MissingCase = - cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal)); - SwitchInstProfUpdateWrapper SIW(*SI); - SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0)); - createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false); - SIW.setSuccessorWeight(0, 0); - return true; - } + NumUnknownBits < 64 /* avoid overflow */ && + SI->getNumCases() == (1ULL << NumUnknownBits)) { + createUnreachableSwitchDefault(SI, DTU); + return true; } if (DeadCases.empty()) diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index 42e7c4006b42..0ed3324a27b6 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -1753,7 +1753,7 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri } // This narrow use can be widened by a sext if it's non-negative or its narrow - // def was widended by a sext. Same for zext. + // def was widened by a sext. Same for zext. auto canWidenBySExt = [&]() { return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Sign; }; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index a7cd68e860e4..52eef9ab58a4 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -2495,13 +2495,17 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) { // log(pow(x,y)) -> y*log(x) AttributeList NoAttrs; - if (ArgLb == PowLb || ArgID == Intrinsic::pow) { + if (ArgLb == PowLb || ArgID == Intrinsic::pow || ArgID == Intrinsic::powi) { Value *LogX = Log->doesNotAccessMemory() ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty), Arg->getOperand(0), "log") : emitUnaryFloatFnCall(Arg->getOperand(0), TLI, LogNm, B, NoAttrs); - Value *MulY = B.CreateFMul(Arg->getArgOperand(1), LogX, "mul"); + Value *Y = Arg->getArgOperand(1); + // Cast exponent to FP if integer. + if (ArgID == Intrinsic::powi) + Y = B.CreateSIToFP(Y, Ty, "cast"); + Value *MulY = B.CreateFMul(Y, LogX, "mul"); // Since pow() may have side effects, e.g. errno, // dead code elimination may not be trusted to remove it. substituteInParent(Arg, MulY); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp index 71d0f09e4771..380541ffdd49 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -544,6 +544,16 @@ void Mapper::remapDPValue(DPValue &V) { V.setVariable(cast<DILocalVariable>(MappedVar)); V.setDebugLoc(DebugLoc(cast<DILocation>(MappedDILoc))); + bool IgnoreMissingLocals = Flags & RF_IgnoreMissingLocals; + + if (V.isDbgAssign()) { + auto *NewAddr = mapValue(V.getAddress()); + if (!IgnoreMissingLocals && !NewAddr) + V.setKillAddress(); + else if (NewAddr) + V.setAddress(NewAddr); + } + // Find Value operands and remap those. SmallVector<Value *, 4> Vals, NewVals; for (Value *Val : V.location_ops()) @@ -555,8 +565,6 @@ void Mapper::remapDPValue(DPValue &V) { if (Vals == NewVals) return; - bool IgnoreMissingLocals = Flags & RF_IgnoreMissingLocals; - // Otherwise, do some replacement. if (!IgnoreMissingLocals && llvm::any_of(NewVals, [&](Value *V) { return V == nullptr; })) { diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index cff72ae263d8..a7ebf78e54ce 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -148,26 +148,27 @@ public: VPInstruction *createOverflowingOp(unsigned Opcode, std::initializer_list<VPValue *> Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags, - DebugLoc DL, const Twine &Name = "") { + DebugLoc DL = {}, const Twine &Name = "") { return tryInsertInstruction( new VPInstruction(Opcode, Operands, WrapFlags, DL, Name)); } - VPValue *createNot(VPValue *Operand, DebugLoc DL, const Twine &Name = "") { + VPValue *createNot(VPValue *Operand, DebugLoc DL = {}, + const Twine &Name = "") { return createInstruction(VPInstruction::Not, {Operand}, DL, Name); } - VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL, + VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {}, const Twine &Name = "") { return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name); } - VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL, + VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {}, const Twine &Name = "") { return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}, DL, Name); } VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, - DebugLoc DL, const Twine &Name = "", + DebugLoc DL = {}, const Twine &Name = "", std::optional<FastMathFlags> FMFs = std::nullopt) { auto *Select = FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal}, diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 9743fa0e7402..6ca93e15719f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2346,9 +2346,8 @@ emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue, auto *Offset = CreateMul(Index, Step); return CreateAdd(StartValue, Offset); } - case InductionDescriptor::IK_PtrInduction: { - return B.CreateGEP(B.getInt8Ty(), StartValue, CreateMul(Index, Step)); - } + case InductionDescriptor::IK_PtrInduction: + return B.CreatePtrAdd(StartValue, CreateMul(Index, Step)); case InductionDescriptor::IK_FpInduction: { assert(!isa<VectorType>(Index->getType()) && "Vector indices not supported for FP inductions yet"); @@ -6950,10 +6949,25 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, Op2Info.Kind = TargetTransformInfo::OK_UniformValue; SmallVector<const Value *, 4> Operands(I->operand_values()); - return TTI.getArithmeticInstrCost( + auto InstrCost = TTI.getArithmeticInstrCost( I->getOpcode(), VectorTy, CostKind, {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None}, Op2Info, Operands, I); + + // Some targets can replace frem with vector library calls. + InstructionCost VecCallCost = InstructionCost::getInvalid(); + if (I->getOpcode() == Instruction::FRem) { + LibFunc Func; + if (TLI->getLibFunc(I->getOpcode(), I->getType(), Func) && + TLI->isFunctionVectorizable(TLI->getName(Func), VF)) { + SmallVector<Type *, 4> OpTypes; + for (auto &Op : I->operands()) + OpTypes.push_back(Op->getType()); + VecCallCost = + TTI.getCallInstrCost(nullptr, VectorTy, OpTypes, CostKind); + } + } + return std::min(InstrCost, VecCallCost); } case Instruction::FNeg: { return TTI.getArithmeticInstrCost( @@ -8247,7 +8261,8 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, }, Range); if (ShouldUseVectorIntrinsic) - return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end()), ID); + return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end()), ID, + CI->getDebugLoc()); Function *Variant = nullptr; std::optional<unsigned> MaskPos; @@ -8300,7 +8315,8 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, } return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end()), - Intrinsic::not_intrinsic, Variant); + Intrinsic::not_intrinsic, CI->getDebugLoc(), + Variant); } return nullptr; @@ -8949,16 +8965,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( "AnyOf reductions are not allowed for in-loop reductions"); // Collect the chain of "link" recipes for the reduction starting at PhiR. - SetVector<VPRecipeBase *> Worklist; + SetVector<VPSingleDefRecipe *> Worklist; Worklist.insert(PhiR); for (unsigned I = 0; I != Worklist.size(); ++I) { - VPRecipeBase *Cur = Worklist[I]; - for (VPUser *U : Cur->getVPSingleValue()->users()) { - auto *UserRecipe = dyn_cast<VPRecipeBase>(U); - if (!UserRecipe) + VPSingleDefRecipe *Cur = Worklist[I]; + for (VPUser *U : Cur->users()) { + auto *UserRecipe = dyn_cast<VPSingleDefRecipe>(U); + if (!UserRecipe) { + assert(isa<VPLiveOut>(U) && + "U must either be a VPSingleDef or VPLiveOut"); continue; - assert(UserRecipe->getNumDefinedValues() == 1 && - "recipes must define exactly one result value"); + } Worklist.insert(UserRecipe); } } @@ -8968,10 +8985,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( // (PreviousLink) to tell which of the two operands of a Link will remain // scalar and which will be reduced. For minmax by select(cmp), Link will be // the select instructions. - VPRecipeBase *PreviousLink = PhiR; // Aka Worklist[0]. - for (VPRecipeBase *CurrentLink : Worklist.getArrayRef().drop_front()) { - VPValue *PreviousLinkV = PreviousLink->getVPSingleValue(); - + VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0]. + for (VPSingleDefRecipe *CurrentLink : Worklist.getArrayRef().drop_front()) { Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr(); // Index of the first operand which holds a non-mask vector operand. @@ -8986,7 +9001,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( "Expected instruction to be a call to the llvm.fmuladd intrinsic"); assert(((MinVF.isScalar() && isa<VPReplicateRecipe>(CurrentLink)) || isa<VPWidenCallRecipe>(CurrentLink)) && - CurrentLink->getOperand(2) == PreviousLinkV && + CurrentLink->getOperand(2) == PreviousLink && "expected a call where the previous link is the added operand"); // If the instruction is a call to the llvm.fmuladd intrinsic then we @@ -9017,15 +9032,15 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( // Note that for non-commutable operands (cmp-selects), the semantics of // the cmp-select are captured in the recurrence kind. unsigned VecOpId = - CurrentLink->getOperand(IndexOfFirstOperand) == PreviousLinkV + CurrentLink->getOperand(IndexOfFirstOperand) == PreviousLink ? IndexOfFirstOperand + 1 : IndexOfFirstOperand; VecOp = CurrentLink->getOperand(VecOpId); - assert(VecOp != PreviousLinkV && + assert(VecOp != PreviousLink && CurrentLink->getOperand(CurrentLink->getNumOperands() - 1 - (VecOpId - IndexOfFirstOperand)) == - PreviousLinkV && - "PreviousLinkV must be the operand other than VecOp"); + PreviousLink && + "PreviousLink must be the operand other than VecOp"); } BasicBlock *BB = CurrentLinkI->getParent(); @@ -9037,19 +9052,19 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( } VPReductionRecipe *RedRecipe = new VPReductionRecipe( - RdxDesc, CurrentLinkI, PreviousLinkV, VecOp, CondOp); + RdxDesc, CurrentLinkI, PreviousLink, VecOp, CondOp); // Append the recipe to the end of the VPBasicBlock because we need to // ensure that it comes after all of it's inputs, including CondOp. // Note that this transformation may leave over dead recipes (including // CurrentLink), which will be cleaned by a later VPlan transform. LinkVPBB->appendRecipe(RedRecipe); - CurrentLink->getVPSingleValue()->replaceAllUsesWith(RedRecipe); + CurrentLink->replaceAllUsesWith(RedRecipe); PreviousLink = RedRecipe; } } - Builder.setInsertPoint(&*LatchVPBB->begin()); - for (VPRecipeBase &R : - Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { + Builder.setInsertPoint(&*LatchVPBB->begin()); + for (VPRecipeBase &R : + Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R); if (!PhiR) continue; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 055fbb00871f..601d2454c1e1 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7379,6 +7379,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { continue; if (Idx >= static_cast<int>(CommonVF)) Idx = E1Mask[Idx - CommonVF] + VF; + else + Idx = E1Mask[Idx]; } CommonVF = VF; } @@ -12986,8 +12988,8 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) { for (ScheduleData *BundleMember = Picked; BundleMember; BundleMember = BundleMember->NextInBundle) { Instruction *PickedInst = BundleMember->Inst; - if (PickedInst->getNextNode() != LastScheduledInst) - PickedInst->moveBefore(LastScheduledInst); + if (PickedInst->getNextNonDebugInstruction() != LastScheduledInst) + PickedInst->moveAfter(LastScheduledInst->getPrevNode()); LastScheduledInst = PickedInst; } @@ -13181,7 +13183,7 @@ void BoUpSLP::computeMinimumValueSizes() { // We only attempt to truncate integer expressions. auto &TreeRoot = VectorizableTree[0]->Scalars; auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType()); - if (!TreeRootIT) + if (!TreeRootIT || VectorizableTree.front()->State == TreeEntry::NeedToGather) return; // Ensure the roots of the vectorizable tree don't form a cycle. @@ -14792,8 +14794,17 @@ public: LocalExternallyUsedValues[RdxVal]; // Update LocalExternallyUsedValues for the scalar, replaced by // extractelement instructions. + DenseMap<Value *, Value *> ReplacementToExternal; + for (const std::pair<Value *, Value *> &Pair : ReplacedExternals) + ReplacementToExternal.try_emplace(Pair.second, Pair.first); for (const std::pair<Value *, Value *> &Pair : ReplacedExternals) { - auto *It = ExternallyUsedValues.find(Pair.first); + Value *Ext = Pair.first; + auto RIt = ReplacementToExternal.find(Ext); + while (RIt != ReplacementToExternal.end()) { + Ext = RIt->second; + RIt = ReplacementToExternal.find(Ext); + } + auto *It = ExternallyUsedValues.find(Ext); if (It == ExternallyUsedValues.end()) continue; LocalExternallyUsedValues[Pair.second].append(It->second); @@ -15214,6 +15225,19 @@ private: assert(IsSupportedHorRdxIdentityOp && "The optimization of matched scalar identity horizontal reductions " "must be supported."); + auto *VTy = cast<FixedVectorType>(VectorizedValue->getType()); + if (VTy->getElementType() != VL.front()->getType()) { + VectorizedValue = Builder.CreateIntCast( + VectorizedValue, + FixedVectorType::get(VL.front()->getType(), VTy->getNumElements()), + any_of(VL, [&](Value *R) { + KnownBits Known = computeKnownBits( + R, cast<Instruction>(ReductionOps.front().front()) + ->getModule() + ->getDataLayout()); + return !Known.isNonNegative(); + })); + } switch (RdxKind) { case RecurKind::Add: { // root = mul prev_root, <1, 1, n, 1> @@ -16217,10 +16241,13 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { SetVector<Value *> Candidates(GEPList.begin(), GEPList.end()); // Some of the candidates may have already been vectorized after we - // initially collected them. If so, they are marked as deleted, so remove - // them from the set of candidates. - Candidates.remove_if( - [&R](Value *I) { return R.isDeleted(cast<Instruction>(I)); }); + // initially collected them or their index is optimized to constant value. + // If so, they are marked as deleted, so remove them from the set of + // candidates. + Candidates.remove_if([&R](Value *I) { + return R.isDeleted(cast<Instruction>(I)) || + isa<Constant>(cast<GetElementPtrInst>(I)->idx_begin()->get()); + }); // Remove from the set of candidates all pairs of getelementptrs with // constant differences. Such getelementptrs are likely not good diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp index b6e56c47c227..3eeb1a6948f2 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1136,29 +1136,18 @@ void VPlanIngredient::print(raw_ostream &O) const { template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT); void VPValue::replaceAllUsesWith(VPValue *New) { - if (this == New) - return; - for (unsigned J = 0; J < getNumUsers();) { - VPUser *User = Users[J]; - bool RemovedUser = false; - for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) - if (User->getOperand(I) == this) { - User->setOperand(I, New); - RemovedUser = true; - } - // If a user got removed after updating the current user, the next user to - // update will be moved to the current position, so we only need to - // increment the index if the number of users did not change. - if (!RemovedUser) - J++; - } + replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; }); } void VPValue::replaceUsesWithIf( VPValue *New, llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) { + // Note that this early exit is required for correctness; the implementation + // below relies on the number of users for this VPValue to decrease, which + // isn't the case if this == New. if (this == New) return; + for (unsigned J = 0; J < getNumUsers();) { VPUser *User = Users[J]; bool RemovedUser = false; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h index 4b4f4911eb64..0c6214868d84 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h @@ -12,10 +12,12 @@ /// VPBlockBase, together implementing a Hierarchical CFG; /// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained /// within VPBasicBlocks; -/// 3. VPInstruction, a concrete Recipe and VPUser modeling a single planned +/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that +/// also inherit from VPValue. +/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned /// instruction; -/// 4. The VPlan class holding a candidate for vectorization; -/// 5. The VPlanPrinter class providing a way to print a plan in dot format; +/// 5. The VPlan class holding a candidate for vectorization; +/// 6. The VPlanPrinter class providing a way to print a plan in dot format; /// These are documented in docs/VectorizationPlan.rst. // //===----------------------------------------------------------------------===// @@ -700,8 +702,8 @@ public: /// VPRecipeBase is a base class modeling a sequence of one or more output IR /// instructions. VPRecipeBase owns the VPValues it defines through VPDef /// and is responsible for deleting its defined values. Single-value -/// VPRecipeBases that also inherit from VPValue must make sure to inherit from -/// VPRecipeBase before VPValue. +/// recipes must inherit from VPSingleDef instead of inheriting from both +/// VPRecipeBase and VPValue separately. class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>, public VPDef, public VPUser { @@ -762,15 +764,6 @@ public: /// \returns an iterator pointing to the element after the erased one iplist<VPRecipeBase>::iterator eraseFromParent(); - /// Returns the underlying instruction, if the recipe is a VPValue or nullptr - /// otherwise. - Instruction *getUnderlyingInstr() { - return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()); - } - const Instruction *getUnderlyingInstr() const { - return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()); - } - /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPDef *D) { // All VPDefs are also VPRecipeBases. @@ -819,10 +812,80 @@ public: } \ static inline bool classof(const VPRecipeBase *R) { \ return R->getVPDefID() == VPDefID; \ + } \ + static inline bool classof(const VPSingleDefRecipe *R) { \ + return R->getVPDefID() == VPDefID; \ } +/// VPSingleDef is a base class for recipes for modeling a sequence of one or +/// more output IR that define a single result VPValue. +/// Note that VPRecipeBase must be inherited from before VPValue. +class VPSingleDefRecipe : public VPRecipeBase, public VPValue { +public: + template <typename IterT> + VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL = {}) + : VPRecipeBase(SC, Operands, DL), VPValue(this) {} + + VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands, + DebugLoc DL = {}) + : VPRecipeBase(SC, Operands, DL), VPValue(this) {} + + template <typename IterT> + VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV, + DebugLoc DL = {}) + : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {} + + static inline bool classof(const VPRecipeBase *R) { + switch (R->getVPDefID()) { + case VPRecipeBase::VPDerivedIVSC: + case VPRecipeBase::VPExpandSCEVSC: + case VPRecipeBase::VPInstructionSC: + case VPRecipeBase::VPReductionSC: + case VPRecipeBase::VPReplicateSC: + case VPRecipeBase::VPScalarIVStepsSC: + case VPRecipeBase::VPVectorPointerSC: + case VPRecipeBase::VPWidenCallSC: + case VPRecipeBase::VPWidenCanonicalIVSC: + case VPRecipeBase::VPWidenCastSC: + case VPRecipeBase::VPWidenGEPSC: + case VPRecipeBase::VPWidenSC: + case VPRecipeBase::VPWidenSelectSC: + case VPRecipeBase::VPBlendSC: + case VPRecipeBase::VPPredInstPHISC: + case VPRecipeBase::VPCanonicalIVPHISC: + case VPRecipeBase::VPActiveLaneMaskPHISC: + case VPRecipeBase::VPFirstOrderRecurrencePHISC: + case VPRecipeBase::VPWidenPHISC: + case VPRecipeBase::VPWidenIntOrFpInductionSC: + case VPRecipeBase::VPWidenPointerInductionSC: + case VPRecipeBase::VPReductionPHISC: + return true; + case VPRecipeBase::VPInterleaveSC: + case VPRecipeBase::VPBranchOnMaskSC: + case VPRecipeBase::VPWidenMemoryInstructionSC: + // TODO: Widened stores don't define a value, but widened loads do. Split + // the recipes to be able to make widened loads VPSingleDefRecipes. + return false; + } + llvm_unreachable("Unhandled VPDefID"); + } + + static inline bool classof(const VPUser *U) { + auto *R = dyn_cast<VPRecipeBase>(U); + return R && classof(R); + } + + /// Returns the underlying instruction. + Instruction *getUnderlyingInstr() { + return cast<Instruction>(getUnderlyingValue()); + } + const Instruction *getUnderlyingInstr() const { + return cast<Instruction>(getUnderlyingValue()); + } +}; + /// Class to record LLVM IR flag for a recipe along with it. -class VPRecipeWithIRFlags : public VPRecipeBase { +class VPRecipeWithIRFlags : public VPSingleDefRecipe { enum class OperationType : unsigned char { Cmp, OverflowingBinOp, @@ -886,14 +949,14 @@ private: public: template <typename IterT> VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {}) - : VPRecipeBase(SC, Operands, DL) { + : VPSingleDefRecipe(SC, Operands, DL) { OpType = OperationType::Other; AllFlags = 0; } template <typename IterT> VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I) - : VPRecipeWithIRFlags(SC, Operands, I.getDebugLoc()) { + : VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()) { if (auto *Op = dyn_cast<CmpInst>(&I)) { OpType = OperationType::Cmp; CmpPredicate = Op->getPredicate(); @@ -915,32 +978,35 @@ public: } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) { OpType = OperationType::FPMathOp; FMFs = Op->getFastMathFlags(); + } else { + OpType = OperationType::Other; + AllFlags = 0; } } template <typename IterT> VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, CmpInst::Predicate Pred, DebugLoc DL = {}) - : VPRecipeBase(SC, Operands, DL), OpType(OperationType::Cmp), + : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp), CmpPredicate(Pred) {} template <typename IterT> VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, WrapFlagsTy WrapFlags, DebugLoc DL = {}) - : VPRecipeBase(SC, Operands, DL), OpType(OperationType::OverflowingBinOp), - WrapFlags(WrapFlags) {} + : VPSingleDefRecipe(SC, Operands, DL), + OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {} template <typename IterT> VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, FastMathFlags FMFs, DebugLoc DL = {}) - : VPRecipeBase(SC, Operands, DL), OpType(OperationType::FPMathOp), + : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp), FMFs(FMFs) {} protected: template <typename IterT> VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, GEPFlagsTy GEPFlags, DebugLoc DL = {}) - : VPRecipeBase(SC, Operands, DL), OpType(OperationType::GEPOp), + : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {} public: @@ -1056,7 +1122,7 @@ public: /// While as any Recipe it may generate a sequence of IR instructions when /// executed, these instructions would always form a single-def expression as /// the VPInstruction is also a single def-use vertex. -class VPInstruction : public VPRecipeWithIRFlags, public VPValue { +class VPInstruction : public VPRecipeWithIRFlags { friend class VPlanSlp; public: @@ -1103,7 +1169,7 @@ public: VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "") : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL), - VPValue(this), Opcode(Opcode), Name(Name.str()) {} + Opcode(Opcode), Name(Name.str()) {} VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands, DebugLoc DL = {}, const Twine &Name = "") @@ -1115,7 +1181,7 @@ public: VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands, WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "") : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL), - VPValue(this), Opcode(Opcode), Name(Name.str()) {} + Opcode(Opcode), Name(Name.str()) {} VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands, FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = ""); @@ -1205,13 +1271,13 @@ public: /// VPWidenRecipe is a recipe for producing a copy of vector type its /// ingredient. This recipe covers most of the traditional vectorization cases /// where each ingredient transforms into a vectorized version of itself. -class VPWidenRecipe : public VPRecipeWithIRFlags, public VPValue { +class VPWidenRecipe : public VPRecipeWithIRFlags { unsigned Opcode; public: template <typename IterT> VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands) - : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), VPValue(this, &I), + : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), Opcode(I.getOpcode()) {} ~VPWidenRecipe() override = default; @@ -1231,7 +1297,7 @@ public: }; /// VPWidenCastRecipe is a recipe to create vector cast instructions. -class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPValue { +class VPWidenCastRecipe : public VPRecipeWithIRFlags { /// Cast instruction opcode. Instruction::CastOps Opcode; @@ -1241,8 +1307,8 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPValue { public: VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst &UI) - : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), VPValue(this, &UI), - Opcode(Opcode), ResultTy(ResultTy) { + : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode), + ResultTy(ResultTy) { assert(UI.getOpcode() == Opcode && "opcode of underlying cast doesn't match"); assert(UI.getType() == ResultTy && @@ -1250,8 +1316,8 @@ public: } VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy) - : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), VPValue(this, nullptr), - Opcode(Opcode), ResultTy(ResultTy) {} + : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode), + ResultTy(ResultTy) {} ~VPWidenCastRecipe() override = default; @@ -1273,7 +1339,7 @@ public: }; /// A recipe for widening Call instructions. -class VPWidenCallRecipe : public VPRecipeBase, public VPValue { +class VPWidenCallRecipe : public VPSingleDefRecipe { /// ID of the vector intrinsic to call when widening the call. If set the /// Intrinsic::not_intrinsic, a library call will be used instead. Intrinsic::ID VectorIntrinsicID; @@ -1286,9 +1352,9 @@ class VPWidenCallRecipe : public VPRecipeBase, public VPValue { public: template <typename IterT> VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments, - Intrinsic::ID VectorIntrinsicID, + Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {}, Function *Variant = nullptr) - : VPRecipeBase(VPDef::VPWidenCallSC, CallArguments), VPValue(this, &I), + : VPSingleDefRecipe(VPDef::VPWidenCallSC, CallArguments, &I, DL), VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {} ~VPWidenCallRecipe() override = default; @@ -1306,11 +1372,11 @@ public: }; /// A recipe for widening select instructions. -struct VPWidenSelectRecipe : public VPRecipeBase, public VPValue { +struct VPWidenSelectRecipe : public VPSingleDefRecipe { template <typename IterT> VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands) - : VPRecipeBase(VPDef::VPWidenSelectSC, Operands, I.getDebugLoc()), - VPValue(this, &I) {} + : VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I, + I.getDebugLoc()) {} ~VPWidenSelectRecipe() override = default; @@ -1335,7 +1401,7 @@ struct VPWidenSelectRecipe : public VPRecipeBase, public VPValue { }; /// A recipe for handling GEP instructions. -class VPWidenGEPRecipe : public VPRecipeWithIRFlags, public VPValue { +class VPWidenGEPRecipe : public VPRecipeWithIRFlags { bool isPointerLoopInvariant() const { return getOperand(0)->isDefinedOutsideVectorRegions(); } @@ -1353,8 +1419,7 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags, public VPValue { public: template <typename IterT> VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands) - : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP), - VPValue(this, GEP) {} + : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {} ~VPWidenGEPRecipe() override = default; @@ -1373,7 +1438,7 @@ public: /// A recipe to compute the pointers for widened memory accesses of IndexTy for /// all parts. If IsReverse is true, compute pointers for accessing the input in /// reverse order per part. -class VPVectorPointerRecipe : public VPRecipeWithIRFlags, public VPValue { +class VPVectorPointerRecipe : public VPRecipeWithIRFlags { Type *IndexedTy; bool IsReverse; @@ -1382,7 +1447,7 @@ public: bool IsInBounds, DebugLoc DL) : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr), GEPFlagsTy(IsInBounds), DL), - VPValue(this), IndexedTy(IndexedTy), IsReverse(IsReverse) {} + IndexedTy(IndexedTy), IsReverse(IsReverse) {} VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC) @@ -1424,11 +1489,11 @@ public: /// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a /// pointer induction. Produces either a vector PHI per-part or scalar values /// per-lane based on the canonical induction. -class VPHeaderPHIRecipe : public VPRecipeBase, public VPValue { +class VPHeaderPHIRecipe : public VPSingleDefRecipe { protected: VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start = nullptr, DebugLoc DL = {}) - : VPRecipeBase(VPDefID, {}, DL), VPValue(this, UnderlyingInstr) { + : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>(), UnderlyingInstr, DL) { if (Start) addOperand(Start); } @@ -1709,14 +1774,13 @@ public: /// A recipe for vectorizing a phi-node as a sequence of mask-based select /// instructions. -class VPBlendRecipe : public VPRecipeBase, public VPValue { +class VPBlendRecipe : public VPSingleDefRecipe { public: /// The blend operation is a User of the incoming values and of their /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value /// might be incoming with a full mask for which there is no VPValue. VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands) - : VPRecipeBase(VPDef::VPBlendSC, Operands, Phi->getDebugLoc()), - VPValue(this, Phi) { + : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) { assert(Operands.size() > 0 && ((Operands.size() == 1) || (Operands.size() % 2 == 0)) && "Expected either a single incoming value or a positive even number " @@ -1843,14 +1907,15 @@ public: /// A recipe to represent inloop reduction operations, performing a reduction on /// a vector operand into a scalar value, and adding the result to a chain. /// The Operands are {ChainOp, VecOp, [Condition]}. -class VPReductionRecipe : public VPRecipeBase, public VPValue { +class VPReductionRecipe : public VPSingleDefRecipe { /// The recurrence decriptor for the reduction in question. const RecurrenceDescriptor &RdxDesc; public: VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp) - : VPRecipeBase(VPDef::VPReductionSC, {ChainOp, VecOp}), VPValue(this, I), + : VPSingleDefRecipe(VPDef::VPReductionSC, + ArrayRef<VPValue *>({ChainOp, VecOp}), I), RdxDesc(R) { if (CondOp) addOperand(CondOp); @@ -1883,7 +1948,7 @@ public: /// copies of the original scalar type, one per lane, instead of producing a /// single copy of widened type for all lanes. If the instruction is known to be /// uniform only one copy, per lane zero, will be generated. -class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPValue { +class VPReplicateRecipe : public VPRecipeWithIRFlags { /// Indicator if only a single replica per lane is needed. bool IsUniform; @@ -1895,7 +1960,7 @@ public: VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands, bool IsUniform, VPValue *Mask = nullptr) : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I), - VPValue(this, I), IsUniform(IsUniform), IsPredicated(Mask) { + IsUniform(IsUniform), IsPredicated(Mask) { if (Mask) addOperand(Mask); } @@ -1993,12 +2058,12 @@ public: /// order to merge values that are set under such a branch and feed their uses. /// The phi nodes can be scalar or vector depending on the users of the value. /// This recipe works in concert with VPBranchOnMaskRecipe. -class VPPredInstPHIRecipe : public VPRecipeBase, public VPValue { +class VPPredInstPHIRecipe : public VPSingleDefRecipe { public: /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi /// nodes after merging back from a Branch-on-Mask. VPPredInstPHIRecipe(VPValue *PredV) - : VPRecipeBase(VPDef::VPPredInstPHISC, PredV), VPValue(this) {} + : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV) {} ~VPPredInstPHIRecipe() override = default; VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC) @@ -2119,14 +2184,13 @@ public: }; /// Recipe to expand a SCEV expression. -class VPExpandSCEVRecipe : public VPRecipeBase, public VPValue { +class VPExpandSCEVRecipe : public VPSingleDefRecipe { const SCEV *Expr; ScalarEvolution &SE; public: VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE) - : VPRecipeBase(VPDef::VPExpandSCEVSC, {}), VPValue(this), Expr(Expr), - SE(SE) {} + : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {} ~VPExpandSCEVRecipe() override = default; @@ -2225,11 +2289,10 @@ public: }; /// A Recipe for widening the canonical induction variable of the vector loop. -class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue { +class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe { public: VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV) - : VPRecipeBase(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}), - VPValue(this) {} + : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {} ~VPWidenCanonicalIVRecipe() override = default; @@ -2256,7 +2319,7 @@ public: /// A recipe for converting the canonical IV value to the corresponding value of /// an IV with different start and step values, using Start + CanonicalIV * /// Step. -class VPDerivedIVRecipe : public VPRecipeBase, public VPValue { +class VPDerivedIVRecipe : public VPSingleDefRecipe { /// If not nullptr, the result of the induction will get truncated to /// TruncResultTy. Type *TruncResultTy; @@ -2271,8 +2334,8 @@ public: VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, Type *TruncResultTy) - : VPRecipeBase(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}), - VPValue(this), TruncResultTy(TruncResultTy), Kind(IndDesc.getKind()), + : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}), + TruncResultTy(TruncResultTy), Kind(IndDesc.getKind()), FPBinOp(dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())) { } @@ -2309,7 +2372,7 @@ public: /// A recipe for handling phi nodes of integer and floating-point inductions, /// producing their scalar values. -class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags, public VPValue { +class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags { Instruction::BinaryOps InductionOpcode; public: @@ -2317,7 +2380,7 @@ public: Instruction::BinaryOps Opcode, FastMathFlags FMFs) : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC, ArrayRef<VPValue *>({IV, Step}), FMFs), - VPValue(this), InductionOpcode(Opcode) {} + InductionOpcode(Opcode) {} VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step) diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index f950d4740e41..94456bf858d9 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -283,7 +283,7 @@ VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) { void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB) { VPIRBuilder.setInsertPoint(VPBB); - for (Instruction &InstRef : *BB) { + for (Instruction &InstRef : BB->instructionsWithoutDebug(false)) { Instruction *Inst = &InstRef; // There shouldn't be any VPValue for Inst at this point. Otherwise, we diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 1f844bce2310..bbeb5da2cfec 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -258,7 +258,7 @@ VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, const Twine &Name) : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}), Pred, DL), - VPValue(this), Opcode(Opcode), Name(Name.str()) { + Opcode(Opcode), Name(Name.str()) { assert(Opcode == Instruction::ICmp && "only ICmp predicates supported at the moment"); } @@ -267,7 +267,7 @@ VPInstruction::VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands, FastMathFlags FMFs, DebugLoc DL, const Twine &Name) : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL), - VPValue(this), Opcode(Opcode), Name(Name.str()) { + Opcode(Opcode), Name(Name.str()) { // Make sure the VPInstruction is a floating-point operation. assert(isFPMathOp() && "this op can't take fast-math flags"); } @@ -580,7 +580,7 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { auto &CI = *cast<CallInst>(getUnderlyingInstr()); assert(!isa<DbgInfoIntrinsic>(CI) && "DbgInfoIntrinsic should have been dropped during VPlan construction"); - State.setDebugLocFrom(CI.getDebugLoc()); + State.setDebugLocFrom(getDebugLoc()); bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic; FunctionType *VFTy = nullptr; @@ -1712,16 +1712,20 @@ void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, #endif void VPReductionPHIRecipe::execute(VPTransformState &State) { - PHINode *PN = cast<PHINode>(getUnderlyingValue()); auto &Builder = State.Builder; + // Reductions do not have to start at zero. They can start with + // any loop invariant values. + VPValue *StartVPV = getStartValue(); + Value *StartV = StartVPV->getLiveInIRValue(); + // In order to support recurrences we need to be able to vectorize Phi nodes. // Phi nodes have cycles, so we need to vectorize them in two stages. This is // stage #1: We create a new vector PHI node with no incoming edges. We'll use // this value when we vectorize all of the instructions that use the PHI. bool ScalarPHI = State.VF.isScalar() || IsInLoop; - Type *VecTy = - ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF); + Type *VecTy = ScalarPHI ? StartV->getType() + : VectorType::get(StartV->getType(), State.VF); BasicBlock *HeaderBB = State.CFG.PrevBB; assert(State.CurrentVectorLoop->getHeader() == HeaderBB && @@ -1735,11 +1739,6 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) { BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); - // Reductions do not have to start at zero. They can start with - // any loop invariant values. - VPValue *StartVPV = getStartValue(); - Value *StartV = StartVPV->getLiveInIRValue(); - Value *Iden = nullptr; RecurKind RK = RdxDesc.getRecurrenceKind(); if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 5c430620a2dc..8e6b48cdb2c8 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -74,9 +74,9 @@ void VPlanTransforms::VPInstructionsToVPRecipes( } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands()); } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) { - NewRecipe = - new VPWidenCallRecipe(*CI, drop_end(Ingredient.operands()), - getVectorIntrinsicIDForCall(CI, &TLI)); + NewRecipe = new VPWidenCallRecipe( + *CI, drop_end(Ingredient.operands()), + getVectorIntrinsicIDForCall(CI, &TLI), CI->getDebugLoc()); } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) { NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands()); } else if (auto *CI = dyn_cast<CastInst>(Inst)) { @@ -103,7 +103,7 @@ static bool sinkScalarOperands(VPlan &Plan) { bool Changed = false; // First, collect the operands of all recipes in replicate blocks as seeds for // sinking. - SetVector<std::pair<VPBasicBlock *, VPRecipeBase *>> WorkList; + SetVector<std::pair<VPBasicBlock *, VPSingleDefRecipe *>> WorkList; for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) { VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock(); if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2) @@ -113,7 +113,8 @@ static bool sinkScalarOperands(VPlan &Plan) { continue; for (auto &Recipe : *VPBB) { for (VPValue *Op : Recipe.operands()) - if (auto *Def = Op->getDefiningRecipe()) + if (auto *Def = + dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe())) WorkList.insert(std::make_pair(VPBB, Def)); } } @@ -122,7 +123,7 @@ static bool sinkScalarOperands(VPlan &Plan) { // Try to sink each replicate or scalar IV steps recipe in the worklist. for (unsigned I = 0; I != WorkList.size(); ++I) { VPBasicBlock *SinkTo; - VPRecipeBase *SinkCandidate; + VPSingleDefRecipe *SinkCandidate; std::tie(SinkTo, SinkCandidate) = WorkList[I]; if (SinkCandidate->getParent() == SinkTo || SinkCandidate->mayHaveSideEffects() || @@ -146,12 +147,11 @@ static bool sinkScalarOperands(VPlan &Plan) { return false; if (UI->getParent() == SinkTo) return true; - NeedsDuplicating = - UI->onlyFirstLaneUsed(SinkCandidate->getVPSingleValue()); + NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate); // We only know how to duplicate VPRecipeRecipes for now. return NeedsDuplicating && isa<VPReplicateRecipe>(SinkCandidate); }; - if (!all_of(SinkCandidate->getVPSingleValue()->users(), CanSinkWithUser)) + if (!all_of(SinkCandidate->users(), CanSinkWithUser)) continue; if (NeedsDuplicating) { @@ -163,14 +163,14 @@ static bool sinkScalarOperands(VPlan &Plan) { // TODO: add ".cloned" suffix to name of Clone's VPValue. Clone->insertBefore(SinkCandidate); - SinkCandidate->getVPSingleValue()->replaceUsesWithIf( - Clone, [SinkTo](VPUser &U, unsigned) { - return cast<VPRecipeBase>(&U)->getParent() != SinkTo; - }); + SinkCandidate->replaceUsesWithIf(Clone, [SinkTo](VPUser &U, unsigned) { + return cast<VPRecipeBase>(&U)->getParent() != SinkTo; + }); } SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi()); for (VPValue *Op : SinkCandidate->operands()) - if (auto *Def = Op->getDefiningRecipe()) + if (auto *Def = + dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe())) WorkList.insert(std::make_pair(SinkTo, Def)); Changed = true; } @@ -412,16 +412,15 @@ void VPlanTransforms::removeRedundantInductionCasts(VPlan &Plan) { auto &Casts = IV->getInductionDescriptor().getCastInsts(); VPValue *FindMyCast = IV; for (Instruction *IRCast : reverse(Casts)) { - VPRecipeBase *FoundUserCast = nullptr; + VPSingleDefRecipe *FoundUserCast = nullptr; for (auto *U : FindMyCast->users()) { - auto *UserCast = cast<VPRecipeBase>(U); - if (UserCast->getNumDefinedValues() == 1 && - UserCast->getVPSingleValue()->getUnderlyingValue() == IRCast) { + auto *UserCast = dyn_cast<VPSingleDefRecipe>(U); + if (UserCast && UserCast->getUnderlyingValue() == IRCast) { FoundUserCast = UserCast; break; } } - FindMyCast = FoundUserCast->getVPSingleValue(); + FindMyCast = FoundUserCast; } FindMyCast->replaceAllUsesWith(IV); } @@ -895,7 +894,10 @@ void VPlanTransforms::truncateToMinimalBitwidths( vp_depth_first_deep(Plan.getVectorLoopRegion()))) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { if (!isa<VPWidenRecipe, VPWidenCastRecipe, VPReplicateRecipe, - VPWidenSelectRecipe>(&R)) + VPWidenSelectRecipe, VPWidenMemoryInstructionRecipe>(&R)) + continue; + if (isa<VPWidenMemoryInstructionRecipe>(&R) && + cast<VPWidenMemoryInstructionRecipe>(&R)->isStore()) continue; VPValue *ResultVPV = R.getVPSingleValue(); @@ -948,6 +950,23 @@ void VPlanTransforms::truncateToMinimalBitwidths( auto *NewResTy = IntegerType::get(Ctx, NewResSizeInBits); + // Any wrapping introduced by shrinking this operation shouldn't be + // considered undefined behavior. So, we can't unconditionally copy + // arithmetic wrapping flags to VPW. + if (auto *VPW = dyn_cast<VPRecipeWithIRFlags>(&R)) + VPW->dropPoisonGeneratingFlags(); + + // Extend result to original width. + auto *Ext = new VPWidenCastRecipe(Instruction::ZExt, ResultVPV, OldResTy); + Ext->insertAfter(&R); + ResultVPV->replaceAllUsesWith(Ext); + Ext->setOperand(0, ResultVPV); + + if (isa<VPWidenMemoryInstructionRecipe>(&R)) { + assert(!cast<VPWidenMemoryInstructionRecipe>(&R)->isStore() && "stores cannot be narrowed"); + continue; + } + // Shrink operands by introducing truncates as needed. unsigned StartIdx = isa<VPWidenSelectRecipe>(&R) ? 1 : 0; for (unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) { @@ -979,17 +998,6 @@ void VPlanTransforms::truncateToMinimalBitwidths( } } - // Any wrapping introduced by shrinking this operation shouldn't be - // considered undefined behavior. So, we can't unconditionally copy - // arithmetic wrapping flags to VPW. - if (auto *VPW = dyn_cast<VPRecipeWithIRFlags>(&R)) - VPW->dropPoisonGeneratingFlags(); - - // Extend result to original width. - auto *Ext = new VPWidenCastRecipe(Instruction::ZExt, ResultVPV, OldResTy); - Ext->insertAfter(&R); - ResultVPV->replaceAllUsesWith(Ext); - Ext->setOperand(0, ResultVPV); } } @@ -1130,7 +1138,7 @@ void VPlanTransforms::addActiveLaneMask( "Must have widened canonical IV when tail folding!"); auto *WideCanonicalIV = cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser); - VPRecipeBase *LaneMask; + VPSingleDefRecipe *LaneMask; if (UseActiveLaneMaskForControlFlow) { LaneMask = addVPLaneMaskPhiAndUpdateExitBranch( Plan, DataAndControlFlowWithoutRuntimeCheck); @@ -1155,7 +1163,7 @@ void VPlanTransforms::addActiveLaneMask( assert(CompareToReplace->getOperand(0) == WideCanonicalIV && "WidenCanonicalIV must be the first operand of the compare"); - CompareToReplace->replaceAllUsesWith(LaneMask->getVPSingleValue()); + CompareToReplace->replaceAllUsesWith(LaneMask); CompareToReplace->eraseFromParent(); } } |