diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Analysis')
42 files changed, 1205 insertions, 925 deletions
diff --git a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp index 49199060786c..a8132e5abf54 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp @@ -242,7 +242,7 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call, if (onlyReadsMemory(MRB)) Result = clearMod(Result); - else if (doesNotReadMemory(MRB)) + else if (onlyWritesMemory(MRB)) Result = clearRef(Result); if (onlyAccessesArgPointees(MRB) || onlyAccessesInaccessibleOrArgMem(MRB)) { @@ -320,7 +320,7 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call1, // from Call1 reading memory written by Call2. if (onlyReadsMemory(Call1B)) Result = clearMod(Result); - else if (doesNotReadMemory(Call1B)) + else if (onlyWritesMemory(Call1B)) Result = clearRef(Result); // If Call2 only access memory through arguments, accumulate the mod/ref @@ -988,6 +988,29 @@ bool llvm::isIdentifiedFunctionLocal(const Value *V) { return isa<AllocaInst>(V) || isNoAliasCall(V) || isNoAliasOrByValArgument(V); } +bool llvm::isNotVisibleOnUnwind(const Value *Object, + bool &RequiresNoCaptureBeforeUnwind) { + RequiresNoCaptureBeforeUnwind = false; + + // Alloca goes out of scope on unwind. + if (isa<AllocaInst>(Object)) + return true; + + // Byval goes out of scope on unwind. + if (auto *A = dyn_cast<Argument>(Object)) + return A->hasByValAttr(); + + // A noalias return is not accessible from any other code. If the pointer + // does not escape prior to the unwind, then the caller cannot access the + // memory either. + if (isNoAliasCall(Object)) { + RequiresNoCaptureBeforeUnwind = true; + return true; + } + + return false; +} + void llvm::getAAResultsAnalysisUsage(AnalysisUsage &AU) { // This function needs to be in sync with llvm::createLegacyPMAAResults -- if // more alias analyses are added to llvm::createLegacyPMAAResults, they need diff --git a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp index 0c097b2fa302..1577f1eb70b1 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -142,13 +142,13 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) { for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end(); I1 != E; ++I1) { auto I1Size = LocationSize::afterPointer(); - Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType(); + Type *I1ElTy = (*I1)->getType()->getPointerElementType(); if (I1ElTy->isSized()) I1Size = LocationSize::precise(DL.getTypeStoreSize(I1ElTy)); for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) { auto I2Size = LocationSize::afterPointer(); - Type *I2ElTy = cast<PointerType>((*I2)->getType())->getElementType(); + Type *I2ElTy = (*I2)->getType()->getPointerElementType(); if (I2ElTy->isSized()) I2Size = LocationSize::precise(DL.getTypeStoreSize(I2ElTy)); @@ -233,7 +233,7 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) { for (CallBase *Call : Calls) { for (auto Pointer : Pointers) { auto Size = LocationSize::afterPointer(); - Type *ElTy = cast<PointerType>(Pointer->getType())->getElementType(); + Type *ElTy = Pointer->getType()->getPointerElementType(); if (ElTy->isSized()) Size = LocationSize::precise(DL.getTypeStoreSize(ElTy)); diff --git a/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 5f1bf2001d47..b4c985962837 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -779,7 +779,7 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const CallBase *Call) { // than that. if (Call->onlyReadsMemory()) Min = FMRB_OnlyReadsMemory; - else if (Call->doesNotReadMemory()) + else if (Call->onlyWritesMemory()) Min = FMRB_OnlyWritesMemory; if (Call->onlyAccessesArgMemory()) @@ -812,7 +812,7 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) { // If the function declares it only reads memory, go with that. if (F->onlyReadsMemory()) Min = FMRB_OnlyReadsMemory; - else if (F->doesNotReadMemory()) + else if (F->onlyWritesMemory()) Min = FMRB_OnlyWritesMemory; if (F->onlyAccessesArgMemory()) @@ -972,7 +972,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call, continue; } // Operand aliases 'Object' but call only writes into it. - if (Call->doesNotReadMemory(OperandNo)) { + if (Call->onlyWritesMemory(OperandNo)) { Result = setMod(Result); continue; } @@ -1020,9 +1020,9 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call, getBestAAResults().alias(MemoryLocation::getForDest(Inst), Loc, AAQI); // It's also possible for Loc to alias both src and dest, or neither. ModRefInfo rv = ModRefInfo::NoModRef; - if (SrcAA != AliasResult::NoAlias) + if (SrcAA != AliasResult::NoAlias || Call->hasReadingOperandBundles()) rv = setRef(rv); - if (DestAA != AliasResult::NoAlias) + if (DestAA != AliasResult::NoAlias || Call->hasClobberingOperandBundles()) rv = setMod(rv); return rv; } @@ -1248,8 +1248,8 @@ AliasResult BasicAAResult::aliasGEP( else GCD = APIntOps::GreatestCommonDivisor(GCD, ScaleForGCD.abs()); - ConstantRange CR = - computeConstantRange(Index.Val.V, true, &AC, Index.CxtI); + ConstantRange CR = computeConstantRange(Index.Val.V, /* ForSigned */ false, + true, &AC, Index.CxtI); KnownBits Known = computeKnownBits(Index.Val.V, DL, 0, &AC, Index.CxtI, DT); CR = CR.intersectWith( diff --git a/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp index 856d7e90acb2..ffb80134749a 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -42,6 +42,7 @@ #include <cassert> #include <cstdint> #include <iterator> +#include <map> #include <utility> using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp index 9467bb3c9b2d..090dccc53b6e 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp @@ -63,7 +63,7 @@ using namespace llvm::cflaa; CFLSteensAAResult::CFLSteensAAResult( std::function<const TargetLibraryInfo &(Function &F)> GetTLI) - : AAResultBase(), GetTLI(std::move(GetTLI)) {} + : GetTLI(std::move(GetTLI)) {} CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg) : AAResultBase(std::move(Arg)), GetTLI(std::move(Arg.GetTLI)) {} CFLSteensAAResult::~CFLSteensAAResult() = default; diff --git a/contrib/llvm-project/llvm/lib/Analysis/CallGraphSCCPass.cpp b/contrib/llvm-project/llvm/lib/Analysis/CallGraphSCCPass.cpp index f2e5eab72bf2..930cb13c0cb3 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/CallGraphSCCPass.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/CallGraphSCCPass.cpp @@ -61,7 +61,7 @@ class CGPassManager : public ModulePass, public PMDataManager { public: static char ID; - explicit CGPassManager() : ModulePass(ID), PMDataManager() {} + explicit CGPassManager() : ModulePass(ID) {} /// Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the module, and if so, return true. diff --git a/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp index 9b45f455be08..ba8462e659d5 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp @@ -75,7 +75,7 @@ bool CaptureTracker::isDereferenceableOrNull(Value *O, const DataLayout &DL) { namespace { struct SimpleCaptureTracker : public CaptureTracker { explicit SimpleCaptureTracker(bool ReturnCaptures) - : ReturnCaptures(ReturnCaptures), Captured(false) {} + : ReturnCaptures(ReturnCaptures) {} void tooManyUses() override { Captured = true; } @@ -89,7 +89,7 @@ namespace { bool ReturnCaptures; - bool Captured; + bool Captured = false; }; /// Only find pointer captures which happen before the given instruction. Uses @@ -101,7 +101,7 @@ namespace { CapturesBefore(bool ReturnCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI, const LoopInfo *LI) : BeforeHere(I), DT(DT), ReturnCaptures(ReturnCaptures), - IncludeI(IncludeI), Captured(false), LI(LI) {} + IncludeI(IncludeI), LI(LI) {} void tooManyUses() override { Captured = true; } @@ -139,7 +139,7 @@ namespace { bool ReturnCaptures; bool IncludeI; - bool Captured; + bool Captured = false; const LoopInfo *LI; }; @@ -155,7 +155,7 @@ namespace { struct EarliestCaptures : public CaptureTracker { EarliestCaptures(bool ReturnCaptures, Function &F, const DominatorTree &DT) - : DT(DT), ReturnCaptures(ReturnCaptures), Captured(false), F(F) {} + : DT(DT), ReturnCaptures(ReturnCaptures), F(F) {} void tooManyUses() override { Captured = true; @@ -199,7 +199,7 @@ namespace { bool ReturnCaptures; - bool Captured; + bool Captured = false; Function &F; }; diff --git a/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp index 922b38e92785..7cf69f613c66 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp @@ -106,11 +106,8 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { "Invalid constantexpr bitcast!"); // Catch the obvious splat cases. - if (C->isNullValue() && !DestTy->isX86_MMXTy() && !DestTy->isX86_AMXTy()) - return Constant::getNullValue(DestTy); - if (C->isAllOnesValue() && !DestTy->isX86_MMXTy() && !DestTy->isX86_AMXTy() && - !DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types! - return Constant::getAllOnesValue(DestTy); + if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy)) + return Res; if (auto *VTy = dyn_cast<VectorType>(C->getType())) { // Handle a vector->scalar integer/fp cast. @@ -362,16 +359,8 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy, // Catch the obvious splat cases (since all-zeros can coerce non-integral // pointers legally). - if (C->isNullValue() && !DestTy->isX86_MMXTy() && !DestTy->isX86_AMXTy()) - return Constant::getNullValue(DestTy); - if (C->isAllOnesValue() && - (DestTy->isIntegerTy() || DestTy->isFloatingPointTy() || - DestTy->isVectorTy()) && - !DestTy->isX86_AMXTy() && !DestTy->isX86_MMXTy() && - !DestTy->isPtrOrPtrVectorTy()) - // Get ones when the input is trivial, but - // only for supported types inside getAllOnesValue. - return Constant::getAllOnesValue(DestTy); + if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy)) + return Res; // If the type sizes are the same and a cast is legal, just directly // cast the constant. @@ -410,6 +399,12 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy, } while (ElemC && DL.getTypeSizeInBits(ElemC->getType()).isZero()); C = ElemC; } else { + // For non-byte-sized vector elements, the first element is not + // necessarily located at the vector base address. + if (auto *VT = dyn_cast<VectorType>(SrcTy)) + if (!DL.typeSizeEqualsStoreSize(VT->getElementType())) + return nullptr; + C = C->getAggregateElement(0u); } } while (C); @@ -558,23 +553,16 @@ Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy, // If this isn't an integer load we can't fold it directly. if (!IntType) { - // If this is a float/double load, we can try folding it as an int32/64 load - // and then bitcast the result. This can be useful for union cases. Note + // If this is a non-integer load, we can try folding it as an int load and + // then bitcast the result. This can be useful for union cases. Note // that address spaces don't matter here since we're not going to result in // an actual new load. - Type *MapTy; - if (LoadTy->isHalfTy()) - MapTy = Type::getInt16Ty(C->getContext()); - else if (LoadTy->isFloatTy()) - MapTy = Type::getInt32Ty(C->getContext()); - else if (LoadTy->isDoubleTy()) - MapTy = Type::getInt64Ty(C->getContext()); - else if (LoadTy->isVectorTy()) { - MapTy = PointerType::getIntNTy( - C->getContext(), DL.getTypeSizeInBits(LoadTy).getFixedSize()); - } else + if (!LoadTy->isFloatingPointTy() && !LoadTy->isPointerTy() && + !LoadTy->isVectorTy()) return nullptr; + Type *MapTy = Type::getIntNTy( + C->getContext(), DL.getTypeSizeInBits(LoadTy).getFixedSize()); if (Constant *Res = FoldReinterpretLoadFromConst(C, MapTy, Offset, DL)) { if (Res->isNullValue() && !LoadTy->isX86_MMXTy() && !LoadTy->isX86_AMXTy()) @@ -680,9 +668,21 @@ Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty, if (Constant *Result = ConstantFoldLoadThroughBitcast(AtOffset, Ty, DL)) return Result; + // Explicitly check for out-of-bounds access, so we return undef even if the + // constant is a uniform value. + TypeSize Size = DL.getTypeAllocSize(C->getType()); + if (!Size.isScalable() && Offset.sge(Size.getFixedSize())) + return UndefValue::get(Ty); + + // Try an offset-independent fold of a uniform value. + if (Constant *Result = ConstantFoldLoadFromUniformValue(C, Ty)) + return Result; + // Try hard to fold loads from bitcasted strange and non-type-safe things. if (Offset.getMinSignedBits() <= 64) - return FoldReinterpretLoadFromConst(C, Ty, Offset.getSExtValue(), DL); + if (Constant *Result = + FoldReinterpretLoadFromConst(C, Ty, Offset.getSExtValue(), DL)) + return Result; return nullptr; } @@ -704,15 +704,13 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, Offset, DL)) return Result; - // If this load comes from anywhere in a constant global, and if the global - // is all undef or zero, we know what it loads. + // If this load comes from anywhere in a uniform constant global, the value + // is always the same, regardless of the loaded offset. if (auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(C))) { if (GV->isConstant() && GV->hasDefinitiveInitializer()) { - if (GV->getInitializer()->isNullValue() && !Ty->isX86_MMXTy() && - !Ty->isX86_AMXTy()) - return Constant::getNullValue(Ty); - if (isa<UndefValue>(GV->getInitializer())) - return UndefValue::get(Ty); + if (Constant *Res = + ConstantFoldLoadFromUniformValue(GV->getInitializer(), Ty)) + return Res; } } @@ -725,6 +723,19 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, return ConstantFoldLoadFromConstPtr(C, Ty, Offset, DL); } +Constant *llvm::ConstantFoldLoadFromUniformValue(Constant *C, Type *Ty) { + if (isa<PoisonValue>(C)) + return PoisonValue::get(Ty); + if (isa<UndefValue>(C)) + return UndefValue::get(Ty); + if (C->isNullValue() && !Ty->isX86_MMXTy() && !Ty->isX86_AMXTy()) + return Constant::getNullValue(Ty); + if (C->isAllOnesValue() && + (Ty->isIntOrIntVectorTy() || Ty->isFPOrFPVectorTy())) + return Constant::getAllOnesValue(Ty); + return nullptr; +} + namespace { /// One of Op0/Op1 is a constant expression. @@ -930,7 +941,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, if (auto *GV = dyn_cast<GlobalValue>(Ptr)) SrcElemTy = GV->getValueType(); else if (!PTy->isOpaque()) - SrcElemTy = PTy->getElementType(); + SrcElemTy = PTy->getNonOpaquePointerElementType(); else SrcElemTy = Type::getInt8Ty(Ptr->getContext()); @@ -1171,10 +1182,11 @@ Constant *llvm::ConstantFoldInstOperands(Instruction *I, return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI); } -Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, +Constant *llvm::ConstantFoldCompareInstOperands(unsigned IntPredicate, Constant *Ops0, Constant *Ops1, const DataLayout &DL, const TargetLibraryInfo *TLI) { + CmpInst::Predicate Predicate = (CmpInst::Predicate)IntPredicate; // fold: icmp (inttoptr x), null -> icmp x, 0 // fold: icmp null, (inttoptr x) -> icmp 0, x // fold: icmp (ptrtoint x), 0 -> icmp x, null @@ -1248,10 +1260,30 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; return ConstantFoldBinaryOpOperands(OpC, LHS, RHS, DL); } + + // Convert pointer comparison (base+offset1) pred (base+offset2) into + // offset1 pred offset2, for the case where the offset is inbounds. This + // only works for equality and unsigned comparison, as inbounds permits + // crossing the sign boundary. However, the offset comparison itself is + // signed. + if (Ops0->getType()->isPointerTy() && !ICmpInst::isSigned(Predicate)) { + unsigned IndexWidth = DL.getIndexTypeSizeInBits(Ops0->getType()); + APInt Offset0(IndexWidth, 0); + Value *Stripped0 = + Ops0->stripAndAccumulateInBoundsConstantOffsets(DL, Offset0); + APInt Offset1(IndexWidth, 0); + Value *Stripped1 = + Ops1->stripAndAccumulateInBoundsConstantOffsets(DL, Offset1); + if (Stripped0 == Stripped1) + return ConstantExpr::getCompare( + ICmpInst::getSignedPredicate(Predicate), + ConstantInt::get(CE0->getContext(), Offset0), + ConstantInt::get(CE0->getContext(), Offset1)); + } } else if (isa<ConstantExpr>(Ops1)) { // If RHS is a constant expression, but the left side isn't, swap the // operands and try again. - Predicate = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)Predicate); + Predicate = ICmpInst::getSwappedPredicate(Predicate); return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI); } @@ -1347,23 +1379,6 @@ Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C, } } -Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, - ConstantExpr *CE, - Type *Ty, - const DataLayout &DL) { - if (!CE->getOperand(1)->isNullValue()) - return nullptr; // Do not allow stepping over the value! - - // Loop over all of the operands, tracking down which value we are - // addressing. - for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) { - C = C->getAggregateElement(CE->getOperand(i)); - if (!C) - return nullptr; - } - return ConstantFoldLoadThroughBitcast(C, Ty, DL); -} - //===----------------------------------------------------------------------===// // Constant Folding for Calls // @@ -2463,36 +2478,21 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, !getConstIntOrUndef(Operands[1], C1)) return nullptr; - unsigned BitWidth = Ty->getScalarSizeInBits(); switch (IntrinsicID) { default: break; case Intrinsic::smax: - if (!C0 && !C1) - return UndefValue::get(Ty); - if (!C0 || !C1) - return ConstantInt::get(Ty, APInt::getSignedMaxValue(BitWidth)); - return ConstantInt::get(Ty, C0->sgt(*C1) ? *C0 : *C1); - case Intrinsic::smin: - if (!C0 && !C1) - return UndefValue::get(Ty); - if (!C0 || !C1) - return ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)); - return ConstantInt::get(Ty, C0->slt(*C1) ? *C0 : *C1); - case Intrinsic::umax: - if (!C0 && !C1) - return UndefValue::get(Ty); - if (!C0 || !C1) - return ConstantInt::get(Ty, APInt::getMaxValue(BitWidth)); - return ConstantInt::get(Ty, C0->ugt(*C1) ? *C0 : *C1); - case Intrinsic::umin: if (!C0 && !C1) return UndefValue::get(Ty); if (!C0 || !C1) - return ConstantInt::get(Ty, APInt::getMinValue(BitWidth)); - return ConstantInt::get(Ty, C0->ult(*C1) ? *C0 : *C1); + return MinMaxIntrinsic::getSaturationPoint(IntrinsicID, Ty); + return ConstantInt::get( + Ty, ICmpInst::compare(*C0, *C1, + MinMaxIntrinsic::getPredicate(IntrinsicID)) + ? *C0 + : *C1); case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: @@ -2572,9 +2572,9 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, case Intrinsic::ctlz: assert(C1 && "Must be constant int"); - // cttz(0, 1) and ctlz(0, 1) are undef. + // cttz(0, 1) and ctlz(0, 1) are poison. if (C1->isOne() && (!C0 || C0->isZero())) - return UndefValue::get(Ty); + return PoisonValue::get(Ty); if (!C0) return Constant::getNullValue(Ty); if (IntrinsicID == Intrinsic::cttz) @@ -2583,13 +2583,15 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, return ConstantInt::get(Ty, C0->countLeadingZeros()); case Intrinsic::abs: - // Undef or minimum val operand with poison min --> undef assert(C1 && "Must be constant int"); + assert((C1->isOne() || C1->isZero()) && "Must be 0 or 1"); + + // Undef or minimum val operand with poison min --> undef if (C1->isOne() && (!C0 || C0->isMinSignedValue())) return UndefValue::get(Ty); // Undef operand with no poison min --> 0 (sign bit must be clear) - if (C1->isZero() && !C0) + if (!C0) return Constant::getNullValue(Ty); return ConstantInt::get(Ty, C0->abs()); diff --git a/contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp b/contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp index 9739c6af5769..773f71ada0ee 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp @@ -142,7 +142,7 @@ bool ConstraintSystem::mayHaveSolution() { return HasSolution; } -bool ConstraintSystem::isConditionImplied(SmallVector<int64_t, 8> R) { +bool ConstraintSystem::isConditionImplied(SmallVector<int64_t, 8> R) const { // If all variable coefficients are 0, we have 'C >= 0'. If the constant is >= // 0, R is always true, regardless of the system. if (all_of(makeArrayRef(R).drop_front(1), [](int64_t C) { return C == 0; })) diff --git a/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp index f407ec0d017a..326bacad01fe 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp @@ -50,7 +50,7 @@ namespace { public: static char ID; // Class identification, replacement for typeinfo - CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) { + CostModelAnalysis() : FunctionPass(ID) { initializeCostModelAnalysisPass( *PassRegistry::getPassRegistry()); } @@ -69,9 +69,9 @@ namespace { void print(raw_ostream &OS, const Module*) const override; /// The function that we analyze. - Function *F; + Function *F = nullptr; /// Target information. - const TargetTransformInfo *TTI; + const TargetTransformInfo *TTI = nullptr; }; } // End of anonymous namespace diff --git a/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp b/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp index da5de75a038c..7e1357959a3f 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp @@ -106,7 +106,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode &N) { //===--------------------------------------------------------------------===// SimpleDDGNode::SimpleDDGNode(Instruction &I) - : DDGNode(NodeKind::SingleInstruction), InstList() { + : DDGNode(NodeKind::SingleInstruction) { assert(InstList.empty() && "Expected empty list."); InstList.push_back(&I); } diff --git a/contrib/llvm-project/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/contrib/llvm-project/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp index 31b2dafa29b4..4a792fce51d1 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp @@ -11,8 +11,10 @@ // //===----------------------------------------------------------------------===// #include "llvm/Config/config.h" +#include "llvm/Support/Casting.h" #if defined(LLVM_HAVE_TF_API) +#include "llvm/ADT/BitVector.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" #include "llvm/Analysis/MLInlineAdvisor.h" @@ -111,7 +113,7 @@ private: StringRef LogFileName; const ModelUnderTrainingRunner *const MUTR; std::unique_ptr<Logger> L; - std::vector<bool> Effects; + BitVector Effects; /// There's at least one output. We'll set this to a different value if MUTR /// is avaliable. size_t OutputCount = 1; @@ -150,7 +152,7 @@ public: DevelopmentModeMLInlineAdvisor( Module &M, ModuleAnalysisManager &MAM, std::unique_ptr<MLModelRunner> ModelRunner, - std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference, + std::function<bool(CallBase &)> GetDefaultAdvice, std::unique_ptr<TrainingLogger> Logger); size_t getTotalSizeEstimate(); @@ -341,10 +343,11 @@ void TrainingLogger::print() { DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( Module &M, ModuleAnalysisManager &MAM, std::unique_ptr<MLModelRunner> ModelRunner, - std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference, + std::function<bool(CallBase &)> GetDefaultAdvice, std::unique_ptr<TrainingLogger> Logger) : MLInlineAdvisor(M, MAM, std::move(ModelRunner)), - GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference), + GetDefaultAdvice(GetDefaultAdvice), + IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())), Logger(std::move(Logger)), InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), CurrentNativeSize(InitialNativeSize) { @@ -410,8 +413,6 @@ size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { for (auto &F : M) { if (F.isDeclaration()) continue; - if (isFunctionDeleted(&F)) - continue; Ret += *getNativeSizeEstimate(F); } return Ret; @@ -422,30 +423,20 @@ std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( std::function<bool(CallBase &)> GetDefaultAdvice) { auto &Ctx = M.getContext(); std::unique_ptr<MLModelRunner> Runner; - ModelUnderTrainingRunner *MUTRPtr = nullptr; - bool IsDoingInference = false; if (TFModelUnderTrainingPath.empty()) Runner.reset(new NoInferenceModelRunner(Ctx, getInputFeatures())); - else { - std::unique_ptr<ModelUnderTrainingRunner> MUTR; - if (auto MaybeOutputSpecs = loadOutputSpecs( - Ctx, DecisionName, TFModelUnderTrainingPath, TFOutputSpecOverride)) - MUTR = std::make_unique<ModelUnderTrainingRunner>( - Ctx, TFModelUnderTrainingPath, getInputFeatures(), *MaybeOutputSpecs); - if (!MUTR || !MUTR->isValid()) { - Ctx.emitError("Could not load the policy model from the provided path"); - return nullptr; - } - IsDoingInference = true; - MUTRPtr = MUTR.get(); - Runner = std::move(MUTR); - } + else + Runner = ModelUnderTrainingRunner::createAndEnsureValid( + Ctx, TFModelUnderTrainingPath, DecisionName, getInputFeatures(), + TFOutputSpecOverride); + if (!Runner) + return nullptr; std::unique_ptr<TrainingLogger> Logger; if (!TrainingLog.empty()) - Logger = std::make_unique<TrainingLogger>(TrainingLog, MUTRPtr); + Logger = std::make_unique<TrainingLogger>( + TrainingLog, dyn_cast<ModelUnderTrainingRunner>(Runner.get())); return std::make_unique<DevelopmentModeMLInlineAdvisor>( - M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference, - std::move(Logger)); + M, MAM, std::move(Runner), GetDefaultAdvice, std::move(Logger)); } #endif // defined(LLVM_HAVE_TF_API) diff --git a/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp index 7426d0c07592..39e80c2ad51c 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -130,7 +130,7 @@ bool DivergenceAnalysisImpl::inRegion(const Instruction &I) const { } bool DivergenceAnalysisImpl::inRegion(const BasicBlock &BB) const { - return (!RegionLoop && BB.getParent() == &F) || RegionLoop->contains(&BB); + return RegionLoop ? RegionLoop->contains(&BB) : (BB.getParent() == &F); } void DivergenceAnalysisImpl::pushUsers(const Value &V) { @@ -348,7 +348,7 @@ DivergenceInfo::DivergenceInfo(Function &F, const DominatorTree &DT, const PostDominatorTree &PDT, const LoopInfo &LI, const TargetTransformInfo &TTI, bool KnownReducible) - : F(F), ContainsIrreducible(false) { + : F(F) { if (!KnownReducible) { using RPOTraversal = ReversePostOrderTraversal<const Function *>; RPOTraversal FuncRPOT(&F); diff --git a/contrib/llvm-project/llvm/lib/Analysis/DomPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/DomPrinter.cpp index ebbe0d3e2c5f..6088de53028d 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/DomPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/DomPrinter.cpp @@ -80,6 +80,19 @@ struct DOTGraphTraits<PostDominatorTree*> }; } +PreservedAnalyses DomTreePrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + WriteDOTGraphToFile(F, &AM.getResult<DominatorTreeAnalysis>(F), "dom", false); + return PreservedAnalyses::all(); +} + +PreservedAnalyses DomTreeOnlyPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + WriteDOTGraphToFile(F, &AM.getResult<DominatorTreeAnalysis>(F), "domonly", + true); + return PreservedAnalyses::all(); +} + void DominatorTree::viewGraph(const Twine &Name, const Twine &Title) { #ifndef NDEBUG ViewGraph(this, Name, false, Title); diff --git a/contrib/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp index 14e6965f1259..a8806fe5a480 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp @@ -37,7 +37,7 @@ INITIALIZE_PASS_END(DominanceFrontierWrapperPass, "domfrontier", "Dominance Frontier Construction", true, true) DominanceFrontierWrapperPass::DominanceFrontierWrapperPass() - : FunctionPass(ID), DF() { + : FunctionPass(ID) { initializeDominanceFrontierWrapperPassPass(*PassRegistry::getPassRegistry()); } diff --git a/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp index d00a7c944f10..6869530148c5 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp @@ -102,7 +102,7 @@ class GlobalsAAResult::FunctionInfo { "Insufficient low bits to store our flag and ModRef info."); public: - FunctionInfo() : Info() {} + FunctionInfo() {} ~FunctionInfo() { delete Info.getPointer(); } @@ -401,14 +401,14 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V, /// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable /// which holds a pointer type. See if the global always points to non-aliased -/// heap memory: that is, all initializers of the globals are allocations, and -/// those allocations have no use other than initialization of the global. +/// heap memory: that is, all initializers of the globals store a value known +/// to be obtained via a noalias return function call which have no other use. /// Further, all loads out of GV must directly use the memory, not store the /// pointer somewhere. If this is true, we consider the memory pointed to by /// GV to be owned by GV and can disambiguate other pointers from it. bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) { // Keep track of values related to the allocation of the memory, f.e. the - // value produced by the malloc call and any casts. + // value produced by the noalias call and any casts. std::vector<Value *> AllocRelatedValues; // If the initializer is a valid pointer, bail. @@ -438,7 +438,7 @@ bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) { // Check the value being stored. Value *Ptr = getUnderlyingObject(SI->getOperand(0)); - if (!isAllocLikeFn(Ptr, &GetTLI(*SI->getFunction()))) + if (!isNoAliasCall(Ptr)) return false; // Too hard to analyze. // Analyze all uses of the allocation. If any of them are used in a @@ -963,7 +963,7 @@ ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call, GlobalsAAResult::GlobalsAAResult( const DataLayout &DL, std::function<const TargetLibraryInfo &(Function &F)> GetTLI) - : AAResultBase(), DL(DL), GetTLI(std::move(GetTLI)) {} + : DL(DL), GetTLI(std::move(GetTLI)) {} GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg) : AAResultBase(std::move(Arg)), DL(Arg.DL), GetTLI(std::move(Arg.GetTLI)), diff --git a/contrib/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/contrib/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp index 2ec6cbeabda2..d2f0c57f6dab 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -23,12 +23,24 @@ using namespace llvm; using namespace IRSimilarity; +namespace llvm { cl::opt<bool> DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes.")); +cl::opt<bool> + DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), + cl::ReallyHidden, + cl::desc("disable outlining indirect calls.")); + +cl::opt<bool> + MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden, + cl::desc("only allow matching call instructions if the " + "name and type signature match.")); +} // namespace llvm + IRInstructionData::IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDList) : Inst(&I), Legal(Legality), IDL(&IDList) { @@ -57,10 +69,16 @@ void IRInstructionData::initializeInstruction() { OperVals.push_back(OI.get()); } + + // We capture the incoming BasicBlocks as values as well as the incoming + // Values in order to check for structural similarity. + if (PHINode *PN = dyn_cast<PHINode>(Inst)) + for (BasicBlock *BB : PN->blocks()) + OperVals.push_back(BB); } IRInstructionData::IRInstructionData(IRInstructionDataList &IDList) - : Inst(nullptr), Legal(false), IDL(&IDList) {} + : IDL(&IDList) {} void IRInstructionData::setBranchSuccessors( DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { @@ -86,6 +104,43 @@ void IRInstructionData::setBranchSuccessors( } } +void IRInstructionData::setCalleeName(bool MatchByName) { + CallInst *CI = dyn_cast<CallInst>(Inst); + assert(CI && "Instruction must be call"); + + CalleeName = ""; + if (!CI->isIndirectCall() && MatchByName) + CalleeName = CI->getCalledFunction()->getName().str(); +} + +void IRInstructionData::setPHIPredecessors( + DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { + assert(isa<PHINode>(Inst) && "Instruction must be phi node"); + + PHINode *PN = cast<PHINode>(Inst); + DenseMap<BasicBlock *, unsigned>::iterator BBNumIt; + + BBNumIt = BasicBlockToInteger.find(PN->getParent()); + assert(BBNumIt != BasicBlockToInteger.end() && + "Could not find location for BasicBlock!"); + + int CurrentBlockNumber = static_cast<int>(BBNumIt->second); + + // Convert the incoming blocks of the PHINode to an integer value, based on + // the relative distances between the current block and the incoming block. + for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) { + BasicBlock *Incoming = PN->getIncomingBlock(Idx); + BBNumIt = BasicBlockToInteger.find(Incoming); + assert(BBNumIt != BasicBlockToInteger.end() && + "Could not find number for BasicBlock!"); + int OtherBlockNumber = static_cast<int>(BBNumIt->second); + + int Relative = OtherBlockNumber - CurrentBlockNumber; + RelativeBlockLocations.push_back(Relative); + RelativeBlockLocations.push_back(Relative); + } +} + CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { switch (CI->getPredicate()) { case CmpInst::FCMP_OGT: @@ -112,10 +167,13 @@ CmpInst::Predicate IRInstructionData::getPredicate() const { return cast<CmpInst>(Inst)->getPredicate(); } -static StringRef getCalledFunctionName(CallInst &CI) { - assert(CI.getCalledFunction() != nullptr && "Called Function is nullptr?"); +StringRef IRInstructionData::getCalleeName() const { + assert(isa<CallInst>(Inst) && + "Can only get a name from a call instruction"); - return CI.getCalledFunction()->getName(); + assert(CalleeName.hasValue() && "CalleeName has not been set"); + + return *CalleeName; } bool IRSimilarity::isClose(const IRInstructionData &A, @@ -170,13 +228,11 @@ bool IRSimilarity::isClose(const IRInstructionData &A, }); } - // If the instructions are functions, we make sure that the function name is - // the same. We already know that the types are since is isSameOperationAs is - // true. + // If the instructions are functions calls, we make sure that the function + // name is the same. We already know that the types are since is + // isSameOperationAs is true. if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) { - CallInst *CIA = cast<CallInst>(A.Inst); - CallInst *CIB = cast<CallInst>(B.Inst); - if (getCalledFunctionName(*CIA).compare(getCalledFunctionName(*CIB)) != 0) + if (A.getCalleeName().str().compare(B.getCalleeName().str()) != 0) return false; } @@ -244,6 +300,12 @@ unsigned IRInstructionMapper::mapToLegalUnsigned( if (isa<BranchInst>(*It)) ID->setBranchSuccessors(BasicBlockToInteger); + if (isa<CallInst>(*It)) + ID->setCalleeName(EnableMatchCallsByName); + + if (isa<PHINode>(*It)) + ID->setPHIPredecessors(BasicBlockToInteger); + // Add to the instruction list bool WasInserted; DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator @@ -1075,6 +1137,8 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( std::vector<IRInstructionData *> InstrList; std::vector<unsigned> IntegerMapping; Mapper.InstClassifier.EnableBranches = this->EnableBranches; + Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; + Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; populateMapper(Modules, InstrList, IntegerMapping); findCandidates(InstrList, IntegerMapping); @@ -1085,6 +1149,8 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { resetSimilarityCandidates(); Mapper.InstClassifier.EnableBranches = this->EnableBranches; + Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; + Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; std::vector<IRInstructionData *> InstrList; std::vector<unsigned> IntegerMapping; @@ -1105,7 +1171,8 @@ IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() } bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { - IRSI.reset(new IRSimilarityIdentifier(!DisableBranches)); + IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, + MatchCallsByName)); return false; } @@ -1123,7 +1190,8 @@ AnalysisKey IRSimilarityAnalysis::Key; IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, ModuleAnalysisManager &) { - auto IRSI = IRSimilarityIdentifier(!DisableBranches); + auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, + MatchCallsByName); IRSI.findSimilarity(M); return IRSI; } diff --git a/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp b/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp index f5fa6748d053..44b1d94ebdc8 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp @@ -161,19 +161,22 @@ static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit, /// Collect cast instructions that can be ignored in the vectorizer's cost /// model, given a reduction exit value and the minimal type in which the -/// reduction can be represented. -static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit, - Type *RecurrenceType, - SmallPtrSetImpl<Instruction *> &Casts) { +// reduction can be represented. Also search casts to the recurrence type +// to find the minimum width used by the recurrence. +static void collectCastInstrs(Loop *TheLoop, Instruction *Exit, + Type *RecurrenceType, + SmallPtrSetImpl<Instruction *> &Casts, + unsigned &MinWidthCastToRecurTy) { SmallVector<Instruction *, 8> Worklist; SmallPtrSet<Instruction *, 8> Visited; Worklist.push_back(Exit); + MinWidthCastToRecurTy = -1U; while (!Worklist.empty()) { Instruction *Val = Worklist.pop_back_val(); Visited.insert(Val); - if (auto *Cast = dyn_cast<CastInst>(Val)) + if (auto *Cast = dyn_cast<CastInst>(Val)) { if (Cast->getSrcTy() == RecurrenceType) { // If the source type of a cast instruction is equal to the recurrence // type, it will be eliminated, and should be ignored in the vectorizer @@ -181,7 +184,16 @@ static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit, Casts.insert(Cast); continue; } - + if (Cast->getDestTy() == RecurrenceType) { + // The minimum width used by the recurrence is found by checking for + // casts on its operands. The minimum width is used by the vectorizer + // when finding the widest type for in-loop reductions without any + // loads/stores. + MinWidthCastToRecurTy = std::min<unsigned>( + MinWidthCastToRecurTy, Cast->getSrcTy()->getScalarSizeInBits()); + continue; + } + } // Add all operands to the work list if they are loop-varying values that // we haven't yet visited. for (Value *O : cast<User>(Val)->operands()) @@ -265,6 +277,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, // Data used for determining if the recurrence has been type-promoted. Type *RecurrenceType = Phi->getType(); SmallPtrSet<Instruction *, 4> CastInsts; + unsigned MinWidthCastToRecurrenceType; Instruction *Start = Phi; bool IsSigned = false; @@ -296,6 +309,10 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, // flags from all the reduction operations. FastMathFlags FMF = FastMathFlags::getFast(); + // The first instruction in the use-def chain of the Phi node that requires + // exact floating point operations. + Instruction *ExactFPMathInst = nullptr; + // A value in the reduction can be used: // - By the reduction: // - Reduction operation: @@ -339,6 +356,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, if (Cur != Start) { ReduxDesc = isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF); + ExactFPMathInst = ExactFPMathInst == nullptr + ? ReduxDesc.getExactFPMathInst() + : ExactFPMathInst; if (!ReduxDesc.isRecurrence()) return false; // FIXME: FMF is allowed on phi, but propagation is not handled correctly. @@ -467,8 +487,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) return false; - const bool IsOrdered = checkOrderedReduction( - Kind, ReduxDesc.getExactFPMathInst(), ExitInstruction, Phi); + const bool IsOrdered = + checkOrderedReduction(Kind, ExactFPMathInst, ExitInstruction, Phi); if (Start != Phi) { // If the starting value is not the same as the phi node, we speculatively @@ -500,21 +520,24 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, computeRecurrenceType(ExitInstruction, DB, AC, DT); if (ComputedType != RecurrenceType) return false; - - // The recurrence expression will be represented in a narrower type. If - // there are any cast instructions that will be unnecessary, collect them - // in CastInsts. Note that the 'and' instruction was already included in - // this list. - // - // TODO: A better way to represent this may be to tag in some way all the - // instructions that are a part of the reduction. The vectorizer cost - // model could then apply the recurrence type to these instructions, - // without needing a white list of instructions to ignore. - // This may also be useful for the inloop reductions, if it can be - // kept simple enough. - collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts); } + // Collect cast instructions and the minimum width used by the recurrence. + // If the starting value is not the same as the phi node and the computed + // recurrence type is equal to the recurrence type, the recurrence expression + // will be represented in a narrower or wider type. If there are any cast + // instructions that will be unnecessary, collect them in CastsFromRecurTy. + // Note that the 'and' instruction was already included in this list. + // + // TODO: A better way to represent this may be to tag in some way all the + // instructions that are a part of the reduction. The vectorizer cost + // model could then apply the recurrence type to these instructions, + // without needing a white list of instructions to ignore. + // This may also be useful for the inloop reductions, if it can be + // kept simple enough. + collectCastInstrs(TheLoop, ExitInstruction, RecurrenceType, CastInsts, + MinWidthCastToRecurrenceType); + // We found a reduction var if we have reached the original phi node and we // only have a single instruction with out-of-loop users. @@ -522,9 +545,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, // is saved as part of the RecurrenceDescriptor. // Save the description of this reduction variable. - RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF, - ReduxDesc.getExactFPMathInst(), RecurrenceType, - IsSigned, IsOrdered, CastInsts); + RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF, ExactFPMathInst, + RecurrenceType, IsSigned, IsOrdered, CastInsts, + MinWidthCastToRecurrenceType); RedDes = RD; return true; @@ -1397,8 +1420,9 @@ bool InductionDescriptor::isInductionPHI( // Always use i8 element type for opaque pointer inductions. PointerType *PtrTy = cast<PointerType>(PhiTy); - Type *ElementType = PtrTy->isOpaque() ? Type::getInt8Ty(PtrTy->getContext()) - : PtrTy->getElementType(); + Type *ElementType = PtrTy->isOpaque() + ? Type::getInt8Ty(PtrTy->getContext()) + : PtrTy->getNonOpaquePointerElementType(); if (!ElementType->isSized()) return false; diff --git a/contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp index d7b202f83189..0f3929f45506 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp @@ -254,7 +254,7 @@ IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) { IVUsers::IVUsers(Loop *L, AssumptionCache *AC, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE) - : L(L), AC(AC), LI(LI), DT(DT), SE(SE), IVUses() { + : L(L), AC(AC), LI(LI), DT(DT), SE(SE) { // Collect ephemeral values so that AddUsersIfInteresting skips them. EphValues.clear(); CodeMetrics::collectEphemeralValues(L, AC, EphValues); diff --git a/contrib/llvm-project/llvm/lib/Analysis/InlineAdvisor.cpp b/contrib/llvm-project/llvm/lib/Analysis/InlineAdvisor.cpp index 140c88eb8b0d..f6e3dd354ff8 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/InlineAdvisor.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/InlineAdvisor.cpp @@ -21,11 +21,15 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "inline" +#ifdef LLVM_HAVE_TF_AOT_INLINERSIZEMODEL +#define LLVM_HAVE_TF_AOT +#endif // This weirdly named statistic tracks the number of times that, when attempting // to inline a function A into B, we analyze the callers of B in order to see @@ -160,18 +164,6 @@ InlineAdvice::InlineAdvice(InlineAdvisor *Advisor, CallBase &CB, DLoc(CB.getDebugLoc()), Block(CB.getParent()), ORE(ORE), IsInliningRecommended(IsInliningRecommended) {} -void InlineAdvisor::markFunctionAsDeleted(Function *F) { - assert((!DeletedFunctions.count(F)) && - "Cannot put cause a function to become dead twice!"); - DeletedFunctions.insert(F); -} - -void InlineAdvisor::freeDeletedFunctions() { - for (auto *F : DeletedFunctions) - delete F; - DeletedFunctions.clear(); -} - void InlineAdvice::recordInlineStatsIfNeeded() { if (Advisor->ImportedFunctionsStats) Advisor->ImportedFunctionsStats->recordInline(*Caller, *Callee); @@ -186,7 +178,6 @@ void InlineAdvice::recordInlining() { void InlineAdvice::recordInliningWithCalleeDeleted() { markRecorded(); recordInlineStatsIfNeeded(); - Advisor->markFunctionAsDeleted(Callee); recordInliningWithCalleeDeletedImpl(); } @@ -523,8 +514,6 @@ InlineAdvisor::~InlineAdvisor() { ImportedFunctionsStats->dump(InlinerFunctionImportStats == InlinerFunctionImportStatsOpts::Verbose); } - - freeDeletedFunctions(); } std::unique_ptr<InlineAdvice> InlineAdvisor::getMandatoryAdvice(CallBase &CB, @@ -569,3 +558,13 @@ std::unique_ptr<InlineAdvice> InlineAdvisor::getAdvice(CallBase &CB, OptimizationRemarkEmitter &InlineAdvisor::getCallerORE(CallBase &CB) { return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*CB.getCaller()); } + +PreservedAnalyses +InlineAdvisorAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &MAM) { + const auto *IA = MAM.getCachedResult<InlineAdvisorAnalysis>(M); + if (!IA) + OS << "No Inline Advisor\n"; + else + IA->getAdvisor()->print(OS); + return PreservedAnalyses::all(); +} diff --git a/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp index ff31e81aad08..d5411d916c77 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp @@ -361,10 +361,10 @@ protected: /// Model the elimination of repeated loads that is expected to happen /// whenever we simplify away the stores that would otherwise cause them to be /// loads. - bool EnableLoadElimination; + bool EnableLoadElimination = true; /// Whether we allow inlining for recursive call. - bool AllowRecursiveCall; + bool AllowRecursiveCall = false; SmallPtrSet<Value *, 16> LoadAddrSet; @@ -455,8 +455,7 @@ public: OptimizationRemarkEmitter *ORE = nullptr) : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE), - CandidateCall(Call), EnableLoadElimination(true), - AllowRecursiveCall(false) {} + CandidateCall(Call) {} InlineResult analyze(); @@ -2898,15 +2897,6 @@ Optional<InlineResult> llvm::getAttributeBasedInliningDecision( if (Call.isNoInline()) return InlineResult::failure("noinline call site attribute"); - // Don't inline functions if one does not have any stack protector attribute - // but the other does. - if (Caller->hasStackProtectorFnAttr() && !Callee->hasStackProtectorFnAttr()) - return InlineResult::failure( - "stack protected caller but callee requested no stack protector"); - if (Callee->hasStackProtectorFnAttr() && !Caller->hasStackProtectorFnAttr()) - return InlineResult::failure( - "stack protected callee but caller requested no stack protector"); - return None; } diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp index 4831b22b1d46..b71b39334ace 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstSimplifyFolder.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/OverflowInstAnalysis.h" @@ -70,7 +71,7 @@ static Value *SimplifyOrInst(Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyXorInst(Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyCastInst(unsigned, Value *, Type *, const SimplifyQuery &, unsigned); -static Value *SimplifyGEPInst(Type *, ArrayRef<Value *>, bool, +static Value *SimplifyGEPInst(Type *, Value *, ArrayRef<Value *>, bool, const SimplifyQuery &, unsigned); static Value *SimplifySelectInst(Value *, Value *, Value *, const SimplifyQuery &, unsigned); @@ -620,6 +621,10 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW, if (Constant *C = foldOrCommuteConstant(Instruction::Add, Op0, Op1, Q)) return C; + // X + poison -> poison + if (isa<PoisonValue>(Op1)) + return Op1; + // X + undef -> undef if (Q.isUndefValue(Op1)) return Op1; @@ -1074,6 +1079,16 @@ static bool isDivZero(Value *X, Value *Y, const SimplifyQuery &Q, } // IsSigned == false. + + // Is the unsigned dividend known to be less than a constant divisor? + // TODO: Convert this (and above) to range analysis + // ("computeConstantRangeIncludingKnownBits")? + const APInt *C; + if (match(Y, m_APInt(C)) && + computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI, Q.DT).getMaxValue().ult(*C)) + return true; + + // Try again for any divisor: // Is the dividend unsigned less than the divisor? return isICmpTrue(ICmpInst::ICMP_ULT, X, Y, Q, MaxRecurse); } @@ -2254,14 +2269,21 @@ static Value *simplifyOrLogic(Value *X, Value *Y) { match(Y, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) return NotA; - // ~(A ^ B) | (A & B) --> ~(A & B) - // ~(A ^ B) | (B & A) --> ~(A & B) + // ~(A ^ B) | (A & B) --> ~(A ^ B) + // ~(A ^ B) | (B & A) --> ~(A ^ B) Value *NotAB; if (match(X, m_CombineAnd(m_NotForbidUndef(m_Xor(m_Value(A), m_Value(B))), m_Value(NotAB))) && match(Y, m_c_And(m_Specific(A), m_Specific(B)))) return NotAB; + // ~(A & B) | (A ^ B) --> ~(A & B) + // ~(A & B) | (B ^ A) --> ~(A & B) + if (match(X, m_CombineAnd(m_NotForbidUndef(m_And(m_Value(A), m_Value(B))), + m_Value(NotAB))) && + match(Y, m_c_Xor(m_Specific(A), m_Specific(B)))) + return NotAB; + return nullptr; } @@ -2685,7 +2707,9 @@ computePointerICmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, // Fold comparisons for non-escaping pointer even if the allocation call // cannot be elided. We cannot fold malloc comparison to null. Also, the - // dynamic allocation call could be either of the operands. + // dynamic allocation call could be either of the operands. Note that + // the other operand can not be based on the alloc - if it were, then + // the cmp itself would be a capture. Value *MI = nullptr; if (isAllocLikeFn(LHS, TLI) && llvm::isKnownNonZero(RHS, DL, 0, nullptr, CxtI, DT)) @@ -2890,7 +2914,8 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, if (RHS_CR.isFullSet()) return ConstantInt::getTrue(ITy); - ConstantRange LHS_CR = computeConstantRange(LHS, IIQ.UseInstrInfo); + ConstantRange LHS_CR = + computeConstantRange(LHS, CmpInst::isSigned(Pred), IIQ.UseInstrInfo); if (!LHS_CR.isFullSet()) { if (RHS_CR.contains(LHS_CR)) return ConstantInt::getTrue(ITy); @@ -4057,9 +4082,9 @@ static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, NewOps[1], Q, MaxRecurse - 1)); if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) - return PreventSelfSimplify(SimplifyGEPInst(GEP->getSourceElementType(), - NewOps, GEP->isInBounds(), Q, - MaxRecurse - 1)); + return PreventSelfSimplify(SimplifyGEPInst( + GEP->getSourceElementType(), NewOps[0], makeArrayRef(NewOps).slice(1), + GEP->isInBounds(), Q, MaxRecurse - 1)); if (isa<SelectInst>(I)) return PreventSelfSimplify( @@ -4417,45 +4442,52 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, /// Given operands for an GetElementPtrInst, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, bool InBounds, +static Value *SimplifyGEPInst(Type *SrcTy, Value *Ptr, + ArrayRef<Value *> Indices, bool InBounds, const SimplifyQuery &Q, unsigned) { // The type of the GEP pointer operand. unsigned AS = - cast<PointerType>(Ops[0]->getType()->getScalarType())->getAddressSpace(); + cast<PointerType>(Ptr->getType()->getScalarType())->getAddressSpace(); // getelementptr P -> P. - if (Ops.size() == 1) - return Ops[0]; + if (Indices.empty()) + return Ptr; // Compute the (pointer) type returned by the GEP instruction. - Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Ops.slice(1)); + Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Indices); Type *GEPTy = PointerType::get(LastType, AS); - for (Value *Op : Ops) { - // If one of the operands is a vector, the result type is a vector of - // pointers. All vector operands must have the same number of elements. - if (VectorType *VT = dyn_cast<VectorType>(Op->getType())) { - GEPTy = VectorType::get(GEPTy, VT->getElementCount()); - break; + if (VectorType *VT = dyn_cast<VectorType>(Ptr->getType())) + GEPTy = VectorType::get(GEPTy, VT->getElementCount()); + else { + for (Value *Op : Indices) { + // If one of the operands is a vector, the result type is a vector of + // pointers. All vector operands must have the same number of elements. + if (VectorType *VT = dyn_cast<VectorType>(Op->getType())) { + GEPTy = VectorType::get(GEPTy, VT->getElementCount()); + break; + } } } // getelementptr poison, idx -> poison // getelementptr baseptr, poison -> poison - if (any_of(Ops, [](const auto *V) { return isa<PoisonValue>(V); })) + if (isa<PoisonValue>(Ptr) || + any_of(Indices, [](const auto *V) { return isa<PoisonValue>(V); })) return PoisonValue::get(GEPTy); - if (Q.isUndefValue(Ops[0])) - return UndefValue::get(GEPTy); + if (Q.isUndefValue(Ptr)) + // If inbounds, we can choose an out-of-bounds pointer as a base pointer. + return InBounds ? PoisonValue::get(GEPTy) : UndefValue::get(GEPTy); bool IsScalableVec = - isa<ScalableVectorType>(SrcTy) || any_of(Ops, [](const Value *V) { + isa<ScalableVectorType>(SrcTy) || any_of(Indices, [](const Value *V) { return isa<ScalableVectorType>(V->getType()); }); - if (Ops.size() == 2) { + if (Indices.size() == 1) { // getelementptr P, 0 -> P. - if (match(Ops[1], m_Zero()) && Ops[0]->getType() == GEPTy) - return Ops[0]; + if (match(Indices[0], m_Zero()) && Ptr->getType() == GEPTy) + return Ptr; Type *Ty = SrcTy; if (!IsScalableVec && Ty->isSized()) { @@ -4463,37 +4495,37 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, bool InBounds, uint64_t C; uint64_t TyAllocSize = Q.DL.getTypeAllocSize(Ty); // getelementptr P, N -> P if P points to a type of zero size. - if (TyAllocSize == 0 && Ops[0]->getType() == GEPTy) - return Ops[0]; + if (TyAllocSize == 0 && Ptr->getType() == GEPTy) + return Ptr; // The following transforms are only safe if the ptrtoint cast // doesn't truncate the pointers. - if (Ops[1]->getType()->getScalarSizeInBits() == + if (Indices[0]->getType()->getScalarSizeInBits() == Q.DL.getPointerSizeInBits(AS)) { - auto CanSimplify = [GEPTy, &P, V = Ops[0]]() -> bool { + auto CanSimplify = [GEPTy, &P, Ptr]() -> bool { return P->getType() == GEPTy && - getUnderlyingObject(P) == getUnderlyingObject(V); + getUnderlyingObject(P) == getUnderlyingObject(Ptr); }; // getelementptr V, (sub P, V) -> P if P points to a type of size 1. if (TyAllocSize == 1 && - match(Ops[1], m_Sub(m_PtrToInt(m_Value(P)), - m_PtrToInt(m_Specific(Ops[0])))) && + match(Indices[0], + m_Sub(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Specific(Ptr)))) && CanSimplify()) return P; // getelementptr V, (ashr (sub P, V), C) -> P if P points to a type of // size 1 << C. - if (match(Ops[1], m_AShr(m_Sub(m_PtrToInt(m_Value(P)), - m_PtrToInt(m_Specific(Ops[0]))), - m_ConstantInt(C))) && + if (match(Indices[0], m_AShr(m_Sub(m_PtrToInt(m_Value(P)), + m_PtrToInt(m_Specific(Ptr))), + m_ConstantInt(C))) && TyAllocSize == 1ULL << C && CanSimplify()) return P; // getelementptr V, (sdiv (sub P, V), C) -> P if P points to a type of // size C. - if (match(Ops[1], m_SDiv(m_Sub(m_PtrToInt(m_Value(P)), - m_PtrToInt(m_Specific(Ops[0]))), - m_SpecificInt(TyAllocSize))) && + if (match(Indices[0], m_SDiv(m_Sub(m_PtrToInt(m_Value(P)), + m_PtrToInt(m_Specific(Ptr))), + m_SpecificInt(TyAllocSize))) && CanSimplify()) return P; } @@ -4501,29 +4533,28 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, bool InBounds, } if (!IsScalableVec && Q.DL.getTypeAllocSize(LastType) == 1 && - all_of(Ops.slice(1).drop_back(1), + all_of(Indices.drop_back(1), [](Value *Idx) { return match(Idx, m_Zero()); })) { unsigned IdxWidth = - Q.DL.getIndexSizeInBits(Ops[0]->getType()->getPointerAddressSpace()); - if (Q.DL.getTypeSizeInBits(Ops.back()->getType()) == IdxWidth) { + Q.DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()); + if (Q.DL.getTypeSizeInBits(Indices.back()->getType()) == IdxWidth) { APInt BasePtrOffset(IdxWidth, 0); Value *StrippedBasePtr = - Ops[0]->stripAndAccumulateInBoundsConstantOffsets(Q.DL, - BasePtrOffset); + Ptr->stripAndAccumulateInBoundsConstantOffsets(Q.DL, BasePtrOffset); // Avoid creating inttoptr of zero here: While LLVMs treatment of // inttoptr is generally conservative, this particular case is folded to // a null pointer, which will have incorrect provenance. // gep (gep V, C), (sub 0, V) -> C - if (match(Ops.back(), + if (match(Indices.back(), m_Sub(m_Zero(), m_PtrToInt(m_Specific(StrippedBasePtr)))) && !BasePtrOffset.isZero()) { auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset); return ConstantExpr::getIntToPtr(CI, GEPTy); } // gep (gep V, C), (xor V, -1) -> C-1 - if (match(Ops.back(), + if (match(Indices.back(), m_Xor(m_PtrToInt(m_Specific(StrippedBasePtr)), m_AllOnes())) && !BasePtrOffset.isOne()) { auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset - 1); @@ -4533,17 +4564,18 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, bool InBounds, } // Check to see if this is constant foldable. - if (!all_of(Ops, [](Value *V) { return isa<Constant>(V); })) + if (!isa<Constant>(Ptr) || + !all_of(Indices, [](Value *V) { return isa<Constant>(V); })) return nullptr; - auto *CE = ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ops[0]), - Ops.slice(1), InBounds); + auto *CE = ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ptr), Indices, + InBounds); return ConstantFoldConstant(CE, Q.DL); } -Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, bool InBounds, - const SimplifyQuery &Q) { - return ::SimplifyGEPInst(SrcTy, Ops, InBounds, Q, RecursionLimit); +Value *llvm::SimplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef<Value *> Indices, + bool InBounds, const SimplifyQuery &Q) { + return ::SimplifyGEPInst(SrcTy, Ptr, Indices, InBounds, Q, RecursionLimit); } /// Given operands for an InsertValueInst, see if we can fold the result. @@ -5603,26 +5635,6 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0, return nullptr; } -static APInt getMaxMinLimit(Intrinsic::ID IID, unsigned BitWidth) { - switch (IID) { - case Intrinsic::smax: return APInt::getSignedMaxValue(BitWidth); - case Intrinsic::smin: return APInt::getSignedMinValue(BitWidth); - case Intrinsic::umax: return APInt::getMaxValue(BitWidth); - case Intrinsic::umin: return APInt::getMinValue(BitWidth); - default: llvm_unreachable("Unexpected intrinsic"); - } -} - -static ICmpInst::Predicate getMaxMinPredicate(Intrinsic::ID IID) { - switch (IID) { - case Intrinsic::smax: return ICmpInst::ICMP_SGE; - case Intrinsic::smin: return ICmpInst::ICMP_SLE; - case Intrinsic::umax: return ICmpInst::ICMP_UGE; - case Intrinsic::umin: return ICmpInst::ICMP_ULE; - default: llvm_unreachable("Unexpected intrinsic"); - } -} - /// Given a min/max intrinsic, see if it can be removed based on having an /// operand that is another min/max intrinsic with shared operand(s). The caller /// is expected to swap the operand arguments to handle commutation. @@ -5690,19 +5702,21 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, // Assume undef is the limit value. if (Q.isUndefValue(Op1)) - return ConstantInt::get(ReturnType, getMaxMinLimit(IID, BitWidth)); + return ConstantInt::get( + ReturnType, MinMaxIntrinsic::getSaturationPoint(IID, BitWidth)); const APInt *C; if (match(Op1, m_APIntAllowUndef(C))) { // Clamp to limit value. For example: // umax(i8 %x, i8 255) --> 255 - if (*C == getMaxMinLimit(IID, BitWidth)) + if (*C == MinMaxIntrinsic::getSaturationPoint(IID, BitWidth)) return ConstantInt::get(ReturnType, *C); // If the constant op is the opposite of the limit value, the other must // be larger/smaller or equal. For example: // umin(i8 %x, i8 255) --> %x - if (*C == getMaxMinLimit(getInverseMinMaxIntrinsic(IID), BitWidth)) + if (*C == MinMaxIntrinsic::getSaturationPoint( + getInverseMinMaxIntrinsic(IID), BitWidth)) return Op0; // Remove nested call if constant operands allow it. Example: @@ -5713,10 +5727,9 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, Value *M00 = MinMax0->getOperand(0), *M01 = MinMax0->getOperand(1); const APInt *InnerC; if ((match(M00, m_APInt(InnerC)) || match(M01, m_APInt(InnerC))) && - ((IID == Intrinsic::smax && InnerC->sge(*C)) || - (IID == Intrinsic::smin && InnerC->sle(*C)) || - (IID == Intrinsic::umax && InnerC->uge(*C)) || - (IID == Intrinsic::umin && InnerC->ule(*C)))) + ICmpInst::compare(*InnerC, *C, + ICmpInst::getNonStrictPredicate( + MinMaxIntrinsic::getPredicate(IID)))) return Op0; } } @@ -5726,7 +5739,8 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, if (Value *V = foldMinMaxSharedOp(IID, Op1, Op0)) return V; - ICmpInst::Predicate Pred = getMaxMinPredicate(IID); + ICmpInst::Predicate Pred = + ICmpInst::getNonStrictPredicate(MinMaxIntrinsic::getPredicate(IID)); if (isICmpTrue(Pred, Op0, Op1, Q.getWithoutUndef(), RecursionLimit)) return Op0; if (isICmpTrue(Pred, Op1, Op0, Q.getWithoutUndef(), RecursionLimit)) @@ -6277,8 +6291,9 @@ static Value *simplifyInstructionWithOperands(Instruction *I, break; case Instruction::GetElementPtr: { auto *GEPI = cast<GetElementPtrInst>(I); - Result = SimplifyGEPInst(GEPI->getSourceElementType(), NewOps, - GEPI->isInBounds(), Q); + Result = + SimplifyGEPInst(GEPI->getSourceElementType(), NewOps[0], + makeArrayRef(NewOps).slice(1), GEPI->isInBounds(), Q); break; } case Instruction::InsertValue: { @@ -6460,3 +6475,5 @@ const SimplifyQuery getBestSimplifyQuery(AnalysisManager<T, TArgs...> &AM, template const SimplifyQuery getBestSimplifyQuery(AnalysisManager<Function> &, Function &); } + +void InstSimplifyFolder::anchor() {} diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp index 0007c54b16d0..e8e9593d7030 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp @@ -1503,7 +1503,7 @@ void LazyCallGraph::removeEdge(Node &SourceN, Node &TargetN) { void LazyCallGraph::removeDeadFunction(Function &F) { // FIXME: This is unnecessarily restrictive. We should be able to remove // functions which recursively call themselves. - assert(F.use_empty() && + assert(F.hasZeroLiveUses() && "This routine should only be called on trivially dead functions!"); // We shouldn't remove library functions as they are never really dead while @@ -1522,13 +1522,6 @@ void LazyCallGraph::removeDeadFunction(Function &F) { // Remove this from the entry edges if present. EntryEdges.removeEdgeInternal(N); - if (SCCMap.empty()) { - // No SCCs have been formed, so removing this is fine and there is nothing - // else necessary at this point but clearing out the node. - N.clear(); - return; - } - // Cannot remove a function which has yet to be visited in the DFS walk, so // if we have a node at all then we must have an SCC and RefSCC. auto CI = SCCMap.find(&N); @@ -1544,15 +1537,9 @@ void LazyCallGraph::removeDeadFunction(Function &F) { assert(C.size() == 1 && "Dead functions must be in a singular SCC"); assert(RC.size() == 1 && "Dead functions must be in a singular RefSCC"); - auto RCIndexI = RefSCCIndices.find(&RC); - int RCIndex = RCIndexI->second; - PostOrderRefSCCs.erase(PostOrderRefSCCs.begin() + RCIndex); - RefSCCIndices.erase(RCIndexI); - for (int i = RCIndex, Size = PostOrderRefSCCs.size(); i < Size; ++i) - RefSCCIndices[PostOrderRefSCCs[i]] = i; - // Finally clear out all the data structures from the node down through the - // components. + // components. postorder_ref_scc_iterator will skip empty RefSCCs, so no need + // to adjust LazyCallGraph data structures. N.clear(); N.G = nullptr; N.F = nullptr; diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp index 5b5d48bf6fe5..e311b40ab25c 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp @@ -395,7 +395,8 @@ class LazyValueInfoImpl { /// if it exists in the module. Function *GuardDecl; - Optional<ValueLatticeElement> getBlockValue(Value *Val, BasicBlock *BB); + Optional<ValueLatticeElement> getBlockValue(Value *Val, BasicBlock *BB, + Instruction *CxtI); Optional<ValueLatticeElement> getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T, Instruction *CxtI = nullptr); @@ -533,15 +534,17 @@ void LazyValueInfoImpl::solve() { } } -Optional<ValueLatticeElement> LazyValueInfoImpl::getBlockValue(Value *Val, - BasicBlock *BB) { +Optional<ValueLatticeElement> LazyValueInfoImpl::getBlockValue( + Value *Val, BasicBlock *BB, Instruction *CxtI) { // If already a constant, there is nothing to compute. if (Constant *VC = dyn_cast<Constant>(Val)) return ValueLatticeElement::get(VC); if (Optional<ValueLatticeElement> OptLatticeVal = - TheCache.getCachedValueInfo(Val, BB)) + TheCache.getCachedValueInfo(Val, BB)) { + intersectAssumeOrGuardBlockValueConstantRange(Val, *OptLatticeVal, CxtI); return OptLatticeVal; + } // We have hit a cycle, assume overdefined. if (!pushBlockValue({ BB, Val })) @@ -792,31 +795,41 @@ void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange( } } +static ConstantRange getConstantRangeOrFull(const ValueLatticeElement &Val, + Type *Ty, const DataLayout &DL) { + if (Val.isConstantRange()) + return Val.getConstantRange(); + return ConstantRange::getFull(DL.getTypeSizeInBits(Ty)); +} + Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueSelect( SelectInst *SI, BasicBlock *BB) { // Recurse on our inputs if needed Optional<ValueLatticeElement> OptTrueVal = - getBlockValue(SI->getTrueValue(), BB); + getBlockValue(SI->getTrueValue(), BB, SI); if (!OptTrueVal) return None; ValueLatticeElement &TrueVal = *OptTrueVal; Optional<ValueLatticeElement> OptFalseVal = - getBlockValue(SI->getFalseValue(), BB); + getBlockValue(SI->getFalseValue(), BB, SI); if (!OptFalseVal) return None; ValueLatticeElement &FalseVal = *OptFalseVal; - if (TrueVal.isConstantRange() && FalseVal.isConstantRange()) { - const ConstantRange &TrueCR = TrueVal.getConstantRange(); - const ConstantRange &FalseCR = FalseVal.getConstantRange(); + if (TrueVal.isConstantRange() || FalseVal.isConstantRange()) { + const ConstantRange &TrueCR = + getConstantRangeOrFull(TrueVal, SI->getType(), DL); + const ConstantRange &FalseCR = + getConstantRangeOrFull(FalseVal, SI->getType(), DL); Value *LHS = nullptr; Value *RHS = nullptr; SelectPatternResult SPR = matchSelectPattern(SI, LHS, RHS); // Is this a min specifically of our two inputs? (Avoid the risk of // ValueTracking getting smarter looking back past our immediate inputs.) if (SelectPatternResult::isMinOrMax(SPR.Flavor) && - LHS == SI->getTrueValue() && RHS == SI->getFalseValue()) { + ((LHS == SI->getTrueValue() && RHS == SI->getFalseValue()) || + (RHS == SI->getTrueValue() && LHS == SI->getFalseValue()))) { ConstantRange ResultCR = [&]() { switch (SPR.Flavor) { default: @@ -873,17 +886,10 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueSelect( Optional<ConstantRange> LazyValueInfoImpl::getRangeFor(Value *V, Instruction *CxtI, BasicBlock *BB) { - Optional<ValueLatticeElement> OptVal = getBlockValue(V, BB); + Optional<ValueLatticeElement> OptVal = getBlockValue(V, BB, CxtI); if (!OptVal) return None; - - ValueLatticeElement &Val = *OptVal; - intersectAssumeOrGuardBlockValueConstantRange(V, Val, CxtI); - if (Val.isConstantRange()) - return Val.getConstantRange(); - - const unsigned OperandBitWidth = DL.getTypeSizeInBits(V->getType()); - return ConstantRange::getFull(OperandBitWidth); + return getConstantRangeOrFull(*OptVal, V->getType(), DL); } Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueCast( @@ -1017,7 +1023,7 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueExtractValue( if (Value *V = SimplifyExtractValueInst( EVI->getAggregateOperand(), EVI->getIndices(), EVI->getModule()->getDataLayout())) - return getBlockValue(V, BB); + return getBlockValue(V, BB, EVI); LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined (unknown extractvalue).\n"); @@ -1126,14 +1132,16 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI, } // If (X urem Modulus) >= C, then X >= C. + // If trunc X >= C, then X >= C. // TODO: An upper bound could be computed as well. - if (match(LHS, m_URem(m_Specific(Val), m_Value())) && + if (match(LHS, m_CombineOr(m_URem(m_Specific(Val), m_Value()), + m_Trunc(m_Specific(Val)))) && match(RHS, m_APInt(C))) { // Use the icmp region so we don't have to deal with different predicates. ConstantRange CR = ConstantRange::makeExactICmpRegion(EdgePred, *C); if (!CR.isEmptySet()) return ValueLatticeElement::getRange(ConstantRange::getNonEmpty( - CR.getUnsignedMin(), APInt(BitWidth, 0))); + CR.getUnsignedMin().zextOrSelf(BitWidth), APInt(BitWidth, 0))); } return ValueLatticeElement::getOverdefined(); @@ -1430,14 +1438,12 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::getEdgeValue( // Can't get any more precise here return LocalResult; - Optional<ValueLatticeElement> OptInBlock = getBlockValue(Val, BBFrom); + Optional<ValueLatticeElement> OptInBlock = + getBlockValue(Val, BBFrom, BBFrom->getTerminator()); if (!OptInBlock) return None; ValueLatticeElement &InBlock = *OptInBlock; - // Try to intersect ranges of the BB and the constraint on the edge. - intersectAssumeOrGuardBlockValueConstantRange(Val, InBlock, - BBFrom->getTerminator()); // We can use the context instruction (generically the ultimate instruction // the calling pass is trying to simplify) here, even though the result of // this function is generally cached when called from the solve* functions @@ -1457,15 +1463,14 @@ ValueLatticeElement LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB, << BB->getName() << "'\n"); assert(BlockValueStack.empty() && BlockValueSet.empty()); - Optional<ValueLatticeElement> OptResult = getBlockValue(V, BB); + Optional<ValueLatticeElement> OptResult = getBlockValue(V, BB, CxtI); if (!OptResult) { solve(); - OptResult = getBlockValue(V, BB); + OptResult = getBlockValue(V, BB, CxtI); assert(OptResult && "Value not available after solving"); } - ValueLatticeElement Result = *OptResult; - intersectAssumeOrGuardBlockValueConstantRange(V, Result, CxtI); + ValueLatticeElement Result = *OptResult; LLVM_DEBUG(dbgs() << " Result = " << Result << "\n"); return Result; } diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 6444518dc70c..2ab78d2b7ee2 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -519,8 +519,7 @@ public: AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI, MemoryDepChecker::DepCandidates &DA, PredicatedScalarEvolution &PSE) - : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), - IsRTCheckAnalysisNeeded(false), PSE(PSE) {} + : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), PSE(PSE) {} /// Register a load and whether it is only read from. void addLoad(MemoryLocation &Loc, bool IsReadOnly) { @@ -620,7 +619,7 @@ private: /// memcheck analysis without dependency checking /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is /// cleared while this remains set if we have potentially dependent accesses. - bool IsRTCheckAnalysisNeeded; + bool IsRTCheckAnalysisNeeded = false; /// The SCEV predicate containing all the SCEV-related assumptions. PredicatedScalarEvolution &PSE; @@ -1055,7 +1054,6 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, bool ShouldCheckWrap) { Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); - assert(!AccessTy->isAggregateType() && "Bad stride - Not a pointer to a scalar type"); if (isa<ScalableVectorType>(AccessTy)) { LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy @@ -2245,10 +2243,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI) : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)), PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)), - DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L), - NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false), - HasConvergentOp(false), - HasDependenceInvolvingLoopInvariantAddress(false) { + DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L) { if (canAnalyzeLoop()) analyzeLoop(AA, LI, TLI, DT); } diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp index 7b895d8a5dc2..ba014bd08c98 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp @@ -477,9 +477,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const CacheCost &CC) { CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, - AAResults &AA, DependenceInfo &DI, - Optional<unsigned> TRT) - : Loops(Loops), TripCounts(), LoopCosts(), + AAResults &AA, DependenceInfo &DI, Optional<unsigned> TRT) + : Loops(Loops), TRT((TRT == None) ? Optional<unsigned>(TemporalReuseThreshold) : TRT), LI(LI), SE(SE), TTI(TTI), AA(AA), DI(DI) { assert(!Loops.empty() && "Expecting a non-empty loop vector."); diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp index b35fb2a190f6..dd6958716127 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp @@ -695,11 +695,10 @@ class UnloopUpdater { // Flag the presence of an irreducible backedge whose destination is a block // directly contained by the original unloop. - bool FoundIB; + bool FoundIB = false; public: - UnloopUpdater(Loop *UL, LoopInfo *LInfo) - : Unloop(*UL), LI(LInfo), DFS(UL), FoundIB(false) {} + UnloopUpdater(Loop *UL, LoopInfo *LInfo) : Unloop(*UL), LI(LInfo), DFS(UL) {} void updateBlockParents(); diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp index 9e470e998e67..b720bab454e9 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp @@ -69,8 +69,7 @@ char PrintLoopPassWrapper::ID = 0; char LPPassManager::ID = 0; -LPPassManager::LPPassManager() - : FunctionPass(ID), PMDataManager() { +LPPassManager::LPPassManager() : FunctionPass(ID) { LI = nullptr; CurrentLoop = nullptr; } diff --git a/contrib/llvm-project/llvm/lib/Analysis/MLInlineAdvisor.cpp b/contrib/llvm-project/llvm/lib/Analysis/MLInlineAdvisor.cpp index f5a65cd2b689..0480c1cd2842 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/MLInlineAdvisor.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/MLInlineAdvisor.cpp @@ -11,35 +11,34 @@ // 'release' mode) or a runtime-loaded model (the 'development' case). // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" -#if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) - -#include <limits> -#include <unordered_map> -#include <unordered_set> - +#include "llvm/Analysis/MLInlineAdvisor.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/FunctionPropertiesAnalysis.h" #include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/MLInlineAdvisor.h" +#include "llvm/Analysis/InlineModelFeatureMaps.h" +#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/MLModelRunner.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ReleaseModeModelRunner.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Config/config.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Path.h" +#include <limits> +#include <unordered_map> +#include <unordered_set> + using namespace llvm; -#ifdef LLVM_HAVE_TF_AOT -#include "llvm/Analysis/ReleaseModeModelRunner.h" +#if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL) // codegen-ed file #include "InlinerSizeModel.h" // NOLINT -#include "llvm/Analysis/InlineModelFeatureMaps.h" std::unique_ptr<InlineAdvisor> llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) { @@ -90,7 +89,8 @@ MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM, std::unique_ptr<MLModelRunner> Runner) : InlineAdvisor( M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()), - ModelRunner(std::move(Runner)), CG(new CallGraph(M)), + ModelRunner(std::move(Runner)), + CG(MAM.getResult<LazyCallGraphAnalysis>(M)), InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) { assert(ModelRunner); @@ -100,7 +100,8 @@ MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM, // critical in behavioral cloning - i.e. training a model to mimic the manual // heuristic's decisions - and, thus, equally important for training for // improvement. - for (auto I = scc_begin(CG.get()); !I.isAtEnd(); ++I) { + CallGraph CGraph(M); + for (auto I = scc_begin(&CGraph); !I.isAtEnd(); ++I) { const std::vector<CallGraphNode *> &CGNodes = *I; unsigned Level = 0; for (auto *CGNode : CGNodes) { @@ -110,7 +111,7 @@ MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM, for (auto &I : instructions(F)) { if (auto *CS = getInlinableCS(I)) { auto *Called = CS->getCalledFunction(); - auto Pos = FunctionLevels.find(Called); + auto Pos = FunctionLevels.find(&CG.get(*Called)); // In bottom up traversal, an inlinable callee is either in the // same SCC, or to a function in a visited SCC. So not finding its // level means we haven't visited it yet, meaning it's in this SCC. @@ -123,24 +124,73 @@ MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM, for (auto *CGNode : CGNodes) { Function *F = CGNode->getFunction(); if (F && !F->isDeclaration()) - FunctionLevels[F] = Level; + FunctionLevels[&CG.get(*F)] = Level; } } + for (auto KVP : FunctionLevels) { + AllNodes.insert(KVP.first); + EdgeCount += getLocalCalls(KVP.first->getFunction()); + } + NodeCount = AllNodes.size(); +} + +unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const { + return CG.lookup(F) ? FunctionLevels.at(CG.lookup(F)) : 0; } void MLInlineAdvisor::onPassEntry() { // Function passes executed between InlinerPass runs may have changed the // module-wide features. - if (!Invalid) - return; - NodeCount = 0; - EdgeCount = 0; - for (auto &F : M) - if (!F.isDeclaration()) { - ++NodeCount; - EdgeCount += getLocalCalls(F); + // The cgscc pass manager rules are such that: + // - if a pass leads to merging SCCs, then the pipeline is restarted on the + // merged SCC + // - if a pass leads to splitting the SCC, then we continue with one of the + // splits + // This means that the NodesInLastSCC is a superset (not strict) of the nodes + // that subsequent passes would have processed + // - in addition, if new Nodes were created by a pass (e.g. CoroSplit), + // they'd be adjacent to Nodes in the last SCC. So we just need to check the + // boundary of Nodes in NodesInLastSCC for Nodes we haven't seen. We don't + // care about the nature of the Edge (call or ref). + NodeCount -= static_cast<int64_t>(NodesInLastSCC.size()); + while (!NodesInLastSCC.empty()) { + const auto *N = NodesInLastSCC.front(); + NodesInLastSCC.pop_front(); + // The Function wrapped by N could have been deleted since we last saw it. + if (N->isDead()) { + assert(!N->getFunction().isDeclaration()); + continue; + } + ++NodeCount; + EdgeCount += getLocalCalls(N->getFunction()); + for (const auto &E : *(*N)) { + const auto *AdjNode = &E.getNode(); + assert(!AdjNode->isDead() && !AdjNode->getFunction().isDeclaration()); + auto I = AllNodes.insert(AdjNode); + if (I.second) + NodesInLastSCC.push_back(AdjNode); } - Invalid = false; + } + + EdgeCount -= EdgesOfLastSeenNodes; + EdgesOfLastSeenNodes = 0; +} + +void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) { + if (!LastSCC) + return; + // Keep track of the nodes and edges we last saw. Then, in onPassEntry, + // we update the node count and edge count from the subset of these nodes that + // survived. + assert(NodesInLastSCC.empty()); + assert(NodeCount >= LastSCC->size()); + EdgesOfLastSeenNodes = 0; + for (const auto &N : *LastSCC) { + assert(!N.isDead()); + EdgesOfLastSeenNodes += getLocalCalls(N.getFunction()); + NodesInLastSCC.push_back(&N); + } + assert(EdgeCount >= EdgesOfLastSeenNodes); } int64_t MLInlineAdvisor::getLocalCalls(Function &F) { @@ -192,7 +242,7 @@ void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice, int64_t MLInlineAdvisor::getModuleIRSize() const { int64_t Ret = 0; - for (auto &F : CG->getModule()) + for (auto &F : M) if (!F.isDeclaration()) Ret += getIRSize(F); return Ret; @@ -263,7 +313,7 @@ std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) { *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeBasicBlockCount) = CalleeBefore.BasicBlockCount; *ModelRunner->getTensor<int64_t>(FeatureIndex::CallSiteHeight) = - FunctionLevels[&Caller]; + getInitialFunctionLevel(Caller); *ModelRunner->getTensor<int64_t>(FeatureIndex::NodeCount) = NodeCount; *ModelRunner->getTensor<int64_t>(FeatureIndex::NrCtantParams) = NrCtantParams; *ModelRunner->getTensor<int64_t>(FeatureIndex::EdgeCount) = EdgeCount; @@ -361,4 +411,3 @@ void MLInlineAdvice::recordUnattemptedInliningImpl() { return R; }); } -#endif // defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp index ffdd7a2cfd4b..208f93aa1ac6 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -51,12 +51,13 @@ using namespace llvm; enum AllocType : uint8_t { OpNewLike = 1<<0, // allocates; never returns null - MallocLike = 1<<1 | OpNewLike, // allocates; may return null + MallocLike = 1<<1, // allocates; may return null AlignedAllocLike = 1<<2, // allocates with alignment; may return null CallocLike = 1<<3, // allocates + bzero ReallocLike = 1<<4, // reallocates StrDupLike = 1<<5, - MallocOrCallocLike = MallocLike | CallocLike | AlignedAllocLike, + MallocOrOpNewLike = MallocLike | OpNewLike, + MallocOrCallocLike = MallocLike | OpNewLike | CallocLike | AlignedAllocLike, AllocLike = MallocOrCallocLike | StrDupLike, AnyAlloc = AllocLike | ReallocLike }; @@ -66,64 +67,59 @@ struct AllocFnsTy { unsigned NumParams; // First and Second size parameters (or -1 if unused) int FstParam, SndParam; + // Alignment parameter for aligned_alloc and aligned new + int AlignParam; }; // FIXME: certain users need more information. E.g., SimplifyLibCalls needs to // know which functions are nounwind, noalias, nocapture parameters, etc. static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = { - {LibFunc_malloc, {MallocLike, 1, 0, -1}}, - {LibFunc_vec_malloc, {MallocLike, 1, 0, -1}}, - {LibFunc_valloc, {MallocLike, 1, 0, -1}}, - {LibFunc_Znwj, {OpNewLike, 1, 0, -1}}, // new(unsigned int) - {LibFunc_ZnwjRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new(unsigned int, nothrow) - {LibFunc_ZnwjSt11align_val_t, {OpNewLike, 2, 0, -1}}, // new(unsigned int, align_val_t) - {LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t, // new(unsigned int, align_val_t, nothrow) - {MallocLike, 3, 0, -1}}, - {LibFunc_Znwm, {OpNewLike, 1, 0, -1}}, // new(unsigned long) - {LibFunc_ZnwmRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new(unsigned long, nothrow) - {LibFunc_ZnwmSt11align_val_t, {OpNewLike, 2, 0, -1}}, // new(unsigned long, align_val_t) - {LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t, // new(unsigned long, align_val_t, nothrow) - {MallocLike, 3, 0, -1}}, - {LibFunc_Znaj, {OpNewLike, 1, 0, -1}}, // new[](unsigned int) - {LibFunc_ZnajRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new[](unsigned int, nothrow) - {LibFunc_ZnajSt11align_val_t, {OpNewLike, 2, 0, -1}}, // new[](unsigned int, align_val_t) - {LibFunc_ZnajSt11align_val_tRKSt9nothrow_t, // new[](unsigned int, align_val_t, nothrow) - {MallocLike, 3, 0, -1}}, - {LibFunc_Znam, {OpNewLike, 1, 0, -1}}, // new[](unsigned long) - {LibFunc_ZnamRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new[](unsigned long, nothrow) - {LibFunc_ZnamSt11align_val_t, {OpNewLike, 2, 0, -1}}, // new[](unsigned long, align_val_t) - {LibFunc_ZnamSt11align_val_tRKSt9nothrow_t, // new[](unsigned long, align_val_t, nothrow) - {MallocLike, 3, 0, -1}}, - {LibFunc_msvc_new_int, {OpNewLike, 1, 0, -1}}, // new(unsigned int) - {LibFunc_msvc_new_int_nothrow, {MallocLike, 2, 0, -1}}, // new(unsigned int, nothrow) - {LibFunc_msvc_new_longlong, {OpNewLike, 1, 0, -1}}, // new(unsigned long long) - {LibFunc_msvc_new_longlong_nothrow, {MallocLike, 2, 0, -1}}, // new(unsigned long long, nothrow) - {LibFunc_msvc_new_array_int, {OpNewLike, 1, 0, -1}}, // new[](unsigned int) - {LibFunc_msvc_new_array_int_nothrow, {MallocLike, 2, 0, -1}}, // new[](unsigned int, nothrow) - {LibFunc_msvc_new_array_longlong, {OpNewLike, 1, 0, -1}}, // new[](unsigned long long) - {LibFunc_msvc_new_array_longlong_nothrow, {MallocLike, 2, 0, -1}}, // new[](unsigned long long, nothrow) - {LibFunc_aligned_alloc, {AlignedAllocLike, 2, 1, -1}}, - {LibFunc_memalign, {AlignedAllocLike, 2, 1, -1}}, - {LibFunc_calloc, {CallocLike, 2, 0, 1}}, - {LibFunc_vec_calloc, {CallocLike, 2, 0, 1}}, - {LibFunc_realloc, {ReallocLike, 2, 1, -1}}, - {LibFunc_vec_realloc, {ReallocLike, 2, 1, -1}}, - {LibFunc_reallocf, {ReallocLike, 2, 1, -1}}, - {LibFunc_strdup, {StrDupLike, 1, -1, -1}}, - {LibFunc_strndup, {StrDupLike, 2, 1, -1}}, - {LibFunc___kmpc_alloc_shared, {MallocLike, 1, 0, -1}}, - // TODO: Handle "int posix_memalign(void **, size_t, size_t)" + {LibFunc_malloc, {MallocLike, 1, 0, -1, -1}}, + {LibFunc_vec_malloc, {MallocLike, 1, 0, -1, -1}}, + {LibFunc_valloc, {MallocLike, 1, 0, -1, -1}}, + {LibFunc_Znwj, {OpNewLike, 1, 0, -1, -1}}, // new(unsigned int) + {LibFunc_ZnwjRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1}}, // new(unsigned int, nothrow) + {LibFunc_ZnwjSt11align_val_t, {OpNewLike, 2, 0, -1, 1}}, // new(unsigned int, align_val_t) + {LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1}}, // new(unsigned int, align_val_t, nothrow) + {LibFunc_Znwm, {OpNewLike, 1, 0, -1, -1}}, // new(unsigned long) + {LibFunc_ZnwmRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1}}, // new(unsigned long, nothrow) + {LibFunc_ZnwmSt11align_val_t, {OpNewLike, 2, 0, -1, 1}}, // new(unsigned long, align_val_t) + {LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1}}, // new(unsigned long, align_val_t, nothrow) + {LibFunc_Znaj, {OpNewLike, 1, 0, -1, -1}}, // new[](unsigned int) + {LibFunc_ZnajRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1}}, // new[](unsigned int, nothrow) + {LibFunc_ZnajSt11align_val_t, {OpNewLike, 2, 0, -1, 1}}, // new[](unsigned int, align_val_t) + {LibFunc_ZnajSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1}}, // new[](unsigned int, align_val_t, nothrow) + {LibFunc_Znam, {OpNewLike, 1, 0, -1, -1}}, // new[](unsigned long) + {LibFunc_ZnamRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1}}, // new[](unsigned long, nothrow) + {LibFunc_ZnamSt11align_val_t, {OpNewLike, 2, 0, -1, 1}}, // new[](unsigned long, align_val_t) + {LibFunc_ZnamSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1}}, // new[](unsigned long, align_val_t, nothrow) + {LibFunc_msvc_new_int, {OpNewLike, 1, 0, -1, -1}}, // new(unsigned int) + {LibFunc_msvc_new_int_nothrow, {MallocLike, 2, 0, -1, -1}}, // new(unsigned int, nothrow) + {LibFunc_msvc_new_longlong, {OpNewLike, 1, 0, -1, -1}}, // new(unsigned long long) + {LibFunc_msvc_new_longlong_nothrow, {MallocLike, 2, 0, -1, -1}}, // new(unsigned long long, nothrow) + {LibFunc_msvc_new_array_int, {OpNewLike, 1, 0, -1, -1}}, // new[](unsigned int) + {LibFunc_msvc_new_array_int_nothrow, {MallocLike, 2, 0, -1, -1}}, // new[](unsigned int, nothrow) + {LibFunc_msvc_new_array_longlong, {OpNewLike, 1, 0, -1, -1}}, // new[](unsigned long long) + {LibFunc_msvc_new_array_longlong_nothrow, {MallocLike, 2, 0, -1, -1}}, // new[](unsigned long long, nothrow) + {LibFunc_aligned_alloc, {AlignedAllocLike, 2, 1, -1, 0}}, + {LibFunc_memalign, {AlignedAllocLike, 2, 1, -1, 0}}, + {LibFunc_calloc, {CallocLike, 2, 0, 1, -1}}, + {LibFunc_vec_calloc, {CallocLike, 2, 0, 1, -1}}, + {LibFunc_realloc, {ReallocLike, 2, 1, -1, -1}}, + {LibFunc_vec_realloc, {ReallocLike, 2, 1, -1, -1}}, + {LibFunc_reallocf, {ReallocLike, 2, 1, -1, -1}}, + {LibFunc_strdup, {StrDupLike, 1, -1, -1, -1}}, + {LibFunc_strndup, {StrDupLike, 2, 1, -1, -1}}, + {LibFunc___kmpc_alloc_shared, {MallocLike, 1, 0, -1, -1}}, + // TODO: Handle "int posix_memalign(void **, size_t, size_t)" }; -static const Function *getCalledFunction(const Value *V, bool LookThroughBitCast, +static const Function *getCalledFunction(const Value *V, bool &IsNoBuiltin) { // Don't care about intrinsics in this case. if (isa<IntrinsicInst>(V)) return nullptr; - if (LookThroughBitCast) - V = V->stripPointerCasts(); - const auto *CB = dyn_cast<CallBase>(V); if (!CB) return nullptr; @@ -175,11 +171,9 @@ getAllocationDataForFunction(const Function *Callee, AllocType AllocTy, } static Optional<AllocFnsTy> getAllocationData(const Value *V, AllocType AllocTy, - const TargetLibraryInfo *TLI, - bool LookThroughBitCast = false) { + const TargetLibraryInfo *TLI) { bool IsNoBuiltinCall; - if (const Function *Callee = - getCalledFunction(V, LookThroughBitCast, IsNoBuiltinCall)) + if (const Function *Callee = getCalledFunction(V, IsNoBuiltinCall)) if (!IsNoBuiltinCall) return getAllocationDataForFunction(Callee, AllocTy, TLI); return None; @@ -187,11 +181,9 @@ static Optional<AllocFnsTy> getAllocationData(const Value *V, AllocType AllocTy, static Optional<AllocFnsTy> getAllocationData(const Value *V, AllocType AllocTy, - function_ref<const TargetLibraryInfo &(Function &)> GetTLI, - bool LookThroughBitCast = false) { + function_ref<const TargetLibraryInfo &(Function &)> GetTLI) { bool IsNoBuiltinCall; - if (const Function *Callee = - getCalledFunction(V, LookThroughBitCast, IsNoBuiltinCall)) + if (const Function *Callee = getCalledFunction(V, IsNoBuiltinCall)) if (!IsNoBuiltinCall) return getAllocationDataForFunction( Callee, AllocTy, &GetTLI(const_cast<Function &>(*Callee))); @@ -202,7 +194,7 @@ static Optional<AllocFnsTy> getAllocationSize(const Value *V, const TargetLibraryInfo *TLI) { bool IsNoBuiltinCall; const Function *Callee = - getCalledFunction(V, /*LookThroughBitCast=*/false, IsNoBuiltinCall); + getCalledFunction(V, IsNoBuiltinCall); if (!Callee) return None; @@ -226,92 +218,57 @@ static Optional<AllocFnsTy> getAllocationSize(const Value *V, Result.NumParams = Callee->getNumOperands(); Result.FstParam = Args.first; Result.SndParam = Args.second.getValueOr(-1); + // Allocsize has no way to specify an alignment argument + Result.AlignParam = -1; return Result; } -static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) { - const auto *CB = - dyn_cast<CallBase>(LookThroughBitCast ? V->stripPointerCasts() : V); - return CB && CB->hasRetAttr(Attribute::NoAlias); -} - /// Tests if a value is a call or invoke to a library function that /// allocates or reallocates memory (either malloc, calloc, realloc, or strdup /// like). -bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI, - bool LookThroughBitCast) { - return getAllocationData(V, AnyAlloc, TLI, LookThroughBitCast).hasValue(); +bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI) { + return getAllocationData(V, AnyAlloc, TLI).hasValue(); } bool llvm::isAllocationFn( - const Value *V, function_ref<const TargetLibraryInfo &(Function &)> GetTLI, - bool LookThroughBitCast) { - return getAllocationData(V, AnyAlloc, GetTLI, LookThroughBitCast).hasValue(); -} - -/// Tests if a value is a call or invoke to a function that returns a -/// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions). -bool llvm::isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI, - bool LookThroughBitCast) { - // it's safe to consider realloc as noalias since accessing the original - // pointer is undefined behavior - return isAllocationFn(V, TLI, LookThroughBitCast) || - hasNoAliasAttr(V, LookThroughBitCast); + const Value *V, function_ref<const TargetLibraryInfo &(Function &)> GetTLI) { + return getAllocationData(V, AnyAlloc, GetTLI).hasValue(); } /// Tests if a value is a call or invoke to a library function that /// allocates uninitialized memory (such as malloc). -bool llvm::isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, - bool LookThroughBitCast) { - return getAllocationData(V, MallocLike, TLI, LookThroughBitCast).hasValue(); -} -bool llvm::isMallocLikeFn( - const Value *V, function_ref<const TargetLibraryInfo &(Function &)> GetTLI, - bool LookThroughBitCast) { - return getAllocationData(V, MallocLike, GetTLI, LookThroughBitCast) - .hasValue(); +static bool isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) { + return getAllocationData(V, MallocOrOpNewLike, TLI).hasValue(); } /// Tests if a value is a call or invoke to a library function that /// allocates uninitialized memory with alignment (such as aligned_alloc). -bool llvm::isAlignedAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, - bool LookThroughBitCast) { - return getAllocationData(V, AlignedAllocLike, TLI, LookThroughBitCast) - .hasValue(); -} -bool llvm::isAlignedAllocLikeFn( - const Value *V, function_ref<const TargetLibraryInfo &(Function &)> GetTLI, - bool LookThroughBitCast) { - return getAllocationData(V, AlignedAllocLike, GetTLI, LookThroughBitCast) +static bool isAlignedAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI) { + return getAllocationData(V, AlignedAllocLike, TLI) .hasValue(); } /// Tests if a value is a call or invoke to a library function that /// allocates zero-filled memory (such as calloc). -bool llvm::isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, - bool LookThroughBitCast) { - return getAllocationData(V, CallocLike, TLI, LookThroughBitCast).hasValue(); +static bool isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) { + return getAllocationData(V, CallocLike, TLI).hasValue(); } /// Tests if a value is a call or invoke to a library function that /// allocates memory similar to malloc or calloc. -bool llvm::isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, - bool LookThroughBitCast) { - return getAllocationData(V, MallocOrCallocLike, TLI, - LookThroughBitCast).hasValue(); +bool llvm::isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) { + return getAllocationData(V, MallocOrCallocLike, TLI).hasValue(); } /// Tests if a value is a call or invoke to a library function that /// allocates memory (either malloc, calloc, or strdup like). -bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, - bool LookThroughBitCast) { - return getAllocationData(V, AllocLike, TLI, LookThroughBitCast).hasValue(); +bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI) { + return getAllocationData(V, AllocLike, TLI).hasValue(); } /// Tests if a value is a call or invoke to a library function that /// reallocates memory (e.g., realloc). -bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, - bool LookThroughBitCast) { - return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast).hasValue(); +bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) { + return getAllocationData(V, ReallocLike, TLI).hasValue(); } /// Tests if a functions is a call or invoke to a library function that @@ -320,113 +277,122 @@ bool llvm::isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI) { return getAllocationDataForFunction(F, ReallocLike, TLI).hasValue(); } -/// Tests if a value is a call or invoke to a library function that -/// allocates memory and throws if an allocation failed (e.g., new). -bool llvm::isOpNewLikeFn(const Value *V, const TargetLibraryInfo *TLI, - bool LookThroughBitCast) { - return getAllocationData(V, OpNewLike, TLI, LookThroughBitCast).hasValue(); -} +bool llvm::isAllocRemovable(const CallBase *CB, const TargetLibraryInfo *TLI) { + assert(isAllocationFn(CB, TLI)); -/// Tests if a value is a call or invoke to a library function that -/// allocates memory (strdup, strndup). -bool llvm::isStrdupLikeFn(const Value *V, const TargetLibraryInfo *TLI, - bool LookThroughBitCast) { - return getAllocationData(V, StrDupLike, TLI, LookThroughBitCast).hasValue(); -} + // Note: Removability is highly dependent on the source language. For + // example, recent C++ requires direct calls to the global allocation + // [basic.stc.dynamic.allocation] to be observable unless part of a new + // expression [expr.new paragraph 13]. -/// extractMallocCall - Returns the corresponding CallInst if the instruction -/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we -/// ignore InvokeInst here. -const CallInst *llvm::extractMallocCall( - const Value *I, - function_ref<const TargetLibraryInfo &(Function &)> GetTLI) { - return isMallocLikeFn(I, GetTLI) ? dyn_cast<CallInst>(I) : nullptr; + // Historically we've treated the C family allocation routines as removable + return isAllocLikeFn(CB, TLI); } -static Value *computeArraySize(const CallInst *CI, const DataLayout &DL, - const TargetLibraryInfo *TLI, - bool LookThroughSExt = false) { - if (!CI) - return nullptr; +Value *llvm::getAllocAlignment(const CallBase *V, + const TargetLibraryInfo *TLI) { + assert(isAllocationFn(V, TLI)); - // The size of the malloc's result type must be known to determine array size. - Type *T = getMallocAllocatedType(CI, TLI); - if (!T || !T->isSized()) + const Optional<AllocFnsTy> FnData = getAllocationData(V, AnyAlloc, TLI); + if (!FnData.hasValue() || FnData->AlignParam < 0) { return nullptr; + } + return V->getOperand(FnData->AlignParam); +} - unsigned ElementSize = DL.getTypeAllocSize(T); - if (StructType *ST = dyn_cast<StructType>(T)) - ElementSize = DL.getStructLayout(ST)->getSizeInBytes(); +/// When we're compiling N-bit code, and the user uses parameters that are +/// greater than N bits (e.g. uint64_t on a 32-bit build), we can run into +/// trouble with APInt size issues. This function handles resizing + overflow +/// checks for us. Check and zext or trunc \p I depending on IntTyBits and +/// I's value. +static bool CheckedZextOrTrunc(APInt &I, unsigned IntTyBits) { + // More bits than we can handle. Checking the bit width isn't necessary, but + // it's faster than checking active bits, and should give `false` in the + // vast majority of cases. + if (I.getBitWidth() > IntTyBits && I.getActiveBits() > IntTyBits) + return false; + if (I.getBitWidth() != IntTyBits) + I = I.zextOrTrunc(IntTyBits); + return true; +} - // If malloc call's arg can be determined to be a multiple of ElementSize, - // return the multiple. Otherwise, return NULL. - Value *MallocArg = CI->getArgOperand(0); - Value *Multiple = nullptr; - if (ComputeMultiple(MallocArg, ElementSize, Multiple, LookThroughSExt)) - return Multiple; +Optional<APInt> +llvm::getAllocSize(const CallBase *CB, + const TargetLibraryInfo *TLI, + std::function<const Value*(const Value*)> Mapper) { + // Note: This handles both explicitly listed allocation functions and + // allocsize. The code structure could stand to be cleaned up a bit. + Optional<AllocFnsTy> FnData = getAllocationSize(CB, TLI); + if (!FnData) + return None; - return nullptr; -} + // Get the index type for this address space, results and intermediate + // computations are performed at that width. + auto &DL = CB->getModule()->getDataLayout(); + const unsigned IntTyBits = DL.getIndexTypeSizeInBits(CB->getType()); + + // Handle strdup-like functions separately. + if (FnData->AllocTy == StrDupLike) { + APInt Size(IntTyBits, GetStringLength(Mapper(CB->getArgOperand(0)))); + if (!Size) + return None; -/// getMallocType - Returns the PointerType resulting from the malloc call. -/// The PointerType depends on the number of bitcast uses of the malloc call: -/// 0: PointerType is the calls' return type. -/// 1: PointerType is the bitcast's result type. -/// >1: Unique PointerType cannot be determined, return NULL. -PointerType *llvm::getMallocType(const CallInst *CI, - const TargetLibraryInfo *TLI) { - assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call"); - - PointerType *MallocType = nullptr; - unsigned NumOfBitCastUses = 0; - - // Determine if CallInst has a bitcast use. - for (const User *U : CI->users()) - if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { - MallocType = cast<PointerType>(BCI->getDestTy()); - NumOfBitCastUses++; + // Strndup limits strlen. + if (FnData->FstParam > 0) { + const ConstantInt *Arg = + dyn_cast<ConstantInt>(Mapper(CB->getArgOperand(FnData->FstParam))); + if (!Arg) + return None; + + APInt MaxSize = Arg->getValue().zextOrSelf(IntTyBits); + if (Size.ugt(MaxSize)) + Size = MaxSize + 1; } + return Size; + } - // Malloc call has 1 bitcast use, so type is the bitcast's destination type. - if (NumOfBitCastUses == 1) - return MallocType; + const ConstantInt *Arg = + dyn_cast<ConstantInt>(Mapper(CB->getArgOperand(FnData->FstParam))); + if (!Arg) + return None; - // Malloc call was not bitcast, so type is the malloc function's return type. - if (NumOfBitCastUses == 0) - return cast<PointerType>(CI->getType()); + APInt Size = Arg->getValue(); + if (!CheckedZextOrTrunc(Size, IntTyBits)) + return None; - // Type could not be determined. - return nullptr; -} + // Size is determined by just 1 parameter. + if (FnData->SndParam < 0) + return Size; -/// getMallocAllocatedType - Returns the Type allocated by malloc call. -/// The Type depends on the number of bitcast uses of the malloc call: -/// 0: PointerType is the malloc calls' return type. -/// 1: PointerType is the bitcast's result type. -/// >1: Unique PointerType cannot be determined, return NULL. -Type *llvm::getMallocAllocatedType(const CallInst *CI, - const TargetLibraryInfo *TLI) { - PointerType *PT = getMallocType(CI, TLI); - return PT ? PT->getElementType() : nullptr; -} + Arg = dyn_cast<ConstantInt>(Mapper(CB->getArgOperand(FnData->SndParam))); + if (!Arg) + return None; + + APInt NumElems = Arg->getValue(); + if (!CheckedZextOrTrunc(NumElems, IntTyBits)) + return None; -/// getMallocArraySize - Returns the array size of a malloc call. If the -/// argument passed to malloc is a multiple of the size of the malloced type, -/// then return that multiple. For non-array mallocs, the multiple is -/// constant 1. Otherwise, return NULL for mallocs whose array size cannot be -/// determined. -Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout &DL, - const TargetLibraryInfo *TLI, - bool LookThroughSExt) { - assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call"); - return computeArraySize(CI, DL, TLI, LookThroughSExt); + bool Overflow; + Size = Size.umul_ov(NumElems, Overflow); + if (Overflow) + return None; + return Size; } -/// extractCallocCall - Returns the corresponding CallInst if the instruction -/// is a calloc call. -const CallInst *llvm::extractCallocCall(const Value *I, - const TargetLibraryInfo *TLI) { - return isCallocLikeFn(I, TLI) ? cast<CallInst>(I) : nullptr; +Constant *llvm::getInitialValueOfAllocation(const CallBase *Alloc, + const TargetLibraryInfo *TLI, + Type *Ty) { + assert(isAllocationFn(Alloc, TLI)); + + // malloc and aligned_alloc are uninitialized (undef) + if (isMallocLikeFn(Alloc, TLI) || isAlignedAllocLikeFn(Alloc, TLI)) + return UndefValue::get(Ty); + + // calloc zero initializes + if (isCallocLikeFn(Alloc, TLI)) + return Constant::getNullValue(Ty); + + return nullptr; } /// isLibFreeFunction - Returns true if the function is a builtin free() @@ -485,8 +451,7 @@ bool llvm::isLibFreeFunction(const Function *F, const LibFunc TLIFn) { /// isFreeCall - Returns non-null if the value is a call to the builtin free() const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { bool IsNoBuiltinCall; - const Function *Callee = - getCalledFunction(I, /*LookThroughBitCast=*/false, IsNoBuiltinCall); + const Function *Callee = getCalledFunction(I, IsNoBuiltinCall); if (Callee == nullptr || IsNoBuiltinCall) return nullptr; @@ -644,20 +609,8 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { return unknown(); } -/// When we're compiling N-bit code, and the user uses parameters that are -/// greater than N bits (e.g. uint64_t on a 32-bit build), we can run into -/// trouble with APInt size issues. This function handles resizing + overflow -/// checks for us. Check and zext or trunc \p I depending on IntTyBits and -/// I's value. bool ObjectSizeOffsetVisitor::CheckedZextOrTrunc(APInt &I) { - // More bits than we can handle. Checking the bit width isn't necessary, but - // it's faster than checking active bits, and should give `false` in the - // vast majority of cases. - if (I.getBitWidth() > IntTyBits && I.getActiveBits() > IntTyBits) - return false; - if (I.getBitWidth() != IntTyBits) - I = I.zextOrTrunc(IntTyBits); - return true; + return ::CheckedZextOrTrunc(I, IntTyBits); } SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { @@ -698,61 +651,10 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { } SizeOffsetType ObjectSizeOffsetVisitor::visitCallBase(CallBase &CB) { - Optional<AllocFnsTy> FnData = getAllocationSize(&CB, TLI); - if (!FnData) - return unknown(); - - // Handle strdup-like functions separately. - if (FnData->AllocTy == StrDupLike) { - APInt Size(IntTyBits, GetStringLength(CB.getArgOperand(0))); - if (!Size) - return unknown(); - - // Strndup limits strlen. - if (FnData->FstParam > 0) { - ConstantInt *Arg = - dyn_cast<ConstantInt>(CB.getArgOperand(FnData->FstParam)); - if (!Arg) - return unknown(); - - APInt MaxSize = Arg->getValue().zextOrSelf(IntTyBits); - if (Size.ugt(MaxSize)) - Size = MaxSize + 1; - } - return std::make_pair(Size, Zero); - } - - ConstantInt *Arg = dyn_cast<ConstantInt>(CB.getArgOperand(FnData->FstParam)); - if (!Arg) - return unknown(); - - APInt Size = Arg->getValue(); - if (!CheckedZextOrTrunc(Size)) - return unknown(); - - // Size is determined by just 1 parameter. - if (FnData->SndParam < 0) - return std::make_pair(Size, Zero); - - Arg = dyn_cast<ConstantInt>(CB.getArgOperand(FnData->SndParam)); - if (!Arg) - return unknown(); - - APInt NumElems = Arg->getValue(); - if (!CheckedZextOrTrunc(NumElems)) - return unknown(); - - bool Overflow; - Size = Size.umul_ov(NumElems, Overflow); - return Overflow ? unknown() : std::make_pair(Size, Zero); - - // TODO: handle more standard functions (+ wchar cousins): - // - strdup / strndup - // - strcpy / strncpy - // - strcat / strncat - // - memcpy / memmove - // - strcat / strncat - // - memset + auto Mapper = [](const Value *V) { return V; }; + if (Optional<APInt> Size = getAllocSize(&CB, TLI, Mapper)) + return std::make_pair(*Size, Zero); + return unknown(); } SizeOffsetType @@ -976,7 +878,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallBase(CallBase &CB) { // Handle strdup-like functions separately. if (FnData->AllocTy == StrDupLike) { - // TODO + // TODO: implement evaluation of strdup/strndup return unknown(); } @@ -989,14 +891,6 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallBase(CallBase &CB) { SecondArg = Builder.CreateZExtOrTrunc(SecondArg, IntTy); Value *Size = Builder.CreateMul(FirstArg, SecondArg); return std::make_pair(Size, Zero); - - // TODO: handle more standard functions (+ wchar cousins): - // - strdup / strndup - // - strcpy / strncpy - // - strcat / strncat - // - memcpy / memmove - // - strcat / strncat - // - memset } SizeOffsetEvalType diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index da6bb4c49cba..36df462c7a66 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -594,7 +594,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( // turn into undef. Note that we can bypass the allocation itself when // looking for a clobber in many cases; that's an alias property and is // handled by BasicAA. - if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, &TLI)) { + if (isa<AllocaInst>(Inst) || isNoAliasCall(Inst)) { const Value *AccessPtr = getUnderlyingObject(MemLoc.Ptr); if (AccessPtr == Inst || BatchAA.isMustAlias(Inst, AccessPtr)) return MemDepResult::getDef(Inst); diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp index ac20e20f0c0d..57f431ec21f5 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp @@ -1265,8 +1265,8 @@ void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) { } MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT) - : AA(nullptr), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), - SkipWalker(nullptr), NextID(0) { + : DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), + SkipWalker(nullptr) { // Build MemorySSA using a batch alias analysis. This reuses the internal // state that AA collects during an alias()/getModRefInfo() call. This is // safe because there are no CFG changes while building MemorySSA and can diff --git a/contrib/llvm-project/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp b/contrib/llvm-project/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp index 941458f648bc..fab51d6a7aaf 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp @@ -22,12 +22,13 @@ ModelUnderTrainingRunner::ModelUnderTrainingRunner( LLVMContext &Ctx, const std::string &ModelPath, const std::vector<TensorSpec> &InputSpecs, const std::vector<LoggedFeatureSpec> &OutputSpecs) - : MLModelRunner(Ctx), OutputSpecs(OutputSpecs) { + : MLModelRunner(Ctx, MLModelRunner::Kind::Development), + OutputSpecs(OutputSpecs) { Evaluator = std::make_unique<TFModelEvaluator>( ModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I].Spec; }, OutputSpecs.size()); if (!Evaluator || !Evaluator->isValid()) { - Ctx.emitError("Failed to create inliner saved model evaluator"); + Ctx.emitError("Failed to create saved model evaluator"); Evaluator.reset(); return; } @@ -46,4 +47,21 @@ void *ModelUnderTrainingRunner::getTensorUntyped(size_t Index) { return Evaluator->getUntypedInput(Index); } +std::unique_ptr<ModelUnderTrainingRunner> +ModelUnderTrainingRunner::createAndEnsureValid( + LLVMContext &Ctx, const std::string &ModelPath, StringRef DecisionName, + const std::vector<TensorSpec> &InputSpecs, + StringRef OutputSpecsPathOverride) { + std::unique_ptr<ModelUnderTrainingRunner> MUTR; + if (auto MaybeOutputSpecs = loadOutputSpecs(Ctx, DecisionName, ModelPath, + OutputSpecsPathOverride)) + MUTR.reset(new ModelUnderTrainingRunner(Ctx, ModelPath, InputSpecs, + *MaybeOutputSpecs)); + if (MUTR && MUTR->isValid()) + return MUTR; + + Ctx.emitError("Could not load the policy model from the provided path"); + return nullptr; +} + #endif // defined(LLVM_HAVE_TF_API) diff --git a/contrib/llvm-project/llvm/lib/Analysis/NoInferenceModelRunner.cpp b/contrib/llvm-project/llvm/lib/Analysis/NoInferenceModelRunner.cpp index 02ece6aa3900..7178120ebe4f 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/NoInferenceModelRunner.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/NoInferenceModelRunner.cpp @@ -20,7 +20,7 @@ using namespace llvm; NoInferenceModelRunner::NoInferenceModelRunner( LLVMContext &Ctx, const std::vector<TensorSpec> &Inputs) - : MLModelRunner(Ctx) { + : MLModelRunner(Ctx, MLModelRunner::Kind::NoOp) { ValuesBuffer.reserve(Inputs.size()); for (const auto &TS : Inputs) ValuesBuffer.push_back(std::make_unique<char[]>(TS.getElementCount() * diff --git a/contrib/llvm-project/llvm/lib/Analysis/ObjCARCInstKind.cpp b/contrib/llvm-project/llvm/lib/Analysis/ObjCARCInstKind.cpp index f74a9f7f104f..d177ee056a93 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ObjCARCInstKind.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ObjCARCInstKind.cpp @@ -32,8 +32,8 @@ raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS, return OS << "ARCInstKind::Retain"; case ARCInstKind::RetainRV: return OS << "ARCInstKind::RetainRV"; - case ARCInstKind::ClaimRV: - return OS << "ARCInstKind::ClaimRV"; + case ARCInstKind::UnsafeClaimRV: + return OS << "ARCInstKind::UnsafeClaimRV"; case ARCInstKind::RetainBlock: return OS << "ARCInstKind::RetainBlock"; case ARCInstKind::Release: @@ -127,7 +127,7 @@ ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) { case Intrinsic::objc_clang_arc_use: return ARCInstKind::IntrinsicUser; case Intrinsic::objc_unsafeClaimAutoreleasedReturnValue: - return ARCInstKind::ClaimRV; + return ARCInstKind::UnsafeClaimRV; case Intrinsic::objc_retainedObject: return ARCInstKind::NoopCast; case Intrinsic::objc_unretainedObject: @@ -334,7 +334,7 @@ bool llvm::objcarc::IsUser(ARCInstKind Class) { case ARCInstKind::StoreStrong: case ARCInstKind::Call: case ARCInstKind::None: - case ARCInstKind::ClaimRV: + case ARCInstKind::UnsafeClaimRV: return false; } llvm_unreachable("covered switch isn't covered?"); @@ -370,7 +370,7 @@ bool llvm::objcarc::IsRetain(ARCInstKind Class) { case ARCInstKind::Call: case ARCInstKind::User: case ARCInstKind::None: - case ARCInstKind::ClaimRV: + case ARCInstKind::UnsafeClaimRV: return false; } llvm_unreachable("covered switch isn't covered?"); @@ -384,7 +384,7 @@ bool llvm::objcarc::IsAutorelease(ARCInstKind Class) { return true; case ARCInstKind::Retain: case ARCInstKind::RetainRV: - case ARCInstKind::ClaimRV: + case ARCInstKind::UnsafeClaimRV: case ARCInstKind::RetainBlock: case ARCInstKind::Release: case ARCInstKind::AutoreleasepoolPush: @@ -416,7 +416,7 @@ bool llvm::objcarc::IsForwarding(ARCInstKind Class) { switch (Class) { case ARCInstKind::Retain: case ARCInstKind::RetainRV: - case ARCInstKind::ClaimRV: + case ARCInstKind::UnsafeClaimRV: case ARCInstKind::Autorelease: case ARCInstKind::AutoreleaseRV: case ARCInstKind::NoopCast: @@ -451,7 +451,7 @@ bool llvm::objcarc::IsNoopOnNull(ARCInstKind Class) { switch (Class) { case ARCInstKind::Retain: case ARCInstKind::RetainRV: - case ARCInstKind::ClaimRV: + case ARCInstKind::UnsafeClaimRV: case ARCInstKind::Release: case ARCInstKind::Autorelease: case ARCInstKind::AutoreleaseRV: @@ -486,7 +486,7 @@ bool llvm::objcarc::IsNoopOnGlobal(ARCInstKind Class) { switch (Class) { case ARCInstKind::Retain: case ARCInstKind::RetainRV: - case ARCInstKind::ClaimRV: + case ARCInstKind::UnsafeClaimRV: case ARCInstKind::Release: case ARCInstKind::Autorelease: case ARCInstKind::AutoreleaseRV: @@ -522,7 +522,7 @@ bool llvm::objcarc::IsAlwaysTail(ARCInstKind Class) { switch (Class) { case ARCInstKind::Retain: case ARCInstKind::RetainRV: - case ARCInstKind::ClaimRV: + case ARCInstKind::UnsafeClaimRV: case ARCInstKind::AutoreleaseRV: return true; case ARCInstKind::Release: @@ -563,7 +563,7 @@ bool llvm::objcarc::IsNeverTail(ARCInstKind Class) { return true; case ARCInstKind::Retain: case ARCInstKind::RetainRV: - case ARCInstKind::ClaimRV: + case ARCInstKind::UnsafeClaimRV: case ARCInstKind::AutoreleaseRV: case ARCInstKind::Release: case ARCInstKind::RetainBlock: @@ -598,7 +598,7 @@ bool llvm::objcarc::IsNoThrow(ARCInstKind Class) { switch (Class) { case ARCInstKind::Retain: case ARCInstKind::RetainRV: - case ARCInstKind::ClaimRV: + case ARCInstKind::UnsafeClaimRV: case ARCInstKind::Release: case ARCInstKind::Autorelease: case ARCInstKind::AutoreleaseRV: @@ -643,7 +643,7 @@ bool llvm::objcarc::CanInterruptRV(ARCInstKind Class) { return true; case ARCInstKind::Retain: case ARCInstKind::RetainRV: - case ARCInstKind::ClaimRV: + case ARCInstKind::UnsafeClaimRV: case ARCInstKind::Release: case ARCInstKind::AutoreleasepoolPush: case ARCInstKind::RetainBlock: @@ -696,7 +696,7 @@ bool llvm::objcarc::CanDecrementRefCount(ARCInstKind Kind) { case ARCInstKind::StoreStrong: case ARCInstKind::CallOrUser: case ARCInstKind::Call: - case ARCInstKind::ClaimRV: + case ARCInstKind::UnsafeClaimRV: return true; } diff --git a/contrib/llvm-project/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm-project/llvm/lib/Analysis/PHITransAddr.cpp index 4c80f6743411..02d084937ccb 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/PHITransAddr.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/PHITransAddr.cpp @@ -226,7 +226,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, return GEP; // Simplify the GEP to handle 'gep x, 0' -> x etc. - if (Value *V = SimplifyGEPInst(GEP->getSourceElementType(), GEPOps, + if (Value *V = SimplifyGEPInst(GEP->getSourceElementType(), GEPOps[0], + ArrayRef<Value *>(GEPOps).slice(1), GEP->isInBounds(), {DL, TLI, DT, AC})) { for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) RemoveInstInputs(GEPOps[i], InstInputs); diff --git a/contrib/llvm-project/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm-project/llvm/lib/Analysis/RegionPass.cpp index c20ecff5f912..10c8569096c6 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/RegionPass.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/RegionPass.cpp @@ -30,8 +30,7 @@ using namespace llvm; char RGPassManager::ID = 0; -RGPassManager::RGPassManager() - : FunctionPass(ID), PMDataManager() { +RGPassManager::RGPassManager() : FunctionPass(ID) { RI = nullptr; CurrentRegion = nullptr; } diff --git a/contrib/llvm-project/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/contrib/llvm-project/llvm/lib/Analysis/ReplayInlineAdvisor.cpp index f83d8b0fd230..294bc38c17ad 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ReplayInlineAdvisor.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ReplayInlineAdvisor.cpp @@ -28,8 +28,7 @@ ReplayInlineAdvisor::ReplayInlineAdvisor( std::unique_ptr<InlineAdvisor> OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks) : InlineAdvisor(M, FAM), OriginalAdvisor(std::move(OriginalAdvisor)), - HasReplayRemarks(false), ReplaySettings(ReplaySettings), - EmitRemarks(EmitRemarks) { + ReplaySettings(ReplaySettings), EmitRemarks(EmitRemarks) { auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(ReplaySettings.ReplayFile); std::error_code EC = BufferOrErr.getError(); diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp index 0c3f32295ae1..07aac1523b47 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp @@ -301,7 +301,8 @@ void SCEV::print(raw_ostream &OS) const { case scUMaxExpr: case scSMaxExpr: case scUMinExpr: - case scSMinExpr: { + case scSMinExpr: + case scSequentialUMinExpr: { const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this); const char *OpStr = nullptr; switch (NAry->getSCEVType()) { @@ -315,6 +316,9 @@ void SCEV::print(raw_ostream &OS) const { case scSMinExpr: OpStr = " smin "; break; + case scSequentialUMinExpr: + OpStr = " umin_seq "; + break; default: llvm_unreachable("There are no other nary expression types."); } @@ -392,6 +396,8 @@ Type *SCEV::getType() const { case scUMinExpr: case scSMinExpr: return cast<SCEVMinMaxExpr>(this)->getType(); + case scSequentialUMinExpr: + return cast<SCEVSequentialMinMaxExpr>(this)->getType(); case scAddExpr: return cast<SCEVAddExpr>(this)->getType(); case scUDivExpr: @@ -774,7 +780,8 @@ CompareSCEVComplexity(EquivalenceClasses<const SCEV *> &EqCacheSCEV, case scSMaxExpr: case scUMaxExpr: case scSMinExpr: - case scUMinExpr: { + case scUMinExpr: + case scSequentialUMinExpr: { const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS); const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS); @@ -2110,6 +2117,22 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { return S; } +const SCEV *ScalarEvolution::getCastExpr(SCEVTypes Kind, const SCEV *Op, + Type *Ty) { + switch (Kind) { + case scTruncate: + return getTruncateExpr(Op, Ty); + case scZeroExtend: + return getZeroExtendExpr(Op, Ty); + case scSignExtend: + return getSignExtendExpr(Op, Ty); + case scPtrToInt: + return getPtrToIntExpr(Op, Ty); + default: + llvm_unreachable("Not a SCEV cast expression!"); + } +} + /// getAnyExtendExpr - Return a SCEV for the given operand extended with /// unspecified bits out to the given type. const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, @@ -3463,7 +3486,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, return S; } -static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { +const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { APInt A = C1->getAPInt().abs(); APInt B = C2->getAPInt().abs(); uint32_t ABW = A.getBitWidth(); @@ -3721,6 +3744,7 @@ const SCEV *ScalarEvolution::getAbsExpr(const SCEV *Op, bool IsNSW) { const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind, SmallVectorImpl<const SCEV *> &Ops) { + assert(SCEVMinMaxExpr::isMinMaxType(Kind) && "Not a SCEVMinMaxExpr!"); assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG @@ -3857,6 +3881,209 @@ const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind, return S; } +namespace { + +class SCEVSequentialMinMaxDeduplicatingVisitor final + : public SCEVVisitor<SCEVSequentialMinMaxDeduplicatingVisitor, + Optional<const SCEV *>> { + using RetVal = Optional<const SCEV *>; + using Base = SCEVVisitor<SCEVSequentialMinMaxDeduplicatingVisitor, RetVal>; + + ScalarEvolution &SE; + const SCEVTypes RootKind; // Must be a sequential min/max expression. + const SCEVTypes NonSequentialRootKind; // Non-sequential variant of RootKind. + SmallPtrSet<const SCEV *, 16> SeenOps; + + bool canRecurseInto(SCEVTypes Kind) const { + // We can only recurse into the SCEV expression of the same effective type + // as the type of our root SCEV expression. + return RootKind == Kind || NonSequentialRootKind == Kind; + }; + + RetVal visitAnyMinMaxExpr(const SCEV *S) { + assert((isa<SCEVMinMaxExpr>(S) || isa<SCEVSequentialMinMaxExpr>(S)) && + "Only for min/max expressions."); + SCEVTypes Kind = S->getSCEVType(); + + if (!canRecurseInto(Kind)) + return S; + + auto *NAry = cast<SCEVNAryExpr>(S); + SmallVector<const SCEV *> NewOps; + bool Changed = + visit(Kind, makeArrayRef(NAry->op_begin(), NAry->op_end()), NewOps); + + if (!Changed) + return S; + if (NewOps.empty()) + return None; + + return isa<SCEVSequentialMinMaxExpr>(S) + ? SE.getSequentialMinMaxExpr(Kind, NewOps) + : SE.getMinMaxExpr(Kind, NewOps); + } + + RetVal visit(const SCEV *S) { + // Has the whole operand been seen already? + if (!SeenOps.insert(S).second) + return None; + return Base::visit(S); + } + +public: + SCEVSequentialMinMaxDeduplicatingVisitor(ScalarEvolution &SE, + SCEVTypes RootKind) + : SE(SE), RootKind(RootKind), + NonSequentialRootKind( + SCEVSequentialMinMaxExpr::getEquivalentNonSequentialSCEVType( + RootKind)) {} + + bool /*Changed*/ visit(SCEVTypes Kind, ArrayRef<const SCEV *> OrigOps, + SmallVectorImpl<const SCEV *> &NewOps) { + bool Changed = false; + SmallVector<const SCEV *> Ops; + Ops.reserve(OrigOps.size()); + + for (const SCEV *Op : OrigOps) { + RetVal NewOp = visit(Op); + if (NewOp != Op) + Changed = true; + if (NewOp) + Ops.emplace_back(*NewOp); + } + + if (Changed) + NewOps = std::move(Ops); + return Changed; + } + + RetVal visitConstant(const SCEVConstant *Constant) { return Constant; } + + RetVal visitPtrToIntExpr(const SCEVPtrToIntExpr *Expr) { return Expr; } + + RetVal visitTruncateExpr(const SCEVTruncateExpr *Expr) { return Expr; } + + RetVal visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { return Expr; } + + RetVal visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { return Expr; } + + RetVal visitAddExpr(const SCEVAddExpr *Expr) { return Expr; } + + RetVal visitMulExpr(const SCEVMulExpr *Expr) { return Expr; } + + RetVal visitUDivExpr(const SCEVUDivExpr *Expr) { return Expr; } + + RetVal visitAddRecExpr(const SCEVAddRecExpr *Expr) { return Expr; } + + RetVal visitSMaxExpr(const SCEVSMaxExpr *Expr) { + return visitAnyMinMaxExpr(Expr); + } + + RetVal visitUMaxExpr(const SCEVUMaxExpr *Expr) { + return visitAnyMinMaxExpr(Expr); + } + + RetVal visitSMinExpr(const SCEVSMinExpr *Expr) { + return visitAnyMinMaxExpr(Expr); + } + + RetVal visitUMinExpr(const SCEVUMinExpr *Expr) { + return visitAnyMinMaxExpr(Expr); + } + + RetVal visitSequentialUMinExpr(const SCEVSequentialUMinExpr *Expr) { + return visitAnyMinMaxExpr(Expr); + } + + RetVal visitUnknown(const SCEVUnknown *Expr) { return Expr; } + + RetVal visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { return Expr; } +}; + +} // namespace + +const SCEV * +ScalarEvolution::getSequentialMinMaxExpr(SCEVTypes Kind, + SmallVectorImpl<const SCEV *> &Ops) { + assert(SCEVSequentialMinMaxExpr::isSequentialMinMaxType(Kind) && + "Not a SCEVSequentialMinMaxExpr!"); + assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!"); + if (Ops.size() == 1) + return Ops[0]; + if (Ops.size() == 2 && + any_of(Ops, [](const SCEV *Op) { return isa<SCEVConstant>(Op); })) + return getMinMaxExpr( + SCEVSequentialMinMaxExpr::getEquivalentNonSequentialSCEVType(Kind), + Ops); +#ifndef NDEBUG + Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + for (unsigned i = 1, e = Ops.size(); i != e; ++i) { + assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && + "Operand types don't match!"); + assert(Ops[0]->getType()->isPointerTy() == + Ops[i]->getType()->isPointerTy() && + "min/max should be consistently pointerish"); + } +#endif + + // Note that SCEVSequentialMinMaxExpr is *NOT* commutative, + // so we can *NOT* do any kind of sorting of the expressions! + + // Check if we have created the same expression before. + if (const SCEV *S = findExistingSCEVInCache(Kind, Ops)) + return S; + + // FIXME: there are *some* simplifications that we can do here. + + // Keep only the first instance of an operand. + { + SCEVSequentialMinMaxDeduplicatingVisitor Deduplicator(*this, Kind); + bool Changed = Deduplicator.visit(Kind, Ops, Ops); + if (Changed) + return getSequentialMinMaxExpr(Kind, Ops); + } + + // Check to see if one of the operands is of the same kind. If so, expand its + // operands onto our operand list, and recurse to simplify. + { + unsigned Idx = 0; + bool DeletedAny = false; + while (Idx < Ops.size()) { + if (Ops[Idx]->getSCEVType() != Kind) { + ++Idx; + continue; + } + const auto *SMME = cast<SCEVSequentialMinMaxExpr>(Ops[Idx]); + Ops.erase(Ops.begin() + Idx); + Ops.insert(Ops.begin() + Idx, SMME->op_begin(), SMME->op_end()); + DeletedAny = true; + } + + if (DeletedAny) + return getSequentialMinMaxExpr(Kind, Ops); + } + + // Okay, it looks like we really DO need an expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(Kind); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = nullptr; + const SCEV *ExistingSCEV = UniqueSCEVs.FindNodeOrInsertPos(ID, IP); + if (ExistingSCEV) + return ExistingSCEV; + + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + SCEV *S = new (SCEVAllocator) + SCEVSequentialMinMaxExpr(ID.Intern(SCEVAllocator), Kind, O, Ops.size()); + + UniqueSCEVs.InsertNode(S, IP); + registerUser(S, Ops); + return S; +} + const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) { SmallVector<const SCEV *, 2> Ops = {LHS, RHS}; return getSMaxExpr(Ops); @@ -3885,14 +4112,16 @@ const SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl<const SCEV *> &Ops) { return getMinMaxExpr(scSMinExpr, Ops); } -const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, - const SCEV *RHS) { +const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, const SCEV *RHS, + bool Sequential) { SmallVector<const SCEV *, 2> Ops = { LHS, RHS }; - return getUMinExpr(Ops); + return getUMinExpr(Ops, Sequential); } -const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops) { - return getMinMaxExpr(scUMinExpr, Ops); +const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops, + bool Sequential) { + return Sequential ? getSequentialMinMaxExpr(scSequentialUMinExpr, Ops) + : getMinMaxExpr(scUMinExpr, Ops); } const SCEV * @@ -4375,13 +4604,15 @@ const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS, } const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS, - const SCEV *RHS) { + const SCEV *RHS, + bool Sequential) { SmallVector<const SCEV *, 2> Ops = { LHS, RHS }; - return getUMinFromMismatchedTypes(Ops); + return getUMinFromMismatchedTypes(Ops, Sequential); } -const SCEV *ScalarEvolution::getUMinFromMismatchedTypes( - SmallVectorImpl<const SCEV *> &Ops) { +const SCEV * +ScalarEvolution::getUMinFromMismatchedTypes(SmallVectorImpl<const SCEV *> &Ops, + bool Sequential) { assert(!Ops.empty() && "At least one operand must be!"); // Trivial case. if (Ops.size() == 1) @@ -4402,7 +4633,7 @@ const SCEV *ScalarEvolution::getUMinFromMismatchedTypes( PromotedOps.push_back(getNoopOrZeroExtend(S, MaxType)); // Generate umin. - return getUMinExpr(PromotedOps); + return getUMinExpr(PromotedOps, Sequential); } const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { @@ -5513,6 +5744,7 @@ static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S, case scSMaxExpr: case scUMinExpr: case scSMinExpr: + case scSequentialUMinExpr: // These expressions are available if their operand(s) is/are. return true; @@ -6060,35 +6292,31 @@ ScalarEvolution::getRangeRef(const SCEV *S, ConservativeResult.intersectWith(X, RangeType)); } - if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { - ConstantRange X = getRangeRef(SMax->getOperand(0), SignHint); - for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) - X = X.smax(getRangeRef(SMax->getOperand(i), SignHint)); - return setRange(SMax, SignHint, - ConservativeResult.intersectWith(X, RangeType)); - } - - if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { - ConstantRange X = getRangeRef(UMax->getOperand(0), SignHint); - for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) - X = X.umax(getRangeRef(UMax->getOperand(i), SignHint)); - return setRange(UMax, SignHint, - ConservativeResult.intersectWith(X, RangeType)); - } - - if (const SCEVSMinExpr *SMin = dyn_cast<SCEVSMinExpr>(S)) { - ConstantRange X = getRangeRef(SMin->getOperand(0), SignHint); - for (unsigned i = 1, e = SMin->getNumOperands(); i != e; ++i) - X = X.smin(getRangeRef(SMin->getOperand(i), SignHint)); - return setRange(SMin, SignHint, - ConservativeResult.intersectWith(X, RangeType)); - } + if (isa<SCEVMinMaxExpr>(S) || isa<SCEVSequentialMinMaxExpr>(S)) { + Intrinsic::ID ID; + switch (S->getSCEVType()) { + case scUMaxExpr: + ID = Intrinsic::umax; + break; + case scSMaxExpr: + ID = Intrinsic::smax; + break; + case scUMinExpr: + case scSequentialUMinExpr: + ID = Intrinsic::umin; + break; + case scSMinExpr: + ID = Intrinsic::smin; + break; + default: + llvm_unreachable("Unknown SCEVMinMaxExpr/SCEVSequentialMinMaxExpr."); + } - if (const SCEVUMinExpr *UMin = dyn_cast<SCEVUMinExpr>(S)) { - ConstantRange X = getRangeRef(UMin->getOperand(0), SignHint); - for (unsigned i = 1, e = UMin->getNumOperands(); i != e; ++i) - X = X.umin(getRangeRef(UMin->getOperand(i), SignHint)); - return setRange(UMin, SignHint, + const auto *NAry = cast<SCEVNAryExpr>(S); + ConstantRange X = getRangeRef(NAry->getOperand(0), SignHint); + for (unsigned i = 1, e = NAry->getNumOperands(); i != e; ++i) + X = X.intrinsic(ID, {X, getRangeRef(NAry->getOperand(i), SignHint)}); + return setRange(S, SignHint, ConservativeResult.intersectWith(X, RangeType)); } @@ -7368,11 +7596,6 @@ const SCEV *ScalarEvolution::getConstantMaxTripCountFromArray(const Loop *L) { auto *ArrSize = dyn_cast<ConstantInt>(AllocateInst->getArraySize()); if (!Ty || !ArrSize || !ArrSize->isOne()) continue; - // Also make sure step was increased the same with sizeof allocated - // element type. - const PointerType *GEPT = dyn_cast<PointerType>(GEP->getType()); - if (Ty->getElementType() != GEPT->getElementType()) - continue; // FIXME: Since gep indices are silently zext to the indexing type, // we will have a narrow gep index which wraps around rather than @@ -8093,6 +8316,29 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( return getZero(CI->getType()); } + // If we're exiting based on the overflow flag of an x.with.overflow intrinsic + // with a constant step, we can form an equivalent icmp predicate and figure + // out how many iterations will be taken before we exit. + const WithOverflowInst *WO; + const APInt *C; + if (match(ExitCond, m_ExtractValue<1>(m_WithOverflowInst(WO))) && + match(WO->getRHS(), m_APInt(C))) { + ConstantRange NWR = + ConstantRange::makeExactNoWrapRegion(WO->getBinaryOp(), *C, + WO->getNoWrapKind()); + CmpInst::Predicate Pred; + APInt NewRHSC, Offset; + NWR.getEquivalentICmp(Pred, NewRHSC, Offset); + if (!ExitIfTrue) + Pred = ICmpInst::getInversePredicate(Pred); + auto *LHS = getSCEV(WO->getLHS()); + if (Offset != 0) + LHS = getAddExpr(LHS, getConstant(Offset)); + auto EL = computeExitLimitFromICmp(L, Pred, LHS, getConstant(NewRHSC), + ControlsExit, AllowPredicates); + if (EL.hasAnyInfo()) return EL; + } + // If it's not an integer or pointer comparison then compute it the hard way. return computeExitCountExhaustively(L, ExitCond, ExitIfTrue); } @@ -8134,26 +8380,11 @@ ScalarEvolution::computeExitLimitFromCondFromBinOp( if (EitherMayExit) { // Both conditions must be same for the loop to continue executing. // Choose the less conservative count. - // If ExitCond is a short-circuit form (select), using - // umin(EL0.ExactNotTaken, EL1.ExactNotTaken) is unsafe in general. - // To see the detailed examples, please see - // test/Analysis/ScalarEvolution/exit-count-select.ll - bool PoisonSafe = isa<BinaryOperator>(ExitCond); - if (!PoisonSafe) - // Even if ExitCond is select, we can safely derive BECount using both - // EL0 and EL1 in these cases: - // (1) EL0.ExactNotTaken is non-zero - // (2) EL1.ExactNotTaken is non-poison - // (3) EL0.ExactNotTaken is zero (BECount should be simply zero and - // it cannot be umin(0, ..)) - // The PoisonSafe assignment below is simplified and the assertion after - // BECount calculation fully guarantees the condition (3). - PoisonSafe = isa<SCEVConstant>(EL0.ExactNotTaken) || - isa<SCEVConstant>(EL1.ExactNotTaken); if (EL0.ExactNotTaken != getCouldNotCompute() && - EL1.ExactNotTaken != getCouldNotCompute() && PoisonSafe) { - BECount = - getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken); + EL1.ExactNotTaken != getCouldNotCompute()) { + BECount = getUMinFromMismatchedTypes( + EL0.ExactNotTaken, EL1.ExactNotTaken, + /*Sequential=*/!isa<BinaryOperator>(ExitCond)); // If EL0.ExactNotTaken was zero and ExitCond was a short-circuit form, // it should have been simplified to zero (see the condition (3) above) @@ -8203,6 +8434,26 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, const SCEV *LHS = getSCEV(ExitCond->getOperand(0)); const SCEV *RHS = getSCEV(ExitCond->getOperand(1)); + ExitLimit EL = computeExitLimitFromICmp(L, Pred, LHS, RHS, ControlsExit, + AllowPredicates); + if (EL.hasAnyInfo()) return EL; + + auto *ExhaustiveCount = + computeExitCountExhaustively(L, ExitCond, ExitIfTrue); + + if (!isa<SCEVCouldNotCompute>(ExhaustiveCount)) + return ExhaustiveCount; + + return computeShiftCompareExitLimit(ExitCond->getOperand(0), + ExitCond->getOperand(1), L, OriginalPred); +} +ScalarEvolution::ExitLimit +ScalarEvolution::computeExitLimitFromICmp(const Loop *L, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + bool ControlsExit, + bool AllowPredicates) { + // Try to evaluate any dependencies out of the loop. LHS = getSCEVAtScope(LHS, L); RHS = getSCEVAtScope(RHS, L); @@ -8312,14 +8563,7 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, break; } - auto *ExhaustiveCount = - computeExitCountExhaustively(L, ExitCond, ExitIfTrue); - - if (!isa<SCEVCouldNotCompute>(ExhaustiveCount)) - return ExhaustiveCount; - - return computeShiftCompareExitLimit(ExitCond->getOperand(0), - ExitCond->getOperand(1), L, OriginalPred); + return getCouldNotCompute(); } ScalarEvolution::ExitLimit @@ -8941,7 +9185,8 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { case scUMaxExpr: case scSMinExpr: case scUMinExpr: - return nullptr; // TODO: smax, umax, smin, umax. + case scSequentialUMinExpr: + return nullptr; // TODO: smax, umax, smin, umax, umin_seq. } llvm_unreachable("Unknown SCEV kind!"); } @@ -9070,7 +9315,8 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { return V; } - if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) { + if (isa<SCEVCommutativeExpr>(V) || isa<SCEVSequentialMinMaxExpr>(V)) { + const auto *Comm = cast<SCEVNAryExpr>(V); // Avoid performing the look-up in the common case where the specified // expression has no loop-variant portions. for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) { @@ -9092,7 +9338,9 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { return getMulExpr(NewOps, Comm->getNoWrapFlags()); if (isa<SCEVMinMaxExpr>(Comm)) return getMinMaxExpr(Comm->getSCEVType(), NewOps); - llvm_unreachable("Unknown commutative SCEV type!"); + if (isa<SCEVSequentialMinMaxExpr>(Comm)) + return getSequentialMinMaxExpr(Comm->getSCEVType(), NewOps); + llvm_unreachable("Unknown commutative / sequential min/max SCEV type!"); } } // If we got here, all operands are loop invariant. @@ -9153,32 +9401,11 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { return AddRec; } - if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) { - const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); - if (Op == Cast->getOperand()) - return Cast; // must be loop invariant - return getZeroExtendExpr(Op, Cast->getType()); - } - - if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) { - const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); - if (Op == Cast->getOperand()) - return Cast; // must be loop invariant - return getSignExtendExpr(Op, Cast->getType()); - } - - if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) { + if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) { const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); if (Op == Cast->getOperand()) return Cast; // must be loop invariant - return getTruncateExpr(Op, Cast->getType()); - } - - if (const SCEVPtrToIntExpr *Cast = dyn_cast<SCEVPtrToIntExpr>(V)) { - const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); - if (Op == Cast->getOperand()) - return Cast; // must be loop invariant - return getPtrToIntExpr(Op, Cast->getType()); + return getCastExpr(Cast->getSCEVType(), Op, Cast->getType()); } llvm_unreachable("Unknown SCEV type!"); @@ -11236,6 +11463,48 @@ bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred, return true; } +bool ScalarEvolution::isImpliedCondOperandsViaShift(ICmpInst::Predicate Pred, + const SCEV *LHS, + const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS) { + // We want to imply LHS < RHS from LHS < (RHS >> shiftvalue). First, make + // sure that we are dealing with same LHS. + if (RHS == FoundRHS) { + std::swap(LHS, RHS); + std::swap(FoundLHS, FoundRHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + if (LHS != FoundLHS) + return false; + + auto *SUFoundRHS = dyn_cast<SCEVUnknown>(FoundRHS); + if (!SUFoundRHS) + return false; + + Value *Shiftee, *ShiftValue; + + using namespace PatternMatch; + if (match(SUFoundRHS->getValue(), + m_LShr(m_Value(Shiftee), m_Value(ShiftValue)))) { + auto *ShifteeS = getSCEV(Shiftee); + // Prove one of the following: + // LHS <u (shiftee >> shiftvalue) && shiftee <=u RHS ---> LHS <u RHS + // LHS <=u (shiftee >> shiftvalue) && shiftee <=u RHS ---> LHS <=u RHS + // LHS <s (shiftee >> shiftvalue) && shiftee <=s RHS && shiftee >=s 0 + // ---> LHS <s RHS + // LHS <=s (shiftee >> shiftvalue) && shiftee <=s RHS && shiftee >=s 0 + // ---> LHS <=s RHS + if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) + return isKnownPredicate(ICmpInst::ICMP_ULE, ShifteeS, RHS); + if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) + if (isKnownNonNegative(ShifteeS)) + return isKnownPredicate(ICmpInst::ICMP_SLE, ShifteeS, RHS); + } + + return false; +} + bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, @@ -11247,6 +11516,9 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; + if (isImpliedCondOperandsViaShift(Pred, LHS, RHS, FoundLHS, FoundRHS)) + return true; + if (isImpliedCondOperandsViaAddRecStart(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI)) return true; @@ -11323,6 +11595,7 @@ static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE, case ICmpInst::ICMP_ULE: return // min(A, ...) <= A + // FIXME: what about umin_seq? IsMinMaxConsistingOf<SCEVUMinExpr>(LHS, RHS) || // A <= max(A, ...) IsMinMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS); @@ -12723,7 +12996,8 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { case scUMaxExpr: case scSMaxExpr: case scUMinExpr: - case scSMinExpr: { + case scSMinExpr: + case scSequentialUMinExpr: { bool HasVarying = false; for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) { LoopDisposition D = getLoopDisposition(Op, L); @@ -12813,7 +13087,8 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { case scUMaxExpr: case scSMaxExpr: case scUMinExpr: - case scSMinExpr: { + case scSMinExpr: + case scSequentialUMinExpr: { const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S); bool Proper = true; for (const SCEV *NAryOp : NAry->operands()) { diff --git a/contrib/llvm-project/llvm/lib/Analysis/TFUtils.cpp b/contrib/llvm-project/llvm/lib/Analysis/TFUtils.cpp index 3d10479c4544..26bc63983b4e 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/TFUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/TFUtils.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/Analysis/Utils/TFUtils.h" +#include "llvm/Support/Base64.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/JSON.h" @@ -22,6 +23,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" +#include "google/protobuf/struct.pb.h" #include "google/protobuf/text_format.h" #include "tensorflow/c/c_api.h" #include "tensorflow/c/c_api_experimental.h" @@ -72,6 +74,14 @@ TFStatusPtr createTFStatus() { TFSessionOptionsPtr createTFSessionOptions() { return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions); } + +void serialize(const Message &SE, std::string *OutStr) { + if (ProtobufTextMode) { + TextFormat::PrintToString(SE, OutStr); + } else { + *OutStr = SE.SerializeAsString(); + } +} } // namespace namespace llvm { @@ -307,19 +317,13 @@ public: IncludeReward(IncludeReward), FeatureLists(LoggedFeatureSpecs.size()) {} // flush the logged info to a stream and clear the log contents. - void flush(raw_ostream &OS) { + void flush(std::string *Str) { size_t NrRecords = getNrRecords(); (void)NrRecords; tensorflow::SequenceExample SE; transferLog(SE); assert(isSelfConsistent(SE, NrRecords)); - std::string OutStr; - if (ProtobufTextMode) - google::protobuf::TextFormat::PrintToString(SE, &OutStr); - else - OutStr = SE.SerializeAsString(); - - OS << OutStr; + serialize(SE, Str); } char *addNewTensor(size_t FeatureID) { @@ -567,5 +571,31 @@ char *Logger::addEntryAndGetFloatOrInt64Buffer(size_t FeatureID) { return reinterpret_cast<char *>(LoggerData->addNewTensor(FeatureID)); } -void Logger::flush(raw_ostream &OS) { LoggerData->flush(OS); } +void Logger::flush(std::string *Str) { LoggerData->flush(Str); } + +void Logger::flush(raw_ostream &OS) { + std::string Buff; + LoggerData->flush(&Buff); + OS << Buff; +} + +void Logger::flushLogs(raw_ostream &OS, + const StringMap<std::unique_ptr<Logger>> &Loggers) { + google::protobuf::Struct Msg; + for (const auto &NamedLogger : Loggers) { + tensorflow::SequenceExample SE; + const auto &Logger = NamedLogger.second; + std::string Unencoded; + if (Logger->LoggerData->getNrRecords() > 0) + Logger->flush(&Unencoded); + + (*Msg.mutable_fields())[NamedLogger.first().str()] + .mutable_string_value() + ->append(ProtobufTextMode ? Unencoded : encodeBase64(Unencoded)); + } + + std::string OutStr; + serialize(Msg, &OutStr); + OS << OutStr; +} #endif // defined(LLVM_HAVE_TF_API) diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp index 6aa9a77391dc..25e9dee98e13 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -408,6 +408,16 @@ bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType, return TTIImpl->isLegalMaskedScatter(DataType, Alignment); } +bool TargetTransformInfo::forceScalarizeMaskedGather(VectorType *DataType, + Align Alignment) const { + return TTIImpl->forceScalarizeMaskedGather(DataType, Alignment); +} + +bool TargetTransformInfo::forceScalarizeMaskedScatter(VectorType *DataType, + Align Alignment) const { + return TTIImpl->forceScalarizeMaskedScatter(DataType, Alignment); +} + bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const { return TTIImpl->isLegalMaskedCompressStore(DataType); } diff --git a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp index fc378f97de0b..34358739f9a8 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp @@ -396,10 +396,10 @@ unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); } -unsigned llvm::ComputeMinSignedBits(const Value *V, const DataLayout &DL, - unsigned Depth, AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT) { +unsigned llvm::ComputeMaxSignificantBits(const Value *V, const DataLayout &DL, + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { unsigned SignBits = ComputeNumSignBits(V, DL, Depth, AC, CxtI, DT); return V->getType()->getScalarSizeInBits() - SignBits + 1; } @@ -1593,7 +1593,7 @@ static void computeKnownBitsFromOperator(const Operator *I, computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // If we have a known 1, its position is our upper bound. unsigned PossibleLZ = Known2.countMaxLeadingZeros(); - // If this call is undefined for 0, the result will be less than 2^n. + // If this call is poison for 0 input, the result will be less than 2^n. if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) PossibleLZ = std::min(PossibleLZ, BitWidth - 1); unsigned LowBits = Log2_32(PossibleLZ)+1; @@ -1604,7 +1604,7 @@ static void computeKnownBitsFromOperator(const Operator *I, computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // If we have a known 1, its position is our upper bound. unsigned PossibleTZ = Known2.countMaxTrailingZeros(); - // If this call is undefined for 0, the result will be less than 2^n. + // If this call is poison for 0 input, the result will be less than 2^n. if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) PossibleTZ = std::min(PossibleTZ, BitWidth - 1); unsigned LowBits = Log2_32(PossibleTZ)+1; @@ -3248,125 +3248,6 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, return std::max(FirstAnswer, Known.countMinSignBits()); } -/// This function computes the integer multiple of Base that equals V. -/// If successful, it returns true and returns the multiple in -/// Multiple. If unsuccessful, it returns false. It looks -/// through SExt instructions only if LookThroughSExt is true. -bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, - bool LookThroughSExt, unsigned Depth) { - assert(V && "No Value?"); - assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); - assert(V->getType()->isIntegerTy() && "Not integer or pointer type!"); - - Type *T = V->getType(); - - ConstantInt *CI = dyn_cast<ConstantInt>(V); - - if (Base == 0) - return false; - - if (Base == 1) { - Multiple = V; - return true; - } - - ConstantExpr *CO = dyn_cast<ConstantExpr>(V); - Constant *BaseVal = ConstantInt::get(T, Base); - if (CO && CO == BaseVal) { - // Multiple is 1. - Multiple = ConstantInt::get(T, 1); - return true; - } - - if (CI && CI->getZExtValue() % Base == 0) { - Multiple = ConstantInt::get(T, CI->getZExtValue() / Base); - return true; - } - - if (Depth == MaxAnalysisRecursionDepth) return false; - - Operator *I = dyn_cast<Operator>(V); - if (!I) return false; - - switch (I->getOpcode()) { - default: break; - case Instruction::SExt: - if (!LookThroughSExt) return false; - // otherwise fall through to ZExt - LLVM_FALLTHROUGH; - case Instruction::ZExt: - return ComputeMultiple(I->getOperand(0), Base, Multiple, - LookThroughSExt, Depth+1); - case Instruction::Shl: - case Instruction::Mul: { - Value *Op0 = I->getOperand(0); - Value *Op1 = I->getOperand(1); - - if (I->getOpcode() == Instruction::Shl) { - ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1); - if (!Op1CI) return false; - // Turn Op0 << Op1 into Op0 * 2^Op1 - APInt Op1Int = Op1CI->getValue(); - uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1); - APInt API(Op1Int.getBitWidth(), 0); - API.setBit(BitToSet); - Op1 = ConstantInt::get(V->getContext(), API); - } - - Value *Mul0 = nullptr; - if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) { - if (Constant *Op1C = dyn_cast<Constant>(Op1)) - if (Constant *MulC = dyn_cast<Constant>(Mul0)) { - if (Op1C->getType()->getPrimitiveSizeInBits().getFixedSize() < - MulC->getType()->getPrimitiveSizeInBits().getFixedSize()) - Op1C = ConstantExpr::getZExt(Op1C, MulC->getType()); - if (Op1C->getType()->getPrimitiveSizeInBits().getFixedSize() > - MulC->getType()->getPrimitiveSizeInBits().getFixedSize()) - MulC = ConstantExpr::getZExt(MulC, Op1C->getType()); - - // V == Base * (Mul0 * Op1), so return (Mul0 * Op1) - Multiple = ConstantExpr::getMul(MulC, Op1C); - return true; - } - - if (ConstantInt *Mul0CI = dyn_cast<ConstantInt>(Mul0)) - if (Mul0CI->getValue() == 1) { - // V == Base * Op1, so return Op1 - Multiple = Op1; - return true; - } - } - - Value *Mul1 = nullptr; - if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) { - if (Constant *Op0C = dyn_cast<Constant>(Op0)) - if (Constant *MulC = dyn_cast<Constant>(Mul1)) { - if (Op0C->getType()->getPrimitiveSizeInBits().getFixedSize() < - MulC->getType()->getPrimitiveSizeInBits().getFixedSize()) - Op0C = ConstantExpr::getZExt(Op0C, MulC->getType()); - if (Op0C->getType()->getPrimitiveSizeInBits().getFixedSize() > - MulC->getType()->getPrimitiveSizeInBits().getFixedSize()) - MulC = ConstantExpr::getZExt(MulC, Op0C->getType()); - - // V == Base * (Mul1 * Op0), so return (Mul1 * Op0) - Multiple = ConstantExpr::getMul(MulC, Op0C); - return true; - } - - if (ConstantInt *Mul1CI = dyn_cast<ConstantInt>(Mul1)) - if (Mul1CI->getValue() == 1) { - // V == Base * Op0, so return Op0 - Multiple = Op0; - return true; - } - } - } - } - - // We could not determine if V is a multiple of Base. - return false; -} - Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI) { const Function *F = CB.getCalledFunction(); @@ -6756,17 +6637,27 @@ Optional<bool> llvm::isImpliedByDomCondition(CmpInst::Predicate Pred, } static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, - APInt &Upper, const InstrInfoQuery &IIQ) { + APInt &Upper, const InstrInfoQuery &IIQ, + bool PreferSignedRange) { unsigned Width = Lower.getBitWidth(); const APInt *C; switch (BO.getOpcode()) { case Instruction::Add: if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) { - // FIXME: If we have both nuw and nsw, we should reduce the range further. - if (IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(&BO))) { + bool HasNSW = IIQ.hasNoSignedWrap(&BO); + bool HasNUW = IIQ.hasNoUnsignedWrap(&BO); + + // If the caller expects a signed compare, then try to use a signed range. + // Otherwise if both no-wraps are set, use the unsigned range because it + // is never larger than the signed range. Example: + // "add nuw nsw i8 X, -2" is unsigned [254,255] vs. signed [-128, 125]. + if (PreferSignedRange && HasNSW && HasNUW) + HasNUW = false; + + if (HasNUW) { // 'add nuw x, C' produces [C, UINT_MAX]. Lower = *C; - } else if (IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(&BO))) { + } else if (HasNSW) { if (C->isNegative()) { // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C]. Lower = APInt::getSignedMinValue(Width); @@ -7083,8 +6974,8 @@ static void setLimitForFPToI(const Instruction *I, APInt &Lower, APInt &Upper) { } } -ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo, - AssumptionCache *AC, +ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned, + bool UseInstrInfo, AssumptionCache *AC, const Instruction *CtxI, const DominatorTree *DT, unsigned Depth) { @@ -7102,7 +6993,7 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo, APInt Lower = APInt(BitWidth, 0); APInt Upper = APInt(BitWidth, 0); if (auto *BO = dyn_cast<BinaryOperator>(V)) - setLimitsForBinOp(*BO, Lower, Upper, IIQ); + setLimitsForBinOp(*BO, Lower, Upper, IIQ, ForSigned); else if (auto *II = dyn_cast<IntrinsicInst>(V)) setLimitsForIntrinsic(*II, Lower, Upper); else if (auto *SI = dyn_cast<SelectInst>(V)) @@ -7134,8 +7025,10 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo, // Currently we just use information from comparisons. if (!Cmp || Cmp->getOperand(0) != V) continue; - ConstantRange RHS = computeConstantRange(Cmp->getOperand(1), UseInstrInfo, - AC, I, DT, Depth + 1); + // TODO: Set "ForSigned" parameter via Cmp->isSigned()? + ConstantRange RHS = + computeConstantRange(Cmp->getOperand(1), /* ForSigned */ false, + UseInstrInfo, AC, I, DT, Depth + 1); CR = CR.intersectWith( ConstantRange::makeAllowedICmpRegion(Cmp->getPredicate(), RHS)); } |