diff options
Diffstat (limited to 'lib/Analysis')
99 files changed, 5867 insertions, 3581 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 3446aef39938..32241e355eb8 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -1,9 +1,8 @@ //==- AliasAnalysis.cpp - Generic Alias Analysis Interface Implementation --==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -80,12 +79,16 @@ AAResults::~AAResults() { bool AAResults::invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv) { - // Check if the AA manager itself has been invalidated. + // AAResults preserves the AAManager by default, due to the stateless nature + // of AliasAnalysis. There is no need to check whether it has been preserved + // explicitly. Check if any module dependency was invalidated and caused the + // AAManager to be invalidated. Invalidate ourselves in that case. auto PAC = PA.getChecker<AAManager>(); - if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>()) - return true; // The manager needs to be blown away, clear everything. + if (!PAC.preservedWhenStateless()) + return true; - // Check all of the dependencies registered. + // Check if any of the function dependencies were invalidated, and invalidate + // ourselves in that case. for (AnalysisKey *ID : AADeps) if (Inv.invalidate(ID, F, PA)) return true; @@ -100,8 +103,14 @@ bool AAResults::invalidate(Function &F, const PreservedAnalyses &PA, AliasResult AAResults::alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { + AAQueryInfo AAQIP; + return alias(LocA, LocB, AAQIP); +} + +AliasResult AAResults::alias(const MemoryLocation &LocA, + const MemoryLocation &LocB, AAQueryInfo &AAQI) { for (const auto &AA : AAs) { - auto Result = AA->alias(LocA, LocB); + auto Result = AA->alias(LocA, LocB, AAQI); if (Result != MayAlias) return Result; } @@ -110,8 +119,14 @@ AliasResult AAResults::alias(const MemoryLocation &LocA, bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) { + AAQueryInfo AAQIP; + return pointsToConstantMemory(Loc, AAQIP, OrLocal); +} + +bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc, + AAQueryInfo &AAQI, bool OrLocal) { for (const auto &AA : AAs) - if (AA->pointsToConstantMemory(Loc, OrLocal)) + if (AA->pointsToConstantMemory(Loc, AAQI, OrLocal)) return true; return false; @@ -132,10 +147,16 @@ ModRefInfo AAResults::getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) { } ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2) { + AAQueryInfo AAQIP; + return getModRefInfo(I, Call2, AAQIP); +} + +ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2, + AAQueryInfo &AAQI) { // We may have two calls. if (const auto *Call1 = dyn_cast<CallBase>(I)) { // Check if the two calls modify the same memory. - return getModRefInfo(Call1, Call2); + return getModRefInfo(Call1, Call2, AAQI); } else if (I->isFenceLike()) { // If this is a fence, just return ModRef. return ModRefInfo::ModRef; @@ -145,7 +166,7 @@ ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2) { // is that if the call references what this instruction // defines, it must be clobbered by this location. const MemoryLocation DefLoc = MemoryLocation::get(I); - ModRefInfo MR = getModRefInfo(Call2, DefLoc); + ModRefInfo MR = getModRefInfo(Call2, DefLoc, AAQI); if (isModOrRefSet(MR)) return setModAndRef(MR); } @@ -154,10 +175,17 @@ ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2) { ModRefInfo AAResults::getModRefInfo(const CallBase *Call, const MemoryLocation &Loc) { + AAQueryInfo AAQIP; + return getModRefInfo(Call, Loc, AAQIP); +} + +ModRefInfo AAResults::getModRefInfo(const CallBase *Call, + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { ModRefInfo Result = ModRefInfo::ModRef; for (const auto &AA : AAs) { - Result = intersectModRef(Result, AA->getModRefInfo(Call, Loc)); + Result = intersectModRef(Result, AA->getModRefInfo(Call, Loc, AAQI)); // Early-exit the moment we reach the bottom of the lattice. if (isNoModRef(Result)) @@ -215,10 +243,16 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call, ModRefInfo AAResults::getModRefInfo(const CallBase *Call1, const CallBase *Call2) { + AAQueryInfo AAQIP; + return getModRefInfo(Call1, Call2, AAQIP); +} + +ModRefInfo AAResults::getModRefInfo(const CallBase *Call1, + const CallBase *Call2, AAQueryInfo &AAQI) { ModRefInfo Result = ModRefInfo::ModRef; for (const auto &AA : AAs) { - Result = intersectModRef(Result, AA->getModRefInfo(Call1, Call2)); + Result = intersectModRef(Result, AA->getModRefInfo(Call1, Call2, AAQI)); // Early-exit the moment we reach the bottom of the lattice. if (isNoModRef(Result)) @@ -397,6 +431,12 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, AliasResult AR) { ModRefInfo AAResults::getModRefInfo(const LoadInst *L, const MemoryLocation &Loc) { + AAQueryInfo AAQIP; + return getModRefInfo(L, Loc, AAQIP); +} +ModRefInfo AAResults::getModRefInfo(const LoadInst *L, + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { // Be conservative in the face of atomic. if (isStrongerThan(L->getOrdering(), AtomicOrdering::Unordered)) return ModRefInfo::ModRef; @@ -404,7 +444,7 @@ ModRefInfo AAResults::getModRefInfo(const LoadInst *L, // If the load address doesn't alias the given address, it doesn't read // or write the specified memory. if (Loc.Ptr) { - AliasResult AR = alias(MemoryLocation::get(L), Loc); + AliasResult AR = alias(MemoryLocation::get(L), Loc, AAQI); if (AR == NoAlias) return ModRefInfo::NoModRef; if (AR == MustAlias) @@ -416,12 +456,18 @@ ModRefInfo AAResults::getModRefInfo(const LoadInst *L, ModRefInfo AAResults::getModRefInfo(const StoreInst *S, const MemoryLocation &Loc) { + AAQueryInfo AAQIP; + return getModRefInfo(S, Loc, AAQIP); +} +ModRefInfo AAResults::getModRefInfo(const StoreInst *S, + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { // Be conservative in the face of atomic. if (isStrongerThan(S->getOrdering(), AtomicOrdering::Unordered)) return ModRefInfo::ModRef; if (Loc.Ptr) { - AliasResult AR = alias(MemoryLocation::get(S), Loc); + AliasResult AR = alias(MemoryLocation::get(S), Loc, AAQI); // If the store address cannot alias the pointer in question, then the // specified memory cannot be modified by the store. if (AR == NoAlias) @@ -429,7 +475,7 @@ ModRefInfo AAResults::getModRefInfo(const StoreInst *S, // If the pointer is a pointer to constant memory, then it could not have // been modified by this store. - if (pointsToConstantMemory(Loc)) + if (pointsToConstantMemory(Loc, AAQI)) return ModRefInfo::NoModRef; // If the store address aliases the pointer as must alias, set Must. @@ -442,17 +488,31 @@ ModRefInfo AAResults::getModRefInfo(const StoreInst *S, } ModRefInfo AAResults::getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) { + AAQueryInfo AAQIP; + return getModRefInfo(S, Loc, AAQIP); +} + +ModRefInfo AAResults::getModRefInfo(const FenceInst *S, + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { // If we know that the location is a constant memory location, the fence // cannot modify this location. - if (Loc.Ptr && pointsToConstantMemory(Loc)) + if (Loc.Ptr && pointsToConstantMemory(Loc, AAQI)) return ModRefInfo::Ref; return ModRefInfo::ModRef; } ModRefInfo AAResults::getModRefInfo(const VAArgInst *V, const MemoryLocation &Loc) { + AAQueryInfo AAQIP; + return getModRefInfo(V, Loc, AAQIP); +} + +ModRefInfo AAResults::getModRefInfo(const VAArgInst *V, + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { if (Loc.Ptr) { - AliasResult AR = alias(MemoryLocation::get(V), Loc); + AliasResult AR = alias(MemoryLocation::get(V), Loc, AAQI); // If the va_arg address cannot alias the pointer in question, then the // specified memory cannot be accessed by the va_arg. if (AR == NoAlias) @@ -460,7 +520,7 @@ ModRefInfo AAResults::getModRefInfo(const VAArgInst *V, // If the pointer is a pointer to constant memory, then it could not have // been modified by this va_arg. - if (pointsToConstantMemory(Loc)) + if (pointsToConstantMemory(Loc, AAQI)) return ModRefInfo::NoModRef; // If the va_arg aliases the pointer as must alias, set Must. @@ -474,10 +534,17 @@ ModRefInfo AAResults::getModRefInfo(const VAArgInst *V, ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad, const MemoryLocation &Loc) { + AAQueryInfo AAQIP; + return getModRefInfo(CatchPad, Loc, AAQIP); +} + +ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad, + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { if (Loc.Ptr) { // If the pointer is a pointer to constant memory, // then it could not have been modified by this catchpad. - if (pointsToConstantMemory(Loc)) + if (pointsToConstantMemory(Loc, AAQI)) return ModRefInfo::NoModRef; } @@ -487,10 +554,17 @@ ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad, ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet, const MemoryLocation &Loc) { + AAQueryInfo AAQIP; + return getModRefInfo(CatchRet, Loc, AAQIP); +} + +ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet, + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { if (Loc.Ptr) { // If the pointer is a pointer to constant memory, // then it could not have been modified by this catchpad. - if (pointsToConstantMemory(Loc)) + if (pointsToConstantMemory(Loc, AAQI)) return ModRefInfo::NoModRef; } @@ -500,12 +574,19 @@ ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet, ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX, const MemoryLocation &Loc) { + AAQueryInfo AAQIP; + return getModRefInfo(CX, Loc, AAQIP); +} + +ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX, + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { // Acquire/Release cmpxchg has properties that matter for arbitrary addresses. if (isStrongerThanMonotonic(CX->getSuccessOrdering())) return ModRefInfo::ModRef; if (Loc.Ptr) { - AliasResult AR = alias(MemoryLocation::get(CX), Loc); + AliasResult AR = alias(MemoryLocation::get(CX), Loc, AAQI); // If the cmpxchg address does not alias the location, it does not access // it. if (AR == NoAlias) @@ -521,12 +602,19 @@ ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX, ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW, const MemoryLocation &Loc) { + AAQueryInfo AAQIP; + return getModRefInfo(RMW, Loc, AAQIP); +} + +ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW, + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { // Acquire/Release atomicrmw has properties that matter for arbitrary addresses. if (isStrongerThanMonotonic(RMW->getOrdering())) return ModRefInfo::ModRef; if (Loc.Ptr) { - AliasResult AR = alias(MemoryLocation::get(RMW), Loc); + AliasResult AR = alias(MemoryLocation::get(RMW), Loc, AAQI); // If the atomicrmw address does not alias the location, it does not access // it. if (AR == NoAlias) diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index 85dd4fe95b33..e83703867e09 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -1,9 +1,8 @@ //===- AliasAnalysisEvaluator.cpp - Alias Analysis Accuracy Evaluator -----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/AliasAnalysisSummary.cpp b/lib/Analysis/AliasAnalysisSummary.cpp index 2b4879453beb..2f3396a44117 100644 --- a/lib/Analysis/AliasAnalysisSummary.cpp +++ b/lib/Analysis/AliasAnalysisSummary.cpp @@ -73,28 +73,28 @@ AliasAttrs getExternallyVisibleAttrs(AliasAttrs Attr) { } Optional<InstantiatedValue> instantiateInterfaceValue(InterfaceValue IValue, - CallSite CS) { + CallBase &Call) { auto Index = IValue.Index; - auto Value = (Index == 0) ? CS.getInstruction() : CS.getArgument(Index - 1); - if (Value->getType()->isPointerTy()) - return InstantiatedValue{Value, IValue.DerefLevel}; + auto *V = (Index == 0) ? &Call : Call.getArgOperand(Index - 1); + if (V->getType()->isPointerTy()) + return InstantiatedValue{V, IValue.DerefLevel}; return None; } Optional<InstantiatedRelation> -instantiateExternalRelation(ExternalRelation ERelation, CallSite CS) { - auto From = instantiateInterfaceValue(ERelation.From, CS); +instantiateExternalRelation(ExternalRelation ERelation, CallBase &Call) { + auto From = instantiateInterfaceValue(ERelation.From, Call); if (!From) return None; - auto To = instantiateInterfaceValue(ERelation.To, CS); + auto To = instantiateInterfaceValue(ERelation.To, Call); if (!To) return None; return InstantiatedRelation{*From, *To, ERelation.Offset}; } Optional<InstantiatedAttr> instantiateExternalAttribute(ExternalAttribute EAttr, - CallSite CS) { - auto Value = instantiateInterfaceValue(EAttr.IValue, CS); + CallBase &Call) { + auto Value = instantiateInterfaceValue(EAttr.IValue, Call); if (!Value) return None; return InstantiatedAttr{*Value, EAttr.Attr}; diff --git a/lib/Analysis/AliasAnalysisSummary.h b/lib/Analysis/AliasAnalysisSummary.h index fb93a12420f8..fe75b03cedef 100644 --- a/lib/Analysis/AliasAnalysisSummary.h +++ b/lib/Analysis/AliasAnalysisSummary.h @@ -1,9 +1,8 @@ //=====- CFLSummary.h - Abstract stratified sets implementation. --------=====// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -38,7 +37,7 @@ #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/IR/CallSite.h" +#include "llvm/IR/InstrTypes.h" #include <bitset> namespace llvm { @@ -196,12 +195,13 @@ struct AliasSummary { SmallVector<ExternalAttribute, 8> RetParamAttributes; }; -/// This is the result of instantiating InterfaceValue at a particular callsite +/// This is the result of instantiating InterfaceValue at a particular call struct InstantiatedValue { Value *Val; unsigned DerefLevel; }; -Optional<InstantiatedValue> instantiateInterfaceValue(InterfaceValue, CallSite); +Optional<InstantiatedValue> instantiateInterfaceValue(InterfaceValue IValue, + CallBase &Call); inline bool operator==(InstantiatedValue LHS, InstantiatedValue RHS) { return LHS.Val == RHS.Val && LHS.DerefLevel == RHS.DerefLevel; @@ -229,8 +229,8 @@ struct InstantiatedRelation { InstantiatedValue From, To; int64_t Offset; }; -Optional<InstantiatedRelation> instantiateExternalRelation(ExternalRelation, - CallSite); +Optional<InstantiatedRelation> +instantiateExternalRelation(ExternalRelation ERelation, CallBase &Call); /// This is the result of instantiating ExternalAttribute at a particular /// callsite @@ -238,8 +238,8 @@ struct InstantiatedAttr { InstantiatedValue IValue; AliasAttrs Attr; }; -Optional<InstantiatedAttr> instantiateExternalAttribute(ExternalAttribute, - CallSite); +Optional<InstantiatedAttr> instantiateExternalAttribute(ExternalAttribute EAttr, + CallBase &Call); } template <> struct DenseMapInfo<cflaa::InstantiatedValue> { diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index f6ad704cc914..a6e5b9fab558 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -1,9 +1,8 @@ //===- AliasSetTracker.cpp - Alias Sets Tracker implementation-------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -14,7 +13,9 @@ #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/GuardUtils.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -127,24 +128,24 @@ void AliasSet::removeFromTracker(AliasSetTracker &AST) { void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, LocationSize Size, const AAMDNodes &AAInfo, - bool KnownMustAlias) { + bool KnownMustAlias, bool SkipSizeUpdate) { assert(!Entry.hasAliasSet() && "Entry already in set!"); // Check to see if we have to downgrade to _may_ alias. - if (isMustAlias() && !KnownMustAlias) + if (isMustAlias()) if (PointerRec *P = getSomePointer()) { - AliasAnalysis &AA = AST.getAliasAnalysis(); - AliasResult Result = - AA.alias(MemoryLocation(P->getValue(), P->getSize(), P->getAAInfo()), - MemoryLocation(Entry.getValue(), Size, AAInfo)); - if (Result != MustAlias) { - Alias = SetMayAlias; - AST.TotalMayAliasSetSize += size(); - } else { - // First entry of must alias must have maximum size! + if (!KnownMustAlias) { + AliasAnalysis &AA = AST.getAliasAnalysis(); + AliasResult Result = AA.alias( + MemoryLocation(P->getValue(), P->getSize(), P->getAAInfo()), + MemoryLocation(Entry.getValue(), Size, AAInfo)); + if (Result != MustAlias) { + Alias = SetMayAlias; + AST.TotalMayAliasSetSize += size(); + } + assert(Result != NoAlias && "Cannot be part of must set!"); + } else if (!SkipSizeUpdate) P->updateSizeAndAAInfo(Size, AAInfo); - } - assert(Result != NoAlias && "Cannot be part of must set!"); } Entry.setAliasSet(this); @@ -184,14 +185,15 @@ void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) { Access = ModRefAccess; } -/// aliasesPointer - Return true if the specified pointer "may" (or must) -/// alias one of the members in the set. +/// aliasesPointer - If the specified pointer "may" (or must) alias one of the +/// members in the set return the appropriate AliasResult. Otherwise return +/// NoAlias. /// -bool AliasSet::aliasesPointer(const Value *Ptr, LocationSize Size, - const AAMDNodes &AAInfo, - AliasAnalysis &AA) const { +AliasResult AliasSet::aliasesPointer(const Value *Ptr, LocationSize Size, + const AAMDNodes &AAInfo, + AliasAnalysis &AA) const { if (AliasAny) - return true; + return MayAlias; if (Alias == SetMustAlias) { assert(UnknownInsts.empty() && "Illegal must alias set!"); @@ -208,9 +210,10 @@ bool AliasSet::aliasesPointer(const Value *Ptr, LocationSize Size, // If this is a may-alias set, we have to check all of the pointers in the set // to be sure it doesn't alias the set... for (iterator I = begin(), E = end(); I != E; ++I) - if (AA.alias(MemoryLocation(Ptr, Size, AAInfo), - MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo()))) - return true; + if (AliasResult AR = AA.alias( + MemoryLocation(Ptr, Size, AAInfo), + MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo()))) + return AR; // Check the unknown instructions... if (!UnknownInsts.empty()) { @@ -218,10 +221,10 @@ bool AliasSet::aliasesPointer(const Value *Ptr, LocationSize Size, if (auto *Inst = getUnknownInst(i)) if (isModOrRefSet( AA.getModRefInfo(Inst, MemoryLocation(Ptr, Size, AAInfo)))) - return true; + return MayAlias; } - return false; + return NoAlias; } bool AliasSet::aliasesUnknownInst(const Instruction *Inst, @@ -288,25 +291,38 @@ void AliasSetTracker::clear() { AliasSets.clear(); } - /// mergeAliasSetsForPointer - Given a pointer, merge all alias sets that may /// alias the pointer. Return the unified set, or nullptr if no set that aliases -/// the pointer was found. +/// the pointer was found. MustAliasAll is updated to true/false if the pointer +/// is found to MustAlias all the sets it merged. AliasSet *AliasSetTracker::mergeAliasSetsForPointer(const Value *Ptr, LocationSize Size, - const AAMDNodes &AAInfo) { + const AAMDNodes &AAInfo, + bool &MustAliasAll) { AliasSet *FoundSet = nullptr; + AliasResult AllAR = MustAlias; for (iterator I = begin(), E = end(); I != E;) { iterator Cur = I++; - if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, AAInfo, AA)) continue; + if (Cur->Forward) + continue; + + AliasResult AR = Cur->aliasesPointer(Ptr, Size, AAInfo, AA); + if (AR == NoAlias) + continue; + + AllAR = + AliasResult(AllAR & AR); // Possible downgrade to May/Partial, even No - if (!FoundSet) { // If this is the first alias set ptr can go into. - FoundSet = &*Cur; // Remember it. - } else { // Otherwise, we must merge the sets. - FoundSet->mergeSetIn(*Cur, *this); // Merge in contents. + if (!FoundSet) { + // If this is the first alias set ptr can go into, remember it. + FoundSet = &*Cur; + } else { + // Otherwise, we must merge the sets. + FoundSet->mergeSetIn(*Cur, *this); } } + MustAliasAll = (AllAR == MustAlias); return FoundSet; } @@ -316,10 +332,13 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) { iterator Cur = I++; if (Cur->Forward || !Cur->aliasesUnknownInst(Inst, AA)) continue; - if (!FoundSet) // If this is the first alias set ptr can go into. - FoundSet = &*Cur; // Remember it. - else // Otherwise, we must merge the sets. - FoundSet->mergeSetIn(*Cur, *this); // Merge in contents. + if (!FoundSet) { + // If this is the first alias set ptr can go into, remember it. + FoundSet = &*Cur; + } else { + // Otherwise, we must merge the sets. + FoundSet->mergeSetIn(*Cur, *this); + } } return FoundSet; } @@ -329,7 +348,7 @@ AliasSet &AliasSetTracker::getAliasSetFor(const MemoryLocation &MemLoc) { Value * const Pointer = const_cast<Value*>(MemLoc.Ptr); const LocationSize Size = MemLoc.Size; const AAMDNodes &AAInfo = MemLoc.AATags; - + AliasSet::PointerRec &Entry = getEntryFor(Pointer); if (AliasAnyAS) { @@ -348,6 +367,7 @@ AliasSet &AliasSetTracker::getAliasSetFor(const MemoryLocation &MemLoc) { return *AliasAnyAS; } + bool MustAliasAll = false; // Check to see if the pointer is already known. if (Entry.hasAliasSet()) { // If the size changed, we may need to merge several alias sets. @@ -356,20 +376,21 @@ AliasSet &AliasSetTracker::getAliasSetFor(const MemoryLocation &MemLoc) { // is NoAlias, mergeAliasSetsForPointer(undef, ...) will not find the // the right set for undef, even if it exists. if (Entry.updateSizeAndAAInfo(Size, AAInfo)) - mergeAliasSetsForPointer(Pointer, Size, AAInfo); + mergeAliasSetsForPointer(Pointer, Size, AAInfo, MustAliasAll); // Return the set! return *Entry.getAliasSet(*this)->getForwardedTarget(*this); } - if (AliasSet *AS = mergeAliasSetsForPointer(Pointer, Size, AAInfo)) { + if (AliasSet *AS = + mergeAliasSetsForPointer(Pointer, Size, AAInfo, MustAliasAll)) { // Add it to the alias set it aliases. - AS->addPointer(*this, Entry, Size, AAInfo); + AS->addPointer(*this, Entry, Size, AAInfo, MustAliasAll); return *AS; } // Otherwise create a new alias set to hold the loaded pointer. AliasSets.push_back(new AliasSet()); - AliasSets.back().addPointer(*this, Entry, Size, AAInfo); + AliasSets.back().addPointer(*this, Entry, Size, AAInfo, true); return AliasSets.back(); } @@ -422,14 +443,12 @@ void AliasSetTracker::addUnknown(Instruction *Inst) { if (!Inst->mayReadOrWriteMemory()) return; // doesn't alias anything - AliasSet *AS = findAliasSetForUnknownInst(Inst); - if (AS) { + if (AliasSet *AS = findAliasSetForUnknownInst(Inst)) { AS->addUnknownInst(Inst, AA); return; } AliasSets.push_back(new AliasSet()); - AS = &AliasSets.back(); - AS->addUnknownInst(Inst, AA); + AliasSets.back().addUnknownInst(Inst, AA); } void AliasSetTracker::add(Instruction *I) { @@ -516,6 +535,15 @@ void AliasSetTracker::add(const AliasSetTracker &AST) { } } +void AliasSetTracker::addAllInstructionsInLoopUsingMSSA() { + assert(MSSA && L && "MSSA and L must be available"); + for (const BasicBlock *BB : L->blocks()) + if (auto *Accesses = MSSA->getBlockAccesses(BB)) + for (auto &Access : *Accesses) + if (auto *MUD = dyn_cast<MemoryUseOrDef>(&Access)) + add(MUD->getMemoryInst()); +} + // deleteValue method - This method is used to remove a pointer value from the // AliasSetTracker entirely. It should be used when an instruction is deleted // from the program to update the AST. If you don't use this, you would have @@ -563,9 +591,8 @@ void AliasSetTracker::copyValue(Value *From, Value *To) { I = PointerMap.find_as(From); // Add it to the alias set it aliases... AliasSet *AS = I->second->getAliasSet(*this); - AS->addPointer(*this, Entry, I->second->getSize(), - I->second->getAAInfo(), - true); + AS->addPointer(*this, Entry, I->second->getSize(), I->second->getAAInfo(), + true, true); } AliasSet &AliasSetTracker::mergeAllAliasSets() { diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index bb8742123a0f..d46a8d8e306c 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -1,9 +1,8 @@ //===-- Analysis.cpp ------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp index 8bfd24ccf77b..cf2f845dee0a 100644 --- a/lib/Analysis/AssumptionCache.cpp +++ b/lib/Analysis/AssumptionCache.cpp @@ -1,9 +1,8 @@ //===- AssumptionCache.cpp - Cache finding @llvm.assume calls -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -54,11 +53,11 @@ AssumptionCache::getOrInsertAffectedValues(Value *V) { return AVIP.first->second; } -void AssumptionCache::updateAffectedValues(CallInst *CI) { +static void findAffectedValues(CallInst *CI, + SmallVectorImpl<Value *> &Affected) { // Note: This code must be kept in-sync with the code in // computeKnownBitsFromAssume in ValueTracking. - SmallVector<Value *, 16> Affected; auto AddAffected = [&Affected](Value *V) { if (isa<Argument>(V)) { Affected.push_back(V); @@ -109,6 +108,11 @@ void AssumptionCache::updateAffectedValues(CallInst *CI) { AddAffectedFromEq(B); } } +} + +void AssumptionCache::updateAffectedValues(CallInst *CI) { + SmallVector<Value *, 16> Affected; + findAffectedValues(CI, Affected); for (auto &AV : Affected) { auto &AVV = getOrInsertAffectedValues(AV); @@ -117,6 +121,18 @@ void AssumptionCache::updateAffectedValues(CallInst *CI) { } } +void AssumptionCache::unregisterAssumption(CallInst *CI) { + SmallVector<Value *, 16> Affected; + findAffectedValues(CI, Affected); + + for (auto &AV : Affected) { + auto AVI = AffectedValues.find_as(AV); + if (AVI != AffectedValues.end()) + AffectedValues.erase(AVI); + } + remove_if(AssumeHandles, [CI](WeakTrackingVH &VH) { return CI == VH; }); +} + void AssumptionCache::AffectedValueCallbackVH::deleted() { auto AVI = AC->AffectedValues.find(getValPtr()); if (AVI != AC->AffectedValues.end()) @@ -241,6 +257,13 @@ AssumptionCache &AssumptionCacheTracker::getAssumptionCache(Function &F) { return *IP.first->second; } +AssumptionCache *AssumptionCacheTracker::lookupAssumptionCache(Function &F) { + auto I = AssumptionCaches.find_as(&F); + if (I != AssumptionCaches.end()) + return I->second.get(); + return nullptr; +} + void AssumptionCacheTracker::verifyAnalysis() const { // FIXME: In the long term the verifier should not be controllable with a // flag. We should either fix all passes to correctly update the assumption diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 332eeaa00e73..3721c99883b8 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -1,9 +1,8 @@ //===- BasicAliasAnalysis.cpp - Stateless Alias Analysis Impl -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -117,25 +116,44 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA, /// Returns true if the pointer is to a function-local object that never /// escapes from the function. -static bool isNonEscapingLocalObject(const Value *V) { +static bool isNonEscapingLocalObject( + const Value *V, + SmallDenseMap<const Value *, bool, 8> *IsCapturedCache = nullptr) { + SmallDenseMap<const Value *, bool, 8>::iterator CacheIt; + if (IsCapturedCache) { + bool Inserted; + std::tie(CacheIt, Inserted) = IsCapturedCache->insert({V, false}); + if (!Inserted) + // Found cached result, return it! + return CacheIt->second; + } + // If this is a local allocation, check to see if it escapes. - if (isa<AllocaInst>(V) || isNoAliasCall(V)) + if (isa<AllocaInst>(V) || isNoAliasCall(V)) { // Set StoreCaptures to True so that we can assume in our callers that the // pointer is not the result of a load instruction. Currently // PointerMayBeCaptured doesn't have any special analysis for the // StoreCaptures=false case; if it did, our callers could be refined to be // more precise. - return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); + auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); + if (IsCapturedCache) + CacheIt->second = Ret; + return Ret; + } // If this is an argument that corresponds to a byval or noalias argument, // then it has not escaped before entering the function. Check if it escapes // inside the function. if (const Argument *A = dyn_cast<Argument>(V)) - if (A->hasByValAttr() || A->hasNoAliasAttr()) + if (A->hasByValAttr() || A->hasNoAliasAttr()) { // Note even if the argument is marked nocapture, we still need to check // for copies made inside the function. The nocapture attribute only // specifies that there are no copies made that outlive the function. - return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); + auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); + if (IsCapturedCache) + CacheIt->second = Ret; + return Ret; + } return false; } @@ -613,7 +631,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V, /// the function, with global constants being considered local to all /// functions. bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) { + AAQueryInfo &AAQI, bool OrLocal) { assert(Visited.empty() && "Visited must be cleared after use!"); unsigned MaxLookup = 8; @@ -623,7 +641,7 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc, const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL); if (!Visited.insert(V).second) { Visited.clear(); - return AAResultBase::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal); } // An alloca instruction defines local memory. @@ -637,7 +655,7 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc, // others. GV may even be a declaration, not a definition. if (!GV->isConstant()) { Visited.clear(); - return AAResultBase::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal); } continue; } @@ -655,7 +673,7 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc, // Don't bother inspecting phi nodes with many operands. if (PN->getNumIncomingValues() > MaxLookup) { Visited.clear(); - return AAResultBase::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal); } for (Value *IncValue : PN->incoming_values()) Worklist.push_back(IncValue); @@ -664,7 +682,7 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc, // Otherwise be conservative. Visited.clear(); - return AAResultBase::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal); } while (!Worklist.empty() && --MaxLookup); Visited.clear(); @@ -799,24 +817,25 @@ static bool notDifferentParent(const Value *O1, const Value *O2) { #endif AliasResult BasicAAResult::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { + const MemoryLocation &LocB, + AAQueryInfo &AAQI) { assert(notDifferentParent(LocA.Ptr, LocB.Ptr) && "BasicAliasAnalysis doesn't support interprocedural queries."); // If we have a directly cached entry for these locations, we have recursed // through this once, so just return the cached results. Notably, when this // happens, we don't clear the cache. - auto CacheIt = AliasCache.find(LocPair(LocA, LocB)); - if (CacheIt != AliasCache.end()) + auto CacheIt = AAQI.AliasCache.find(AAQueryInfo::LocPair(LocA, LocB)); + if (CacheIt != AAQI.AliasCache.end()) + return CacheIt->second; + + CacheIt = AAQI.AliasCache.find(AAQueryInfo::LocPair(LocB, LocA)); + if (CacheIt != AAQI.AliasCache.end()) return CacheIt->second; AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, LocB.Ptr, - LocB.Size, LocB.AATags); - // AliasCache rarely has more than 1 or 2 elements, always use - // shrink_and_clear so it quickly returns to the inline capacity of the - // SmallDenseMap if it ever grows larger. - // FIXME: This should really be shrink_to_inline_capacity_and_clear(). - AliasCache.shrink_and_clear(); + LocB.Size, LocB.AATags, AAQI); + VisitedPhiBBs.clear(); return Alias; } @@ -828,7 +847,8 @@ AliasResult BasicAAResult::alias(const MemoryLocation &LocA, /// say much about this query. We do, however, use simple "address taken" /// analysis on local objects. ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call, - const MemoryLocation &Loc) { + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { assert(notDifferentParent(Call, Loc.Ptr) && "AliasAnalysis query involving multiple functions!"); @@ -855,7 +875,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call, // then the call can not mod/ref the pointer unless the call takes the pointer // as an argument, and itself doesn't capture it. if (!isa<Constant>(Object) && Call != Object && - isNonEscapingLocalObject(Object)) { + isNonEscapingLocalObject(Object, &AAQI.IsCapturedCache)) { // Optimistically assume that call doesn't touch Object and check this // assumption in the following loop. @@ -881,11 +901,11 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call, // If this is a no-capture pointer argument, see if we can tell that it // is impossible to alias the pointer we're checking. - AliasResult AR = - getBestAAResults().alias(MemoryLocation(*CI), MemoryLocation(Object)); + AliasResult AR = getBestAAResults().alias(MemoryLocation(*CI), + MemoryLocation(Object), AAQI); if (AR != MustAlias) IsMustAlias = false; - // Operand doesnt alias 'Object', continue looking for other aliases + // Operand doesn't alias 'Object', continue looking for other aliases if (AR == NoAlias) continue; // Operand aliases 'Object', but call doesn't modify it. Strengthen @@ -928,7 +948,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call, if (isMallocOrCallocLikeFn(Call, &TLI)) { // Be conservative if the accessed pointer may alias the allocation - // fallback to the generic handling below. - if (getBestAAResults().alias(MemoryLocation(Call), Loc) == NoAlias) + if (getBestAAResults().alias(MemoryLocation(Call), Loc, AAQI) == NoAlias) return ModRefInfo::NoModRef; } @@ -940,11 +960,11 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call, AliasResult SrcAA, DestAA; if ((SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst), - Loc)) == MustAlias) + Loc, AAQI)) == MustAlias) // Loc is exactly the memcpy source thus disjoint from memcpy dest. return ModRefInfo::Ref; if ((DestAA = getBestAAResults().alias(MemoryLocation::getForDest(Inst), - Loc)) == MustAlias) + Loc, AAQI)) == MustAlias) // The converse case. return ModRefInfo::Mod; @@ -1000,11 +1020,12 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call, return ModRefInfo::Ref; // The AAResultBase base class has some smarts, lets use them. - return AAResultBase::getModRefInfo(Call, Loc); + return AAResultBase::getModRefInfo(Call, Loc, AAQI); } ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1, - const CallBase *Call2) { + const CallBase *Call2, + AAQueryInfo &AAQI) { // While the assume intrinsic is marked as arbitrarily writing so that // proper control dependencies will be maintained, it never aliases any // particular memory location. @@ -1020,7 +1041,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1, // heap state at the point the guard is issued needs to be consistent in case // the guard invokes the "deopt" continuation. - // NB! This function is *not* commutative, so we specical case two + // NB! This function is *not* commutative, so we special case two // possibilities for guard intrinsics. if (isIntrinsicCall(Call1, Intrinsic::experimental_guard)) @@ -1034,7 +1055,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1, : ModRefInfo::NoModRef; // The AAResultBase base class has some smarts, lets use them. - return AAResultBase::getModRefInfo(Call1, Call2); + return AAResultBase::getModRefInfo(Call1, Call2, AAQI); } /// Provide ad-hoc rules to disambiguate accesses through two GEP operators, @@ -1266,11 +1287,10 @@ bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp, /// We know that V1 is a GEP, but we don't know anything about V2. /// UnderlyingV1 is GetUnderlyingObject(GEP1, DL), UnderlyingV2 is the same for /// V2. -AliasResult -BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size, - const AAMDNodes &V1AAInfo, const Value *V2, - LocationSize V2Size, const AAMDNodes &V2AAInfo, - const Value *UnderlyingV1, const Value *UnderlyingV2) { +AliasResult BasicAAResult::aliasGEP( + const GEPOperator *GEP1, LocationSize V1Size, const AAMDNodes &V1AAInfo, + const Value *V2, LocationSize V2Size, const AAMDNodes &V2AAInfo, + const Value *UnderlyingV1, const Value *UnderlyingV2, AAQueryInfo &AAQI) { DecomposedGEP DecompGEP1, DecompGEP2; unsigned MaxPointerSize = getMaxPointerSize(DL); DecompGEP1.StructOffset = DecompGEP1.OtherOffset = APInt(MaxPointerSize, 0); @@ -1306,14 +1326,14 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size, // Do the base pointers alias? AliasResult BaseAlias = aliasCheck(UnderlyingV1, LocationSize::unknown(), AAMDNodes(), - UnderlyingV2, LocationSize::unknown(), AAMDNodes()); + UnderlyingV2, LocationSize::unknown(), AAMDNodes(), AAQI); // Check for geps of non-aliasing underlying pointers where the offsets are // identical. if ((BaseAlias == MayAlias) && V1Size == V2Size) { // Do the base pointers alias assuming type and size. - AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, V1AAInfo, - UnderlyingV2, V2Size, V2AAInfo); + AliasResult PreciseBaseAlias = aliasCheck( + UnderlyingV1, V1Size, V1AAInfo, UnderlyingV2, V2Size, V2AAInfo, AAQI); if (PreciseBaseAlias == NoAlias) { // See if the computed offset from the common pointer tells us about the // relation of the resulting pointer. @@ -1368,9 +1388,9 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size, if (V1Size == LocationSize::unknown() && V2Size == LocationSize::unknown()) return MayAlias; - AliasResult R = - aliasCheck(UnderlyingV1, LocationSize::unknown(), AAMDNodes(), V2, - LocationSize::unknown(), V2AAInfo, nullptr, UnderlyingV2); + AliasResult R = aliasCheck(UnderlyingV1, LocationSize::unknown(), + AAMDNodes(), V2, LocationSize::unknown(), + V2AAInfo, AAQI, nullptr, UnderlyingV2); if (R != MustAlias) { // If V2 may alias GEP base pointer, conservatively returns MayAlias. // If V2 is known not to alias GEP base pointer, then the two values @@ -1504,37 +1524,35 @@ static AliasResult MergeAliasResults(AliasResult A, AliasResult B) { /// Provides a bunch of ad-hoc rules to disambiguate a Select instruction /// against another. -AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, - LocationSize SISize, - const AAMDNodes &SIAAInfo, - const Value *V2, LocationSize V2Size, - const AAMDNodes &V2AAInfo, - const Value *UnderV2) { +AliasResult +BasicAAResult::aliasSelect(const SelectInst *SI, LocationSize SISize, + const AAMDNodes &SIAAInfo, const Value *V2, + LocationSize V2Size, const AAMDNodes &V2AAInfo, + const Value *UnderV2, AAQueryInfo &AAQI) { // If the values are Selects with the same condition, we can do a more precise // check: just check for aliases between the values on corresponding arms. if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) if (SI->getCondition() == SI2->getCondition()) { - AliasResult Alias = aliasCheck(SI->getTrueValue(), SISize, SIAAInfo, - SI2->getTrueValue(), V2Size, V2AAInfo); + AliasResult Alias = + aliasCheck(SI->getTrueValue(), SISize, SIAAInfo, SI2->getTrueValue(), + V2Size, V2AAInfo, AAQI); if (Alias == MayAlias) return MayAlias; AliasResult ThisAlias = aliasCheck(SI->getFalseValue(), SISize, SIAAInfo, - SI2->getFalseValue(), V2Size, V2AAInfo); + SI2->getFalseValue(), V2Size, V2AAInfo, AAQI); return MergeAliasResults(ThisAlias, Alias); } // If both arms of the Select node NoAlias or MustAlias V2, then returns // NoAlias / MustAlias. Otherwise, returns MayAlias. - AliasResult Alias = - aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), - SISize, SIAAInfo, UnderV2); + AliasResult Alias = aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), + SISize, SIAAInfo, AAQI, UnderV2); if (Alias == MayAlias) return MayAlias; - AliasResult ThisAlias = - aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo, - UnderV2); + AliasResult ThisAlias = aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), + SISize, SIAAInfo, AAQI, UnderV2); return MergeAliasResults(ThisAlias, Alias); } @@ -1544,7 +1562,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, const AAMDNodes &PNAAInfo, const Value *V2, LocationSize V2Size, const AAMDNodes &V2AAInfo, - const Value *UnderV2) { + const Value *UnderV2, AAQueryInfo &AAQI) { // Track phi nodes we have visited. We use this information when we determine // value equivalence. VisitedPhiBBs.insert(PN->getParent()); @@ -1554,8 +1572,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, // on corresponding edges. if (const PHINode *PN2 = dyn_cast<PHINode>(V2)) if (PN2->getParent() == PN->getParent()) { - LocPair Locs(MemoryLocation(PN, PNSize, PNAAInfo), - MemoryLocation(V2, V2Size, V2AAInfo)); + AAQueryInfo::LocPair Locs(MemoryLocation(PN, PNSize, PNAAInfo), + MemoryLocation(V2, V2Size, V2AAInfo)); if (PN > V2) std::swap(Locs.first, Locs.second); // Analyse the PHIs' inputs under the assumption that the PHIs are @@ -1566,25 +1584,33 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, // that causes a MayAlias. // Pretend the phis do not alias. AliasResult Alias = NoAlias; - assert(AliasCache.count(Locs) && - "There must exist an entry for the phi node"); - AliasResult OrigAliasResult = AliasCache[Locs]; - AliasCache[Locs] = NoAlias; + AliasResult OrigAliasResult; + { + // Limited lifetime iterator invalidated by the aliasCheck call below. + auto CacheIt = AAQI.AliasCache.find(Locs); + assert((CacheIt != AAQI.AliasCache.end()) && + "There must exist an entry for the phi node"); + OrigAliasResult = CacheIt->second; + CacheIt->second = NoAlias; + } for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { AliasResult ThisAlias = aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo, PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), - V2Size, V2AAInfo); + V2Size, V2AAInfo, AAQI); Alias = MergeAliasResults(ThisAlias, Alias); if (Alias == MayAlias) break; } // Reset if speculation failed. - if (Alias != NoAlias) - AliasCache[Locs] = OrigAliasResult; - + if (Alias != NoAlias) { + auto Pair = + AAQI.AliasCache.insert(std::make_pair(Locs, OrigAliasResult)); + assert(!Pair.second && "Entry must have existed"); + Pair.first->second = OrigAliasResult; + } return Alias; } @@ -1658,9 +1684,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, if (isRecursive) PNSize = LocationSize::unknown(); - AliasResult Alias = - aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0], - PNSize, PNAAInfo, UnderV2); + AliasResult Alias = aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0], PNSize, + PNAAInfo, AAQI, UnderV2); // Early exit if the check of the first PHI source against V2 is MayAlias. // Other results are not possible. @@ -1673,7 +1698,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, Value *V = V1Srcs[i]; AliasResult ThisAlias = - aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo, UnderV2); + aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo, AAQI, UnderV2); Alias = MergeAliasResults(ThisAlias, Alias); if (Alias == MayAlias) break; @@ -1687,7 +1712,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, AAMDNodes V1AAInfo, const Value *V2, LocationSize V2Size, AAMDNodes V2AAInfo, - const Value *O1, const Value *O2) { + AAQueryInfo &AAQI, const Value *O1, + const Value *O2) { // If either of the memory references is empty, it doesn't matter what the // pointer values are. if (V1Size.isZero() || V2Size.isZero()) @@ -1755,9 +1781,11 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, // temporary store the nocapture argument's value in a temporary memory // location if that memory location doesn't escape. Or it may pass a // nocapture value to other functions as long as they don't capture it. - if (isEscapeSource(O1) && isNonEscapingLocalObject(O2)) + if (isEscapeSource(O1) && + isNonEscapingLocalObject(O2, &AAQI.IsCapturedCache)) return NoAlias; - if (isEscapeSource(O2) && isNonEscapingLocalObject(O1)) + if (isEscapeSource(O2) && + isNonEscapingLocalObject(O1, &AAQI.IsCapturedCache)) return NoAlias; } @@ -1772,12 +1800,12 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, // Check the cache before climbing up use-def chains. This also terminates // otherwise infinitely recursive queries. - LocPair Locs(MemoryLocation(V1, V1Size, V1AAInfo), - MemoryLocation(V2, V2Size, V2AAInfo)); + AAQueryInfo::LocPair Locs(MemoryLocation(V1, V1Size, V1AAInfo), + MemoryLocation(V2, V2Size, V2AAInfo)); if (V1 > V2) std::swap(Locs.first, Locs.second); - std::pair<AliasCacheTy::iterator, bool> Pair = - AliasCache.insert(std::make_pair(Locs, MayAlias)); + std::pair<AAQueryInfo::AliasCacheT::iterator, bool> Pair = + AAQI.AliasCache.try_emplace(Locs, MayAlias); if (!Pair.second) return Pair.first->second; @@ -1791,9 +1819,13 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, } if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) { AliasResult Result = - aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2); - if (Result != MayAlias) - return AliasCache[Locs] = Result; + aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2, AAQI); + if (Result != MayAlias) { + auto ItInsPair = AAQI.AliasCache.insert(std::make_pair(Locs, Result)); + assert(!ItInsPair.second && "Entry must have existed"); + ItInsPair.first->second = Result; + return Result; + } } if (isa<PHINode>(V2) && !isa<PHINode>(V1)) { @@ -1803,10 +1835,13 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, std::swap(V1AAInfo, V2AAInfo); } if (const PHINode *PN = dyn_cast<PHINode>(V1)) { - AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo, - V2, V2Size, V2AAInfo, O2); - if (Result != MayAlias) - return AliasCache[Locs] = Result; + AliasResult Result = + aliasPHI(PN, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O2, AAQI); + if (Result != MayAlias) { + Pair = AAQI.AliasCache.try_emplace(Locs, Result); + assert(!Pair.second && "Entry must have existed"); + return Pair.first->second = Result; + } } if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) { @@ -1817,9 +1852,12 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, } if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) { AliasResult Result = - aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O2); - if (Result != MayAlias) - return AliasCache[Locs] = Result; + aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O2, AAQI); + if (Result != MayAlias) { + Pair = AAQI.AliasCache.try_emplace(Locs, Result); + assert(!Pair.second && "Entry must have existed"); + return Pair.first->second = Result; + } } // If both pointers are pointing into the same object and one of them @@ -1827,14 +1865,19 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, if (O1 == O2) if (V1Size.isPrecise() && V2Size.isPrecise() && (isObjectSize(O1, V1Size.getValue(), DL, TLI, NullIsValidLocation) || - isObjectSize(O2, V2Size.getValue(), DL, TLI, NullIsValidLocation))) - return AliasCache[Locs] = PartialAlias; + isObjectSize(O2, V2Size.getValue(), DL, TLI, NullIsValidLocation))) { + Pair = AAQI.AliasCache.try_emplace(Locs, PartialAlias); + assert(!Pair.second && "Entry must have existed"); + return Pair.first->second = PartialAlias; + } // Recurse back into the best AA results we have, potentially with refined // memory locations. We have already ensured that BasicAA has a MayAlias // cache result for these, so any recursion back into BasicAA won't loop. - AliasResult Result = getBestAAResults().alias(Locs.first, Locs.second); - return AliasCache[Locs] = Result; + AliasResult Result = getBestAAResults().alias(Locs.first, Locs.second, AAQI); + Pair = AAQI.AliasCache.try_emplace(Locs, Result); + assert(!Pair.second && "Entry must have existed"); + return Pair.first->second = Result; } /// Check whether two Values can be considered equivalent. @@ -1863,7 +1906,7 @@ bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V, // the Values cannot come from different iterations of a potential cycle the // phi nodes could be involved in. for (auto *P : VisitedPhiBBs) - if (isPotentiallyReachable(&P->front(), Inst, DT, LI)) + if (isPotentiallyReachable(&P->front(), Inst, nullptr, DT, LI)) return false; return true; diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp index ef27c36517ea..de183bbde173 100644 --- a/lib/Analysis/BlockFrequencyInfo.cpp +++ b/lib/Analysis/BlockFrequencyInfo.cpp @@ -1,9 +1,8 @@ //===- BlockFrequencyInfo.cpp - Block Frequency Analysis ------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -204,11 +203,12 @@ BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const { } Optional<uint64_t> -BlockFrequencyInfo::getBlockProfileCount(const BasicBlock *BB) const { +BlockFrequencyInfo::getBlockProfileCount(const BasicBlock *BB, + bool AllowSynthetic) const { if (!BFI) return None; - return BFI->getBlockProfileCount(*getFunction(), BB); + return BFI->getBlockProfileCount(*getFunction(), BB, AllowSynthetic); } Optional<uint64_t> diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp index 08ebcc47a807..0db6dd04a7e8 100644 --- a/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -1,9 +1,8 @@ //===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -558,14 +557,17 @@ BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const { Optional<uint64_t> BlockFrequencyInfoImplBase::getBlockProfileCount(const Function &F, - const BlockNode &Node) const { - return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency()); + const BlockNode &Node, + bool AllowSynthetic) const { + return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency(), + AllowSynthetic); } Optional<uint64_t> BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F, - uint64_t Freq) const { - auto EntryCount = F.getEntryCount(); + uint64_t Freq, + bool AllowSynthetic) const { + auto EntryCount = F.getEntryCount(AllowSynthetic); if (!EntryCount) return None; // Use 128 bit APInt to do the arithmetic to avoid overflow. diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 7f544b27fe9d..5eb95003f5d8 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -1,9 +1,8 @@ //===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -661,8 +660,14 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB, if (!CI) return false; + auto GetConstantInt = [](Value *V) { + if (auto *I = dyn_cast<BitCastInst>(V)) + return dyn_cast<ConstantInt>(I->getOperand(0)); + return dyn_cast<ConstantInt>(V); + }; + Value *RHS = CI->getOperand(1); - ConstantInt *CV = dyn_cast<ConstantInt>(RHS); + ConstantInt *CV = GetConstantInt(RHS); if (!CV) return false; diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp index aa880a62b754..18b83d6838cc 100644 --- a/lib/Analysis/CFG.cpp +++ b/lib/Analysis/CFG.cpp @@ -1,9 +1,8 @@ //===-- CFG.cpp - BasicBlock analysis --------------------------------------==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -13,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CFG.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/Dominators.h" @@ -120,22 +120,33 @@ static const Loop *getOutermostLoop(const LoopInfo *LI, const BasicBlock *BB) { return L; } -// True if there is a loop which contains both BB1 and BB2. -static bool loopContainsBoth(const LoopInfo *LI, - const BasicBlock *BB1, const BasicBlock *BB2) { - const Loop *L1 = getOutermostLoop(LI, BB1); - const Loop *L2 = getOutermostLoop(LI, BB2); - return L1 != nullptr && L1 == L2; -} - bool llvm::isPotentiallyReachableFromMany( SmallVectorImpl<BasicBlock *> &Worklist, BasicBlock *StopBB, - const DominatorTree *DT, const LoopInfo *LI) { + const SmallPtrSetImpl<BasicBlock *> *ExclusionSet, const DominatorTree *DT, + const LoopInfo *LI) { // When the stop block is unreachable, it's dominated from everywhere, // regardless of whether there's a path between the two blocks. if (DT && !DT->isReachableFromEntry(StopBB)) DT = nullptr; + // We can't skip directly from a block that dominates the stop block if the + // exclusion block is potentially in between. + if (ExclusionSet && !ExclusionSet->empty()) + DT = nullptr; + + // Normally any block in a loop is reachable from any other block in a loop, + // however excluded blocks might partition the body of a loop to make that + // untrue. + SmallPtrSet<const Loop *, 8> LoopsWithHoles; + if (LI && ExclusionSet) { + for (auto BB : *ExclusionSet) { + if (const Loop *L = getOutermostLoop(LI, BB)) + LoopsWithHoles.insert(L); + } + } + + const Loop *StopLoop = LI ? getOutermostLoop(LI, StopBB) : nullptr; + // Limit the number of blocks we visit. The goal is to avoid run-away compile // times on large CFGs without hampering sensible code. Arbitrarily chosen. unsigned Limit = 32; @@ -146,10 +157,23 @@ bool llvm::isPotentiallyReachableFromMany( continue; if (BB == StopBB) return true; + if (ExclusionSet && ExclusionSet->count(BB)) + continue; if (DT && DT->dominates(BB, StopBB)) return true; - if (LI && loopContainsBoth(LI, BB, StopBB)) - return true; + + const Loop *Outer = nullptr; + if (LI) { + Outer = getOutermostLoop(LI, BB); + // If we're in a loop with a hole, not all blocks in the loop are + // reachable from all other blocks. That implies we can't simply jump to + // the loop's exit blocks, as that exit might need to pass through an + // excluded block. Clear Outer so we process BB's successors. + if (LoopsWithHoles.count(Outer)) + Outer = nullptr; + if (StopLoop && Outer == StopLoop) + return true; + } if (!--Limit) { // We haven't been able to prove it one way or the other. Conservatively @@ -157,7 +181,7 @@ bool llvm::isPotentiallyReachableFromMany( return true; } - if (const Loop *Outer = LI ? getOutermostLoop(LI, BB) : nullptr) { + if (Outer) { // All blocks in a single loop are reachable from all other blocks. From // any of these blocks, we can skip directly to the exits of the loop, // ignoring any other blocks inside the loop body. @@ -181,11 +205,13 @@ bool llvm::isPotentiallyReachable(const BasicBlock *A, const BasicBlock *B, Worklist.push_back(const_cast<BasicBlock*>(A)); return isPotentiallyReachableFromMany(Worklist, const_cast<BasicBlock *>(B), - DT, LI); + nullptr, DT, LI); } -bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B, - const DominatorTree *DT, const LoopInfo *LI) { +bool llvm::isPotentiallyReachable( + const Instruction *A, const Instruction *B, + const SmallPtrSetImpl<BasicBlock *> *ExclusionSet, const DominatorTree *DT, + const LoopInfo *LI) { assert(A->getParent()->getParent() == B->getParent()->getParent() && "This analysis is function-local!"); @@ -227,11 +253,20 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B, Worklist.push_back(const_cast<BasicBlock*>(A->getParent())); } - if (A->getParent() == &A->getParent()->getParent()->getEntryBlock()) - return true; - if (B->getParent() == &A->getParent()->getParent()->getEntryBlock()) - return false; + if (DT) { + if (DT->isReachableFromEntry(A->getParent()) && + !DT->isReachableFromEntry(B->getParent())) + return false; + if (!ExclusionSet || ExclusionSet->empty()) { + if (A->getParent() == &A->getParent()->getParent()->getEntryBlock() && + DT->isReachableFromEntry(B->getParent())) + return true; + if (B->getParent() == &A->getParent()->getParent()->getEntryBlock() && + DT->isReachableFromEntry(A->getParent())) + return false; + } + } return isPotentiallyReachableFromMany( - Worklist, const_cast<BasicBlock *>(B->getParent()), DT, LI); + Worklist, const_cast<BasicBlock *>(B->getParent()), ExclusionSet, DT, LI); } diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp index 6d01e9d5d447..619b675b58d8 100644 --- a/lib/Analysis/CFGPrinter.cpp +++ b/lib/Analysis/CFGPrinter.cpp @@ -1,9 +1,8 @@ //===- CFGPrinter.cpp - DOT printer for the control flow graph ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/CFLAndersAliasAnalysis.cpp b/lib/Analysis/CFLAndersAliasAnalysis.cpp index 1c61dd369a05..690e514d4f5c 100644 --- a/lib/Analysis/CFLAndersAliasAnalysis.cpp +++ b/lib/Analysis/CFLAndersAliasAnalysis.cpp @@ -1,9 +1,8 @@ //===- CFLAndersAliasAnalysis.cpp - Unification-based Alias Analysis ------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -613,7 +612,7 @@ static void initializeWorkList(std::vector<WorkListItem> &WorkList, for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) { auto Src = InstantiatedValue{Val, I}; // If there's an assignment edge from X to Y, it means Y is reachable from - // X at S2 and X is reachable from Y at S1 + // X at S3 and X is reachable from Y at S1 for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) { propagate(Edge.Other, Src, MatchState::FlowFromReadOnly, ReachSet, WorkList); @@ -876,7 +875,8 @@ AliasResult CFLAndersAAResult::query(const MemoryLocation &LocA, } AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { + const MemoryLocation &LocB, + AAQueryInfo &AAQI) { if (LocA.Ptr == LocB.Ptr) return MustAlias; @@ -886,11 +886,11 @@ AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA, // ConstantExpr, but every query needs to have at least one Value tied to a // Function, and neither GlobalValues nor ConstantExprs are. if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) - return AAResultBase::alias(LocA, LocB); + return AAResultBase::alias(LocA, LocB, AAQI); AliasResult QueryResult = query(LocA, LocB); if (QueryResult == MayAlias) - return AAResultBase::alias(LocA, LocB); + return AAResultBase::alias(LocA, LocB, AAQI); return QueryResult; } diff --git a/lib/Analysis/CFLGraph.h b/lib/Analysis/CFLGraph.h index 12121d717433..21842ed36487 100644 --- a/lib/Analysis/CFLGraph.h +++ b/lib/Analysis/CFLGraph.h @@ -1,9 +1,8 @@ //===- CFLGraph.h - Abstract stratified sets implementation. -----*- C++-*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -25,7 +24,6 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -154,7 +152,7 @@ public: } }; -///A builder class used to create CFLGraph instance from a given function +/// A builder class used to create CFLGraph instance from a given function /// The CFL-AA that uses this builder must provide its own type as a template /// argument. This is necessary for interprocedural processing: CFLGraphBuilder /// needs a way of obtaining the summary of other functions when callinsts are @@ -183,24 +181,23 @@ template <typename CFLAA> class CFLGraphBuilder { static bool hasUsefulEdges(ConstantExpr *CE) { // ConstantExpr doesn't have terminators, invokes, or fences, so only - // needs - // to check for compares. + // needs to check for compares. return CE->getOpcode() != Instruction::ICmp && CE->getOpcode() != Instruction::FCmp; } // Returns possible functions called by CS into the given SmallVectorImpl. // Returns true if targets found, false otherwise. - static bool getPossibleTargets(CallSite CS, + static bool getPossibleTargets(CallBase &Call, SmallVectorImpl<Function *> &Output) { - if (auto *Fn = CS.getCalledFunction()) { + if (auto *Fn = Call.getCalledFunction()) { Output.push_back(Fn); return true; } // TODO: If the call is indirect, we might be able to enumerate all - // potential - // targets of the call and return them, rather than just failing. + // potential targets of the call and return them, rather than just + // failing. return false; } @@ -294,6 +291,11 @@ template <typename CFLAA> class CFLGraphBuilder { addAssignEdge(Op2, &Inst); } + void visitUnaryOperator(UnaryOperator &Inst) { + auto *Src = Inst.getOperand(0); + addAssignEdge(Src, &Inst); + } + void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) { auto *Ptr = Inst.getPointerOperand(); auto *Val = Inst.getNewValOperand(); @@ -370,11 +372,11 @@ template <typename CFLAA> class CFLGraphBuilder { return !Fn->hasExactDefinition(); } - bool tryInterproceduralAnalysis(CallSite CS, + bool tryInterproceduralAnalysis(CallBase &Call, const SmallVectorImpl<Function *> &Fns) { assert(Fns.size() > 0); - if (CS.arg_size() > MaxSupportedArgsInSummary) + if (Call.arg_size() > MaxSupportedArgsInSummary) return false; // Exit early if we'll fail anyway @@ -382,7 +384,7 @@ template <typename CFLAA> class CFLGraphBuilder { if (isFunctionExternal(Fn) || Fn->isVarArg()) return false; // Fail if the caller does not provide enough arguments - assert(Fn->arg_size() <= CS.arg_size()); + assert(Fn->arg_size() <= Call.arg_size()); if (!AA.getAliasSummary(*Fn)) return false; } @@ -393,7 +395,7 @@ template <typename CFLAA> class CFLGraphBuilder { auto &RetParamRelations = Summary->RetParamRelations; for (auto &Relation : RetParamRelations) { - auto IRelation = instantiateExternalRelation(Relation, CS); + auto IRelation = instantiateExternalRelation(Relation, Call); if (IRelation.hasValue()) { Graph.addNode(IRelation->From); Graph.addNode(IRelation->To); @@ -403,7 +405,7 @@ template <typename CFLAA> class CFLGraphBuilder { auto &RetParamAttributes = Summary->RetParamAttributes; for (auto &Attribute : RetParamAttributes) { - auto IAttr = instantiateExternalAttribute(Attribute, CS); + auto IAttr = instantiateExternalAttribute(Attribute, Call); if (IAttr.hasValue()) Graph.addNode(IAttr->IValue, IAttr->Attr); } @@ -412,37 +414,35 @@ template <typename CFLAA> class CFLGraphBuilder { return true; } - void visitCallSite(CallSite CS) { - auto Inst = CS.getInstruction(); - + void visitCallBase(CallBase &Call) { // Make sure all arguments and return value are added to the graph first - for (Value *V : CS.args()) + for (Value *V : Call.args()) if (V->getType()->isPointerTy()) addNode(V); - if (Inst->getType()->isPointerTy()) - addNode(Inst); + if (Call.getType()->isPointerTy()) + addNode(&Call); // Check if Inst is a call to a library function that // allocates/deallocates on the heap. Those kinds of functions do not // introduce any aliases. // TODO: address other common library functions such as realloc(), // strdup(), etc. - if (isMallocOrCallocLikeFn(Inst, &TLI) || isFreeCall(Inst, &TLI)) + if (isMallocOrCallocLikeFn(&Call, &TLI) || isFreeCall(&Call, &TLI)) return; // TODO: Add support for noalias args/all the other fun function // attributes that we can tack on. SmallVector<Function *, 4> Targets; - if (getPossibleTargets(CS, Targets)) - if (tryInterproceduralAnalysis(CS, Targets)) + if (getPossibleTargets(Call, Targets)) + if (tryInterproceduralAnalysis(Call, Targets)) return; // Because the function is opaque, we need to note that anything // could have happened to the arguments (unless the function is marked // readonly or readnone), and that the result could alias just about // anything, too (unless the result is marked noalias). - if (!CS.onlyReadsMemory()) - for (Value *V : CS.args()) { + if (!Call.onlyReadsMemory()) + for (Value *V : Call.args()) { if (V->getType()->isPointerTy()) { // The argument itself escapes. Graph.addAttr(InstantiatedValue{V, 0}, getAttrEscaped()); @@ -453,12 +453,12 @@ template <typename CFLAA> class CFLGraphBuilder { } } - if (Inst->getType()->isPointerTy()) { - auto *Fn = CS.getCalledFunction(); + if (Call.getType()->isPointerTy()) { + auto *Fn = Call.getCalledFunction(); if (Fn == nullptr || !Fn->returnDoesNotAlias()) // No need to call addNode() since we've added Inst at the // beginning of this function and we know it is not a global. - Graph.addAttr(InstantiatedValue{Inst, 0}, getAttrUnknown()); + Graph.addAttr(InstantiatedValue{&Call, 0}, getAttrUnknown()); } } @@ -559,6 +559,7 @@ template <typename CFLAA> class CFLGraphBuilder { } case Instruction::Add: + case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: @@ -583,6 +584,11 @@ template <typename CFLAA> class CFLGraphBuilder { break; } + case Instruction::FNeg: { + addAssignEdge(CE->getOperand(0), CE); + break; + } + default: llvm_unreachable("Unknown instruction type encountered!"); } diff --git a/lib/Analysis/CFLSteensAliasAnalysis.cpp b/lib/Analysis/CFLSteensAliasAnalysis.cpp index 30ce13578e54..44b1834f70bf 100644 --- a/lib/Analysis/CFLSteensAliasAnalysis.cpp +++ b/lib/Analysis/CFLSteensAliasAnalysis.cpp @@ -1,9 +1,8 @@ //===- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp index fd2292ced017..a0b3f83cca6a 100644 --- a/lib/Analysis/CGSCCPassManager.cpp +++ b/lib/Analysis/CGSCCPassManager.cpp @@ -1,9 +1,8 @@ //===- CGSCCPassManager.cpp - Managing & running CGSCC passes -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -111,6 +110,12 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, // ...getContext().yield(); } + // Before we mark all of *this* SCC's analyses as preserved below, intersect + // this with the cross-SCC preserved analysis set. This is used to allow + // CGSCC passes to mutate ancestor SCCs and still trigger proper invalidation + // for them. + UR.CrossSCCPA.intersect(PA); + // Invalidation was handled after each pass in the above loop for the current // SCC. Therefore, the remaining analysis results in the AnalysisManager are // preserved. We mark this with a set so that we don't need to inspect each diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp index 0da678e1611b..ec5e94d499be 100644 --- a/lib/Analysis/CallGraph.cpp +++ b/lib/Analysis/CallGraph.cpp @@ -1,9 +1,8 @@ //===- CallGraph.cpp - Build a Module's call graph ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -11,7 +10,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Config/llvm-config.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Module.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" @@ -64,25 +62,25 @@ void CallGraph::addToCallGraph(Function *F) { // If this function has external linkage or has its address taken, anything // could call it. if (!F->hasLocalLinkage() || F->hasAddressTaken()) - ExternalCallingNode->addCalledFunction(CallSite(), Node); + ExternalCallingNode->addCalledFunction(nullptr, Node); // If this function is not defined in this translation unit, it could call // anything. if (F->isDeclaration() && !F->isIntrinsic()) - Node->addCalledFunction(CallSite(), CallsExternalNode.get()); + Node->addCalledFunction(nullptr, CallsExternalNode.get()); // Look for calls by this function. for (BasicBlock &BB : *F) for (Instruction &I : BB) { - if (auto CS = CallSite(&I)) { - const Function *Callee = CS.getCalledFunction(); + if (auto *Call = dyn_cast<CallBase>(&I)) { + const Function *Callee = Call->getCalledFunction(); if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID())) // Indirect calls of intrinsics are not allowed so no need to check. // We can be more precise here by using TargetArg returned by // Intrinsic::isLeaf. - Node->addCalledFunction(CS, CallsExternalNode.get()); + Node->addCalledFunction(Call, CallsExternalNode.get()); else if (!Callee->isIntrinsic()) - Node->addCalledFunction(CS, getOrInsertFunction(Callee)); + Node->addCalledFunction(Call, getOrInsertFunction(Callee)); } } } @@ -185,10 +183,10 @@ LLVM_DUMP_METHOD void CallGraphNode::dump() const { print(dbgs()); } /// removeCallEdgeFor - This method removes the edge in the node for the /// specified call site. Note that this method takes linear time, so it /// should be used sparingly. -void CallGraphNode::removeCallEdgeFor(CallSite CS) { +void CallGraphNode::removeCallEdgeFor(CallBase &Call) { for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { assert(I != CalledFunctions.end() && "Cannot find callsite to remove!"); - if (I->first == CS.getInstruction()) { + if (I->first == &Call) { I->second->DropRef(); *I = CalledFunctions.back(); CalledFunctions.pop_back(); @@ -228,13 +226,13 @@ void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) { /// replaceCallEdge - This method replaces the edge in the node for the /// specified call site with a new one. Note that this method takes linear /// time, so it should be used sparingly. -void CallGraphNode::replaceCallEdge(CallSite CS, - CallSite NewCS, CallGraphNode *NewNode){ +void CallGraphNode::replaceCallEdge(CallBase &Call, CallBase &NewCall, + CallGraphNode *NewNode) { for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { assert(I != CalledFunctions.end() && "Cannot find callsite to remove!"); - if (I->first == CS.getInstruction()) { + if (I->first == &Call) { I->second->DropRef(); - I->first = NewCS.getInstruction(); + I->first = &NewCall; I->second = NewNode; NewNode->AddRef(); return; diff --git a/lib/Analysis/CallGraphSCCPass.cpp b/lib/Analysis/CallGraphSCCPass.cpp index 0aed57a39387..196ef400bc4e 100644 --- a/lib/Analysis/CallGraphSCCPass.cpp +++ b/lib/Analysis/CallGraphSCCPass.cpp @@ -1,9 +1,8 @@ //===- CallGraphSCCPass.cpp - Pass that operates BU on call graph ---------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -20,7 +19,6 @@ #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/Intrinsics.h" @@ -202,7 +200,7 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, /// This never happens in checking mode. bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, bool CheckingMode) { - DenseMap<Value*, CallGraphNode*> CallSites; + DenseMap<Value *, CallGraphNode *> Calls; LLVM_DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size() << " nodes:\n"; @@ -231,21 +229,21 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) { // If this call site is null, then the function pass deleted the call // entirely and the WeakTrackingVH nulled it out. + auto *Call = dyn_cast_or_null<CallBase>(I->first); if (!I->first || // If we've already seen this call site, then the FunctionPass RAUW'd // one call with another, which resulted in two "uses" in the edge // list of the same call. - CallSites.count(I->first) || + Calls.count(I->first) || // If the call edge is not from a call or invoke, or it is a // instrinsic call, then the function pass RAUW'd a call with // another value. This can happen when constant folding happens // of well known functions etc. - !CallSite(I->first) || - (CallSite(I->first).getCalledFunction() && - CallSite(I->first).getCalledFunction()->isIntrinsic() && - Intrinsic::isLeaf( - CallSite(I->first).getCalledFunction()->getIntrinsicID()))) { + !Call || + (Call->getCalledFunction() && + Call->getCalledFunction()->isIntrinsic() && + Intrinsic::isLeaf(Call->getCalledFunction()->getIntrinsicID()))) { assert(!CheckingMode && "CallGraphSCCPass did not update the CallGraph correctly!"); @@ -269,15 +267,14 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, continue; } - assert(!CallSites.count(I->first) && + assert(!Calls.count(I->first) && "Call site occurs in node multiple times"); - CallSite CS(I->first); - if (CS) { - Function *Callee = CS.getCalledFunction(); + if (Call) { + Function *Callee = Call->getCalledFunction(); // Ignore intrinsics because they're not really function calls. if (!Callee || !(Callee->isIntrinsic())) - CallSites.insert(std::make_pair(I->first, I->second)); + Calls.insert(std::make_pair(I->first, I->second)); } ++I; } @@ -288,23 +285,25 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, for (BasicBlock &BB : *F) for (Instruction &I : BB) { - CallSite CS(&I); - if (!CS) continue; - Function *Callee = CS.getCalledFunction(); - if (Callee && Callee->isIntrinsic()) continue; + auto *Call = dyn_cast<CallBase>(&I); + if (!Call) + continue; + Function *Callee = Call->getCalledFunction(); + if (Callee && Callee->isIntrinsic()) + continue; // If this call site already existed in the callgraph, just verify it - // matches up to expectations and remove it from CallSites. - DenseMap<Value*, CallGraphNode*>::iterator ExistingIt = - CallSites.find(CS.getInstruction()); - if (ExistingIt != CallSites.end()) { + // matches up to expectations and remove it from Calls. + DenseMap<Value *, CallGraphNode *>::iterator ExistingIt = + Calls.find(Call); + if (ExistingIt != Calls.end()) { CallGraphNode *ExistingNode = ExistingIt->second; - // Remove from CallSites since we have now seen it. - CallSites.erase(ExistingIt); + // Remove from Calls since we have now seen it. + Calls.erase(ExistingIt); // Verify that the callee is right. - if (ExistingNode->getFunction() == CS.getCalledFunction()) + if (ExistingNode->getFunction() == Call->getCalledFunction()) continue; // If we are in checking mode, we are not allowed to actually mutate @@ -312,7 +311,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, // callgraph is less precise than it could be (e.g. an indirect call // site could be turned direct), don't reject it in checking mode, and // don't tweak it to be more precise. - if (CheckingMode && CS.getCalledFunction() && + if (CheckingMode && Call->getCalledFunction() && ExistingNode->getFunction() == nullptr) continue; @@ -322,7 +321,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, // If not, we either went from a direct call to indirect, indirect to // direct, or direct to different direct. CallGraphNode *CalleeNode; - if (Function *Callee = CS.getCalledFunction()) { + if (Function *Callee = Call->getCalledFunction()) { CalleeNode = CG.getOrInsertFunction(Callee); // Keep track of whether we turned an indirect call into a direct // one. @@ -336,7 +335,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, } // Update the edge target in CGN. - CGN->replaceCallEdge(CS, CS, CalleeNode); + CGN->replaceCallEdge(*Call, *Call, CalleeNode); MadeChange = true; continue; } @@ -346,7 +345,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, // If the call site didn't exist in the CGN yet, add it. CallGraphNode *CalleeNode; - if (Function *Callee = CS.getCalledFunction()) { + if (Function *Callee = Call->getCalledFunction()) { CalleeNode = CG.getOrInsertFunction(Callee); ++NumDirectAdded; } else { @@ -354,7 +353,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, ++NumIndirectAdded; } - CGN->addCalledFunction(CS, CalleeNode); + CGN->addCalledFunction(Call, CalleeNode); MadeChange = true; } @@ -376,12 +375,12 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, // they are dangling pointers. WeakTrackingVH should save us for this, so // abort if // this happens. - assert(CallSites.empty() && "Dangling pointers found in call sites map"); + assert(Calls.empty() && "Dangling pointers found in call sites map"); // Periodically do an explicit clear to remove tombstones when processing // large scc's. if ((FunctionNo & 15) == 15) - CallSites.clear(); + Calls.clear(); } LLVM_DEBUG(if (MadeChange) { @@ -682,11 +681,28 @@ Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &OS, return new PrintCallGraphPass(Banner, OS); } +static std::string getDescription(const CallGraphSCC &SCC) { + std::string Desc = "SCC ("; + bool First = true; + for (CallGraphNode *CGN : SCC) { + if (First) + First = false; + else + Desc += ", "; + Function *F = CGN->getFunction(); + if (F) + Desc += F->getName(); + else + Desc += "<<null function>>"; + } + Desc += ")"; + return Desc; +} + bool CallGraphSCCPass::skipSCC(CallGraphSCC &SCC) const { - return !SCC.getCallGraph().getModule() - .getContext() - .getOptPassGate() - .shouldRunPass(this, SCC); + OptPassGate &Gate = + SCC.getCallGraph().getModule().getContext().getOptPassGate(); + return Gate.isEnabled() && !Gate.shouldRunPass(this, getDescription(SCC)); } char DummyCGSCCPass::ID = 0; diff --git a/lib/Analysis/CallPrinter.cpp b/lib/Analysis/CallPrinter.cpp index e7017e77652a..d24cbd104bf6 100644 --- a/lib/Analysis/CallPrinter.cpp +++ b/lib/Analysis/CallPrinter.cpp @@ -1,9 +1,8 @@ //===- CallPrinter.cpp - DOT printer for call graph -----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index 669f4f2835fa..adaa83a6c443 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -1,9 +1,8 @@ //===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -102,14 +101,14 @@ namespace { SmallVector<BasicBlock*, 32> Worklist; Worklist.append(succ_begin(BB), succ_end(BB)); - return !isPotentiallyReachableFromMany(Worklist, BB, DT); + return !isPotentiallyReachableFromMany(Worklist, BB, nullptr, DT); } // If the value is defined in the same basic block as use and BeforeHere, // there is no need to explore the use if BeforeHere dominates use. // Check whether there is a path from I to BeforeHere. if (BeforeHere != I && DT->dominates(BeforeHere, I) && - !isPotentiallyReachable(I, BeforeHere, DT)) + !isPotentiallyReachable(I, BeforeHere, nullptr, DT)) return true; return false; @@ -331,14 +330,32 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, AddUses(I); break; case Instruction::ICmp: { - // Don't count comparisons of a no-alias return value against null as - // captures. This allows us to ignore comparisons of malloc results - // with null, for example. - if (ConstantPointerNull *CPN = - dyn_cast<ConstantPointerNull>(I->getOperand(1))) + if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(1))) { + // Don't count comparisons of a no-alias return value against null as + // captures. This allows us to ignore comparisons of malloc results + // with null, for example. if (CPN->getType()->getAddressSpace() == 0) if (isNoAliasCall(V->stripPointerCasts())) break; + if (!I->getFunction()->nullPointerIsDefined()) { + auto *O = I->getOperand(0)->stripPointerCastsSameRepresentation(); + // An inbounds GEP can either be a valid pointer (pointing into + // or to the end of an allocation), or be null in the default + // address space. So for an inbounds GEPs there is no way to let + // the pointer escape using clever GEP hacking because doing so + // would make the pointer point outside of the allocated object + // and thus make the GEP result a poison value. + if (auto *GEP = dyn_cast<GetElementPtrInst>(O)) + if (GEP->isInBounds()) + break; + // Comparing a dereferenceable_or_null argument against null + // cannot lead to pointer escapes, because if it is not null it + // must be a valid (in-bounds) pointer. + bool CanBeNull; + if (O->getPointerDereferenceableBytes(I->getModule()->getDataLayout(), CanBeNull)) + break; + } + } // Comparison against value stored in global variable. Given the pointer // does not escape, its value cannot be guessed and stored separately in a // global variable. diff --git a/lib/Analysis/CmpInstAnalysis.cpp b/lib/Analysis/CmpInstAnalysis.cpp index 27071babec5c..a5757be2c4f4 100644 --- a/lib/Analysis/CmpInstAnalysis.cpp +++ b/lib/Analysis/CmpInstAnalysis.cpp @@ -1,9 +1,8 @@ //===- CmpInstAnalysis.cpp - Utils to help fold compares ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index 46cc87d2b178..627d955c865f 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -1,9 +1,8 @@ //===- CodeMetrics.cpp - Code cost measurements ---------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -16,7 +15,6 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" @@ -126,14 +124,12 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, continue; // Special handling for calls. - if (isa<CallInst>(I) || isa<InvokeInst>(I)) { - ImmutableCallSite CS(&I); - - if (const Function *F = CS.getCalledFunction()) { + if (const auto *Call = dyn_cast<CallBase>(&I)) { + if (const Function *F = Call->getCalledFunction()) { // If a function is both internal and has a single use, then it is // extremely likely to get inlined in the future (it was probably // exposed by an interleaved devirtualization pass). - if (!CS.isNoInline() && F->hasInternalLinkage() && F->hasOneUse()) + if (!Call->isNoInline() && F->hasInternalLinkage() && F->hasOneUse()) ++NumInlineCandidates; // If this call is to function itself, then the function is recursive. @@ -148,7 +144,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, } else { // We don't want inline asm to count as a call - that would prevent loop // unrolling. The argument setup cost is still real, though. - if (!isa<InlineAsm>(CS.getCalledValue())) + if (!Call->isInlineAsm()) ++NumCalls; } } diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 5da29d6d2372..20231ca78b45 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -1,9 +1,8 @@ //===-- ConstantFolding.cpp - Fold instructions into constants ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -26,6 +25,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/Config/config.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -516,7 +516,7 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy, MapTy = Type::getInt64Ty(C->getContext()); else if (LoadTy->isVectorTy()) { MapTy = PointerType::getIntNTy(C->getContext(), - DL.getTypeAllocSizeInBits(LoadTy)); + DL.getTypeSizeInBits(LoadTy)); } else return nullptr; @@ -1000,7 +1000,9 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode, const TargetLibraryInfo *TLI) { Type *DestTy = InstOrCE->getType(); - // Handle easy binops first. + if (Instruction::isUnaryOp(Opcode)) + return ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL); + if (Instruction::isBinaryOp(Opcode)) return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL); @@ -1025,15 +1027,18 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode, case Instruction::FCmp: llvm_unreachable("Invalid for compares"); case Instruction::Call: if (auto *F = dyn_cast<Function>(Ops.back())) { - ImmutableCallSite CS(cast<CallInst>(InstOrCE)); - if (canConstantFoldCallTo(CS, F)) - return ConstantFoldCall(CS, F, Ops.slice(0, Ops.size() - 1), TLI); + const auto *Call = cast<CallBase>(InstOrCE); + if (canConstantFoldCallTo(Call, F)) + return ConstantFoldCall(Call, F, Ops.slice(0, Ops.size() - 1), TLI); } return nullptr; case Instruction::Select: return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]); case Instruction::ExtractElement: return ConstantExpr::getExtractElement(Ops[0], Ops[1]); + case Instruction::ExtractValue: + return ConstantExpr::getExtractValue( + Ops[0], dyn_cast<ExtractValueInst>(InstOrCE)->getIndices()); case Instruction::InsertElement: return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]); case Instruction::ShuffleVector: @@ -1263,6 +1268,13 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, return ConstantExpr::getCompare(Predicate, Ops0, Ops1); } +Constant *llvm::ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, + const DataLayout &DL) { + assert(Instruction::isUnaryOp(Opcode)); + + return ConstantExpr::get(Opcode, Op); +} + Constant *llvm::ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL) { @@ -1367,8 +1379,8 @@ llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, // Constant Folding for Calls // -bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) { - if (CS.isNoBuiltin() || CS.isStrictFP()) +bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { + if (Call->isNoBuiltin() || Call->isStrictFP()) return false; switch (F->getIntrinsicID()) { case Intrinsic::fabs: @@ -1414,6 +1426,8 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) { case Intrinsic::uadd_sat: case Intrinsic::ssub_sat: case Intrinsic::usub_sat: + case Intrinsic::smul_fix: + case Intrinsic::smul_fix_sat: case Intrinsic::convert_from_fp16: case Intrinsic::convert_to_fp16: case Intrinsic::bitreverse: @@ -1518,14 +1532,12 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) { namespace { Constant *GetConstantFoldFPValue(double V, Type *Ty) { - if (Ty->isHalfTy()) { + if (Ty->isHalfTy() || Ty->isFloatTy()) { APFloat APF(V); bool unused; - APF.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &unused); + APF.convert(Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &unused); return ConstantFP::get(Ty->getContext(), APF); } - if (Ty->isFloatTy()) - return ConstantFP::get(Ty->getContext(), APFloat((float)V)); if (Ty->isDoubleTy()) return ConstantFP::get(Ty->getContext(), APFloat(V)); llvm_unreachable("Can only constant fold half/float/double"); @@ -1641,522 +1653,538 @@ static bool getConstIntOrUndef(Value *Op, const APInt *&C) { return false; } -Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, - ArrayRef<Constant *> Operands, - const TargetLibraryInfo *TLI, - ImmutableCallSite CS) { - if (Operands.size() == 1) { - if (IntrinsicID == Intrinsic::is_constant) { - // We know we have a "Constant" argument. But we want to only - // return true for manifest constants, not those that depend on - // constants with unknowable values, e.g. GlobalValue or BlockAddress. - if (isManifestConstant(Operands[0])) - return ConstantInt::getTrue(Ty->getContext()); - return nullptr; - } - if (isa<UndefValue>(Operands[0])) { - // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN. - // ctpop() is between 0 and bitwidth, pick 0 for undef. - if (IntrinsicID == Intrinsic::cos || - IntrinsicID == Intrinsic::ctpop) - return Constant::getNullValue(Ty); - if (IntrinsicID == Intrinsic::bswap || - IntrinsicID == Intrinsic::bitreverse || - IntrinsicID == Intrinsic::launder_invariant_group || - IntrinsicID == Intrinsic::strip_invariant_group) - return Operands[0]; - } +static Constant *ConstantFoldScalarCall1(StringRef Name, + Intrinsic::ID IntrinsicID, + Type *Ty, + ArrayRef<Constant *> Operands, + const TargetLibraryInfo *TLI, + const CallBase *Call) { + assert(Operands.size() == 1 && "Wrong number of operands."); + + if (IntrinsicID == Intrinsic::is_constant) { + // We know we have a "Constant" argument. But we want to only + // return true for manifest constants, not those that depend on + // constants with unknowable values, e.g. GlobalValue or BlockAddress. + if (isManifestConstant(Operands[0])) + return ConstantInt::getTrue(Ty->getContext()); + return nullptr; + } + if (isa<UndefValue>(Operands[0])) { + // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN. + // ctpop() is between 0 and bitwidth, pick 0 for undef. + if (IntrinsicID == Intrinsic::cos || + IntrinsicID == Intrinsic::ctpop) + return Constant::getNullValue(Ty); + if (IntrinsicID == Intrinsic::bswap || + IntrinsicID == Intrinsic::bitreverse || + IntrinsicID == Intrinsic::launder_invariant_group || + IntrinsicID == Intrinsic::strip_invariant_group) + return Operands[0]; + } - if (isa<ConstantPointerNull>(Operands[0])) { - // launder(null) == null == strip(null) iff in addrspace 0 - if (IntrinsicID == Intrinsic::launder_invariant_group || - IntrinsicID == Intrinsic::strip_invariant_group) { - // If instruction is not yet put in a basic block (e.g. when cloning - // a function during inlining), CS caller may not be available. - // So check CS's BB first before querying CS.getCaller. - const Function *Caller = CS.getParent() ? CS.getCaller() : nullptr; - if (Caller && - !NullPointerIsDefined( - Caller, Operands[0]->getType()->getPointerAddressSpace())) { - return Operands[0]; - } - return nullptr; + if (isa<ConstantPointerNull>(Operands[0])) { + // launder(null) == null == strip(null) iff in addrspace 0 + if (IntrinsicID == Intrinsic::launder_invariant_group || + IntrinsicID == Intrinsic::strip_invariant_group) { + // If instruction is not yet put in a basic block (e.g. when cloning + // a function during inlining), Call's caller may not be available. + // So check Call's BB first before querying Call->getCaller. + const Function *Caller = + Call->getParent() ? Call->getCaller() : nullptr; + if (Caller && + !NullPointerIsDefined( + Caller, Operands[0]->getType()->getPointerAddressSpace())) { + return Operands[0]; } + return nullptr; } + } - if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) { - if (IntrinsicID == Intrinsic::convert_to_fp16) { - APFloat Val(Op->getValueAPF()); - - bool lost = false; - Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost); + if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) { + if (IntrinsicID == Intrinsic::convert_to_fp16) { + APFloat Val(Op->getValueAPF()); - return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt()); - } + bool lost = false; + Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost); - if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) - return nullptr; + return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt()); + } - if (IntrinsicID == Intrinsic::round) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmNearestTiesToAway); - return ConstantFP::get(Ty->getContext(), V); - } + if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) + return nullptr; - if (IntrinsicID == Intrinsic::floor) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmTowardNegative); - return ConstantFP::get(Ty->getContext(), V); - } + if (IntrinsicID == Intrinsic::round) { + APFloat V = Op->getValueAPF(); + V.roundToIntegral(APFloat::rmNearestTiesToAway); + return ConstantFP::get(Ty->getContext(), V); + } - if (IntrinsicID == Intrinsic::ceil) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmTowardPositive); - return ConstantFP::get(Ty->getContext(), V); - } + if (IntrinsicID == Intrinsic::floor) { + APFloat V = Op->getValueAPF(); + V.roundToIntegral(APFloat::rmTowardNegative); + return ConstantFP::get(Ty->getContext(), V); + } - if (IntrinsicID == Intrinsic::trunc) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmTowardZero); - return ConstantFP::get(Ty->getContext(), V); - } + if (IntrinsicID == Intrinsic::ceil) { + APFloat V = Op->getValueAPF(); + V.roundToIntegral(APFloat::rmTowardPositive); + return ConstantFP::get(Ty->getContext(), V); + } - if (IntrinsicID == Intrinsic::rint) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmNearestTiesToEven); - return ConstantFP::get(Ty->getContext(), V); - } + if (IntrinsicID == Intrinsic::trunc) { + APFloat V = Op->getValueAPF(); + V.roundToIntegral(APFloat::rmTowardZero); + return ConstantFP::get(Ty->getContext(), V); + } - if (IntrinsicID == Intrinsic::nearbyint) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmNearestTiesToEven); - return ConstantFP::get(Ty->getContext(), V); - } + if (IntrinsicID == Intrinsic::rint) { + APFloat V = Op->getValueAPF(); + V.roundToIntegral(APFloat::rmNearestTiesToEven); + return ConstantFP::get(Ty->getContext(), V); + } - /// We only fold functions with finite arguments. Folding NaN and inf is - /// likely to be aborted with an exception anyway, and some host libms - /// have known errors raising exceptions. - if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity()) - return nullptr; + if (IntrinsicID == Intrinsic::nearbyint) { + APFloat V = Op->getValueAPF(); + V.roundToIntegral(APFloat::rmNearestTiesToEven); + return ConstantFP::get(Ty->getContext(), V); + } - /// Currently APFloat versions of these functions do not exist, so we use - /// the host native double versions. Float versions are not called - /// directly but for all these it is true (float)(f((double)arg)) == - /// f(arg). Long double not supported yet. - double V = getValueAsDouble(Op); + /// We only fold functions with finite arguments. Folding NaN and inf is + /// likely to be aborted with an exception anyway, and some host libms + /// have known errors raising exceptions. + if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity()) + return nullptr; - switch (IntrinsicID) { - default: break; - case Intrinsic::fabs: - return ConstantFoldFP(fabs, V, Ty); - case Intrinsic::log2: - return ConstantFoldFP(Log2, V, Ty); - case Intrinsic::log: - return ConstantFoldFP(log, V, Ty); - case Intrinsic::log10: - return ConstantFoldFP(log10, V, Ty); - case Intrinsic::exp: - return ConstantFoldFP(exp, V, Ty); - case Intrinsic::exp2: - return ConstantFoldFP(exp2, V, Ty); - case Intrinsic::sin: - return ConstantFoldFP(sin, V, Ty); - case Intrinsic::cos: - return ConstantFoldFP(cos, V, Ty); - case Intrinsic::sqrt: - return ConstantFoldFP(sqrt, V, Ty); - } + /// Currently APFloat versions of these functions do not exist, so we use + /// the host native double versions. Float versions are not called + /// directly but for all these it is true (float)(f((double)arg)) == + /// f(arg). Long double not supported yet. + double V = getValueAsDouble(Op); - if (!TLI) - return nullptr; + switch (IntrinsicID) { + default: break; + case Intrinsic::fabs: + return ConstantFoldFP(fabs, V, Ty); + case Intrinsic::log2: + return ConstantFoldFP(Log2, V, Ty); + case Intrinsic::log: + return ConstantFoldFP(log, V, Ty); + case Intrinsic::log10: + return ConstantFoldFP(log10, V, Ty); + case Intrinsic::exp: + return ConstantFoldFP(exp, V, Ty); + case Intrinsic::exp2: + return ConstantFoldFP(exp2, V, Ty); + case Intrinsic::sin: + return ConstantFoldFP(sin, V, Ty); + case Intrinsic::cos: + return ConstantFoldFP(cos, V, Ty); + case Intrinsic::sqrt: + return ConstantFoldFP(sqrt, V, Ty); + } - char NameKeyChar = Name[0]; - if (Name[0] == '_' && Name.size() > 2 && Name[1] == '_') - NameKeyChar = Name[2]; - - switch (NameKeyChar) { - case 'a': - if ((Name == "acos" && TLI->has(LibFunc_acos)) || - (Name == "acosf" && TLI->has(LibFunc_acosf)) || - (Name == "__acos_finite" && TLI->has(LibFunc_acos_finite)) || - (Name == "__acosf_finite" && TLI->has(LibFunc_acosf_finite))) - return ConstantFoldFP(acos, V, Ty); - else if ((Name == "asin" && TLI->has(LibFunc_asin)) || - (Name == "asinf" && TLI->has(LibFunc_asinf)) || - (Name == "__asin_finite" && TLI->has(LibFunc_asin_finite)) || - (Name == "__asinf_finite" && TLI->has(LibFunc_asinf_finite))) - return ConstantFoldFP(asin, V, Ty); - else if ((Name == "atan" && TLI->has(LibFunc_atan)) || - (Name == "atanf" && TLI->has(LibFunc_atanf))) - return ConstantFoldFP(atan, V, Ty); - break; - case 'c': - if ((Name == "ceil" && TLI->has(LibFunc_ceil)) || - (Name == "ceilf" && TLI->has(LibFunc_ceilf))) - return ConstantFoldFP(ceil, V, Ty); - else if ((Name == "cos" && TLI->has(LibFunc_cos)) || - (Name == "cosf" && TLI->has(LibFunc_cosf))) - return ConstantFoldFP(cos, V, Ty); - else if ((Name == "cosh" && TLI->has(LibFunc_cosh)) || - (Name == "coshf" && TLI->has(LibFunc_coshf)) || - (Name == "__cosh_finite" && TLI->has(LibFunc_cosh_finite)) || - (Name == "__coshf_finite" && TLI->has(LibFunc_coshf_finite))) - return ConstantFoldFP(cosh, V, Ty); - break; - case 'e': - if ((Name == "exp" && TLI->has(LibFunc_exp)) || - (Name == "expf" && TLI->has(LibFunc_expf)) || - (Name == "__exp_finite" && TLI->has(LibFunc_exp_finite)) || - (Name == "__expf_finite" && TLI->has(LibFunc_expf_finite))) - return ConstantFoldFP(exp, V, Ty); - if ((Name == "exp2" && TLI->has(LibFunc_exp2)) || - (Name == "exp2f" && TLI->has(LibFunc_exp2f)) || - (Name == "__exp2_finite" && TLI->has(LibFunc_exp2_finite)) || - (Name == "__exp2f_finite" && TLI->has(LibFunc_exp2f_finite))) - // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a - // C99 library. - return ConstantFoldBinaryFP(pow, 2.0, V, Ty); - break; - case 'f': - if ((Name == "fabs" && TLI->has(LibFunc_fabs)) || - (Name == "fabsf" && TLI->has(LibFunc_fabsf))) - return ConstantFoldFP(fabs, V, Ty); - else if ((Name == "floor" && TLI->has(LibFunc_floor)) || - (Name == "floorf" && TLI->has(LibFunc_floorf))) - return ConstantFoldFP(floor, V, Ty); - break; - case 'l': - if ((Name == "log" && V > 0 && TLI->has(LibFunc_log)) || - (Name == "logf" && V > 0 && TLI->has(LibFunc_logf)) || - (Name == "__log_finite" && V > 0 && - TLI->has(LibFunc_log_finite)) || - (Name == "__logf_finite" && V > 0 && - TLI->has(LibFunc_logf_finite))) - return ConstantFoldFP(log, V, Ty); - else if ((Name == "log10" && V > 0 && TLI->has(LibFunc_log10)) || - (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f)) || - (Name == "__log10_finite" && V > 0 && - TLI->has(LibFunc_log10_finite)) || - (Name == "__log10f_finite" && V > 0 && - TLI->has(LibFunc_log10f_finite))) - return ConstantFoldFP(log10, V, Ty); - break; - case 'r': - if ((Name == "round" && TLI->has(LibFunc_round)) || - (Name == "roundf" && TLI->has(LibFunc_roundf))) - return ConstantFoldFP(round, V, Ty); - break; - case 's': - if ((Name == "sin" && TLI->has(LibFunc_sin)) || - (Name == "sinf" && TLI->has(LibFunc_sinf))) - return ConstantFoldFP(sin, V, Ty); - else if ((Name == "sinh" && TLI->has(LibFunc_sinh)) || - (Name == "sinhf" && TLI->has(LibFunc_sinhf)) || - (Name == "__sinh_finite" && TLI->has(LibFunc_sinh_finite)) || - (Name == "__sinhf_finite" && TLI->has(LibFunc_sinhf_finite))) - return ConstantFoldFP(sinh, V, Ty); - else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc_sqrt)) || - (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc_sqrtf))) - return ConstantFoldFP(sqrt, V, Ty); - break; - case 't': - if ((Name == "tan" && TLI->has(LibFunc_tan)) || - (Name == "tanf" && TLI->has(LibFunc_tanf))) - return ConstantFoldFP(tan, V, Ty); - else if ((Name == "tanh" && TLI->has(LibFunc_tanh)) || - (Name == "tanhf" && TLI->has(LibFunc_tanhf))) - return ConstantFoldFP(tanh, V, Ty); - break; - default: - break; - } + if (!TLI) return nullptr; - } - if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) { - switch (IntrinsicID) { - case Intrinsic::bswap: - return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap()); - case Intrinsic::ctpop: - return ConstantInt::get(Ty, Op->getValue().countPopulation()); - case Intrinsic::bitreverse: - return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits()); - case Intrinsic::convert_from_fp16: { - APFloat Val(APFloat::IEEEhalf(), Op->getValue()); - - bool lost = false; - APFloat::opStatus status = Val.convert( - Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost); - - // Conversion is always precise. - (void)status; - assert(status == APFloat::opOK && !lost && - "Precision lost during fp16 constfolding"); - - return ConstantFP::get(Ty->getContext(), Val); - } - default: - return nullptr; - } - } + char NameKeyChar = Name[0]; + if (Name[0] == '_' && Name.size() > 2 && Name[1] == '_') + NameKeyChar = Name[2]; - // Support ConstantVector in case we have an Undef in the top. - if (isa<ConstantVector>(Operands[0]) || - isa<ConstantDataVector>(Operands[0])) { - auto *Op = cast<Constant>(Operands[0]); - switch (IntrinsicID) { - default: break; - case Intrinsic::x86_sse_cvtss2si: - case Intrinsic::x86_sse_cvtss2si64: - case Intrinsic::x86_sse2_cvtsd2si: - case Intrinsic::x86_sse2_cvtsd2si64: - if (ConstantFP *FPOp = - dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) - return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), - /*roundTowardZero=*/false, Ty, - /*IsSigned*/true); - break; - case Intrinsic::x86_sse_cvttss2si: - case Intrinsic::x86_sse_cvttss2si64: - case Intrinsic::x86_sse2_cvttsd2si: - case Intrinsic::x86_sse2_cvttsd2si64: - if (ConstantFP *FPOp = - dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) - return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), - /*roundTowardZero=*/true, Ty, - /*IsSigned*/true); - break; - } + switch (NameKeyChar) { + case 'a': + if ((Name == "acos" && TLI->has(LibFunc_acos)) || + (Name == "acosf" && TLI->has(LibFunc_acosf)) || + (Name == "__acos_finite" && TLI->has(LibFunc_acos_finite)) || + (Name == "__acosf_finite" && TLI->has(LibFunc_acosf_finite))) + return ConstantFoldFP(acos, V, Ty); + else if ((Name == "asin" && TLI->has(LibFunc_asin)) || + (Name == "asinf" && TLI->has(LibFunc_asinf)) || + (Name == "__asin_finite" && TLI->has(LibFunc_asin_finite)) || + (Name == "__asinf_finite" && TLI->has(LibFunc_asinf_finite))) + return ConstantFoldFP(asin, V, Ty); + else if ((Name == "atan" && TLI->has(LibFunc_atan)) || + (Name == "atanf" && TLI->has(LibFunc_atanf))) + return ConstantFoldFP(atan, V, Ty); + break; + case 'c': + if ((Name == "ceil" && TLI->has(LibFunc_ceil)) || + (Name == "ceilf" && TLI->has(LibFunc_ceilf))) + return ConstantFoldFP(ceil, V, Ty); + else if ((Name == "cos" && TLI->has(LibFunc_cos)) || + (Name == "cosf" && TLI->has(LibFunc_cosf))) + return ConstantFoldFP(cos, V, Ty); + else if ((Name == "cosh" && TLI->has(LibFunc_cosh)) || + (Name == "coshf" && TLI->has(LibFunc_coshf)) || + (Name == "__cosh_finite" && TLI->has(LibFunc_cosh_finite)) || + (Name == "__coshf_finite" && TLI->has(LibFunc_coshf_finite))) + return ConstantFoldFP(cosh, V, Ty); + break; + case 'e': + if ((Name == "exp" && TLI->has(LibFunc_exp)) || + (Name == "expf" && TLI->has(LibFunc_expf)) || + (Name == "__exp_finite" && TLI->has(LibFunc_exp_finite)) || + (Name == "__expf_finite" && TLI->has(LibFunc_expf_finite))) + return ConstantFoldFP(exp, V, Ty); + if ((Name == "exp2" && TLI->has(LibFunc_exp2)) || + (Name == "exp2f" && TLI->has(LibFunc_exp2f)) || + (Name == "__exp2_finite" && TLI->has(LibFunc_exp2_finite)) || + (Name == "__exp2f_finite" && TLI->has(LibFunc_exp2f_finite))) + // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a + // C99 library. + return ConstantFoldBinaryFP(pow, 2.0, V, Ty); + break; + case 'f': + if ((Name == "fabs" && TLI->has(LibFunc_fabs)) || + (Name == "fabsf" && TLI->has(LibFunc_fabsf))) + return ConstantFoldFP(fabs, V, Ty); + else if ((Name == "floor" && TLI->has(LibFunc_floor)) || + (Name == "floorf" && TLI->has(LibFunc_floorf))) + return ConstantFoldFP(floor, V, Ty); + break; + case 'l': + if ((Name == "log" && V > 0 && TLI->has(LibFunc_log)) || + (Name == "logf" && V > 0 && TLI->has(LibFunc_logf)) || + (Name == "__log_finite" && V > 0 && + TLI->has(LibFunc_log_finite)) || + (Name == "__logf_finite" && V > 0 && + TLI->has(LibFunc_logf_finite))) + return ConstantFoldFP(log, V, Ty); + else if ((Name == "log10" && V > 0 && TLI->has(LibFunc_log10)) || + (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f)) || + (Name == "__log10_finite" && V > 0 && + TLI->has(LibFunc_log10_finite)) || + (Name == "__log10f_finite" && V > 0 && + TLI->has(LibFunc_log10f_finite))) + return ConstantFoldFP(log10, V, Ty); + break; + case 'r': + if ((Name == "round" && TLI->has(LibFunc_round)) || + (Name == "roundf" && TLI->has(LibFunc_roundf))) + return ConstantFoldFP(round, V, Ty); + break; + case 's': + if ((Name == "sin" && TLI->has(LibFunc_sin)) || + (Name == "sinf" && TLI->has(LibFunc_sinf))) + return ConstantFoldFP(sin, V, Ty); + else if ((Name == "sinh" && TLI->has(LibFunc_sinh)) || + (Name == "sinhf" && TLI->has(LibFunc_sinhf)) || + (Name == "__sinh_finite" && TLI->has(LibFunc_sinh_finite)) || + (Name == "__sinhf_finite" && TLI->has(LibFunc_sinhf_finite))) + return ConstantFoldFP(sinh, V, Ty); + else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc_sqrt)) || + (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc_sqrtf))) + return ConstantFoldFP(sqrt, V, Ty); + break; + case 't': + if ((Name == "tan" && TLI->has(LibFunc_tan)) || + (Name == "tanf" && TLI->has(LibFunc_tanf))) + return ConstantFoldFP(tan, V, Ty); + else if ((Name == "tanh" && TLI->has(LibFunc_tanh)) || + (Name == "tanhf" && TLI->has(LibFunc_tanhf))) + return ConstantFoldFP(tanh, V, Ty); + break; + default: + break; } - return nullptr; } - if (Operands.size() == 2) { - if (auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) { - if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) - return nullptr; - double Op1V = getValueAsDouble(Op1); - - if (auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) { - if (Op2->getType() != Op1->getType()) - return nullptr; + if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) { + switch (IntrinsicID) { + case Intrinsic::bswap: + return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap()); + case Intrinsic::ctpop: + return ConstantInt::get(Ty, Op->getValue().countPopulation()); + case Intrinsic::bitreverse: + return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits()); + case Intrinsic::convert_from_fp16: { + APFloat Val(APFloat::IEEEhalf(), Op->getValue()); + + bool lost = false; + APFloat::opStatus status = Val.convert( + Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost); + + // Conversion is always precise. + (void)status; + assert(status == APFloat::opOK && !lost && + "Precision lost during fp16 constfolding"); + + return ConstantFP::get(Ty->getContext(), Val); + } + default: + return nullptr; + } + } - double Op2V = getValueAsDouble(Op2); - if (IntrinsicID == Intrinsic::pow) { - return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); - } - if (IntrinsicID == Intrinsic::copysign) { - APFloat V1 = Op1->getValueAPF(); - const APFloat &V2 = Op2->getValueAPF(); - V1.copySign(V2); - return ConstantFP::get(Ty->getContext(), V1); - } + // Support ConstantVector in case we have an Undef in the top. + if (isa<ConstantVector>(Operands[0]) || + isa<ConstantDataVector>(Operands[0])) { + auto *Op = cast<Constant>(Operands[0]); + switch (IntrinsicID) { + default: break; + case Intrinsic::x86_sse_cvtss2si: + case Intrinsic::x86_sse_cvtss2si64: + case Intrinsic::x86_sse2_cvtsd2si: + case Intrinsic::x86_sse2_cvtsd2si64: + if (ConstantFP *FPOp = + dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) + return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), + /*roundTowardZero=*/false, Ty, + /*IsSigned*/true); + break; + case Intrinsic::x86_sse_cvttss2si: + case Intrinsic::x86_sse_cvttss2si64: + case Intrinsic::x86_sse2_cvttsd2si: + case Intrinsic::x86_sse2_cvttsd2si64: + if (ConstantFP *FPOp = + dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) + return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), + /*roundTowardZero=*/true, Ty, + /*IsSigned*/true); + break; + } + } - if (IntrinsicID == Intrinsic::minnum) { - const APFloat &C1 = Op1->getValueAPF(); - const APFloat &C2 = Op2->getValueAPF(); - return ConstantFP::get(Ty->getContext(), minnum(C1, C2)); - } + return nullptr; +} - if (IntrinsicID == Intrinsic::maxnum) { - const APFloat &C1 = Op1->getValueAPF(); - const APFloat &C2 = Op2->getValueAPF(); - return ConstantFP::get(Ty->getContext(), maxnum(C1, C2)); - } +static Constant *ConstantFoldScalarCall2(StringRef Name, + Intrinsic::ID IntrinsicID, + Type *Ty, + ArrayRef<Constant *> Operands, + const TargetLibraryInfo *TLI, + const CallBase *Call) { + assert(Operands.size() == 2 && "Wrong number of operands."); - if (IntrinsicID == Intrinsic::minimum) { - const APFloat &C1 = Op1->getValueAPF(); - const APFloat &C2 = Op2->getValueAPF(); - return ConstantFP::get(Ty->getContext(), minimum(C1, C2)); - } + if (auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) { + if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) + return nullptr; + double Op1V = getValueAsDouble(Op1); - if (IntrinsicID == Intrinsic::maximum) { - const APFloat &C1 = Op1->getValueAPF(); - const APFloat &C2 = Op2->getValueAPF(); - return ConstantFP::get(Ty->getContext(), maximum(C1, C2)); - } + if (auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) { + if (Op2->getType() != Op1->getType()) + return nullptr; - if (!TLI) - return nullptr; - if ((Name == "pow" && TLI->has(LibFunc_pow)) || - (Name == "powf" && TLI->has(LibFunc_powf)) || - (Name == "__pow_finite" && TLI->has(LibFunc_pow_finite)) || - (Name == "__powf_finite" && TLI->has(LibFunc_powf_finite))) - return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); - if ((Name == "fmod" && TLI->has(LibFunc_fmod)) || - (Name == "fmodf" && TLI->has(LibFunc_fmodf))) - return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); - if ((Name == "atan2" && TLI->has(LibFunc_atan2)) || - (Name == "atan2f" && TLI->has(LibFunc_atan2f)) || - (Name == "__atan2_finite" && TLI->has(LibFunc_atan2_finite)) || - (Name == "__atan2f_finite" && TLI->has(LibFunc_atan2f_finite))) - return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); - } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) { - if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy()) - return ConstantFP::get(Ty->getContext(), - APFloat((float)std::pow((float)Op1V, - (int)Op2C->getZExtValue()))); - if (IntrinsicID == Intrinsic::powi && Ty->isFloatTy()) - return ConstantFP::get(Ty->getContext(), - APFloat((float)std::pow((float)Op1V, - (int)Op2C->getZExtValue()))); - if (IntrinsicID == Intrinsic::powi && Ty->isDoubleTy()) - return ConstantFP::get(Ty->getContext(), - APFloat((double)std::pow((double)Op1V, - (int)Op2C->getZExtValue()))); + double Op2V = getValueAsDouble(Op2); + if (IntrinsicID == Intrinsic::pow) { + return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); + } + if (IntrinsicID == Intrinsic::copysign) { + APFloat V1 = Op1->getValueAPF(); + const APFloat &V2 = Op2->getValueAPF(); + V1.copySign(V2); + return ConstantFP::get(Ty->getContext(), V1); } - return nullptr; - } - if (Operands[0]->getType()->isIntegerTy() && - Operands[1]->getType()->isIntegerTy()) { - const APInt *C0, *C1; - if (!getConstIntOrUndef(Operands[0], C0) || - !getConstIntOrUndef(Operands[1], C1)) - return nullptr; + if (IntrinsicID == Intrinsic::minnum) { + const APFloat &C1 = Op1->getValueAPF(); + const APFloat &C2 = Op2->getValueAPF(); + return ConstantFP::get(Ty->getContext(), minnum(C1, C2)); + } - switch (IntrinsicID) { - default: break; - case Intrinsic::smul_with_overflow: - case Intrinsic::umul_with_overflow: - // Even if both operands are undef, we cannot fold muls to undef - // in the general case. For example, on i2 there are no inputs - // that would produce { i2 -1, i1 true } as the result. - if (!C0 || !C1) - return Constant::getNullValue(Ty); - LLVM_FALLTHROUGH; - case Intrinsic::sadd_with_overflow: - case Intrinsic::uadd_with_overflow: - case Intrinsic::ssub_with_overflow: - case Intrinsic::usub_with_overflow: { - if (!C0 || !C1) - return UndefValue::get(Ty); + if (IntrinsicID == Intrinsic::maxnum) { + const APFloat &C1 = Op1->getValueAPF(); + const APFloat &C2 = Op2->getValueAPF(); + return ConstantFP::get(Ty->getContext(), maxnum(C1, C2)); + } - APInt Res; - bool Overflow; - switch (IntrinsicID) { - default: llvm_unreachable("Invalid case"); - case Intrinsic::sadd_with_overflow: - Res = C0->sadd_ov(*C1, Overflow); - break; - case Intrinsic::uadd_with_overflow: - Res = C0->uadd_ov(*C1, Overflow); - break; - case Intrinsic::ssub_with_overflow: - Res = C0->ssub_ov(*C1, Overflow); - break; - case Intrinsic::usub_with_overflow: - Res = C0->usub_ov(*C1, Overflow); - break; - case Intrinsic::smul_with_overflow: - Res = C0->smul_ov(*C1, Overflow); - break; - case Intrinsic::umul_with_overflow: - Res = C0->umul_ov(*C1, Overflow); - break; - } - Constant *Ops[] = { - ConstantInt::get(Ty->getContext(), Res), - ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow) - }; - return ConstantStruct::get(cast<StructType>(Ty), Ops); + if (IntrinsicID == Intrinsic::minimum) { + const APFloat &C1 = Op1->getValueAPF(); + const APFloat &C2 = Op2->getValueAPF(); + return ConstantFP::get(Ty->getContext(), minimum(C1, C2)); } - case Intrinsic::uadd_sat: - case Intrinsic::sadd_sat: - if (!C0 && !C1) - return UndefValue::get(Ty); - if (!C0 || !C1) - return Constant::getAllOnesValue(Ty); - if (IntrinsicID == Intrinsic::uadd_sat) - return ConstantInt::get(Ty, C0->uadd_sat(*C1)); - else - return ConstantInt::get(Ty, C0->sadd_sat(*C1)); - case Intrinsic::usub_sat: - case Intrinsic::ssub_sat: - if (!C0 && !C1) - return UndefValue::get(Ty); - if (!C0 || !C1) - return Constant::getNullValue(Ty); - if (IntrinsicID == Intrinsic::usub_sat) - return ConstantInt::get(Ty, C0->usub_sat(*C1)); - else - return ConstantInt::get(Ty, C0->ssub_sat(*C1)); - case Intrinsic::cttz: - case Intrinsic::ctlz: - assert(C1 && "Must be constant int"); - - // cttz(0, 1) and ctlz(0, 1) are undef. - if (C1->isOneValue() && (!C0 || C0->isNullValue())) - return UndefValue::get(Ty); - if (!C0) - return Constant::getNullValue(Ty); - if (IntrinsicID == Intrinsic::cttz) - return ConstantInt::get(Ty, C0->countTrailingZeros()); - else - return ConstantInt::get(Ty, C0->countLeadingZeros()); + + if (IntrinsicID == Intrinsic::maximum) { + const APFloat &C1 = Op1->getValueAPF(); + const APFloat &C2 = Op2->getValueAPF(); + return ConstantFP::get(Ty->getContext(), maximum(C1, C2)); } - return nullptr; + if (!TLI) + return nullptr; + if ((Name == "pow" && TLI->has(LibFunc_pow)) || + (Name == "powf" && TLI->has(LibFunc_powf)) || + (Name == "__pow_finite" && TLI->has(LibFunc_pow_finite)) || + (Name == "__powf_finite" && TLI->has(LibFunc_powf_finite))) + return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); + if ((Name == "fmod" && TLI->has(LibFunc_fmod)) || + (Name == "fmodf" && TLI->has(LibFunc_fmodf))) + return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); + if ((Name == "atan2" && TLI->has(LibFunc_atan2)) || + (Name == "atan2f" && TLI->has(LibFunc_atan2f)) || + (Name == "__atan2_finite" && TLI->has(LibFunc_atan2_finite)) || + (Name == "__atan2f_finite" && TLI->has(LibFunc_atan2f_finite))) + return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); + } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) { + if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy()) + return ConstantFP::get(Ty->getContext(), + APFloat((float)std::pow((float)Op1V, + (int)Op2C->getZExtValue()))); + if (IntrinsicID == Intrinsic::powi && Ty->isFloatTy()) + return ConstantFP::get(Ty->getContext(), + APFloat((float)std::pow((float)Op1V, + (int)Op2C->getZExtValue()))); + if (IntrinsicID == Intrinsic::powi && Ty->isDoubleTy()) + return ConstantFP::get(Ty->getContext(), + APFloat((double)std::pow((double)Op1V, + (int)Op2C->getZExtValue()))); } + return nullptr; + } - // Support ConstantVector in case we have an Undef in the top. - if ((isa<ConstantVector>(Operands[0]) || - isa<ConstantDataVector>(Operands[0])) && - // Check for default rounding mode. - // FIXME: Support other rounding modes? - isa<ConstantInt>(Operands[1]) && - cast<ConstantInt>(Operands[1])->getValue() == 4) { - auto *Op = cast<Constant>(Operands[0]); + if (Operands[0]->getType()->isIntegerTy() && + Operands[1]->getType()->isIntegerTy()) { + const APInt *C0, *C1; + if (!getConstIntOrUndef(Operands[0], C0) || + !getConstIntOrUndef(Operands[1], C1)) + return nullptr; + + switch (IntrinsicID) { + default: break; + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + // Even if both operands are undef, we cannot fold muls to undef + // in the general case. For example, on i2 there are no inputs + // that would produce { i2 -1, i1 true } as the result. + if (!C0 || !C1) + return Constant::getNullValue(Ty); + LLVM_FALLTHROUGH; + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: { + if (!C0 || !C1) + return UndefValue::get(Ty); + + APInt Res; + bool Overflow; switch (IntrinsicID) { - default: break; - case Intrinsic::x86_avx512_vcvtss2si32: - case Intrinsic::x86_avx512_vcvtss2si64: - case Intrinsic::x86_avx512_vcvtsd2si32: - case Intrinsic::x86_avx512_vcvtsd2si64: - if (ConstantFP *FPOp = - dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) - return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), - /*roundTowardZero=*/false, Ty, - /*IsSigned*/true); + default: llvm_unreachable("Invalid case"); + case Intrinsic::sadd_with_overflow: + Res = C0->sadd_ov(*C1, Overflow); + break; + case Intrinsic::uadd_with_overflow: + Res = C0->uadd_ov(*C1, Overflow); break; - case Intrinsic::x86_avx512_vcvtss2usi32: - case Intrinsic::x86_avx512_vcvtss2usi64: - case Intrinsic::x86_avx512_vcvtsd2usi32: - case Intrinsic::x86_avx512_vcvtsd2usi64: - if (ConstantFP *FPOp = - dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) - return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), - /*roundTowardZero=*/false, Ty, - /*IsSigned*/false); + case Intrinsic::ssub_with_overflow: + Res = C0->ssub_ov(*C1, Overflow); + break; + case Intrinsic::usub_with_overflow: + Res = C0->usub_ov(*C1, Overflow); break; - case Intrinsic::x86_avx512_cvttss2si: - case Intrinsic::x86_avx512_cvttss2si64: - case Intrinsic::x86_avx512_cvttsd2si: - case Intrinsic::x86_avx512_cvttsd2si64: - if (ConstantFP *FPOp = - dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) - return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), - /*roundTowardZero=*/true, Ty, - /*IsSigned*/true); + case Intrinsic::smul_with_overflow: + Res = C0->smul_ov(*C1, Overflow); break; - case Intrinsic::x86_avx512_cvttss2usi: - case Intrinsic::x86_avx512_cvttss2usi64: - case Intrinsic::x86_avx512_cvttsd2usi: - case Intrinsic::x86_avx512_cvttsd2usi64: - if (ConstantFP *FPOp = - dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) - return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), - /*roundTowardZero=*/true, Ty, - /*IsSigned*/false); + case Intrinsic::umul_with_overflow: + Res = C0->umul_ov(*C1, Overflow); break; } + Constant *Ops[] = { + ConstantInt::get(Ty->getContext(), Res), + ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow) + }; + return ConstantStruct::get(cast<StructType>(Ty), Ops); + } + case Intrinsic::uadd_sat: + case Intrinsic::sadd_sat: + if (!C0 && !C1) + return UndefValue::get(Ty); + if (!C0 || !C1) + return Constant::getAllOnesValue(Ty); + if (IntrinsicID == Intrinsic::uadd_sat) + return ConstantInt::get(Ty, C0->uadd_sat(*C1)); + else + return ConstantInt::get(Ty, C0->sadd_sat(*C1)); + case Intrinsic::usub_sat: + case Intrinsic::ssub_sat: + if (!C0 && !C1) + return UndefValue::get(Ty); + if (!C0 || !C1) + return Constant::getNullValue(Ty); + if (IntrinsicID == Intrinsic::usub_sat) + return ConstantInt::get(Ty, C0->usub_sat(*C1)); + else + return ConstantInt::get(Ty, C0->ssub_sat(*C1)); + case Intrinsic::cttz: + case Intrinsic::ctlz: + assert(C1 && "Must be constant int"); + + // cttz(0, 1) and ctlz(0, 1) are undef. + if (C1->isOneValue() && (!C0 || C0->isNullValue())) + return UndefValue::get(Ty); + if (!C0) + return Constant::getNullValue(Ty); + if (IntrinsicID == Intrinsic::cttz) + return ConstantInt::get(Ty, C0->countTrailingZeros()); + else + return ConstantInt::get(Ty, C0->countLeadingZeros()); } + return nullptr; } - if (Operands.size() != 3) - return nullptr; + // Support ConstantVector in case we have an Undef in the top. + if ((isa<ConstantVector>(Operands[0]) || + isa<ConstantDataVector>(Operands[0])) && + // Check for default rounding mode. + // FIXME: Support other rounding modes? + isa<ConstantInt>(Operands[1]) && + cast<ConstantInt>(Operands[1])->getValue() == 4) { + auto *Op = cast<Constant>(Operands[0]); + switch (IntrinsicID) { + default: break; + case Intrinsic::x86_avx512_vcvtss2si32: + case Intrinsic::x86_avx512_vcvtss2si64: + case Intrinsic::x86_avx512_vcvtsd2si32: + case Intrinsic::x86_avx512_vcvtsd2si64: + if (ConstantFP *FPOp = + dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) + return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), + /*roundTowardZero=*/false, Ty, + /*IsSigned*/true); + break; + case Intrinsic::x86_avx512_vcvtss2usi32: + case Intrinsic::x86_avx512_vcvtss2usi64: + case Intrinsic::x86_avx512_vcvtsd2usi32: + case Intrinsic::x86_avx512_vcvtsd2usi64: + if (ConstantFP *FPOp = + dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) + return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), + /*roundTowardZero=*/false, Ty, + /*IsSigned*/false); + break; + case Intrinsic::x86_avx512_cvttss2si: + case Intrinsic::x86_avx512_cvttss2si64: + case Intrinsic::x86_avx512_cvttsd2si: + case Intrinsic::x86_avx512_cvttsd2si64: + if (ConstantFP *FPOp = + dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) + return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), + /*roundTowardZero=*/true, Ty, + /*IsSigned*/true); + break; + case Intrinsic::x86_avx512_cvttss2usi: + case Intrinsic::x86_avx512_cvttss2usi64: + case Intrinsic::x86_avx512_cvttsd2usi: + case Intrinsic::x86_avx512_cvttsd2usi64: + if (ConstantFP *FPOp = + dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) + return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), + /*roundTowardZero=*/true, Ty, + /*IsSigned*/false); + break; + } + } + return nullptr; +} + +static Constant *ConstantFoldScalarCall3(StringRef Name, + Intrinsic::ID IntrinsicID, + Type *Ty, + ArrayRef<Constant *> Operands, + const TargetLibraryInfo *TLI, + const CallBase *Call) { + assert(Operands.size() == 3 && "Wrong number of operands."); if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) { if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) { @@ -2179,6 +2207,43 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, } } + if (const auto *Op1 = dyn_cast<ConstantInt>(Operands[0])) { + if (const auto *Op2 = dyn_cast<ConstantInt>(Operands[1])) { + if (const auto *Op3 = dyn_cast<ConstantInt>(Operands[2])) { + switch (IntrinsicID) { + default: break; + case Intrinsic::smul_fix: + case Intrinsic::smul_fix_sat: { + // This code performs rounding towards negative infinity in case the + // result cannot be represented exactly for the given scale. Targets + // that do care about rounding should use a target hook for specifying + // how rounding should be done, and provide their own folding to be + // consistent with rounding. This is the same approach as used by + // DAGTypeLegalizer::ExpandIntRes_MULFIX. + APInt Lhs = Op1->getValue(); + APInt Rhs = Op2->getValue(); + unsigned Scale = Op3->getValue().getZExtValue(); + unsigned Width = Lhs.getBitWidth(); + assert(Scale < Width && "Illegal scale."); + unsigned ExtendedWidth = Width * 2; + APInt Product = (Lhs.sextOrSelf(ExtendedWidth) * + Rhs.sextOrSelf(ExtendedWidth)).ashr(Scale); + if (IntrinsicID == Intrinsic::smul_fix_sat) { + APInt MaxValue = + APInt::getSignedMaxValue(Width).sextOrSelf(ExtendedWidth); + APInt MinValue = + APInt::getSignedMinValue(Width).sextOrSelf(ExtendedWidth); + Product = APIntOps::smin(Product, MaxValue); + Product = APIntOps::smax(Product, MinValue); + } + return ConstantInt::get(Ty->getContext(), + Product.sextOrTrunc(Width)); + } + } + } + } + } + if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) { const APInt *C0, *C1, *C2; if (!getConstIntOrUndef(Operands[0], C0) || @@ -2212,11 +2277,31 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, return nullptr; } -Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID, - VectorType *VTy, ArrayRef<Constant *> Operands, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - ImmutableCallSite CS) { +static Constant *ConstantFoldScalarCall(StringRef Name, + Intrinsic::ID IntrinsicID, + Type *Ty, + ArrayRef<Constant *> Operands, + const TargetLibraryInfo *TLI, + const CallBase *Call) { + if (Operands.size() == 1) + return ConstantFoldScalarCall1(Name, IntrinsicID, Ty, Operands, TLI, Call); + + if (Operands.size() == 2) + return ConstantFoldScalarCall2(Name, IntrinsicID, Ty, Operands, TLI, Call); + + if (Operands.size() == 3) + return ConstantFoldScalarCall3(Name, IntrinsicID, Ty, Operands, TLI, Call); + + return nullptr; +} + +static Constant *ConstantFoldVectorCall(StringRef Name, + Intrinsic::ID IntrinsicID, + VectorType *VTy, + ArrayRef<Constant *> Operands, + const DataLayout &DL, + const TargetLibraryInfo *TLI, + const CallBase *Call) { SmallVector<Constant *, 4> Result(VTy->getNumElements()); SmallVector<Constant *, 4> Lane(Operands.size()); Type *Ty = VTy->getElementType(); @@ -2263,10 +2348,8 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID, for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) { // Gather a column of constants. for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) { - // These intrinsics use a scalar type for their second argument. - if (J == 1 && - (IntrinsicID == Intrinsic::cttz || IntrinsicID == Intrinsic::ctlz || - IntrinsicID == Intrinsic::powi)) { + // Some intrinsics use a scalar type for certain arguments. + if (hasVectorInstrinsicScalarOpd(IntrinsicID, J)) { Lane[J] = Operands[J]; continue; } @@ -2279,7 +2362,8 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID, } // Use the regular scalar folding to simplify this column. - Constant *Folded = ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, CS); + Constant *Folded = + ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, Call); if (!Folded) return nullptr; Result[I] = Folded; @@ -2290,11 +2374,10 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID, } // end anonymous namespace -Constant * -llvm::ConstantFoldCall(ImmutableCallSite CS, Function *F, - ArrayRef<Constant *> Operands, - const TargetLibraryInfo *TLI) { - if (CS.isNoBuiltin() || CS.isStrictFP()) +Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F, + ArrayRef<Constant *> Operands, + const TargetLibraryInfo *TLI) { + if (Call->isNoBuiltin() || Call->isStrictFP()) return nullptr; if (!F->hasName()) return nullptr; @@ -2304,17 +2387,19 @@ llvm::ConstantFoldCall(ImmutableCallSite CS, Function *F, if (auto *VTy = dyn_cast<VectorType>(Ty)) return ConstantFoldVectorCall(Name, F->getIntrinsicID(), VTy, Operands, - F->getParent()->getDataLayout(), TLI, CS); + F->getParent()->getDataLayout(), TLI, Call); - return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI, CS); + return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI, + Call); } -bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) { +bool llvm::isMathLibCallNoop(const CallBase *Call, + const TargetLibraryInfo *TLI) { // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap // (and to some extent ConstantFoldScalarCall). - if (CS.isNoBuiltin() || CS.isStrictFP()) + if (Call->isNoBuiltin() || Call->isStrictFP()) return false; - Function *F = CS.getCalledFunction(); + Function *F = Call->getCalledFunction(); if (!F) return false; @@ -2322,8 +2407,8 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) { if (!TLI || !TLI->getLibFunc(*F, Func)) return false; - if (CS.getNumArgOperands() == 1) { - if (ConstantFP *OpC = dyn_cast<ConstantFP>(CS.getArgOperand(0))) { + if (Call->getNumArgOperands() == 1) { + if (ConstantFP *OpC = dyn_cast<ConstantFP>(Call->getArgOperand(0))) { const APFloat &Op = OpC->getValueAPF(); switch (Func) { case LibFunc_logl: @@ -2421,9 +2506,9 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) { } } - if (CS.getNumArgOperands() == 2) { - ConstantFP *Op0C = dyn_cast<ConstantFP>(CS.getArgOperand(0)); - ConstantFP *Op1C = dyn_cast<ConstantFP>(CS.getArgOperand(1)); + if (Call->getNumArgOperands() == 2) { + ConstantFP *Op0C = dyn_cast<ConstantFP>(Call->getArgOperand(0)); + ConstantFP *Op1C = dyn_cast<ConstantFP>(Call->getArgOperand(1)); if (Op0C && Op1C) { const APFloat &Op0 = Op0C->getValueAPF(); const APFloat &Op1 = Op1C->getValueAPF(); diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp index 3d55bf20bb40..bf0cdbfd0c8b 100644 --- a/lib/Analysis/CostModel.cpp +++ b/lib/Analysis/CostModel.cpp @@ -1,9 +1,8 @@ //===- CostModel.cpp ------ Cost Model Analysis ---------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp index 4cafb7da16d3..c1043e446beb 100644 --- a/lib/Analysis/Delinearization.cpp +++ b/lib/Analysis/Delinearization.cpp @@ -1,9 +1,8 @@ //===---- Delinearization.cpp - MultiDimensional Index Delinearization ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp index 34f785fb02be..01b8ff10d355 100644 --- a/lib/Analysis/DemandedBits.cpp +++ b/lib/Analysis/DemandedBits.cpp @@ -1,9 +1,8 @@ //===- DemandedBits.cpp - Determine demanded bits -------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -340,6 +339,8 @@ void DemandedBits::performAnalysis() { Type *T = J->getType(); if (T->isIntOrIntVectorTy()) AliveBits[J] = APInt::getAllOnesValue(T->getScalarSizeInBits()); + else + Visited.insert(J); Worklist.insert(J); } } @@ -355,16 +356,18 @@ void DemandedBits::performAnalysis() { LLVM_DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI); APInt AOut; + bool InputIsKnownDead = false; if (UserI->getType()->isIntOrIntVectorTy()) { AOut = AliveBits[UserI]; LLVM_DEBUG(dbgs() << " Alive Out: 0x" << Twine::utohexstr(AOut.getLimitedValue())); + + // If all bits of the output are dead, then all bits of the input + // are also dead. + InputIsKnownDead = !AOut && !isAlwaysLive(UserI); } LLVM_DEBUG(dbgs() << "\n"); - if (!UserI->getType()->isIntOrIntVectorTy()) - Visited.insert(UserI); - KnownBits Known, Known2; bool KnownBitsComputed = false; // Compute the set of alive bits for each operand. These are anded into the @@ -381,10 +384,7 @@ void DemandedBits::performAnalysis() { if (T->isIntOrIntVectorTy()) { unsigned BitWidth = T->getScalarSizeInBits(); APInt AB = APInt::getAllOnesValue(BitWidth); - if (UserI->getType()->isIntOrIntVectorTy() && !AOut && - !isAlwaysLive(UserI)) { - // If all bits of the output are dead, then all bits of the input - // are also dead. + if (InputIsKnownDead) { AB = APInt(BitWidth, 0); } else { // Bits of each operand that are used to compute alive bits of the @@ -403,18 +403,13 @@ void DemandedBits::performAnalysis() { // If we've added to the set of alive bits (or the operand has not // been previously visited), then re-queue the operand to be visited // again. - APInt ABPrev(BitWidth, 0); - auto ABI = AliveBits.find(I); - if (ABI != AliveBits.end()) - ABPrev = ABI->second; - - APInt ABNew = AB | ABPrev; - if (ABNew != ABPrev || ABI == AliveBits.end()) { - AliveBits[I] = std::move(ABNew); + auto Res = AliveBits.try_emplace(I); + if (Res.second || (AB |= Res.first->second) != Res.first->second) { + Res.first->second = std::move(AB); Worklist.insert(I); } } - } else if (I && !Visited.count(I)) { + } else if (I && Visited.insert(I).second) { Worklist.insert(I); } } diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index 3f4dfa52e1da..75f269e84f9d 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -1,9 +1,8 @@ //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -110,6 +109,14 @@ STATISTIC(BanerjeeSuccesses, "Banerjee successes"); static cl::opt<bool> Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Try to delinearize array references.")); +static cl::opt<bool> DisableDelinearizationChecks( + "da-disable-delinearization-checks", cl::init(false), cl::Hidden, + cl::ZeroOrMore, + cl::desc( + "Disable checks that try to statically verify validity of " + "delinearized subscripts. Enabling this option may result in incorrect " + "dependence vectors for languages that allow the subscript of one " + "dimension to underflow or overflow into another dimension.")); //===----------------------------------------------------------------------===// // basics @@ -3317,19 +3324,20 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, // and dst. // FIXME: It may be better to record these sizes and add them as constraints // to the dependency checks. - for (int i = 1; i < size; ++i) { - if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr)) - return false; + if (!DisableDelinearizationChecks) + for (int i = 1; i < size; ++i) { + if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr)) + return false; - if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1])) - return false; + if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1])) + return false; - if (!isKnownNonNegative(DstSubscripts[i], DstPtr)) - return false; + if (!isKnownNonNegative(DstSubscripts[i], DstPtr)) + return false; - if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1])) - return false; - } + if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1])) + return false; + } LLVM_DEBUG({ dbgs() << "\nSrcSubscripts: "; @@ -3369,6 +3377,19 @@ static void dumpSmallBitVector(SmallBitVector &BV) { } #endif +bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv) { + // Check if the analysis itself has been invalidated. + auto PAC = PA.getChecker<DependenceAnalysis>(); + if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>()) + return true; + + // Check transitive dependencies. + return Inv.invalidate<AAManager>(F, PA) || + Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) || + Inv.invalidate<LoopAnalysis>(F, PA); +} + // depends - // Returns NULL if there is no dependence. // Otherwise, return a Dependence with as many details as possible. @@ -3510,7 +3531,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, // to either Separable or Coupled). // // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. - // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, + // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty, // so Pair[3].Group = {0, 1, 3} and Done = false. // // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp index 7ba23854a3cc..0ccd59ef2bfd 100644 --- a/lib/Analysis/DivergenceAnalysis.cpp +++ b/lib/Analysis/DivergenceAnalysis.cpp @@ -1,9 +1,8 @@ //===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp index 8abc0e7d0df9..d9f43dd746ef 100644 --- a/lib/Analysis/DomPrinter.cpp +++ b/lib/Analysis/DomPrinter.cpp @@ -1,9 +1,8 @@ //===- DomPrinter.cpp - DOT printer for the dominance trees ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/DomTreeUpdater.cpp b/lib/Analysis/DomTreeUpdater.cpp new file mode 100644 index 000000000000..49215889cfd6 --- /dev/null +++ b/lib/Analysis/DomTreeUpdater.cpp @@ -0,0 +1,533 @@ +//===- DomTreeUpdater.cpp - DomTree/Post DomTree Updater --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the DomTreeUpdater class, which provides a uniform way +// to update dominator tree related data structures. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/IR/Dominators.h" +#include "llvm/Support/GenericDomTree.h" +#include <algorithm> +#include <functional> +#include <utility> + +namespace llvm { + +bool DomTreeUpdater::isUpdateValid( + const DominatorTree::UpdateType Update) const { + const auto *From = Update.getFrom(); + const auto *To = Update.getTo(); + const auto Kind = Update.getKind(); + + // Discard updates by inspecting the current state of successors of From. + // Since isUpdateValid() must be called *after* the Terminator of From is + // altered we can determine if the update is unnecessary for batch updates + // or invalid for a single update. + const bool HasEdge = llvm::any_of( + successors(From), [To](const BasicBlock *B) { return B == To; }); + + // If the IR does not match the update, + // 1. In batch updates, this update is unnecessary. + // 2. When called by insertEdge*()/deleteEdge*(), this update is invalid. + // Edge does not exist in IR. + if (Kind == DominatorTree::Insert && !HasEdge) + return false; + + // Edge exists in IR. + if (Kind == DominatorTree::Delete && HasEdge) + return false; + + return true; +} + +bool DomTreeUpdater::isSelfDominance( + const DominatorTree::UpdateType Update) const { + // Won't affect DomTree and PostDomTree. + return Update.getFrom() == Update.getTo(); +} + +void DomTreeUpdater::applyDomTreeUpdates() { + // No pending DomTreeUpdates. + if (Strategy != UpdateStrategy::Lazy || !DT) + return; + + // Only apply updates not are applied by DomTree. + if (hasPendingDomTreeUpdates()) { + const auto I = PendUpdates.begin() + PendDTUpdateIndex; + const auto E = PendUpdates.end(); + assert(I < E && "Iterator range invalid; there should be DomTree updates."); + DT->applyUpdates(ArrayRef<DominatorTree::UpdateType>(I, E)); + PendDTUpdateIndex = PendUpdates.size(); + } +} + +void DomTreeUpdater::flush() { + applyDomTreeUpdates(); + applyPostDomTreeUpdates(); + dropOutOfDateUpdates(); +} + +void DomTreeUpdater::applyPostDomTreeUpdates() { + // No pending PostDomTreeUpdates. + if (Strategy != UpdateStrategy::Lazy || !PDT) + return; + + // Only apply updates not are applied by PostDomTree. + if (hasPendingPostDomTreeUpdates()) { + const auto I = PendUpdates.begin() + PendPDTUpdateIndex; + const auto E = PendUpdates.end(); + assert(I < E && + "Iterator range invalid; there should be PostDomTree updates."); + PDT->applyUpdates(ArrayRef<DominatorTree::UpdateType>(I, E)); + PendPDTUpdateIndex = PendUpdates.size(); + } +} + +void DomTreeUpdater::tryFlushDeletedBB() { + if (!hasPendingUpdates()) + forceFlushDeletedBB(); +} + +bool DomTreeUpdater::forceFlushDeletedBB() { + if (DeletedBBs.empty()) + return false; + + for (auto *BB : DeletedBBs) { + // After calling deleteBB or callbackDeleteBB under Lazy UpdateStrategy, + // validateDeleteBB() removes all instructions of DelBB and adds an + // UnreachableInst as its terminator. So we check whether the BasicBlock to + // delete only has an UnreachableInst inside. + assert(BB->getInstList().size() == 1 && + isa<UnreachableInst>(BB->getTerminator()) && + "DelBB has been modified while awaiting deletion."); + BB->removeFromParent(); + eraseDelBBNode(BB); + delete BB; + } + DeletedBBs.clear(); + Callbacks.clear(); + return true; +} + +void DomTreeUpdater::recalculate(Function &F) { + + if (Strategy == UpdateStrategy::Eager) { + if (DT) + DT->recalculate(F); + if (PDT) + PDT->recalculate(F); + return; + } + + // There is little performance gain if we pend the recalculation under + // Lazy UpdateStrategy so we recalculate available trees immediately. + + // Prevent forceFlushDeletedBB() from erasing DomTree or PostDomTree nodes. + IsRecalculatingDomTree = IsRecalculatingPostDomTree = true; + + // Because all trees are going to be up-to-date after recalculation, + // flush awaiting deleted BasicBlocks. + forceFlushDeletedBB(); + if (DT) + DT->recalculate(F); + if (PDT) + PDT->recalculate(F); + + // Resume forceFlushDeletedBB() to erase DomTree or PostDomTree nodes. + IsRecalculatingDomTree = IsRecalculatingPostDomTree = false; + PendDTUpdateIndex = PendPDTUpdateIndex = PendUpdates.size(); + dropOutOfDateUpdates(); +} + +bool DomTreeUpdater::hasPendingUpdates() const { + return hasPendingDomTreeUpdates() || hasPendingPostDomTreeUpdates(); +} + +bool DomTreeUpdater::hasPendingDomTreeUpdates() const { + if (!DT) + return false; + return PendUpdates.size() != PendDTUpdateIndex; +} + +bool DomTreeUpdater::hasPendingPostDomTreeUpdates() const { + if (!PDT) + return false; + return PendUpdates.size() != PendPDTUpdateIndex; +} + +bool DomTreeUpdater::isBBPendingDeletion(llvm::BasicBlock *DelBB) const { + if (Strategy == UpdateStrategy::Eager || DeletedBBs.empty()) + return false; + return DeletedBBs.count(DelBB) != 0; +} + +// The DT and PDT require the nodes related to updates +// are not deleted when update functions are called. +// So BasicBlock deletions must be pended when the +// UpdateStrategy is Lazy. When the UpdateStrategy is +// Eager, the BasicBlock will be deleted immediately. +void DomTreeUpdater::deleteBB(BasicBlock *DelBB) { + validateDeleteBB(DelBB); + if (Strategy == UpdateStrategy::Lazy) { + DeletedBBs.insert(DelBB); + return; + } + + DelBB->removeFromParent(); + eraseDelBBNode(DelBB); + delete DelBB; +} + +void DomTreeUpdater::callbackDeleteBB( + BasicBlock *DelBB, std::function<void(BasicBlock *)> Callback) { + validateDeleteBB(DelBB); + if (Strategy == UpdateStrategy::Lazy) { + Callbacks.push_back(CallBackOnDeletion(DelBB, Callback)); + DeletedBBs.insert(DelBB); + return; + } + + DelBB->removeFromParent(); + eraseDelBBNode(DelBB); + Callback(DelBB); + delete DelBB; +} + +void DomTreeUpdater::eraseDelBBNode(BasicBlock *DelBB) { + if (DT && !IsRecalculatingDomTree) + if (DT->getNode(DelBB)) + DT->eraseNode(DelBB); + + if (PDT && !IsRecalculatingPostDomTree) + if (PDT->getNode(DelBB)) + PDT->eraseNode(DelBB); +} + +void DomTreeUpdater::validateDeleteBB(BasicBlock *DelBB) { + assert(DelBB && "Invalid push_back of nullptr DelBB."); + assert(pred_empty(DelBB) && "DelBB has one or more predecessors."); + // DelBB is unreachable and all its instructions are dead. + while (!DelBB->empty()) { + Instruction &I = DelBB->back(); + // Replace used instructions with an arbitrary value (undef). + if (!I.use_empty()) + I.replaceAllUsesWith(llvm::UndefValue::get(I.getType())); + DelBB->getInstList().pop_back(); + } + // Make sure DelBB has a valid terminator instruction. As long as DelBB is a + // Child of Function F it must contain valid IR. + new UnreachableInst(DelBB->getContext(), DelBB); +} + +void DomTreeUpdater::applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates) { + if (!DT && !PDT) + return; + + if (Strategy == UpdateStrategy::Lazy) { + for (const auto U : Updates) + if (!isSelfDominance(U)) + PendUpdates.push_back(U); + + return; + } + + if (DT) + DT->applyUpdates(Updates); + if (PDT) + PDT->applyUpdates(Updates); +} + +void DomTreeUpdater::applyUpdatesPermissive( + ArrayRef<DominatorTree::UpdateType> Updates) { + if (!DT && !PDT) + return; + + SmallSet<std::pair<BasicBlock *, BasicBlock *>, 8> Seen; + SmallVector<DominatorTree::UpdateType, 8> DeduplicatedUpdates; + for (const auto U : Updates) { + auto Edge = std::make_pair(U.getFrom(), U.getTo()); + // Because it is illegal to submit updates that have already been applied + // and updates to an edge need to be strictly ordered, + // it is safe to infer the existence of an edge from the first update + // to this edge. + // If the first update to an edge is "Delete", it means that the edge + // existed before. If the first update to an edge is "Insert", it means + // that the edge didn't exist before. + // + // For example, if the user submits {{Delete, A, B}, {Insert, A, B}}, + // because + // 1. it is illegal to submit updates that have already been applied, + // i.e., user cannot delete an nonexistent edge, + // 2. updates to an edge need to be strictly ordered, + // So, initially edge A -> B existed. + // We can then safely ignore future updates to this edge and directly + // inspect the current CFG: + // a. If the edge still exists, because the user cannot insert an existent + // edge, so both {Delete, A, B}, {Insert, A, B} actually happened and + // resulted in a no-op. DTU won't submit any update in this case. + // b. If the edge doesn't exist, we can then infer that {Delete, A, B} + // actually happened but {Insert, A, B} was an invalid update which never + // happened. DTU will submit {Delete, A, B} in this case. + if (!isSelfDominance(U) && Seen.count(Edge) == 0) { + Seen.insert(Edge); + // If the update doesn't appear in the CFG, it means that + // either the change isn't made or relevant operations + // result in a no-op. + if (isUpdateValid(U)) { + if (isLazy()) + PendUpdates.push_back(U); + else + DeduplicatedUpdates.push_back(U); + } + } + } + + if (Strategy == UpdateStrategy::Lazy) + return; + + if (DT) + DT->applyUpdates(DeduplicatedUpdates); + if (PDT) + PDT->applyUpdates(DeduplicatedUpdates); +} + +DominatorTree &DomTreeUpdater::getDomTree() { + assert(DT && "Invalid acquisition of a null DomTree"); + applyDomTreeUpdates(); + dropOutOfDateUpdates(); + return *DT; +} + +PostDominatorTree &DomTreeUpdater::getPostDomTree() { + assert(PDT && "Invalid acquisition of a null PostDomTree"); + applyPostDomTreeUpdates(); + dropOutOfDateUpdates(); + return *PDT; +} + +void DomTreeUpdater::insertEdge(BasicBlock *From, BasicBlock *To) { + +#ifndef NDEBUG + assert(isUpdateValid({DominatorTree::Insert, From, To}) && + "Inserted edge does not appear in the CFG"); +#endif + + if (!DT && !PDT) + return; + + // Won't affect DomTree and PostDomTree; discard update. + if (From == To) + return; + + if (Strategy == UpdateStrategy::Eager) { + if (DT) + DT->insertEdge(From, To); + if (PDT) + PDT->insertEdge(From, To); + return; + } + + PendUpdates.push_back({DominatorTree::Insert, From, To}); +} + +void DomTreeUpdater::insertEdgeRelaxed(BasicBlock *From, BasicBlock *To) { + if (From == To) + return; + + if (!DT && !PDT) + return; + + if (!isUpdateValid({DominatorTree::Insert, From, To})) + return; + + if (Strategy == UpdateStrategy::Eager) { + if (DT) + DT->insertEdge(From, To); + if (PDT) + PDT->insertEdge(From, To); + return; + } + + PendUpdates.push_back({DominatorTree::Insert, From, To}); +} + +void DomTreeUpdater::deleteEdge(BasicBlock *From, BasicBlock *To) { + +#ifndef NDEBUG + assert(isUpdateValid({DominatorTree::Delete, From, To}) && + "Deleted edge still exists in the CFG!"); +#endif + + if (!DT && !PDT) + return; + + // Won't affect DomTree and PostDomTree; discard update. + if (From == To) + return; + + if (Strategy == UpdateStrategy::Eager) { + if (DT) + DT->deleteEdge(From, To); + if (PDT) + PDT->deleteEdge(From, To); + return; + } + + PendUpdates.push_back({DominatorTree::Delete, From, To}); +} + +void DomTreeUpdater::deleteEdgeRelaxed(BasicBlock *From, BasicBlock *To) { + if (From == To) + return; + + if (!DT && !PDT) + return; + + if (!isUpdateValid({DominatorTree::Delete, From, To})) + return; + + if (Strategy == UpdateStrategy::Eager) { + if (DT) + DT->deleteEdge(From, To); + if (PDT) + PDT->deleteEdge(From, To); + return; + } + + PendUpdates.push_back({DominatorTree::Delete, From, To}); +} + +void DomTreeUpdater::dropOutOfDateUpdates() { + if (Strategy == DomTreeUpdater::UpdateStrategy::Eager) + return; + + tryFlushDeletedBB(); + + // Drop all updates applied by both trees. + if (!DT) + PendDTUpdateIndex = PendUpdates.size(); + if (!PDT) + PendPDTUpdateIndex = PendUpdates.size(); + + const size_t dropIndex = std::min(PendDTUpdateIndex, PendPDTUpdateIndex); + const auto B = PendUpdates.begin(); + const auto E = PendUpdates.begin() + dropIndex; + assert(B <= E && "Iterator out of range."); + PendUpdates.erase(B, E); + // Calculate current index. + PendDTUpdateIndex -= dropIndex; + PendPDTUpdateIndex -= dropIndex; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void DomTreeUpdater::dump() const { + raw_ostream &OS = llvm::dbgs(); + + OS << "Available Trees: "; + if (DT || PDT) { + if (DT) + OS << "DomTree "; + if (PDT) + OS << "PostDomTree "; + OS << "\n"; + } else + OS << "None\n"; + + OS << "UpdateStrategy: "; + if (Strategy == UpdateStrategy::Eager) { + OS << "Eager\n"; + return; + } else + OS << "Lazy\n"; + int Index = 0; + + auto printUpdates = + [&](ArrayRef<DominatorTree::UpdateType>::const_iterator begin, + ArrayRef<DominatorTree::UpdateType>::const_iterator end) { + if (begin == end) + OS << " None\n"; + Index = 0; + for (auto It = begin, ItEnd = end; It != ItEnd; ++It) { + auto U = *It; + OS << " " << Index << " : "; + ++Index; + if (U.getKind() == DominatorTree::Insert) + OS << "Insert, "; + else + OS << "Delete, "; + BasicBlock *From = U.getFrom(); + if (From) { + auto S = From->getName(); + if (!From->hasName()) + S = "(no name)"; + OS << S << "(" << From << "), "; + } else { + OS << "(badref), "; + } + BasicBlock *To = U.getTo(); + if (To) { + auto S = To->getName(); + if (!To->hasName()) + S = "(no_name)"; + OS << S << "(" << To << ")\n"; + } else { + OS << "(badref)\n"; + } + } + }; + + if (DT) { + const auto I = PendUpdates.begin() + PendDTUpdateIndex; + assert(PendUpdates.begin() <= I && I <= PendUpdates.end() && + "Iterator out of range."); + OS << "Applied but not cleared DomTreeUpdates:\n"; + printUpdates(PendUpdates.begin(), I); + OS << "Pending DomTreeUpdates:\n"; + printUpdates(I, PendUpdates.end()); + } + + if (PDT) { + const auto I = PendUpdates.begin() + PendPDTUpdateIndex; + assert(PendUpdates.begin() <= I && I <= PendUpdates.end() && + "Iterator out of range."); + OS << "Applied but not cleared PostDomTreeUpdates:\n"; + printUpdates(PendUpdates.begin(), I); + OS << "Pending PostDomTreeUpdates:\n"; + printUpdates(I, PendUpdates.end()); + } + + OS << "Pending DeletedBBs:\n"; + Index = 0; + for (auto BB : DeletedBBs) { + OS << " " << Index << " : "; + ++Index; + if (BB->hasName()) + OS << BB->getName() << "("; + else + OS << "(no_name)("; + OS << BB << ")\n"; + } + + OS << "Pending Callbacks:\n"; + Index = 0; + for (auto BB : Callbacks) { + OS << " " << Index << " : "; + ++Index; + if (BB->hasName()) + OS << BB->getName() << "("; + else + OS << "(no_name)("; + OS << BB << ")\n"; + } +} +#endif +} // namespace llvm diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp index de7f62cf4ecd..f9a554acb7ea 100644 --- a/lib/Analysis/DominanceFrontier.cpp +++ b/lib/Analysis/DominanceFrontier.cpp @@ -1,9 +1,8 @@ //===- DominanceFrontier.cpp - Dominance Frontier Calculation -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/EHPersonalities.cpp b/lib/Analysis/EHPersonalities.cpp index 0df73aeebbdc..2242541696a4 100644 --- a/lib/Analysis/EHPersonalities.cpp +++ b/lib/Analysis/EHPersonalities.cpp @@ -1,9 +1,8 @@ //===- EHPersonalities.cpp - Compute EH-related information ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp index b28abcadca4a..0d6c0ffb18a8 100644 --- a/lib/Analysis/GlobalsModRef.cpp +++ b/lib/Analysis/GlobalsModRef.cpp @@ -1,9 +1,8 @@ //===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -514,7 +513,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { break; } - if (F->isDeclaration() || F->hasFnAttribute(Attribute::OptimizeNone)) { + if (F->isDeclaration() || F->hasOptNone()) { // Try to get mod/ref behaviour from function attributes. if (F->doesNotAccessMemory()) { // Can't do better than that! @@ -567,7 +566,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { // Don't prove any properties based on the implementation of an optnone // function. Function attributes were already used as a best approximation // above. - if (Node->getFunction()->hasFnAttribute(Attribute::OptimizeNone)) + if (Node->getFunction()->hasOptNone()) continue; for (Instruction &I : instructions(Node->getFunction())) { @@ -597,7 +596,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { } // All non-call instructions we use the primary predicates for whether - // thay read or write memory. + // they read or write memory. if (I.mayReadFromMemory()) FI.addModRefInfo(ModRefInfo::Ref); if (I.mayWriteToMemory()) @@ -791,10 +790,10 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV, } // FIXME: It would be good to handle other obvious no-alias cases here, but - // it isn't clear how to do so reasonbly without building a small version + // it isn't clear how to do so reasonably without building a small version // of BasicAA into this code. We could recurse into AAResultBase::alias // here but that seems likely to go poorly as we're inside the - // implementation of such a query. Until then, just conservatievly retun + // implementation of such a query. Until then, just conservatively return // false. return false; } while (!Inputs.empty()); @@ -807,7 +806,8 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV, /// other is some random pointer, we know there cannot be an alias, because the /// address of the global isn't taken. AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { + const MemoryLocation &LocB, + AAQueryInfo &AAQI) { // Get the base object these pointers point to. const Value *UV1 = GetUnderlyingObject(LocA.Ptr, DL); const Value *UV2 = GetUnderlyingObject(LocB.Ptr, DL); @@ -882,11 +882,12 @@ AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA, if ((GV1 || GV2) && GV1 != GV2) return NoAlias; - return AAResultBase::alias(LocA, LocB); + return AAResultBase::alias(LocA, LocB, AAQI); } ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call, - const GlobalValue *GV) { + const GlobalValue *GV, + AAQueryInfo &AAQI) { if (Call->doesNotAccessMemory()) return ModRefInfo::NoModRef; ModRefInfo ConservativeResult = @@ -895,14 +896,15 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call, // Iterate through all the arguments to the called function. If any argument // is based on GV, return the conservative result. for (auto &A : Call->args()) { - SmallVector<Value*, 4> Objects; + SmallVector<const Value*, 4> Objects; GetUnderlyingObjects(A, Objects, DL); // All objects must be identified. if (!all_of(Objects, isIdentifiedObject) && // Try ::alias to see if all objects are known not to alias GV. - !all_of(Objects, [&](Value *V) { - return this->alias(MemoryLocation(V), MemoryLocation(GV)) == NoAlias; + !all_of(Objects, [&](const Value *V) { + return this->alias(MemoryLocation(V), MemoryLocation(GV), AAQI) == + NoAlias; })) return ConservativeResult; @@ -915,7 +917,8 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call, } ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call, - const MemoryLocation &Loc) { + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { ModRefInfo Known = ModRefInfo::ModRef; // If we are asking for mod/ref info of a direct call with a pointer to a @@ -927,11 +930,11 @@ ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call, if (NonAddressTakenGlobals.count(GV)) if (const FunctionInfo *FI = getFunctionInfo(F)) Known = unionModRef(FI->getModRefInfoForGlobal(*GV), - getModRefInfoForArgument(Call, GV)); + getModRefInfoForArgument(Call, GV, AAQI)); if (!isModOrRefSet(Known)) return ModRefInfo::NoModRef; // No need to query other mod/ref analyses - return intersectModRef(Known, AAResultBase::getModRefInfo(Call, Loc)); + return intersectModRef(Known, AAResultBase::getModRefInfo(Call, Loc, AAQI)); } GlobalsAAResult::GlobalsAAResult(const DataLayout &DL, diff --git a/lib/Analysis/GuardUtils.cpp b/lib/Analysis/GuardUtils.cpp index 08fa6abeafb5..cad92f6e56bb 100644 --- a/lib/Analysis/GuardUtils.cpp +++ b/lib/Analysis/GuardUtils.cpp @@ -1,9 +1,8 @@ //===-- GuardUtils.cpp - Utils for work with guards -------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // Utils that are used to perform analyzes related to guards and their @@ -19,3 +18,32 @@ bool llvm::isGuard(const User *U) { using namespace llvm::PatternMatch; return match(U, m_Intrinsic<Intrinsic::experimental_guard>()); } + +bool llvm::isGuardAsWidenableBranch(const User *U) { + Value *Condition, *WidenableCondition; + BasicBlock *GuardedBB, *DeoptBB; + if (!parseWidenableBranch(U, Condition, WidenableCondition, GuardedBB, + DeoptBB)) + return false; + using namespace llvm::PatternMatch; + for (auto &Insn : *DeoptBB) { + if (match(&Insn, m_Intrinsic<Intrinsic::experimental_deoptimize>())) + return true; + if (Insn.mayHaveSideEffects()) + return false; + } + return false; +} + +bool llvm::parseWidenableBranch(const User *U, Value *&Condition, + Value *&WidenableCondition, + BasicBlock *&IfTrueBB, BasicBlock *&IfFalseBB) { + using namespace llvm::PatternMatch; + if (!match(U, m_Br(m_And(m_Value(Condition), m_Value(WidenableCondition)), + IfTrueBB, IfFalseBB))) + return false; + // TODO: At the moment, we only recognize the branch if the WC call in this + // specific position. We should generalize! + return match(WidenableCondition, + m_Intrinsic<Intrinsic::experimental_widenable_condition>()); +} diff --git a/lib/Analysis/IVDescriptors.cpp b/lib/Analysis/IVDescriptors.cpp index aaebc4a481ec..ce285f82f720 100644 --- a/lib/Analysis/IVDescriptors.cpp +++ b/lib/Analysis/IVDescriptors.cpp @@ -1,9 +1,8 @@ //===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -15,6 +14,7 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" @@ -26,7 +26,6 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -252,6 +251,10 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Worklist.push_back(Start); VisitedInsts.insert(Start); + // Start with all flags set because we will intersect this with the reduction + // flags from all the reduction operations. + FastMathFlags FMF = FastMathFlags::getFast(); + // A value in the reduction can be used: // - By the reduction: // - Reduction operation: @@ -297,6 +300,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); if (!ReduxDesc.isRecurrence()) return false; + if (isa<FPMathOperator>(ReduxDesc.getPatternInst())) + FMF &= ReduxDesc.getPatternInst()->getFastMathFlags(); } bool IsASelect = isa<SelectInst>(Cur); @@ -442,7 +447,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, // Save the description of this reduction variable. RecurrenceDescriptor RD( - RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), + RdxStart, ExitInstruction, Kind, FMF, ReduxDesc.getMinMaxKind(), ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); RedDes = RD; @@ -550,9 +555,8 @@ RecurrenceDescriptor::isConditionalRdxPattern( RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, InstDesc &Prev, bool HasFunNoNaNAttr) { - bool FP = I->getType()->isFloatingPointTy(); Instruction *UAI = Prev.getUnsafeAlgebraInst(); - if (!UAI && FP && !I->isFast()) + if (!UAI && isa<FPMathOperator>(I) && !I->hasAllowReassoc()) UAI = I; // Found an unsafe (unvectorizable) algebra instruction. switch (I->getOpcode()) { @@ -1010,7 +1014,7 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, // If we started from an UnknownSCEV, and managed to build an addRecurrence // only after enabling Assume with PSCEV, this means we may have encountered // cast instructions that required adding a runtime check in order to - // guarantee the correctness of the AddRecurence respresentation of the + // guarantee the correctness of the AddRecurrence respresentation of the // induction. if (PhiScev != AR && SymbolicPhi) { SmallVector<Instruction *, 2> Casts; @@ -1049,6 +1053,13 @@ bool InductionDescriptor::isInductionPHI( Value *StartValue = Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); + + BasicBlock *Latch = AR->getLoop()->getLoopLatch(); + if (!Latch) + return false; + BinaryOperator *BOp = + dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch)); + const SCEV *Step = AR->getStepRecurrence(*SE); // Calculate the pointer stride and check if it is consecutive. // The stride may be a constant or a loop invariant integer value. @@ -1057,7 +1068,7 @@ bool InductionDescriptor::isInductionPHI( return false; if (PhiTy->isIntegerTy()) { - D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/nullptr, + D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp, CastsToIgnore); return true; } @@ -1084,6 +1095,6 @@ bool InductionDescriptor::isInductionPHI( return false; auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size, true /* signed */); - D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); + D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue, BOp); return true; } diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 609e5e3a1448..681a0cf7e981 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -1,9 +1,8 @@ //===- IVUsers.cpp - Induction Variable Users -------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/lib/Analysis/IndirectCallPromotionAnalysis.cpp index d6e6e76af03c..6ff840efcb64 100644 --- a/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -1,9 +1,8 @@ //===-- IndirectCallPromotionAnalysis.cpp - Find promotion candidates ===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 6ddb3cbc01a3..0dec146e0465 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -1,9 +1,8 @@ //===- InlineCost.cpp - Cost analysis for inliner -------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -28,7 +27,6 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/llvm-config.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -37,6 +35,7 @@ #include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -51,19 +50,19 @@ static cl::opt<int> InlineThreshold( cl::desc("Control the amount of inlining to perform (default = 225)")); static cl::opt<int> HintThreshold( - "inlinehint-threshold", cl::Hidden, cl::init(325), + "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with inline hint")); static cl::opt<int> ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, - cl::init(45), + cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining cold callsites")); // We introduce this threshold to help performance of instrumentation based // PGO before we actually hook up inliner with analysis passes such as BPI and // BFI. static cl::opt<int> ColdThreshold( - "inlinecold-threshold", cl::Hidden, cl::init(45), + "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with cold attribute")); static cl::opt<int> @@ -77,7 +76,7 @@ static cl::opt<int> LocallyHotCallSiteThreshold( static cl::opt<int> ColdCallSiteRelFreq( "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, - cl::desc("Maxmimum block frequency, expressed as a percentage of caller's " + cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information.")); @@ -88,7 +87,7 @@ static cl::opt<int> HotCallSiteRelFreq( "profile information.")); static cl::opt<bool> OptComputeFullInlineCost( - "inline-cost-full", cl::Hidden, cl::init(false), + "inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold.")); @@ -122,31 +121,43 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { /// The candidate callsite being analyzed. Please do not use this to do /// analysis in the caller function; we want the inline cost query to be /// easily cacheable. Instead, use the cover function paramHasAttr. - CallSite CandidateCS; + CallBase &CandidateCall; /// Tunable parameters that control the analysis. const InlineParams &Params; + /// Upper bound for the inlining cost. Bonuses are being applied to account + /// for speculative "expected profit" of the inlining decision. int Threshold; - int Cost; + + /// Inlining cost measured in abstract units, accounts for all the + /// instructions expected to be executed for a given function invocation. + /// Instructions that are statically proven to be dead based on call-site + /// arguments are not counted here. + int Cost = 0; + bool ComputeFullInlineCost; - bool IsCallerRecursive; - bool IsRecursiveCall; - bool ExposesReturnsTwice; - bool HasDynamicAlloca; - bool ContainsNoDuplicateCall; - bool HasReturn; - bool HasIndirectBr; - bool HasUninlineableIntrinsic; - bool InitsVargArgs; + bool IsCallerRecursive = false; + bool IsRecursiveCall = false; + bool ExposesReturnsTwice = false; + bool HasDynamicAlloca = false; + bool ContainsNoDuplicateCall = false; + bool HasReturn = false; + bool HasIndirectBr = false; + bool HasUninlineableIntrinsic = false; + bool InitsVargArgs = false; /// Number of bytes allocated statically by the callee. - uint64_t AllocatedSize; - unsigned NumInstructions, NumVectorInstructions; - int VectorBonus, TenPercentVectorBonus; - // Bonus to be applied when the callee has only one reachable basic block. - int SingleBBBonus; + uint64_t AllocatedSize = 0; + unsigned NumInstructions = 0; + unsigned NumVectorInstructions = 0; + + /// Bonus to be applied when percentage of vector instructions in callee is + /// high (see more details in updateThreshold). + int VectorBonus = 0; + /// Bonus to be applied when the callee has only one reachable basic block. + int SingleBBBonus = 0; /// While we walk the potentially-inlined instructions, we build up and /// maintain a mapping of simplified values specific to this callsite. The @@ -181,7 +192,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { /// loads. bool EnableLoadElimination; SmallPtrSet<Value *, 16> LoadAddrSet; - int LoadEliminationCost; + int LoadEliminationCost = 0; // Custom simplification helper routines. bool isAllocaDerivedArg(Value *V); @@ -196,7 +207,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool isGEPFree(GetElementPtrInst &GEP); bool canFoldInboundsGEP(GetElementPtrInst &I); bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); - bool simplifyCallSite(Function *F, CallSite CS); + bool simplifyCallSite(Function *F, CallBase &Call); template <typename Callable> bool simplifyInstruction(Instruction &I, Callable Evaluate); ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); @@ -216,22 +227,28 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { /// attributes and callee hotness for PGO builds. The Callee is explicitly /// passed to support analyzing indirect calls whose target is inferred by /// analysis. - void updateThreshold(CallSite CS, Function &Callee); + void updateThreshold(CallBase &Call, Function &Callee); - /// Return true if size growth is allowed when inlining the callee at CS. - bool allowSizeGrowth(CallSite CS); + /// Return true if size growth is allowed when inlining the callee at \p Call. + bool allowSizeGrowth(CallBase &Call); - /// Return true if \p CS is a cold callsite. - bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI); + /// Return true if \p Call is a cold callsite. + bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI); - /// Return a higher threshold if \p CS is a hot callsite. - Optional<int> getHotCallSiteThreshold(CallSite CS, + /// Return a higher threshold if \p Call is a hot callsite. + Optional<int> getHotCallSiteThreshold(CallBase &Call, BlockFrequencyInfo *CallerBFI); // Custom analysis routines. InlineResult analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues); + /// Handle a capped 'int' increment for Cost. + void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) { + assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound"); + Cost = (int)std::min(UpperBound, Cost + Inc); + } + // Disable several entry points to the visitor so we don't accidentally use // them by declaring but not defining them here. void visit(Module *); @@ -256,11 +273,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool visitCmpInst(CmpInst &I); bool visitSub(BinaryOperator &I); bool visitBinaryOperator(BinaryOperator &I); + bool visitFNeg(UnaryOperator &I); bool visitLoad(LoadInst &I); bool visitStore(StoreInst &I); bool visitExtractValue(ExtractValueInst &I); bool visitInsertValue(InsertValueInst &I); - bool visitCallSite(CallSite CS); + bool visitCallBase(CallBase &Call); bool visitReturnInst(ReturnInst &RI); bool visitBranchInst(BranchInst &BI); bool visitSelectInst(SelectInst &SI); @@ -276,38 +294,29 @@ public: std::function<AssumptionCache &(Function &)> &GetAssumptionCache, Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, - Function &Callee, CallSite CSArg, const InlineParams &Params) + Function &Callee, CallBase &Call, const InlineParams &Params) : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE), - CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold), - Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost || - Params.ComputeFullInlineCost || ORE), - IsCallerRecursive(false), IsRecursiveCall(false), - ExposesReturnsTwice(false), HasDynamicAlloca(false), - ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), - HasUninlineableIntrinsic(false), InitsVargArgs(false), AllocatedSize(0), - NumInstructions(0), NumVectorInstructions(0), VectorBonus(0), - SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0), - NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), - NumConstantPtrCmps(0), NumConstantPtrDiffs(0), - NumInstructionsSimplified(0), SROACostSavings(0), - SROACostSavingsLost(0) {} - - InlineResult analyzeCall(CallSite CS); + CandidateCall(Call), Params(Params), Threshold(Params.DefaultThreshold), + ComputeFullInlineCost(OptComputeFullInlineCost || + Params.ComputeFullInlineCost || ORE), + EnableLoadElimination(true) {} + + InlineResult analyzeCall(CallBase &Call); int getThreshold() { return Threshold; } int getCost() { return Cost; } // Keep a bunch of stats about the cost savings found so we can print them // out when debugging. - unsigned NumConstantArgs; - unsigned NumConstantOffsetPtrArgs; - unsigned NumAllocaArgs; - unsigned NumConstantPtrCmps; - unsigned NumConstantPtrDiffs; - unsigned NumInstructionsSimplified; - unsigned SROACostSavings; - unsigned SROACostSavingsLost; + unsigned NumConstantArgs = 0; + unsigned NumConstantOffsetPtrArgs = 0; + unsigned NumAllocaArgs = 0; + unsigned NumConstantPtrCmps = 0; + unsigned NumConstantPtrDiffs = 0; + unsigned NumInstructionsSimplified = 0; + unsigned SROACostSavings = 0; + unsigned SROACostSavingsLost = 0; void dump(); }; @@ -342,7 +351,7 @@ bool CallAnalyzer::lookupSROAArgAndCost( void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) { // If we're no longer able to perform SROA we need to undo its cost savings // and prevent subsequent analysis. - Cost += CostIt->second; + addCost(CostIt->second); SROACostSavings -= CostIt->second; SROACostSavingsLost += CostIt->second; SROAArgCosts.erase(CostIt); @@ -366,7 +375,7 @@ void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, void CallAnalyzer::disableLoadElimination() { if (EnableLoadElimination) { - Cost += LoadEliminationCost; + addCost(LoadEliminationCost); LoadEliminationCost = 0; EnableLoadElimination = false; } @@ -701,7 +710,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { } bool CallAnalyzer::visitCastInst(CastInst &I) { - // Propagate constants through ptrtoint. + // Propagate constants through casts. if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType()); })) @@ -721,7 +730,7 @@ bool CallAnalyzer::visitCastInst(CastInst &I) { case Instruction::FPToUI: case Instruction::FPToSI: if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive) - Cost += InlineConstants::CallPenalty; + addCost(InlineConstants::CallPenalty); break; default: break; @@ -737,14 +746,14 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { })) return true; - // Disable any SROA on the argument to arbitrary unary operators. + // Disable any SROA on the argument to arbitrary unary instructions. disableSROA(Operand); return false; } bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) { - return CandidateCS.paramHasAttr(A->getArgNo(), Attr); + return CandidateCall.paramHasAttr(A->getArgNo(), Attr); } bool CallAnalyzer::isKnownNonNullInCallee(Value *V) { @@ -769,7 +778,7 @@ bool CallAnalyzer::isKnownNonNullInCallee(Value *V) { return false; } -bool CallAnalyzer::allowSizeGrowth(CallSite CS) { +bool CallAnalyzer::allowSizeGrowth(CallBase &Call) { // If the normal destination of the invoke or the parent block of the call // site is unreachable-terminated, there is little point in inlining this // unless there is literally zero cost. @@ -785,21 +794,21 @@ bool CallAnalyzer::allowSizeGrowth(CallSite CS) { // For now, we are not handling this corner case here as it is rare in real // code. In future, we should elaborate this based on BPI and BFI in more // general threshold adjusting heuristics in updateThreshold(). - Instruction *Instr = CS.getInstruction(); - if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) { + if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) { if (isa<UnreachableInst>(II->getNormalDest()->getTerminator())) return false; - } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator())) + } else if (isa<UnreachableInst>(Call.getParent()->getTerminator())) return false; return true; } -bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) { +bool CallAnalyzer::isColdCallSite(CallBase &Call, + BlockFrequencyInfo *CallerBFI) { // If global profile summary is available, then callsite's coldness is // determined based on that. if (PSI && PSI->hasProfileSummary()) - return PSI->isColdCallSite(CS, CallerBFI); + return PSI->isColdCallSite(CallSite(&Call), CallerBFI); // Otherwise we need BFI to be available. if (!CallerBFI) @@ -810,20 +819,21 @@ bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) { // complexity is not worth it unless this scaling shows up high in the // profiles. const BranchProbability ColdProb(ColdCallSiteRelFreq, 100); - auto CallSiteBB = CS.getInstruction()->getParent(); + auto CallSiteBB = Call.getParent(); auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB); auto CallerEntryFreq = - CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock())); + CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock())); return CallSiteFreq < CallerEntryFreq * ColdProb; } Optional<int> -CallAnalyzer::getHotCallSiteThreshold(CallSite CS, +CallAnalyzer::getHotCallSiteThreshold(CallBase &Call, BlockFrequencyInfo *CallerBFI) { // If global profile summary is available, then callsite's hotness is // determined based on that. - if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(CS, CallerBFI)) + if (PSI && PSI->hasProfileSummary() && + PSI->isHotCallSite(CallSite(&Call), CallerBFI)) return Params.HotCallSiteThreshold; // Otherwise we need BFI to be available and to have a locally hot callsite @@ -835,7 +845,7 @@ CallAnalyzer::getHotCallSiteThreshold(CallSite CS, // potentially cache the computation of scaled entry frequency, but the added // complexity is not worth it unless this scaling shows up high in the // profiles. - auto CallSiteBB = CS.getInstruction()->getParent(); + auto CallSiteBB = Call.getParent(); auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency(); auto CallerEntryFreq = CallerBFI->getEntryFreq(); if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq) @@ -845,14 +855,14 @@ CallAnalyzer::getHotCallSiteThreshold(CallSite CS, return None; } -void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { +void CallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) { // If no size growth is allowed for this inlining, set Threshold to 0. - if (!allowSizeGrowth(CS)) { + if (!allowSizeGrowth(Call)) { Threshold = 0; return; } - Function *Caller = CS.getCaller(); + Function *Caller = Call.getCaller(); // return min(A, B) if B is valid. auto MinIfValid = [](int A, Optional<int> B) { @@ -870,15 +880,6 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { // basic block at the given callsite context. This is speculatively applied // and withdrawn if more than one basic block is seen. // - // Vector bonuses: We want to more aggressively inline vector-dense kernels - // and apply this bonus based on the percentage of vector instructions. A - // bonus is applied if the vector instructions exceed 50% and half that amount - // is applied if it exceeds 10%. Note that these bonuses are some what - // arbitrary and evolved over time by accident as much as because they are - // principled bonuses. - // FIXME: It would be nice to base the bonus values on something more - // scientific. - // // LstCallToStaticBonus: This large bonus is applied to ensure the inlining // of the last call to a static function as inlining such functions is // guaranteed to reduce code size. @@ -886,7 +887,7 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { // These bonus percentages may be set to 0 based on properties of the caller // and the callsite. int SingleBBBonusPercent = 50; - int VectorBonusPercent = 150; + int VectorBonusPercent = TTI.getInlinerVectorBonusPercent(); int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus; // Lambda to set all the above bonus and bonus percentages to 0. @@ -898,7 +899,7 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available // and reduce the threshold if the caller has the necessary attribute. - if (Caller->optForMinSize()) { + if (Caller->hasMinSize()) { Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold); // For minsize, we want to disable the single BB bonus and the vector // bonuses, but not the last-call-to-static bonus. Inlining the last call to @@ -906,12 +907,12 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { // call/return instructions. SingleBBBonusPercent = 0; VectorBonusPercent = 0; - } else if (Caller->optForSize()) + } else if (Caller->hasOptSize()) Threshold = MinIfValid(Threshold, Params.OptSizeThreshold); // Adjust the threshold based on inlinehint attribute and profile based // hotness information if the caller does not have MinSize attribute. - if (!Caller->optForMinSize()) { + if (!Caller->hasMinSize()) { if (Callee.hasFnAttribute(Attribute::InlineHint)) Threshold = MaxIfValid(Threshold, Params.HintThreshold); @@ -923,15 +924,15 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { // used (which adds hotness metadata to calls) or if caller's // BlockFrequencyInfo is available. BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr; - auto HotCallSiteThreshold = getHotCallSiteThreshold(CS, CallerBFI); - if (!Caller->optForSize() && HotCallSiteThreshold) { + auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI); + if (!Caller->hasOptSize() && HotCallSiteThreshold) { LLVM_DEBUG(dbgs() << "Hot callsite.\n"); // FIXME: This should update the threshold only if it exceeds the // current threshold, but AutoFDO + ThinLTO currently relies on this // behavior to prevent inlining of hot callsites during ThinLTO // compile phase. Threshold = HotCallSiteThreshold.getValue(); - } else if (isColdCallSite(CS, CallerBFI)) { + } else if (isColdCallSite(Call, CallerBFI)) { LLVM_DEBUG(dbgs() << "Cold callsite.\n"); // Do not apply bonuses for a cold callsite including the // LastCallToStatic bonus. While this bonus might result in code size @@ -968,7 +969,7 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { VectorBonus = Threshold * VectorBonusPercent / 100; bool OnlyOneCallAndLocalLinkage = - F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction(); + F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction(); // If there is only one call of the function, and it has internal linkage, // the cost of inlining it drops dramatically. It may seem odd to update // Cost in updateThreshold, but the bonus depends on the logic in this method. @@ -1087,10 +1088,34 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { // If the instruction is floating point, and the target says this operation // is expensive, this may eventually become a library call. Treat the cost - // as such. + // as such. Unless it's fneg which can be implemented with an xor. + using namespace llvm::PatternMatch; if (I.getType()->isFloatingPointTy() && - TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive) - Cost += InlineConstants::CallPenalty; + TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive && + !match(&I, m_FNeg(m_Value()))) + addCost(InlineConstants::CallPenalty); + + return false; +} + +bool CallAnalyzer::visitFNeg(UnaryOperator &I) { + Value *Op = I.getOperand(0); + Constant *COp = dyn_cast<Constant>(Op); + if (!COp) + COp = SimplifiedValues.lookup(Op); + + Value *SimpleV = SimplifyFNegInst(COp ? COp : Op, + cast<FPMathOperator>(I).getFastMathFlags(), + DL); + + if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) + SimplifiedValues[&I] = C; + + if (SimpleV) + return true; + + // Disable any SROA on arguments to arbitrary, unsimplified fneg. + disableSROA(Op); return false; } @@ -1173,62 +1198,61 @@ bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { /// analyzing the arguments and call itself with instsimplify. Returns true if /// it has simplified the callsite to some other entity (a constant), making it /// free. -bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { +bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) { // FIXME: Using the instsimplify logic directly for this is inefficient // because we have to continually rebuild the argument list even when no // simplifications can be performed. Until that is fixed with remapping // inside of instsimplify, directly constant fold calls here. - if (!canConstantFoldCallTo(CS, F)) + if (!canConstantFoldCallTo(&Call, F)) return false; // Try to re-map the arguments to constants. SmallVector<Constant *, 4> ConstantArgs; - ConstantArgs.reserve(CS.arg_size()); - for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; - ++I) { - Constant *C = dyn_cast<Constant>(*I); + ConstantArgs.reserve(Call.arg_size()); + for (Value *I : Call.args()) { + Constant *C = dyn_cast<Constant>(I); if (!C) - C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I)); + C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(I)); if (!C) return false; // This argument doesn't map to a constant. ConstantArgs.push_back(C); } - if (Constant *C = ConstantFoldCall(CS, F, ConstantArgs)) { - SimplifiedValues[CS.getInstruction()] = C; + if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) { + SimplifiedValues[&Call] = C; return true; } return false; } -bool CallAnalyzer::visitCallSite(CallSite CS) { - if (CS.hasFnAttr(Attribute::ReturnsTwice) && +bool CallAnalyzer::visitCallBase(CallBase &Call) { + if (Call.hasFnAttr(Attribute::ReturnsTwice) && !F.hasFnAttribute(Attribute::ReturnsTwice)) { // This aborts the entire analysis. ExposesReturnsTwice = true; return false; } - if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate()) + if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate()) ContainsNoDuplicateCall = true; - if (Function *F = CS.getCalledFunction()) { + if (Function *F = Call.getCalledFunction()) { // When we have a concrete function, first try to simplify it directly. - if (simplifyCallSite(F, CS)) + if (simplifyCallSite(F, Call)) return true; // Next check if it is an intrinsic we know about. // FIXME: Lift this into part of the InstVisitor. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) { switch (II->getIntrinsicID()) { default: - if (!CS.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) + if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) disableLoadElimination(); - return Base::visitCallSite(CS); + return Base::visitCallBase(Call); case Intrinsic::load_relative: // This is normally lowered to 4 LLVM instructions. - Cost += 3 * InlineConstants::InstrCost; + addCost(3 * InlineConstants::InstrCost); return false; case Intrinsic::memset: @@ -1247,7 +1271,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { } } - if (F == CS.getInstruction()->getFunction()) { + if (F == Call.getFunction()) { // This flag will fully abort the analysis, so don't bother with anything // else. IsRecursiveCall = true; @@ -1257,34 +1281,34 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { if (TTI.isLoweredToCall(F)) { // We account for the average 1 instruction per call argument setup // here. - Cost += CS.arg_size() * InlineConstants::InstrCost; + addCost(Call.arg_size() * InlineConstants::InstrCost); // Everything other than inline ASM will also have a significant cost // merely from making the call. - if (!isa<InlineAsm>(CS.getCalledValue())) - Cost += InlineConstants::CallPenalty; + if (!isa<InlineAsm>(Call.getCalledValue())) + addCost(InlineConstants::CallPenalty); } - if (!CS.onlyReadsMemory()) + if (!Call.onlyReadsMemory()) disableLoadElimination(); - return Base::visitCallSite(CS); + return Base::visitCallBase(Call); } // Otherwise we're in a very special case -- an indirect function call. See // if we can be particularly clever about this. - Value *Callee = CS.getCalledValue(); + Value *Callee = Call.getCalledValue(); // First, pay the price of the argument setup. We account for the average // 1 instruction per call argument setup here. - Cost += CS.arg_size() * InlineConstants::InstrCost; + addCost(Call.arg_size() * InlineConstants::InstrCost); // Next, check if this happens to be an indirect function call to a known // function in this inline context. If not, we've done all we can. Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee)); if (!F) { - if (!CS.onlyReadsMemory()) + if (!Call.onlyReadsMemory()) disableLoadElimination(); - return Base::visitCallSite(CS); + return Base::visitCallBase(Call); } // If we have a constant that we are calling as a function, we can peer @@ -1294,9 +1318,9 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { // out. Pretend to inline the function, with a custom threshold. auto IndirectCallParams = Params; IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; - CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS, + CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call, IndirectCallParams); - if (CA.analyzeCall(CS)) { + if (CA.analyzeCall(Call)) { // We were able to inline the indirect call! Subtract the cost from the // threshold to get the bonus we want to apply, but don't go below zero. Cost -= std::max(0, CA.getThreshold() - CA.getCost()); @@ -1304,7 +1328,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { if (!F->onlyReadsMemory()) disableLoadElimination(); - return Base::visitCallSite(CS); + return Base::visitCallBase(Call); } bool CallAnalyzer::visitReturnInst(ReturnInst &RI) { @@ -1438,7 +1462,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); if (CostLowerBound > Threshold && !ComputeFullInlineCost) { - Cost = CostLowerBound; + addCost((int64_t)SI.getNumCases() * InlineConstants::InstrCost); return false; } @@ -1452,7 +1476,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost + 4 * InlineConstants::InstrCost; - Cost = std::min((int64_t)CostUpperBound, JTCost + Cost); + addCost(JTCost, (int64_t)CostUpperBound); return false; } @@ -1473,7 +1497,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { // n + n / 2 - 1 = n * 3 / 2 - 1 if (NumCaseCluster <= 3) { // Suppose a comparison includes one compare and one conditional branch. - Cost += NumCaseCluster * 2 * InlineConstants::InstrCost; + addCost(NumCaseCluster * 2 * InlineConstants::InstrCost); return false; } @@ -1481,7 +1505,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { int64_t SwitchCost = ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; - Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost); + addCost(SwitchCost, (int64_t)CostUpperBound); return false; } @@ -1574,7 +1598,7 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB, if (Base::visit(&*I)) ++NumInstructionsSimplified; else - Cost += InlineConstants::InstrCost; + addCost(InlineConstants::InstrCost); using namespace ore; // If the visit this instruction detected an uninlinable pattern, abort. @@ -1595,7 +1619,7 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB, if (ORE) ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", - CandidateCS.getInstruction()) + &CandidateCall) << NV("Callee", &F) << " has uninlinable pattern (" << NV("InlineResult", IR.message) << ") and cost is not fully computed"; @@ -1612,14 +1636,14 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB, if (ORE) ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", - CandidateCS.getInstruction()) + &CandidateCall) << NV("Callee", &F) << " is " << NV("InlineResult", IR.message) << ". Cost is not fully computed"; }); return IR; } - // Check if we've past the maximum possible threshold so we don't spin in + // Check if we've passed the maximum possible threshold so we don't spin in // huge basic blocks that will never inline. if (Cost >= Threshold && !ComputeFullInlineCost) return false; @@ -1676,7 +1700,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { /// blocks to see if all their incoming edges are dead or not. void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) { auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) { - // A CFG edge is dead if the predecessor is dead or the predessor has a + // A CFG edge is dead if the predecessor is dead or the predecessor has a // known successor which is not the one under exam. return (DeadBlocks.count(Pred) || (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ)); @@ -1712,7 +1736,7 @@ void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) { /// factors and heuristics. If this method returns false but the computed cost /// is below the computed threshold, then inlining was forcibly disabled by /// some artifact of the routine. -InlineResult CallAnalyzer::analyzeCall(CallSite CS) { +InlineResult CallAnalyzer::analyzeCall(CallBase &Call) { ++NumCallsAnalyzed; // Perform some tweaks to the cost and threshold based on the direct @@ -1729,7 +1753,7 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) { assert(NumVectorInstructions == 0); // Update the threshold based on callsite properties - updateThreshold(CS, F); + updateThreshold(Call, F); // While Threshold depends on commandline options that can take negative // values, we want to enforce the invariant that the computed threshold and @@ -1745,7 +1769,7 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) { // Give out bonuses for the callsite, as the instructions setting them up // will be gone after inlining. - Cost -= getCallsiteCost(CS, DL); + addCost(-getCallsiteCost(Call, DL)); // If this function uses the coldcc calling convention, prefer not to inline // it. @@ -1759,14 +1783,11 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) { if (F.empty()) return true; - Function *Caller = CS.getInstruction()->getFunction(); + Function *Caller = Call.getFunction(); // Check if the caller function is recursive itself. for (User *U : Caller->users()) { - CallSite Site(U); - if (!Site) - continue; - Instruction *I = Site.getInstruction(); - if (I->getFunction() == Caller) { + CallBase *Call = dyn_cast<CallBase>(U); + if (Call && Call->getFunction() == Caller) { IsCallerRecursive = true; break; } @@ -1774,10 +1795,10 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) { // Populate our simplified values by mapping from function arguments to call // arguments with known important simplifications. - CallSite::arg_iterator CAI = CS.arg_begin(); + auto CAI = Call.arg_begin(); for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); FAI != FAE; ++FAI, ++CAI) { - assert(CAI != CS.arg_end()); + assert(CAI != Call.arg_end()); if (Constant *C = dyn_cast<Constant>(CAI)) SimplifiedValues[&*FAI] = C; @@ -1826,14 +1847,18 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) { if (BB->empty()) continue; - // Disallow inlining a blockaddress. A blockaddress only has defined - // behavior for an indirect branch in the same function, and we do not - // currently support inlining indirect branches. But, the inliner may not - // see an indirect branch that ends up being dead code at a particular call - // site. If the blockaddress escapes the function, e.g., via a global - // variable, inlining may lead to an invalid cross-function reference. + // Disallow inlining a blockaddress with uses other than strictly callbr. + // A blockaddress only has defined behavior for an indirect branch in the + // same function, and we do not currently support inlining indirect + // branches. But, the inliner may not see an indirect branch that ends up + // being dead code at a particular call site. If the blockaddress escapes + // the function, e.g., via a global variable, inlining may lead to an + // invalid cross-function reference. + // FIXME: pr/39560: continue relaxing this overt restriction. if (BB->hasAddressTaken()) - return "blockaddress"; + for (User *U : BlockAddress::get(&*BB)->users()) + if (!isa<CallBrInst>(*U)) + return "blockaddress used outside of callbr"; // Analyze the cost of this block. If we blow through the threshold, this // returns false, and we can bail on out. @@ -1887,7 +1912,7 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) { } bool OnlyOneCallAndLocalLinkage = - F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction(); + F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction(); // If this is a noduplicate call, we can still inline as long as // inlining this would cause the removal of the caller (so the instruction // is not actually duplicated, just moved). @@ -1899,7 +1924,7 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) { // size, we penalise any call sites that perform loops. We do this after all // other costs here, so will likely only be dealing with relatively small // functions (and hence DT and LI will hopefully be cheap). - if (Caller->optForMinSize()) { + if (Caller->hasMinSize()) { DominatorTree DT(F); LoopInfo LI(DT); int NumLoops = 0; @@ -1909,7 +1934,7 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) { continue; NumLoops++; } - Cost += NumLoops * InlineConstants::CallPenalty; + addCost(NumLoops * InlineConstants::CallPenalty); } // We applied the maximum possible vector bonus at the beginning. Now, @@ -1953,13 +1978,13 @@ static bool functionsHaveCompatibleAttributes(Function *Caller, AttributeFuncs::areInlineCompatible(*Caller, *Callee); } -int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) { +int llvm::getCallsiteCost(CallBase &Call, const DataLayout &DL) { int Cost = 0; - for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { - if (CS.isByValArgument(I)) { + for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) { + if (Call.isByValArgument(I)) { // We approximate the number of loads and stores needed by dividing the // size of the byval type by the target's pointer size. - PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); + PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType()); unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType()); unsigned AS = PTy->getAddressSpace(); unsigned PointerSize = DL.getPointerSizeInBits(AS); @@ -1987,16 +2012,16 @@ int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) { } InlineCost llvm::getInlineCost( - CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, + CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function<AssumptionCache &(Function &)> &GetAssumptionCache, Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { - return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI, + return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE); } InlineCost llvm::getInlineCost( - CallSite CS, Function *Callee, const InlineParams &Params, + CallBase &Call, Function *Callee, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function<AssumptionCache &(Function &)> &GetAssumptionCache, Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, @@ -2012,9 +2037,9 @@ InlineCost llvm::getInlineCost( // argument is in the alloca address space (so it is a little bit complicated // to solve). unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace(); - for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) - if (CS.isByValArgument(I)) { - PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); + for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) + if (Call.isByValArgument(I)) { + PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType()); if (PTy->getAddressSpace() != AllocaAS) return llvm::InlineCost::getNever("byval arguments without alloca" " address space"); @@ -2022,20 +2047,21 @@ InlineCost llvm::getInlineCost( // Calls to functions with always-inline attributes should be inlined // whenever possible. - if (CS.hasFnAttr(Attribute::AlwaysInline)) { - if (isInlineViable(*Callee)) + if (Call.hasFnAttr(Attribute::AlwaysInline)) { + auto IsViable = isInlineViable(*Callee); + if (IsViable) return llvm::InlineCost::getAlways("always inline attribute"); - return llvm::InlineCost::getNever("inapplicable always inline attribute"); + return llvm::InlineCost::getNever(IsViable.message); } // Never inline functions with conflicting attributes (unless callee has // always-inline attribute). - Function *Caller = CS.getCaller(); + Function *Caller = Call.getCaller(); if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI)) return llvm::InlineCost::getNever("conflicting attributes"); // Don't inline this call if the caller has the optnone attribute. - if (Caller->hasFnAttribute(Attribute::OptimizeNone)) + if (Caller->hasOptNone()) return llvm::InlineCost::getNever("optnone attribute"); // Don't inline a function that treats null pointer as valid into a caller @@ -2052,15 +2078,15 @@ InlineCost llvm::getInlineCost( return llvm::InlineCost::getNever("noinline function attribute"); // Don't inline call sites marked noinline. - if (CS.isNoInline()) + if (Call.isNoInline()) return llvm::InlineCost::getNever("noinline call site attribute"); LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "... (caller:" << Caller->getName() << ")\n"); - CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS, - Params); - InlineResult ShouldInline = CA.analyzeCall(CS); + CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, + Call, Params); + InlineResult ShouldInline = CA.analyzeCall(Call); LLVM_DEBUG(CA.dump()); @@ -2073,42 +2099,50 @@ InlineCost llvm::getInlineCost( return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); } -bool llvm::isInlineViable(Function &F) { +InlineResult llvm::isInlineViable(Function &F) { bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice); for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { - // Disallow inlining of functions which contain indirect branches or - // blockaddresses. - if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken()) - return false; + // Disallow inlining of functions which contain indirect branches. + if (isa<IndirectBrInst>(BI->getTerminator())) + return "contains indirect branches"; + + // Disallow inlining of blockaddresses which are used by non-callbr + // instructions. + if (BI->hasAddressTaken()) + for (User *U : BlockAddress::get(&*BI)->users()) + if (!isa<CallBrInst>(*U)) + return "blockaddress used outside of callbr"; for (auto &II : *BI) { - CallSite CS(&II); - if (!CS) + CallBase *Call = dyn_cast<CallBase>(&II); + if (!Call) continue; // Disallow recursive calls. - if (&F == CS.getCalledFunction()) - return false; + if (&F == Call->getCalledFunction()) + return "recursive call"; // Disallow calls which expose returns-twice to a function not previously // attributed as such. - if (!ReturnsTwice && CS.isCall() && - cast<CallInst>(CS.getInstruction())->canReturnTwice()) - return false; + if (!ReturnsTwice && isa<CallInst>(Call) && + cast<CallInst>(Call)->canReturnTwice()) + return "exposes returns-twice attribute"; - if (CS.getCalledFunction()) - switch (CS.getCalledFunction()->getIntrinsicID()) { + if (Call->getCalledFunction()) + switch (Call->getCalledFunction()->getIntrinsicID()) { default: break; // Disallow inlining of @llvm.icall.branch.funnel because current // backend can't separate call targets from call arguments. case llvm::Intrinsic::icall_branch_funnel: + return "disallowed inlining of @llvm.icall.branch.funnel"; // Disallow inlining functions that call @llvm.localescape. Doing this // correctly would require major changes to the inliner. case llvm::Intrinsic::localescape: + return "disallowed inlining of @llvm.localescape"; // Disallow inlining of functions that initialize VarArgs with va_start. case llvm::Intrinsic::vastart: - return false; + return "contains VarArgs initialized with va_start"; } } } diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp index 95ab6ee3db5b..943a99a5f46d 100644 --- a/lib/Analysis/InstCount.cpp +++ b/lib/Analysis/InstCount.cpp @@ -1,9 +1,8 @@ //===-- InstCount.cpp - Collects the count of all instructions ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/InstructionPrecedenceTracking.cpp b/lib/Analysis/InstructionPrecedenceTracking.cpp index 816126f407ca..35190ce3e11a 100644 --- a/lib/Analysis/InstructionPrecedenceTracking.cpp +++ b/lib/Analysis/InstructionPrecedenceTracking.cpp @@ -1,9 +1,8 @@ //===-- InstructionPrecedenceTracking.cpp -----------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // Implements a class that is able to define some instructions as "special" @@ -20,6 +19,7 @@ #include "llvm/Analysis/InstructionPrecedenceTracking.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/PatternMatch.h" using namespace llvm; @@ -153,5 +153,8 @@ bool ImplicitControlFlowTracking::isSpecialInstruction( bool MemoryWriteTracking::isSpecialInstruction( const Instruction *Insn) const { + using namespace PatternMatch; + if (match(Insn, m_Intrinsic<Intrinsic::experimental_widenable_condition>())) + return false; return Insn->mayWriteToMemory(); } diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index ccf907c144f0..e34bf6f4e43f 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -1,9 +1,8 @@ //===- InstructionSimplify.cpp - Fold instruction operands ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -34,6 +33,8 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" @@ -50,6 +51,9 @@ STATISTIC(NumExpand, "Number of expansions"); STATISTIC(NumReassoc, "Number of reassociations"); static Value *SimplifyAndInst(Value *, Value *, const SimplifyQuery &, unsigned); +static Value *simplifyUnOp(unsigned, Value *, const SimplifyQuery &, unsigned); +static Value *simplifyFPUnOp(unsigned, Value *, const FastMathFlags &, + const SimplifyQuery &, unsigned); static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &, @@ -655,32 +659,11 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V, Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType(); APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth()); - // Even though we don't look through PHI nodes, we could be called on an - // instruction in an unreachable block, which may be on a cycle. - SmallPtrSet<Value *, 4> Visited; - Visited.insert(V); - do { - if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { - if ((!AllowNonInbounds && !GEP->isInBounds()) || - !GEP->accumulateConstantOffset(DL, Offset)) - break; - V = GEP->getPointerOperand(); - } else if (Operator::getOpcode(V) == Instruction::BitCast) { - V = cast<Operator>(V)->getOperand(0); - } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { - if (GA->isInterposable()) - break; - V = GA->getAliasee(); - } else { - if (auto CS = CallSite(V)) - if (Value *RV = CS.getReturnedArgOperand()) { - V = RV; - continue; - } - break; - } - assert(V->getType()->isPtrOrPtrVectorTy() && "Unexpected operand type!"); - } while (Visited.insert(V).second); + V = V->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds); + // As that strip may trace through `addrspacecast`, need to sext or trunc + // the offset calculated. + IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType(); + Offset = Offset.sextOrTrunc(IntPtrTy->getIntegerBitWidth()); Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset); if (V->getType()->isVectorTy()) @@ -1841,6 +1824,16 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return Op1; } + // This is a similar pattern used for checking if a value is a power-of-2: + // (A - 1) & A --> 0 (if A is a power-of-2 or 0) + // A & (A - 1) --> 0 (if A is a power-of-2 or 0) + if (match(Op0, m_Add(m_Specific(Op1), m_AllOnes())) && + isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT)) + return Constant::getNullValue(Op1->getType()); + if (match(Op1, m_Add(m_Specific(Op0), m_AllOnes())) && + isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT)) + return Constant::getNullValue(Op0->getType()); + if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, true)) return V; @@ -2280,12 +2273,12 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI, // come from a pointer that cannot overlap with dynamically-allocated // memory within the lifetime of the current function (allocas, byval // arguments, globals), then determine the comparison result here. - SmallVector<Value *, 8> LHSUObjs, RHSUObjs; + SmallVector<const Value *, 8> LHSUObjs, RHSUObjs; GetUnderlyingObjects(LHS, LHSUObjs, DL); GetUnderlyingObjects(RHS, RHSUObjs, DL); // Is the set of underlying objects all noalias calls? - auto IsNAC = [](ArrayRef<Value *> Objects) { + auto IsNAC = [](ArrayRef<const Value *> Objects) { return all_of(Objects, isNoAliasCall); }; @@ -2295,8 +2288,8 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI, // live with the compared-to allocation). For globals, we exclude symbols // that might be resolve lazily to symbols in another dynamically-loaded // library (and, thus, could be malloc'ed by the implementation). - auto IsAllocDisjoint = [](ArrayRef<Value *> Objects) { - return all_of(Objects, [](Value *V) { + auto IsAllocDisjoint = [](ArrayRef<const Value *> Objects) { + return all_of(Objects, [](const Value *V) { if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) return AI->getParent() && AI->getFunction() && AI->isStaticAlloca(); if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) @@ -2472,228 +2465,6 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, return nullptr; } -/// Many binary operators with a constant operand have an easy-to-compute -/// range of outputs. This can be used to fold a comparison to always true or -/// always false. -static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper, - const InstrInfoQuery &IIQ) { - unsigned Width = Lower.getBitWidth(); - const APInt *C; - switch (BO.getOpcode()) { - case Instruction::Add: - if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) { - // FIXME: If we have both nuw and nsw, we should reduce the range further. - if (IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(&BO))) { - // 'add nuw x, C' produces [C, UINT_MAX]. - Lower = *C; - } else if (IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(&BO))) { - if (C->isNegative()) { - // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C]. - Lower = APInt::getSignedMinValue(Width); - Upper = APInt::getSignedMaxValue(Width) + *C + 1; - } else { - // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX]. - Lower = APInt::getSignedMinValue(Width) + *C; - Upper = APInt::getSignedMaxValue(Width) + 1; - } - } - } - break; - - case Instruction::And: - if (match(BO.getOperand(1), m_APInt(C))) - // 'and x, C' produces [0, C]. - Upper = *C + 1; - break; - - case Instruction::Or: - if (match(BO.getOperand(1), m_APInt(C))) - // 'or x, C' produces [C, UINT_MAX]. - Lower = *C; - break; - - case Instruction::AShr: - if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { - // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C]. - Lower = APInt::getSignedMinValue(Width).ashr(*C); - Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1; - } else if (match(BO.getOperand(0), m_APInt(C))) { - unsigned ShiftAmount = Width - 1; - if (!C->isNullValue() && IIQ.isExact(&BO)) - ShiftAmount = C->countTrailingZeros(); - if (C->isNegative()) { - // 'ashr C, x' produces [C, C >> (Width-1)] - Lower = *C; - Upper = C->ashr(ShiftAmount) + 1; - } else { - // 'ashr C, x' produces [C >> (Width-1), C] - Lower = C->ashr(ShiftAmount); - Upper = *C + 1; - } - } - break; - - case Instruction::LShr: - if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { - // 'lshr x, C' produces [0, UINT_MAX >> C]. - Upper = APInt::getAllOnesValue(Width).lshr(*C) + 1; - } else if (match(BO.getOperand(0), m_APInt(C))) { - // 'lshr C, x' produces [C >> (Width-1), C]. - unsigned ShiftAmount = Width - 1; - if (!C->isNullValue() && IIQ.isExact(&BO)) - ShiftAmount = C->countTrailingZeros(); - Lower = C->lshr(ShiftAmount); - Upper = *C + 1; - } - break; - - case Instruction::Shl: - if (match(BO.getOperand(0), m_APInt(C))) { - if (IIQ.hasNoUnsignedWrap(&BO)) { - // 'shl nuw C, x' produces [C, C << CLZ(C)] - Lower = *C; - Upper = Lower.shl(Lower.countLeadingZeros()) + 1; - } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw? - if (C->isNegative()) { - // 'shl nsw C, x' produces [C << CLO(C)-1, C] - unsigned ShiftAmount = C->countLeadingOnes() - 1; - Lower = C->shl(ShiftAmount); - Upper = *C + 1; - } else { - // 'shl nsw C, x' produces [C, C << CLZ(C)-1] - unsigned ShiftAmount = C->countLeadingZeros() - 1; - Lower = *C; - Upper = C->shl(ShiftAmount) + 1; - } - } - } - break; - - case Instruction::SDiv: - if (match(BO.getOperand(1), m_APInt(C))) { - APInt IntMin = APInt::getSignedMinValue(Width); - APInt IntMax = APInt::getSignedMaxValue(Width); - if (C->isAllOnesValue()) { - // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX] - // where C != -1 and C != 0 and C != 1 - Lower = IntMin + 1; - Upper = IntMax + 1; - } else if (C->countLeadingZeros() < Width - 1) { - // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C] - // where C != -1 and C != 0 and C != 1 - Lower = IntMin.sdiv(*C); - Upper = IntMax.sdiv(*C); - if (Lower.sgt(Upper)) - std::swap(Lower, Upper); - Upper = Upper + 1; - assert(Upper != Lower && "Upper part of range has wrapped!"); - } - } else if (match(BO.getOperand(0), m_APInt(C))) { - if (C->isMinSignedValue()) { - // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2]. - Lower = *C; - Upper = Lower.lshr(1) + 1; - } else { - // 'sdiv C, x' produces [-|C|, |C|]. - Upper = C->abs() + 1; - Lower = (-Upper) + 1; - } - } - break; - - case Instruction::UDiv: - if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) { - // 'udiv x, C' produces [0, UINT_MAX / C]. - Upper = APInt::getMaxValue(Width).udiv(*C) + 1; - } else if (match(BO.getOperand(0), m_APInt(C))) { - // 'udiv C, x' produces [0, C]. - Upper = *C + 1; - } - break; - - case Instruction::SRem: - if (match(BO.getOperand(1), m_APInt(C))) { - // 'srem x, C' produces (-|C|, |C|). - Upper = C->abs(); - Lower = (-Upper) + 1; - } - break; - - case Instruction::URem: - if (match(BO.getOperand(1), m_APInt(C))) - // 'urem x, C' produces [0, C). - Upper = *C; - break; - - default: - break; - } -} - -/// Some intrinsics with a constant operand have an easy-to-compute range of -/// outputs. This can be used to fold a comparison to always true or always -/// false. -static void setLimitsForIntrinsic(IntrinsicInst &II, APInt &Lower, - APInt &Upper) { - unsigned Width = Lower.getBitWidth(); - const APInt *C; - switch (II.getIntrinsicID()) { - case Intrinsic::uadd_sat: - // uadd.sat(x, C) produces [C, UINT_MAX]. - if (match(II.getOperand(0), m_APInt(C)) || - match(II.getOperand(1), m_APInt(C))) - Lower = *C; - break; - case Intrinsic::sadd_sat: - if (match(II.getOperand(0), m_APInt(C)) || - match(II.getOperand(1), m_APInt(C))) { - if (C->isNegative()) { - // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)]. - Lower = APInt::getSignedMinValue(Width); - Upper = APInt::getSignedMaxValue(Width) + *C + 1; - } else { - // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX]. - Lower = APInt::getSignedMinValue(Width) + *C; - Upper = APInt::getSignedMaxValue(Width) + 1; - } - } - break; - case Intrinsic::usub_sat: - // usub.sat(C, x) produces [0, C]. - if (match(II.getOperand(0), m_APInt(C))) - Upper = *C + 1; - // usub.sat(x, C) produces [0, UINT_MAX - C]. - else if (match(II.getOperand(1), m_APInt(C))) - Upper = APInt::getMaxValue(Width) - *C + 1; - break; - case Intrinsic::ssub_sat: - if (match(II.getOperand(0), m_APInt(C))) { - if (C->isNegative()) { - // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)]. - Lower = APInt::getSignedMinValue(Width); - Upper = *C - APInt::getSignedMinValue(Width) + 1; - } else { - // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX]. - Lower = *C - APInt::getSignedMaxValue(Width); - Upper = APInt::getSignedMaxValue(Width) + 1; - } - } else if (match(II.getOperand(1), m_APInt(C))) { - if (C->isNegative()) { - // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]: - Lower = APInt::getSignedMinValue(Width) - *C; - Upper = APInt::getSignedMaxValue(Width) + 1; - } else { - // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C]. - Lower = APInt::getSignedMinValue(Width); - Upper = APInt::getSignedMaxValue(Width) - *C + 1; - } - } - break; - default: - break; - } -} - static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const InstrInfoQuery &IIQ) { Type *ITy = GetCompareTy(RHS); // The return type. @@ -2721,22 +2492,7 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, if (RHS_CR.isFullSet()) return ConstantInt::getTrue(ITy); - // Find the range of possible values for binary operators. - unsigned Width = C->getBitWidth(); - APInt Lower = APInt(Width, 0); - APInt Upper = APInt(Width, 0); - if (auto *BO = dyn_cast<BinaryOperator>(LHS)) - setLimitsForBinOp(*BO, Lower, Upper, IIQ); - else if (auto *II = dyn_cast<IntrinsicInst>(LHS)) - setLimitsForIntrinsic(*II, Lower, Upper); - - ConstantRange LHS_CR = - Lower != Upper ? ConstantRange(Lower, Upper) : ConstantRange(Width, true); - - if (auto *I = dyn_cast<Instruction>(LHS)) - if (auto *Ranges = IIQ.getMetadata(I, LLVMContext::MD_range)) - LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges)); - + ConstantRange LHS_CR = computeConstantRange(LHS, IIQ.UseInstrInfo); if (!LHS_CR.isFullSet()) { if (RHS_CR.contains(LHS_CR)) return ConstantInt::getTrue(ITy); @@ -3062,44 +2818,6 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, return nullptr; } -static Value *simplifyICmpWithAbsNabs(CmpInst::Predicate Pred, Value *Op0, - Value *Op1) { - // We need a comparison with a constant. - const APInt *C; - if (!match(Op1, m_APInt(C))) - return nullptr; - - // matchSelectPattern returns the negation part of an abs pattern in SP1. - // If the negate has an NSW flag, abs(INT_MIN) is undefined. Without that - // constraint, we can't make a contiguous range for the result of abs. - ICmpInst::Predicate AbsPred = ICmpInst::BAD_ICMP_PREDICATE; - Value *SP0, *SP1; - SelectPatternFlavor SPF = matchSelectPattern(Op0, SP0, SP1).Flavor; - if (SPF == SelectPatternFlavor::SPF_ABS && - cast<Instruction>(SP1)->hasNoSignedWrap()) - // The result of abs(X) is >= 0 (with nsw). - AbsPred = ICmpInst::ICMP_SGE; - if (SPF == SelectPatternFlavor::SPF_NABS) - // The result of -abs(X) is <= 0. - AbsPred = ICmpInst::ICMP_SLE; - - if (AbsPred == ICmpInst::BAD_ICMP_PREDICATE) - return nullptr; - - // If there is no intersection between abs/nabs and the range of this icmp, - // the icmp must be false. If the abs/nabs range is a subset of the icmp - // range, the icmp must be true. - APInt Zero = APInt::getNullValue(C->getBitWidth()); - ConstantRange AbsRange = ConstantRange::makeExactICmpRegion(AbsPred, Zero); - ConstantRange CmpRange = ConstantRange::makeExactICmpRegion(Pred, *C); - if (AbsRange.intersectWith(CmpRange).isEmptySet()) - return getFalse(GetCompareTy(Op0)); - if (CmpRange.contains(AbsRange)) - return getTrue(GetCompareTy(Op0)); - - return nullptr; -} - /// Simplify integer comparisons where at least one operand of the compare /// matches an integer min/max idiom. static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS, @@ -3319,9 +3037,16 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, std::swap(LHS, RHS); Pred = CmpInst::getSwappedPredicate(Pred); } + assert(!isa<UndefValue>(LHS) && "Unexpected icmp undef,%X"); Type *ITy = GetCompareTy(LHS); // The return type. + // For EQ and NE, we can always pick a value for the undef to make the + // predicate pass or fail, so we can return undef. + // Matches behavior in llvm::ConstantFoldCompareInstruction. + if (isa<UndefValue>(RHS) && ICmpInst::isEquality(Pred)) + return UndefValue::get(ITy); + // icmp X, X -> true/false // icmp X, undef -> true/false because undef could be X. if (LHS == RHS || isa<UndefValue>(RHS)) @@ -3531,9 +3256,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (Value *V = simplifyICmpWithMinMax(Pred, LHS, RHS, Q, MaxRecurse)) return V; - if (Value *V = simplifyICmpWithAbsNabs(Pred, LHS, RHS)) - return V; - // Simplify comparisons of related pointers using a powerful, recursive // GEP-walk when we have target data available.. if (LHS->getType()->isPointerTy()) @@ -3647,6 +3369,8 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, } // Handle fcmp with constant RHS. + // TODO: Use match with a specific FP value, so these work with vectors with + // undef lanes. const APFloat *C; if (match(RHS, m_APFloat(C))) { // Check whether the constant is an infinity. @@ -3675,28 +3399,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } } - if (C->isZero()) { - switch (Pred) { - case FCmpInst::FCMP_OGE: - if (FMF.noNaNs() && CannotBeOrderedLessThanZero(LHS, Q.TLI)) - return getTrue(RetTy); - break; - case FCmpInst::FCMP_UGE: - if (CannotBeOrderedLessThanZero(LHS, Q.TLI)) - return getTrue(RetTy); - break; - case FCmpInst::FCMP_ULT: - if (FMF.noNaNs() && CannotBeOrderedLessThanZero(LHS, Q.TLI)) - return getFalse(RetTy); - break; - case FCmpInst::FCMP_OLT: - if (CannotBeOrderedLessThanZero(LHS, Q.TLI)) - return getFalse(RetTy); - break; - default: - break; - } - } else if (C->isNegative()) { + if (C->isNegative() && !C->isNegZero()) { assert(!C->isNaN() && "Unexpected NaN constant!"); // TODO: We can catch more cases by using a range check rather than // relying on CannotBeOrderedLessThanZero. @@ -3719,6 +3422,67 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, break; } } + + // Check comparison of [minnum/maxnum with constant] with other constant. + const APFloat *C2; + if ((match(LHS, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_APFloat(C2))) && + C2->compare(*C) == APFloat::cmpLessThan) || + (match(LHS, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_APFloat(C2))) && + C2->compare(*C) == APFloat::cmpGreaterThan)) { + bool IsMaxNum = + cast<IntrinsicInst>(LHS)->getIntrinsicID() == Intrinsic::maxnum; + // The ordered relationship and minnum/maxnum guarantee that we do not + // have NaN constants, so ordered/unordered preds are handled the same. + switch (Pred) { + case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_UEQ: + // minnum(X, LesserC) == C --> false + // maxnum(X, GreaterC) == C --> false + return getFalse(RetTy); + case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_UNE: + // minnum(X, LesserC) != C --> true + // maxnum(X, GreaterC) != C --> true + return getTrue(RetTy); + case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_UGE: + case FCmpInst::FCMP_OGT: case FCmpInst::FCMP_UGT: + // minnum(X, LesserC) >= C --> false + // minnum(X, LesserC) > C --> false + // maxnum(X, GreaterC) >= C --> true + // maxnum(X, GreaterC) > C --> true + return ConstantInt::get(RetTy, IsMaxNum); + case FCmpInst::FCMP_OLE: case FCmpInst::FCMP_ULE: + case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_ULT: + // minnum(X, LesserC) <= C --> true + // minnum(X, LesserC) < C --> true + // maxnum(X, GreaterC) <= C --> false + // maxnum(X, GreaterC) < C --> false + return ConstantInt::get(RetTy, !IsMaxNum); + default: + // TRUE/FALSE/ORD/UNO should be handled before this. + llvm_unreachable("Unexpected fcmp predicate"); + } + } + } + + if (match(RHS, m_AnyZeroFP())) { + switch (Pred) { + case FCmpInst::FCMP_OGE: + case FCmpInst::FCMP_ULT: + // Positive or zero X >= 0.0 --> true + // Positive or zero X < 0.0 --> false + if ((FMF.noNaNs() || isKnownNeverNaN(LHS, Q.TLI)) && + CannotBeOrderedLessThanZero(LHS, Q.TLI)) + return Pred == FCmpInst::FCMP_OGE ? getTrue(RetTy) : getFalse(RetTy); + break; + case FCmpInst::FCMP_UGE: + case FCmpInst::FCMP_OLT: + // Positive or zero or nan X >= 0.0 --> true + // Positive or zero or nan X < 0.0 --> false + if (CannotBeOrderedLessThanZero(LHS, Q.TLI)) + return Pred == FCmpInst::FCMP_UGE ? getTrue(RetTy) : getFalse(RetTy); + break; + default: + break; + } } // If the comparison is with the result of a select instruction, check whether @@ -3904,27 +3668,44 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, Pred == ICmpInst::ICMP_EQ)) return V; - // Test for zero-shift-guard-ops around funnel shifts. These are used to - // avoid UB from oversized shifts in raw IR rotate patterns, but the - // intrinsics do not have that problem. + // Test for a bogus zero-shift-guard-op around funnel-shift or rotate. Value *ShAmt; auto isFsh = m_CombineOr(m_Intrinsic<Intrinsic::fshl>(m_Value(X), m_Value(), m_Value(ShAmt)), m_Intrinsic<Intrinsic::fshr>(m_Value(), m_Value(X), m_Value(ShAmt))); - // (ShAmt != 0) ? fshl(X, *, ShAmt) : X --> fshl(X, *, ShAmt) - // (ShAmt != 0) ? fshr(*, X, ShAmt) : X --> fshr(*, X, ShAmt) // (ShAmt == 0) ? fshl(X, *, ShAmt) : X --> X // (ShAmt == 0) ? fshr(*, X, ShAmt) : X --> X - if (match(TrueVal, isFsh) && FalseVal == X && CmpLHS == ShAmt) - return Pred == ICmpInst::ICMP_NE ? TrueVal : X; - - // (ShAmt == 0) ? X : fshl(X, *, ShAmt) --> fshl(X, *, ShAmt) - // (ShAmt == 0) ? X : fshr(*, X, ShAmt) --> fshr(*, X, ShAmt) + if (match(TrueVal, isFsh) && FalseVal == X && CmpLHS == ShAmt && + Pred == ICmpInst::ICMP_EQ) + return X; // (ShAmt != 0) ? X : fshl(X, *, ShAmt) --> X // (ShAmt != 0) ? X : fshr(*, X, ShAmt) --> X - if (match(FalseVal, isFsh) && TrueVal == X && CmpLHS == ShAmt) - return Pred == ICmpInst::ICMP_EQ ? FalseVal : X; + if (match(FalseVal, isFsh) && TrueVal == X && CmpLHS == ShAmt && + Pred == ICmpInst::ICMP_NE) + return X; + + // Test for a zero-shift-guard-op around rotates. These are used to + // avoid UB from oversized shifts in raw IR rotate patterns, but the + // intrinsics do not have that problem. + // We do not allow this transform for the general funnel shift case because + // that would not preserve the poison safety of the original code. + auto isRotate = m_CombineOr(m_Intrinsic<Intrinsic::fshl>(m_Value(X), + m_Deferred(X), + m_Value(ShAmt)), + m_Intrinsic<Intrinsic::fshr>(m_Value(X), + m_Deferred(X), + m_Value(ShAmt))); + // (ShAmt != 0) ? fshl(X, X, ShAmt) : X --> fshl(X, X, ShAmt) + // (ShAmt != 0) ? fshr(X, X, ShAmt) : X --> fshr(X, X, ShAmt) + if (match(TrueVal, isRotate) && FalseVal == X && CmpLHS == ShAmt && + Pred == ICmpInst::ICMP_NE) + return TrueVal; + // (ShAmt == 0) ? X : fshl(X, X, ShAmt) --> fshl(X, X, ShAmt) + // (ShAmt == 0) ? X : fshr(X, X, ShAmt) --> fshr(X, X, ShAmt) + if (match(FalseVal, isRotate) && TrueVal == X && CmpLHS == ShAmt && + Pred == ICmpInst::ICMP_EQ) + return FalseVal; } // Check for other compares that behave like bit test. @@ -4218,6 +3999,17 @@ Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx, if (isa<UndefValue>(Idx)) return UndefValue::get(Vec->getType()); + // Inserting an undef scalar? Assume it is the same value as the existing + // vector element. + if (isa<UndefValue>(Val)) + return Vec; + + // If we are extracting a value from a vector, then inserting it into the same + // place, that's the input vector: + // insertelt Vec, (extractelt Vec, Idx), Idx --> Vec + if (match(Val, m_ExtractElement(m_Specific(Vec), m_Specific(Idx)))) + return Vec; + return nullptr; } @@ -4495,6 +4287,33 @@ Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit); } +static Constant *foldConstant(Instruction::UnaryOps Opcode, + Value *&Op, const SimplifyQuery &Q) { + if (auto *C = dyn_cast<Constant>(Op)) + return ConstantFoldUnaryOpOperand(Opcode, C, Q.DL); + return nullptr; +} + +/// Given the operand for an FNeg, see if we can fold the result. If not, this +/// returns null. +static Value *simplifyFNegInst(Value *Op, FastMathFlags FMF, + const SimplifyQuery &Q, unsigned MaxRecurse) { + if (Constant *C = foldConstant(Instruction::FNeg, Op, Q)) + return C; + + Value *X; + // fneg (fneg X) ==> X + if (match(Op, m_FNeg(m_Value(X)))) + return X; + + return nullptr; +} + +Value *llvm::SimplifyFNegInst(Value *Op, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::simplifyFNegInst(Op, FMF, Q, RecursionLimit); +} + static Constant *propagateNaN(Constant *In) { // If the input is a vector with undef elements, just return a default NaN. if (!In->isNaN()) @@ -4536,16 +4355,22 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI))) return Op0; - // With nnan: (+/-0.0 - X) + X --> 0.0 (and commuted variant) + // With nnan: -X + X --> 0.0 (and commuted variant) // We don't have to explicitly exclude infinities (ninf): INF + -INF == NaN. // Negative zeros are allowed because we always end up with positive zero: // X = -0.0: (-0.0 - (-0.0)) + (-0.0) == ( 0.0) + (-0.0) == 0.0 // X = -0.0: ( 0.0 - (-0.0)) + (-0.0) == ( 0.0) + (-0.0) == 0.0 // X = 0.0: (-0.0 - ( 0.0)) + ( 0.0) == (-0.0) + ( 0.0) == 0.0 // X = 0.0: ( 0.0 - ( 0.0)) + ( 0.0) == ( 0.0) + ( 0.0) == 0.0 - if (FMF.noNaNs() && (match(Op0, m_FSub(m_AnyZeroFP(), m_Specific(Op1))) || - match(Op1, m_FSub(m_AnyZeroFP(), m_Specific(Op0))))) - return ConstantFP::getNullValue(Op0->getType()); + if (FMF.noNaNs()) { + if (match(Op0, m_FSub(m_AnyZeroFP(), m_Specific(Op1))) || + match(Op1, m_FSub(m_AnyZeroFP(), m_Specific(Op0)))) + return ConstantFP::getNullValue(Op0->getType()); + + if (match(Op0, m_FNeg(m_Specific(Op1))) || + match(Op1, m_FNeg(m_Specific(Op0)))) + return ConstantFP::getNullValue(Op0->getType()); + } // (X - Y) + Y --> X // Y + (X - Y) --> X @@ -4578,14 +4403,17 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, return Op0; // fsub -0.0, (fsub -0.0, X) ==> X + // fsub -0.0, (fneg X) ==> X Value *X; if (match(Op0, m_NegZeroFP()) && - match(Op1, m_FSub(m_NegZeroFP(), m_Value(X)))) + match(Op1, m_FNeg(m_Value(X)))) return X; // fsub 0.0, (fsub 0.0, X) ==> X if signed zeros are ignored. + // fsub 0.0, (fneg X) ==> X if signed zeros are ignored. if (FMF.noSignedZeros() && match(Op0, m_AnyZeroFP()) && - match(Op1, m_FSub(m_AnyZeroFP(), m_Value(X)))) + (match(Op1, m_FSub(m_AnyZeroFP(), m_Value(X))) || + match(Op1, m_FNeg(m_Value(X))))) return X; // fsub nnan x, x ==> 0.0 @@ -4722,6 +4550,42 @@ Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, //=== Helper functions for higher up the class hierarchy. +/// Given the operand for a UnaryOperator, see if we can fold the result. +/// If not, this returns null. +static Value *simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q, + unsigned MaxRecurse) { + switch (Opcode) { + case Instruction::FNeg: + return simplifyFNegInst(Op, FastMathFlags(), Q, MaxRecurse); + default: + llvm_unreachable("Unexpected opcode"); + } +} + +/// Given the operand for a UnaryOperator, see if we can fold the result. +/// If not, this returns null. +/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the +/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp. +static Value *simplifyFPUnOp(unsigned Opcode, Value *Op, + const FastMathFlags &FMF, + const SimplifyQuery &Q, unsigned MaxRecurse) { + switch (Opcode) { + case Instruction::FNeg: + return simplifyFNegInst(Op, FMF, Q, MaxRecurse); + default: + return simplifyUnOp(Opcode, Op, Q, MaxRecurse); + } +} + +Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q) { + return ::simplifyUnOp(Opcode, Op, Q, RecursionLimit); +} + +Value *llvm::SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::simplifyFPUnOp(Opcode, Op, FMF, Q, RecursionLimit); +} + /// Given operands for a BinaryOperator, see if we can fold the result. /// If not, this returns null. static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, @@ -4885,22 +4749,6 @@ static Value *SimplifyRelativeLoad(Constant *Ptr, Constant *Offset, return ConstantExpr::getBitCast(LoadedLHSPtr, Int8PtrTy); } -static bool maskIsAllZeroOrUndef(Value *Mask) { - auto *ConstMask = dyn_cast<Constant>(Mask); - if (!ConstMask) - return false; - if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask)) - return true; - for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E; - ++I) { - if (auto *MaskElt = ConstMask->getAggregateElement(I)) - if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt)) - continue; - return false; - } - return true; -} - static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0, const SimplifyQuery &Q) { // Idempotent functions return the same result when called repeatedly. @@ -4941,8 +4789,32 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0, case Intrinsic::log2: // log2(exp2(x)) -> x if (Q.CxtI->hasAllowReassoc() && - match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X)))) return X; + (match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X))) || + match(Op0, m_Intrinsic<Intrinsic::pow>(m_SpecificFP(2.0), + m_Value(X))))) return X; + break; + case Intrinsic::log10: + // log10(pow(10.0, x)) -> x + if (Q.CxtI->hasAllowReassoc() && + match(Op0, m_Intrinsic<Intrinsic::pow>(m_SpecificFP(10.0), + m_Value(X)))) return X; break; + case Intrinsic::floor: + case Intrinsic::trunc: + case Intrinsic::ceil: + case Intrinsic::round: + case Intrinsic::nearbyint: + case Intrinsic::rint: { + // floor (sitofp x) -> sitofp x + // floor (uitofp x) -> uitofp x + // + // Converting from int always results in a finite integral number or + // infinity. For either of those inputs, these rounding functions always + // return the same value, so the rounding can be eliminated. + if (match(Op0, m_SIToFP(m_Value())) || match(Op0, m_UIToFP(m_Value()))) + return Op0; + break; + } default: break; } @@ -4960,16 +4832,19 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, // X - X -> { 0, false } if (Op0 == Op1) return Constant::getNullValue(ReturnType); - // X - undef -> undef - // undef - X -> undef - if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1)) - return UndefValue::get(ReturnType); - break; + LLVM_FALLTHROUGH; case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: - // X + undef -> undef - if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1)) - return UndefValue::get(ReturnType); + // X - undef -> { undef, false } + // undef - X -> { undef, false } + // X + undef -> { undef, false } + // undef + x -> { undef, false } + if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1)) { + return ConstantStruct::get( + cast<StructType>(ReturnType), + {UndefValue::get(ReturnType->getStructElementType(0)), + Constant::getNullValue(ReturnType->getStructElementType(1))}); + } break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: @@ -5085,26 +4960,28 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, return nullptr; } -template <typename IterTy> -static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, - const SimplifyQuery &Q) { +static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { + // Intrinsics with no operands have some kind of side effect. Don't simplify. - unsigned NumOperands = std::distance(ArgBegin, ArgEnd); - if (NumOperands == 0) + unsigned NumOperands = Call->getNumArgOperands(); + if (!NumOperands) return nullptr; + Function *F = cast<Function>(Call->getCalledFunction()); Intrinsic::ID IID = F->getIntrinsicID(); if (NumOperands == 1) - return simplifyUnaryIntrinsic(F, ArgBegin[0], Q); + return simplifyUnaryIntrinsic(F, Call->getArgOperand(0), Q); if (NumOperands == 2) - return simplifyBinaryIntrinsic(F, ArgBegin[0], ArgBegin[1], Q); + return simplifyBinaryIntrinsic(F, Call->getArgOperand(0), + Call->getArgOperand(1), Q); // Handle intrinsics with 3 or more arguments. switch (IID) { - case Intrinsic::masked_load: { - Value *MaskArg = ArgBegin[2]; - Value *PassthruArg = ArgBegin[3]; + case Intrinsic::masked_load: + case Intrinsic::masked_gather: { + Value *MaskArg = Call->getArgOperand(2); + Value *PassthruArg = Call->getArgOperand(3); // If the mask is all zeros or undef, the "passthru" argument is the result. if (maskIsAllZeroOrUndef(MaskArg)) return PassthruArg; @@ -5112,7 +4989,8 @@ static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, } case Intrinsic::fshl: case Intrinsic::fshr: { - Value *Op0 = ArgBegin[0], *Op1 = ArgBegin[1], *ShAmtArg = ArgBegin[2]; + Value *Op0 = Call->getArgOperand(0), *Op1 = Call->getArgOperand(1), + *ShAmtArg = Call->getArgOperand(2); // If both operands are undef, the result is undef. if (match(Op0, m_Undef()) && match(Op1, m_Undef())) @@ -5120,15 +4998,14 @@ static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, // If shift amount is undef, assume it is zero. if (match(ShAmtArg, m_Undef())) - return ArgBegin[IID == Intrinsic::fshl ? 0 : 1]; + return Call->getArgOperand(IID == Intrinsic::fshl ? 0 : 1); const APInt *ShAmtC; if (match(ShAmtArg, m_APInt(ShAmtC))) { // If there's effectively no shift, return the 1st arg or 2nd arg. - // TODO: For vectors, we could check each element of a non-splat constant. APInt BitWidth = APInt(ShAmtC->getBitWidth(), ShAmtC->getBitWidth()); if (ShAmtC->urem(BitWidth).isNullValue()) - return ArgBegin[IID == Intrinsic::fshl ? 0 : 1]; + return Call->getArgOperand(IID == Intrinsic::fshl ? 0 : 1); } return nullptr; } @@ -5137,58 +5014,36 @@ static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, } } -template <typename IterTy> -static Value *SimplifyCall(ImmutableCallSite CS, Value *V, IterTy ArgBegin, - IterTy ArgEnd, const SimplifyQuery &Q, - unsigned MaxRecurse) { - Type *Ty = V->getType(); - if (PointerType *PTy = dyn_cast<PointerType>(Ty)) - Ty = PTy->getElementType(); - FunctionType *FTy = cast<FunctionType>(Ty); +Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) { + Value *Callee = Call->getCalledValue(); // call undef -> undef // call null -> undef - if (isa<UndefValue>(V) || isa<ConstantPointerNull>(V)) - return UndefValue::get(FTy->getReturnType()); + if (isa<UndefValue>(Callee) || isa<ConstantPointerNull>(Callee)) + return UndefValue::get(Call->getType()); - Function *F = dyn_cast<Function>(V); + Function *F = dyn_cast<Function>(Callee); if (!F) return nullptr; if (F->isIntrinsic()) - if (Value *Ret = simplifyIntrinsic(F, ArgBegin, ArgEnd, Q)) + if (Value *Ret = simplifyIntrinsic(Call, Q)) return Ret; - if (!canConstantFoldCallTo(CS, F)) + if (!canConstantFoldCallTo(Call, F)) return nullptr; SmallVector<Constant *, 4> ConstantArgs; - ConstantArgs.reserve(ArgEnd - ArgBegin); - for (IterTy I = ArgBegin, E = ArgEnd; I != E; ++I) { - Constant *C = dyn_cast<Constant>(*I); + unsigned NumArgs = Call->getNumArgOperands(); + ConstantArgs.reserve(NumArgs); + for (auto &Arg : Call->args()) { + Constant *C = dyn_cast<Constant>(&Arg); if (!C) return nullptr; ConstantArgs.push_back(C); } - return ConstantFoldCall(CS, F, ConstantArgs, Q.TLI); -} - -Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V, - User::op_iterator ArgBegin, User::op_iterator ArgEnd, - const SimplifyQuery &Q) { - return ::SimplifyCall(CS, V, ArgBegin, ArgEnd, Q, RecursionLimit); -} - -Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V, - ArrayRef<Value *> Args, const SimplifyQuery &Q) { - return ::SimplifyCall(CS, V, Args.begin(), Args.end(), Q, RecursionLimit); -} - -Value *llvm::SimplifyCall(ImmutableCallSite ICS, const SimplifyQuery &Q) { - CallSite CS(const_cast<Instruction*>(ICS.getInstruction())); - return ::SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), - Q, RecursionLimit); + return ConstantFoldCall(Call, F, ConstantArgs, Q.TLI); } /// See if we can compute a simplified version of this instruction. @@ -5203,6 +5058,9 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, default: Result = ConstantFoldInstruction(I, Q.DL, Q.TLI); break; + case Instruction::FNeg: + Result = SimplifyFNegInst(I->getOperand(0), I->getFastMathFlags(), Q); + break; case Instruction::FAdd: Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1), I->getFastMathFlags(), Q); @@ -5327,8 +5185,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, Result = SimplifyPHINode(cast<PHINode>(I), Q); break; case Instruction::Call: { - CallSite CS(cast<CallInst>(I)); - Result = SimplifyCall(CS, Q); + Result = SimplifyCall(cast<CallInst>(I), Q); break; } #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc: diff --git a/lib/Analysis/Interval.cpp b/lib/Analysis/Interval.cpp index 6d5de22cb93f..07d6e27c13be 100644 --- a/lib/Analysis/Interval.cpp +++ b/lib/Analysis/Interval.cpp @@ -1,9 +1,8 @@ //===- Interval.cpp - Interval class code ---------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/IntervalPartition.cpp b/lib/Analysis/IntervalPartition.cpp index c777d91b67c6..d12db010db6a 100644 --- a/lib/Analysis/IntervalPartition.cpp +++ b/lib/Analysis/IntervalPartition.cpp @@ -1,9 +1,8 @@ //===- IntervalPartition.cpp - Interval Partition module code -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/IteratedDominanceFrontier.cpp b/lib/Analysis/IteratedDominanceFrontier.cpp deleted file mode 100644 index 000fe5ddad54..000000000000 --- a/lib/Analysis/IteratedDominanceFrontier.cpp +++ /dev/null @@ -1,110 +0,0 @@ -//===- IteratedDominanceFrontier.cpp - Compute IDF ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Compute iterated dominance frontiers using a linear time algorithm. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/IteratedDominanceFrontier.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Dominators.h" -#include <queue> - -namespace llvm { - -template <class NodeTy, bool IsPostDom> -void IDFCalculator<NodeTy, IsPostDom>::calculate( - SmallVectorImpl<BasicBlock *> &PHIBlocks) { - // Use a priority queue keyed on dominator tree level so that inserted nodes - // are handled from the bottom of the dominator tree upwards. We also augment - // the level with a DFS number to ensure that the blocks are ordered in a - // deterministic way. - typedef std::pair<DomTreeNode *, std::pair<unsigned, unsigned>> - DomTreeNodePair; - typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>, - less_second> IDFPriorityQueue; - IDFPriorityQueue PQ; - - DT.updateDFSNumbers(); - - for (BasicBlock *BB : *DefBlocks) { - if (DomTreeNode *Node = DT.getNode(BB)) - PQ.push({Node, std::make_pair(Node->getLevel(), Node->getDFSNumIn())}); - } - - SmallVector<DomTreeNode *, 32> Worklist; - SmallPtrSet<DomTreeNode *, 32> VisitedPQ; - SmallPtrSet<DomTreeNode *, 32> VisitedWorklist; - - while (!PQ.empty()) { - DomTreeNodePair RootPair = PQ.top(); - PQ.pop(); - DomTreeNode *Root = RootPair.first; - unsigned RootLevel = RootPair.second.first; - - // Walk all dominator tree children of Root, inspecting their CFG edges with - // targets elsewhere on the dominator tree. Only targets whose level is at - // most Root's level are added to the iterated dominance frontier of the - // definition set. - - Worklist.clear(); - Worklist.push_back(Root); - VisitedWorklist.insert(Root); - - while (!Worklist.empty()) { - DomTreeNode *Node = Worklist.pop_back_val(); - BasicBlock *BB = Node->getBlock(); - // Succ is the successor in the direction we are calculating IDF, so it is - // successor for IDF, and predecessor for Reverse IDF. - auto DoWork = [&](BasicBlock *Succ) { - DomTreeNode *SuccNode = DT.getNode(Succ); - - // Quickly skip all CFG edges that are also dominator tree edges instead - // of catching them below. - if (SuccNode->getIDom() == Node) - return; - - const unsigned SuccLevel = SuccNode->getLevel(); - if (SuccLevel > RootLevel) - return; - - if (!VisitedPQ.insert(SuccNode).second) - return; - - BasicBlock *SuccBB = SuccNode->getBlock(); - if (useLiveIn && !LiveInBlocks->count(SuccBB)) - return; - - PHIBlocks.emplace_back(SuccBB); - if (!DefBlocks->count(SuccBB)) - PQ.push(std::make_pair( - SuccNode, std::make_pair(SuccLevel, SuccNode->getDFSNumIn()))); - }; - - if (GD) { - for (auto Pair : children< - std::pair<const GraphDiff<BasicBlock *, IsPostDom> *, NodeTy>>( - {GD, BB})) - DoWork(Pair.second); - } else { - for (auto *Succ : children<NodeTy>(BB)) - DoWork(Succ); - } - - for (auto DomChild : *Node) { - if (VisitedWorklist.insert(DomChild).second) - Worklist.push_back(DomChild); - } - } - } -} - -template class IDFCalculator<BasicBlock *, false>; -template class IDFCalculator<Inverse<BasicBlock *>, true>; -} diff --git a/lib/Analysis/LazyBlockFrequencyInfo.cpp b/lib/Analysis/LazyBlockFrequencyInfo.cpp index 93c23bca96af..439758560284 100644 --- a/lib/Analysis/LazyBlockFrequencyInfo.cpp +++ b/lib/Analysis/LazyBlockFrequencyInfo.cpp @@ -1,9 +1,8 @@ //===- LazyBlockFrequencyInfo.cpp - Lazy Block Frequency Analysis ---------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/LazyBranchProbabilityInfo.cpp b/lib/Analysis/LazyBranchProbabilityInfo.cpp index 429b78c3a47e..f2592c26b373 100644 --- a/lib/Analysis/LazyBranchProbabilityInfo.cpp +++ b/lib/Analysis/LazyBranchProbabilityInfo.cpp @@ -1,9 +1,8 @@ //===- LazyBranchProbabilityInfo.cpp - Lazy Branch Probability Analysis ---===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp index 3f22ada803c9..797fcf516429 100644 --- a/lib/Analysis/LazyCallGraph.cpp +++ b/lib/Analysis/LazyCallGraph.cpp @@ -1,9 +1,8 @@ //===- LazyCallGraph.cpp - Analysis of a Module's call graph --------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -173,6 +172,19 @@ LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) { addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(F), Edge::Ref); } + // Externally visible aliases of internal functions are also viable entry + // edges to the module. + for (auto &A : M.aliases()) { + if (A.hasLocalLinkage()) + continue; + if (Function* F = dyn_cast<Function>(A.getAliasee())) { + LLVM_DEBUG(dbgs() << " Adding '" << F->getName() + << "' with alias '" << A.getName() + << "' to entry set of the graph.\n"); + addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(*F), Edge::Ref); + } + } + // Now add entry nodes for functions reachable via initializers to globals. SmallVector<Constant *, 16> Worklist; SmallPtrSet<Constant *, 16> Visited; diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 110c085d3f35..542ff709d475 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -1,9 +1,8 @@ //===- LazyValueInfo.cpp - Value constraint analysis ------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -423,10 +422,18 @@ namespace { BasicBlock *BB); Optional<ConstantRange> getRangeForOperand(unsigned Op, Instruction *I, BasicBlock *BB); + bool solveBlockValueBinaryOpImpl( + ValueLatticeElement &BBLV, Instruction *I, BasicBlock *BB, + std::function<ConstantRange(const ConstantRange &, + const ConstantRange &)> OpFn); bool solveBlockValueBinaryOp(ValueLatticeElement &BBLV, BinaryOperator *BBI, BasicBlock *BB); bool solveBlockValueCast(ValueLatticeElement &BBLV, CastInst *CI, BasicBlock *BB); + bool solveBlockValueOverflowIntrinsic( + ValueLatticeElement &BBLV, WithOverflowInst *WO, BasicBlock *BB); + bool solveBlockValueIntrinsic(ValueLatticeElement &BBLV, IntrinsicInst *II, + BasicBlock *BB); void intersectAssumeOrGuardBlockValueConstantRange(Value *Val, ValueLatticeElement &BBLV, Instruction *BBI); @@ -625,7 +632,7 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res, // and the like to prove non-nullness, but it's not clear that's worth it // compile time wise. The context-insensitive value walk done inside // isKnownNonZero gets most of the profitable cases at much less expense. - // This does mean that we have a sensativity to where the defining + // This does mean that we have a sensitivity to where the defining // instruction is placed, even if it could legally be hoisted much higher. // That is unfortunate. PointerType *PT = dyn_cast<PointerType>(BBI->getType()); @@ -639,6 +646,14 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res, if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI)) return solveBlockValueBinaryOp(Res, BO, BB); + + if (auto *EVI = dyn_cast<ExtractValueInst>(BBI)) + if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand())) + if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 0) + return solveBlockValueOverflowIntrinsic(Res, WO, BB); + + if (auto *II = dyn_cast<IntrinsicInst>(BBI)) + return solveBlockValueIntrinsic(Res, II, BB); } LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() @@ -824,7 +839,9 @@ void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange( if (!GuardDecl || GuardDecl->use_empty()) return; - for (Instruction &I : make_range(BBI->getIterator().getReverse(), + if (BBI->getIterator() == BBI->getParent()->begin()) + return; + for (Instruction &I : make_range(std::next(BBI->getIterator().getReverse()), BBI->getParent()->rend())) { Value *Cond = nullptr; if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(Cond)))) @@ -892,7 +909,28 @@ bool LazyValueInfoImpl::solveBlockValueSelect(ValueLatticeElement &BBLV, return true; } - // TODO: ABS, NABS from the SelectPatternResult + if (SPR.Flavor == SPF_ABS) { + if (LHS == SI->getTrueValue()) { + BBLV = ValueLatticeElement::getRange(TrueCR.abs()); + return true; + } + if (LHS == SI->getFalseValue()) { + BBLV = ValueLatticeElement::getRange(FalseCR.abs()); + return true; + } + } + + if (SPR.Flavor == SPF_NABS) { + ConstantRange Zero(APInt::getNullValue(TrueCR.getBitWidth())); + if (LHS == SI->getTrueValue()) { + BBLV = ValueLatticeElement::getRange(Zero.sub(TrueCR.abs())); + return true; + } + if (LHS == SI->getFalseValue()) { + BBLV = ValueLatticeElement::getRange(Zero.sub(FalseCR.abs())); + return true; + } + } } // Can we constrain the facts about the true and false values by using the @@ -962,7 +1000,7 @@ Optional<ConstantRange> LazyValueInfoImpl::getRangeForOperand(unsigned Op, const unsigned OperandBitWidth = DL.getTypeSizeInBits(I->getOperand(Op)->getType()); - ConstantRange Range = ConstantRange(OperandBitWidth); + ConstantRange Range = ConstantRange::getFull(OperandBitWidth); if (hasBlockValue(I->getOperand(Op), BB)) { ValueLatticeElement Val = getBlockValue(I->getOperand(Op), BB); intersectAssumeOrGuardBlockValueConstantRange(I->getOperand(Op), Val, I); @@ -1018,56 +1056,83 @@ bool LazyValueInfoImpl::solveBlockValueCast(ValueLatticeElement &BBLV, return true; } +bool LazyValueInfoImpl::solveBlockValueBinaryOpImpl( + ValueLatticeElement &BBLV, Instruction *I, BasicBlock *BB, + std::function<ConstantRange(const ConstantRange &, + const ConstantRange &)> OpFn) { + // Figure out the ranges of the operands. If that fails, use a + // conservative range, but apply the transfer rule anyways. This + // lets us pick up facts from expressions like "and i32 (call i32 + // @foo()), 32" + Optional<ConstantRange> LHSRes = getRangeForOperand(0, I, BB); + Optional<ConstantRange> RHSRes = getRangeForOperand(1, I, BB); + if (!LHSRes.hasValue() || !RHSRes.hasValue()) + // More work to do before applying this transfer rule. + return false; + + ConstantRange LHSRange = LHSRes.getValue(); + ConstantRange RHSRange = RHSRes.getValue(); + BBLV = ValueLatticeElement::getRange(OpFn(LHSRange, RHSRange)); + return true; +} + bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV, BinaryOperator *BO, BasicBlock *BB) { assert(BO->getOperand(0)->getType()->isSized() && "all operands to binary operators are sized"); - - // Filter out operators we don't know how to reason about before attempting to - // recurse on our operand(s). This can cut a long search short if we know - // we're not going to be able to get any useful information anyways. - switch (BO->getOpcode()) { - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - case Instruction::UDiv: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - // continue into the code below - break; - default: - // Unhandled instructions are overdefined. + if (BO->getOpcode() == Instruction::Xor) { + // Xor is the only operation not supported by ConstantRange::binaryOp(). LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined (unknown binary operator).\n"); BBLV = ValueLatticeElement::getOverdefined(); return true; - }; - - // Figure out the ranges of the operands. If that fails, use a - // conservative range, but apply the transfer rule anyways. This - // lets us pick up facts from expressions like "and i32 (call i32 - // @foo()), 32" - Optional<ConstantRange> LHSRes = getRangeForOperand(0, BO, BB); - Optional<ConstantRange> RHSRes = getRangeForOperand(1, BO, BB); + } - if (!LHSRes.hasValue() || !RHSRes.hasValue()) - // More work to do before applying this transfer rule. - return false; + return solveBlockValueBinaryOpImpl(BBLV, BO, BB, + [BO](const ConstantRange &CR1, const ConstantRange &CR2) { + return CR1.binaryOp(BO->getOpcode(), CR2); + }); +} - ConstantRange LHSRange = LHSRes.getValue(); - ConstantRange RHSRange = RHSRes.getValue(); +bool LazyValueInfoImpl::solveBlockValueOverflowIntrinsic( + ValueLatticeElement &BBLV, WithOverflowInst *WO, BasicBlock *BB) { + return solveBlockValueBinaryOpImpl(BBLV, WO, BB, + [WO](const ConstantRange &CR1, const ConstantRange &CR2) { + return CR1.binaryOp(WO->getBinaryOp(), CR2); + }); +} - // NOTE: We're currently limited by the set of operations that ConstantRange - // can evaluate symbolically. Enhancing that set will allows us to analyze - // more definitions. - Instruction::BinaryOps BinOp = BO->getOpcode(); - BBLV = ValueLatticeElement::getRange(LHSRange.binaryOp(BinOp, RHSRange)); - return true; +bool LazyValueInfoImpl::solveBlockValueIntrinsic( + ValueLatticeElement &BBLV, IntrinsicInst *II, BasicBlock *BB) { + switch (II->getIntrinsicID()) { + case Intrinsic::uadd_sat: + return solveBlockValueBinaryOpImpl(BBLV, II, BB, + [](const ConstantRange &CR1, const ConstantRange &CR2) { + return CR1.uadd_sat(CR2); + }); + case Intrinsic::usub_sat: + return solveBlockValueBinaryOpImpl(BBLV, II, BB, + [](const ConstantRange &CR1, const ConstantRange &CR2) { + return CR1.usub_sat(CR2); + }); + case Intrinsic::sadd_sat: + return solveBlockValueBinaryOpImpl(BBLV, II, BB, + [](const ConstantRange &CR1, const ConstantRange &CR2) { + return CR1.sadd_sat(CR2); + }); + case Intrinsic::ssub_sat: + return solveBlockValueBinaryOpImpl(BBLV, II, BB, + [](const ConstantRange &CR1, const ConstantRange &CR2) { + return CR1.ssub_sat(CR2); + }); + default: + LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined (unknown intrinsic).\n"); + BBLV = ValueLatticeElement::getOverdefined(); + return true; + } } static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI, @@ -1133,6 +1198,28 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI, return ValueLatticeElement::getOverdefined(); } +// Handle conditions of the form +// extractvalue(op.with.overflow(%x, C), 1). +static ValueLatticeElement getValueFromOverflowCondition( + Value *Val, WithOverflowInst *WO, bool IsTrueDest) { + // TODO: This only works with a constant RHS for now. We could also compute + // the range of the RHS, but this doesn't fit into the current structure of + // the edge value calculation. + const APInt *C; + if (WO->getLHS() != Val || !match(WO->getRHS(), m_APInt(C))) + return ValueLatticeElement::getOverdefined(); + + // Calculate the possible values of %x for which no overflow occurs. + ConstantRange NWR = ConstantRange::makeExactNoWrapRegion( + WO->getBinaryOp(), *C, WO->getNoWrapKind()); + + // If overflow is false, %x is constrained to NWR. If overflow is true, %x is + // constrained to it's inverse (all values that might cause overflow). + if (IsTrueDest) + NWR = NWR.inverse(); + return ValueLatticeElement::getRange(NWR); +} + static ValueLatticeElement getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest, DenseMap<Value*, ValueLatticeElement> &Visited); @@ -1143,6 +1230,11 @@ getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest, if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cond)) return getValueFromICmpCondition(Val, ICI, isTrueDest); + if (auto *EVI = dyn_cast<ExtractValueInst>(Cond)) + if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand())) + if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 1) + return getValueFromOverflowCondition(Val, WO, isTrueDest); + // Handle conditions in the form of (cond1 && cond2), we know that on the // true dest path both of the conditions hold. Similarly for conditions of // the form (cond1 || cond2), we know that on the false dest path neither @@ -1575,14 +1667,14 @@ ConstantRange LazyValueInfo::getConstantRange(Value *V, BasicBlock *BB, ValueLatticeElement Result = getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI); if (Result.isUndefined()) - return ConstantRange(Width, /*isFullSet=*/false); + return ConstantRange::getEmpty(Width); if (Result.isConstantRange()) return Result.getConstantRange(); // We represent ConstantInt constants as constant ranges but other kinds // of integer constants, i.e. ConstantExpr will be tagged as constants assert(!(Result.isConstant() && isa<ConstantInt>(Result.getConstant())) && "ConstantInt value must be represented as constantrange"); - return ConstantRange(Width, /*isFullSet=*/true); + return ConstantRange::getFull(Width); } /// Determine whether the specified value is known to be a @@ -1614,14 +1706,14 @@ ConstantRange LazyValueInfo::getConstantRangeOnEdge(Value *V, getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); if (Result.isUndefined()) - return ConstantRange(Width, /*isFullSet=*/false); + return ConstantRange::getEmpty(Width); if (Result.isConstantRange()) return Result.getConstantRange(); // We represent ConstantInt constants as constant ranges but other kinds // of integer constants, i.e. ConstantExpr will be tagged as constants assert(!(Result.isConstant() && isa<ConstantInt>(Result.getConstant())) && "ConstantInt value must be represented as constantrange"); - return ConstantRange(Width, /*isFullSet=*/true); + return ConstantRange::getFull(Width); } static LazyValueInfo::Tristate @@ -1711,7 +1803,7 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C, // through would still be correct. const DataLayout &DL = CxtI->getModule()->getDataLayout(); if (V->getType()->isPointerTy() && C->isNullValue() && - isKnownNonZero(V->stripPointerCasts(), DL)) { + isKnownNonZero(V->stripPointerCastsSameRepresentation(), DL)) { if (Pred == ICmpInst::ICMP_EQ) return LazyValueInfo::False; else if (Pred == ICmpInst::ICMP_NE) diff --git a/lib/Analysis/LegacyDivergenceAnalysis.cpp b/lib/Analysis/LegacyDivergenceAnalysis.cpp index 5540859ebdda..52212e1c42aa 100644 --- a/lib/Analysis/LegacyDivergenceAnalysis.cpp +++ b/lib/Analysis/LegacyDivergenceAnalysis.cpp @@ -1,10 +1,9 @@ //===- LegacyDivergenceAnalysis.cpp --------- Legacy Divergence Analysis //Implementation -==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 5d0a627f8426..d28b8a189d4b 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -1,9 +1,8 @@ //===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -268,10 +267,14 @@ void Lint::visitCallSite(CallSite CS) { if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) { AttributeList PAL = CS.getAttributes(); unsigned ArgNo = 0; - for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) { + for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; + ++BI, ++ArgNo) { // Skip ByVal arguments since they will be memcpy'd to the callee's // stack so we're not really passing the pointer anyway. - if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal)) + if (PAL.hasParamAttribute(ArgNo, Attribute::ByVal)) + continue; + // If both arguments are readonly, they have no dependence. + if (Formal->onlyReadsMemory() && CS.onlyReadsMemory(ArgNo)) continue; if (AI != BI && (*BI)->getType()->isPointerTy()) { AliasResult Result = AA->alias(*AI, *BI); diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 8129795bc0c1..31da4e9ec783 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -1,9 +1,8 @@ //===- Loads.cpp - Local load analysis ------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -126,7 +125,8 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, Visited); } -bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, +bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, + unsigned Align, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { @@ -134,8 +134,6 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, // attribute, we know exactly how many bytes are dereferenceable. If we can // determine the exact offset to the attributed variable, we can use that // information here. - Type *VTy = V->getType(); - Type *Ty = VTy->getPointerElementType(); // Require ABI alignment for loads without alignment specification if (Align == 0) @@ -146,14 +144,16 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, SmallPtrSet<const Value *, 32> Visited; return ::isDereferenceableAndAlignedPointer( - V, Align, APInt(DL.getIndexTypeSizeInBits(VTy), DL.getTypeStoreSize(Ty)), DL, - CtxI, DT, Visited); + V, Align, + APInt(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty)), + DL, CtxI, DT, Visited); } -bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL, +bool llvm::isDereferenceablePointer(const Value *V, Type *Ty, + const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { - return isDereferenceableAndAlignedPointer(V, 1, DL, CtxI, DT); + return isDereferenceableAndAlignedPointer(V, Ty, 1, DL, CtxI, DT); } /// Test if A and B will obviously have the same value. @@ -198,7 +198,7 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { /// /// This uses the pointee type to determine how many bytes need to be safe to /// load from the pointer. -bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, +bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, const DataLayout &DL, Instruction *ScanFrom, const DominatorTree *DT) { @@ -209,7 +209,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, // If DT is not specified we can't make context-sensitive query const Instruction* CtxI = DT ? ScanFrom : nullptr; - if (isDereferenceableAndAlignedPointer(V, Align, DL, CtxI, DT)) + if (isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT)) return true; int64_t ByteOffset = 0; @@ -281,9 +281,17 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, Value *AccessedPtr; unsigned AccessedAlign; if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { + // Ignore volatile loads. The execution of a volatile load cannot + // be used to prove an address is backed by regular memory; it can, + // for example, point to an MMIO register. + if (LI->isVolatile()) + continue; AccessedPtr = LI->getPointerOperand(); AccessedAlign = LI->getAlignment(); } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + // Ignore volatile stores (see comment for loads). + if (SI->isVolatile()) + continue; AccessedPtr = SI->getPointerOperand(); AccessedAlign = SI->getAlignment(); } else @@ -306,7 +314,15 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, return false; } -/// DefMaxInstsToScan - the default number of maximum instructions +bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align, + const DataLayout &DL, + Instruction *ScanFrom, + const DominatorTree *DT) { + APInt Size(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty)); + return isSafeToLoadUnconditionally(V, Align, Size, DL, ScanFrom, DT); +} + + /// DefMaxInstsToScan - the default number of maximum instructions /// to scan in the block, used by FindAvailableLoadedValue(). /// FindAvailableLoadedValue() was introduced in r60148, to improve jump /// threading in part by eliminating partially redundant loads. diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index 7f3480f512ab..36bd9a8b7ea7 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -1,9 +1,8 @@ //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -843,7 +842,7 @@ void AccessAnalysis::processMemAccesses() { bool SetHasWrite = false; // Map of pointers to last access encountered. - typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap; + typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap; UnderlyingObjToAccessMap ObjToLastAccess; // Set of access to check after all writes have been processed. @@ -904,13 +903,13 @@ void AccessAnalysis::processMemAccesses() { // Create sets of pointers connected by a shared alias set and // underlying object. - typedef SmallVector<Value *, 16> ValueVector; + typedef SmallVector<const Value *, 16> ValueVector; ValueVector TempObjects; GetUnderlyingObjects(Ptr, TempObjects, DL, LI); LLVM_DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n"); - for (Value *UnderlyingObj : TempObjects) { + for (const Value *UnderlyingObj : TempObjects) { // nullptr never alias, don't join sets for pointer that have "null" // in their UnderlyingObjects list. if (isa<ConstantPointerNull>(UnderlyingObj) && @@ -1014,7 +1013,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, return 0; } - // The accesss function must stride over the innermost loop. + // The access function must stride over the innermost loop. if (Lp != AR->getLoop()) { LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " << *Ptr << " SCEV: " << *AR << "\n"); @@ -1086,7 +1085,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, if (Assume) { // We can avoid this case by adding a run-time check. LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either " - << "inbouds or in address space 0 may wrap:\n" + << "inbounds or in address space 0 may wrap:\n" << "LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n"); @@ -1145,10 +1144,9 @@ bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL, std::iota(SortedIndices.begin(), SortedIndices.end(), 0); // Sort the memory accesses and keep the order of their uses in UseOrder. - std::stable_sort(SortedIndices.begin(), SortedIndices.end(), - [&OffValPairs](unsigned Left, unsigned Right) { - return OffValPairs[Left].first < OffValPairs[Right].first; - }); + llvm::stable_sort(SortedIndices, [&](unsigned Left, unsigned Right) { + return OffValPairs[Left].first < OffValPairs[Right].first; + }); // Check if the order is consecutive already. if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) { @@ -1346,7 +1344,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, // where Step is the absolute stride of the memory accesses in bytes, // then there is no dependence. // - // Ratioanle: + // Rationale: // We basically want to check if the absolute distance (|Dist/Step|) // is >= the loop iteration count (or > BackedgeTakenCount). // This is equivalent to the Strong SIV Test (Practical Dependence Testing, @@ -1369,7 +1367,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, // The dependence distance can be positive/negative, so we sign extend Dist; // The multiplication of the absolute stride in bytes and the - // backdgeTakenCount is non-negative, so we zero extend Product. + // backedgeTakenCount is non-negative, so we zero extend Product. if (DistTypeSize > ProductTypeSize) CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType()); else @@ -1780,6 +1778,11 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, unsigned NumReads = 0; unsigned NumReadWrites = 0; + bool HasComplexMemInst = false; + + // A runtime check is only legal to insert if there are no convergent calls. + HasConvergentOp = false; + PtrRtChecking->Pointers.clear(); PtrRtChecking->Need = false; @@ -1787,8 +1790,25 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, // For each block. for (BasicBlock *BB : TheLoop->blocks()) { - // Scan the BB and collect legal loads and stores. + // Scan the BB and collect legal loads and stores. Also detect any + // convergent instructions. for (Instruction &I : *BB) { + if (auto *Call = dyn_cast<CallBase>(&I)) { + if (Call->isConvergent()) + HasConvergentOp = true; + } + + // With both a non-vectorizable memory instruction and a convergent + // operation, found in this loop, no reason to continue the search. + if (HasComplexMemInst && HasConvergentOp) { + CanVecMem = false; + return; + } + + // Avoid hitting recordAnalysis multiple times. + if (HasComplexMemInst) + continue; + // If this is a load, save it. If this instruction can read from memory // but is not a load, then we quit. Notice that we don't handle function // calls that read or write. @@ -1807,12 +1827,18 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, continue; auto *Ld = dyn_cast<LoadInst>(&I); - if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) { + if (!Ld) { + recordAnalysis("CantVectorizeInstruction", Ld) + << "instruction cannot be vectorized"; + HasComplexMemInst = true; + continue; + } + if (!Ld->isSimple() && !IsAnnotatedParallel) { recordAnalysis("NonSimpleLoad", Ld) << "read with atomic ordering or volatile read"; LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n"); - CanVecMem = false; - return; + HasComplexMemInst = true; + continue; } NumLoads++; Loads.push_back(Ld); @@ -1828,15 +1854,15 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, if (!St) { recordAnalysis("CantVectorizeInstruction", St) << "instruction cannot be vectorized"; - CanVecMem = false; - return; + HasComplexMemInst = true; + continue; } if (!St->isSimple() && !IsAnnotatedParallel) { recordAnalysis("NonSimpleStore", St) << "write with atomic ordering or volatile write"; LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n"); - CanVecMem = false; - return; + HasComplexMemInst = true; + continue; } NumStores++; Stores.push_back(St); @@ -1847,6 +1873,11 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, } // Next instr. } // Next block. + if (HasComplexMemInst) { + CanVecMem = false; + return; + } + // Now we have two lists that hold the loads and the stores. // Next, we find the pointers that they use. @@ -1964,7 +1995,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, } LLVM_DEBUG( - dbgs() << "LAA: We can perform a memory runtime check if needed.\n"); + dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n"); CanVecMem = true; if (Accesses.isDependencyCheckNeeded()) { @@ -1999,6 +2030,15 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, } } + if (HasConvergentOp) { + recordAnalysis("CantInsertRuntimeCheckWithConvergent") + << "cannot add control dependency to convergent operation"; + LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check " + "would be needed with a convergent operation\n"); + CanVecMem = false; + return; + } + if (CanVecMem) LLVM_DEBUG( dbgs() << "LAA: No unsafe dependent memory operations in loop. We" @@ -2252,7 +2292,7 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { // Match the types so we can compare the stride and the BETakenCount. // The Stride can be positive/negative, so we sign extend Stride; - // The backdgeTakenCount is non-negative, so we zero extend BETakenCount. + // The backedgeTakenCount is non-negative, so we zero extend BETakenCount. const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout(); uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType()); uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType()); @@ -2287,6 +2327,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)), DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false), + HasConvergentOp(false), HasDependenceInvolvingLoopInvariantAddress(false) { if (canAnalyzeLoop()) analyzeLoop(AA, LI, TLI, DT); @@ -2303,6 +2344,9 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { OS << "\n"; } + if (HasConvergentOp) + OS.indent(Depth) << "Has convergent operation in loop\n"; + if (Report) OS.indent(Depth) << "Report: " << Report->getMsg() << "\n"; diff --git a/lib/Analysis/LoopAnalysisManager.cpp b/lib/Analysis/LoopAnalysisManager.cpp index 2a3b29d7fbca..a10a87ce113b 100644 --- a/lib/Analysis/LoopAnalysisManager.cpp +++ b/lib/Analysis/LoopAnalysisManager.cpp @@ -1,9 +1,8 @@ //===- LoopAnalysisManager.cpp - Loop analysis management -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -19,11 +18,6 @@ using namespace llvm; namespace llvm { -/// Enables memory ssa as a dependency for loop passes in legacy pass manager. -cl::opt<bool> EnableMSSALoopDependency( - "enable-mssa-loop-dependency", cl::Hidden, cl::init(false), - cl::desc("Enable MemorySSA dependency for loop pass manager")); - // Explicit template instantiations and specialization definitions for core // template typedefs. template class AllAnalysesOn<Loop>; @@ -147,8 +141,6 @@ PreservedAnalyses llvm::getLoopPassPreservedAnalyses() { PA.preserve<LoopAnalysis>(); PA.preserve<LoopAnalysisManagerFunctionProxy>(); PA.preserve<ScalarEvolutionAnalysis>(); - if (EnableMSSALoopDependency) - PA.preserve<MemorySSAAnalysis>(); // FIXME: What we really want to do here is preserve an AA category, but that // concept doesn't exist yet. PA.preserve<AAManager>(); diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index ef2b1257015c..aa5da0859805 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -1,9 +1,8 @@ //===- LoopInfo.cpp - Natural Loop Calculator -----------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -18,8 +17,12 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/LoopInfoImpl.h" #include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/CFG.h" @@ -65,15 +68,16 @@ bool Loop::hasLoopInvariantOperands(const Instruction *I) const { return all_of(I->operands(), [this](Value *V) { return isLoopInvariant(V); }); } -bool Loop::makeLoopInvariant(Value *V, bool &Changed, - Instruction *InsertPt) const { +bool Loop::makeLoopInvariant(Value *V, bool &Changed, Instruction *InsertPt, + MemorySSAUpdater *MSSAU) const { if (Instruction *I = dyn_cast<Instruction>(V)) - return makeLoopInvariant(I, Changed, InsertPt); + return makeLoopInvariant(I, Changed, InsertPt, MSSAU); return true; // All non-instructions are loop-invariant. } bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, - Instruction *InsertPt) const { + Instruction *InsertPt, + MemorySSAUpdater *MSSAU) const { // Test if the value is already loop-invariant. if (isLoopInvariant(I)) return true; @@ -94,11 +98,14 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, } // Don't hoist instructions with loop-variant operands. for (Value *Operand : I->operands()) - if (!makeLoopInvariant(Operand, Changed, InsertPt)) + if (!makeLoopInvariant(Operand, Changed, InsertPt, MSSAU)) return false; // Hoist. I->moveBefore(InsertPt); + if (MSSAU) + if (auto *MUD = MSSAU->getMemorySSA()->getMemoryAccess(I)) + MSSAU->moveToPlace(MUD, InsertPt->getParent(), MemorySSA::End); // There is possibility of hoisting this instruction above some arbitrary // condition. Any metadata defined on it can be control dependent on this @@ -110,24 +117,37 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, return true; } -PHINode *Loop::getCanonicalInductionVariable() const { +bool Loop::getIncomingAndBackEdge(BasicBlock *&Incoming, + BasicBlock *&Backedge) const { BasicBlock *H = getHeader(); - BasicBlock *Incoming = nullptr, *Backedge = nullptr; + Incoming = nullptr; + Backedge = nullptr; pred_iterator PI = pred_begin(H); assert(PI != pred_end(H) && "Loop must have at least one backedge!"); Backedge = *PI++; if (PI == pred_end(H)) - return nullptr; // dead loop + return false; // dead loop Incoming = *PI++; if (PI != pred_end(H)) - return nullptr; // multiple backedges? + return false; // multiple backedges? if (contains(Incoming)) { if (contains(Backedge)) - return nullptr; + return false; std::swap(Incoming, Backedge); } else if (!contains(Backedge)) + return false; + + assert(Incoming && Backedge && "expected non-null incoming and backedges"); + return true; +} + +PHINode *Loop::getCanonicalInductionVariable() const { + BasicBlock *H = getHeader(); + + BasicBlock *Incoming = nullptr, *Backedge = nullptr; + if (!getIncomingAndBackEdge(Incoming, Backedge)) return nullptr; // Loop over all of the PHI nodes, looking for a canonical indvar. @@ -146,6 +166,218 @@ PHINode *Loop::getCanonicalInductionVariable() const { return nullptr; } +/// Get the latch condition instruction. +static ICmpInst *getLatchCmpInst(const Loop &L) { + if (BasicBlock *Latch = L.getLoopLatch()) + if (BranchInst *BI = dyn_cast_or_null<BranchInst>(Latch->getTerminator())) + if (BI->isConditional()) + return dyn_cast<ICmpInst>(BI->getCondition()); + + return nullptr; +} + +/// Return the final value of the loop induction variable if found. +static Value *findFinalIVValue(const Loop &L, const PHINode &IndVar, + const Instruction &StepInst) { + ICmpInst *LatchCmpInst = getLatchCmpInst(L); + if (!LatchCmpInst) + return nullptr; + + Value *Op0 = LatchCmpInst->getOperand(0); + Value *Op1 = LatchCmpInst->getOperand(1); + if (Op0 == &IndVar || Op0 == &StepInst) + return Op1; + + if (Op1 == &IndVar || Op1 == &StepInst) + return Op0; + + return nullptr; +} + +Optional<Loop::LoopBounds> Loop::LoopBounds::getBounds(const Loop &L, + PHINode &IndVar, + ScalarEvolution &SE) { + InductionDescriptor IndDesc; + if (!InductionDescriptor::isInductionPHI(&IndVar, &L, &SE, IndDesc)) + return None; + + Value *InitialIVValue = IndDesc.getStartValue(); + Instruction *StepInst = IndDesc.getInductionBinOp(); + if (!InitialIVValue || !StepInst) + return None; + + const SCEV *Step = IndDesc.getStep(); + Value *StepInstOp1 = StepInst->getOperand(1); + Value *StepInstOp0 = StepInst->getOperand(0); + Value *StepValue = nullptr; + if (SE.getSCEV(StepInstOp1) == Step) + StepValue = StepInstOp1; + else if (SE.getSCEV(StepInstOp0) == Step) + StepValue = StepInstOp0; + + Value *FinalIVValue = findFinalIVValue(L, IndVar, *StepInst); + if (!FinalIVValue) + return None; + + return LoopBounds(L, *InitialIVValue, *StepInst, StepValue, *FinalIVValue, + SE); +} + +using Direction = Loop::LoopBounds::Direction; + +ICmpInst::Predicate Loop::LoopBounds::getCanonicalPredicate() const { + BasicBlock *Latch = L.getLoopLatch(); + assert(Latch && "Expecting valid latch"); + + BranchInst *BI = dyn_cast_or_null<BranchInst>(Latch->getTerminator()); + assert(BI && BI->isConditional() && "Expecting conditional latch branch"); + + ICmpInst *LatchCmpInst = dyn_cast<ICmpInst>(BI->getCondition()); + assert(LatchCmpInst && + "Expecting the latch compare instruction to be a CmpInst"); + + // Need to inverse the predicate when first successor is not the loop + // header + ICmpInst::Predicate Pred = (BI->getSuccessor(0) == L.getHeader()) + ? LatchCmpInst->getPredicate() + : LatchCmpInst->getInversePredicate(); + + if (LatchCmpInst->getOperand(0) == &getFinalIVValue()) + Pred = ICmpInst::getSwappedPredicate(Pred); + + // Need to flip strictness of the predicate when the latch compare instruction + // is not using StepInst + if (LatchCmpInst->getOperand(0) == &getStepInst() || + LatchCmpInst->getOperand(1) == &getStepInst()) + return Pred; + + // Cannot flip strictness of NE and EQ + if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) + return ICmpInst::getFlippedStrictnessPredicate(Pred); + + Direction D = getDirection(); + if (D == Direction::Increasing) + return ICmpInst::ICMP_SLT; + + if (D == Direction::Decreasing) + return ICmpInst::ICMP_SGT; + + // If cannot determine the direction, then unable to find the canonical + // predicate + return ICmpInst::BAD_ICMP_PREDICATE; +} + +Direction Loop::LoopBounds::getDirection() const { + if (const SCEVAddRecExpr *StepAddRecExpr = + dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&getStepInst()))) + if (const SCEV *StepRecur = StepAddRecExpr->getStepRecurrence(SE)) { + if (SE.isKnownPositive(StepRecur)) + return Direction::Increasing; + if (SE.isKnownNegative(StepRecur)) + return Direction::Decreasing; + } + + return Direction::Unknown; +} + +Optional<Loop::LoopBounds> Loop::getBounds(ScalarEvolution &SE) const { + if (PHINode *IndVar = getInductionVariable(SE)) + return LoopBounds::getBounds(*this, *IndVar, SE); + + return None; +} + +PHINode *Loop::getInductionVariable(ScalarEvolution &SE) const { + if (!isLoopSimplifyForm()) + return nullptr; + + BasicBlock *Header = getHeader(); + assert(Header && "Expected a valid loop header"); + ICmpInst *CmpInst = getLatchCmpInst(*this); + if (!CmpInst) + return nullptr; + + Instruction *LatchCmpOp0 = dyn_cast<Instruction>(CmpInst->getOperand(0)); + Instruction *LatchCmpOp1 = dyn_cast<Instruction>(CmpInst->getOperand(1)); + + for (PHINode &IndVar : Header->phis()) { + InductionDescriptor IndDesc; + if (!InductionDescriptor::isInductionPHI(&IndVar, this, &SE, IndDesc)) + continue; + + Instruction *StepInst = IndDesc.getInductionBinOp(); + + // case 1: + // IndVar = phi[{InitialValue, preheader}, {StepInst, latch}] + // StepInst = IndVar + step + // cmp = StepInst < FinalValue + if (StepInst == LatchCmpOp0 || StepInst == LatchCmpOp1) + return &IndVar; + + // case 2: + // IndVar = phi[{InitialValue, preheader}, {StepInst, latch}] + // StepInst = IndVar + step + // cmp = IndVar < FinalValue + if (&IndVar == LatchCmpOp0 || &IndVar == LatchCmpOp1) + return &IndVar; + } + + return nullptr; +} + +bool Loop::getInductionDescriptor(ScalarEvolution &SE, + InductionDescriptor &IndDesc) const { + if (PHINode *IndVar = getInductionVariable(SE)) + return InductionDescriptor::isInductionPHI(IndVar, this, &SE, IndDesc); + + return false; +} + +bool Loop::isAuxiliaryInductionVariable(PHINode &AuxIndVar, + ScalarEvolution &SE) const { + // Located in the loop header + BasicBlock *Header = getHeader(); + if (AuxIndVar.getParent() != Header) + return false; + + // No uses outside of the loop + for (User *U : AuxIndVar.users()) + if (const Instruction *I = dyn_cast<Instruction>(U)) + if (!contains(I)) + return false; + + InductionDescriptor IndDesc; + if (!InductionDescriptor::isInductionPHI(&AuxIndVar, this, &SE, IndDesc)) + return false; + + // The step instruction opcode should be add or sub. + if (IndDesc.getInductionOpcode() != Instruction::Add && + IndDesc.getInductionOpcode() != Instruction::Sub) + return false; + + // Incremented by a loop invariant step for each loop iteration + return SE.isLoopInvariant(IndDesc.getStep(), this); +} + +bool Loop::isCanonical(ScalarEvolution &SE) const { + InductionDescriptor IndDesc; + if (!getInductionDescriptor(SE, IndDesc)) + return false; + + ConstantInt *Init = dyn_cast_or_null<ConstantInt>(IndDesc.getStartValue()); + if (!Init || !Init->isZero()) + return false; + + if (IndDesc.getInductionOpcode() != Instruction::Add) + return false; + + ConstantInt *Step = IndDesc.getConstIntStepValue(); + if (!Step || !Step->isOne()) + return false; + + return true; +} + // Check that 'BB' doesn't have any uses outside of the 'L' static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB, DominatorTree &DT) { @@ -200,8 +432,11 @@ bool Loop::isLoopSimplifyForm() const { bool Loop::isSafeToClone() const { // Return false if any loop blocks contain indirectbrs, or there are any calls // to noduplicate functions. + // FIXME: it should be ok to clone CallBrInst's if we correctly update the + // operand list to reflect the newly cloned labels. for (BasicBlock *BB : this->blocks()) { - if (isa<IndirectBrInst>(BB->getTerminator())) + if (isa<IndirectBrInst>(BB->getTerminator()) || + isa<CallBrInst>(BB->getTerminator())) return false; for (Instruction &I : *BB) @@ -242,48 +477,20 @@ void Loop::setLoopID(MDNode *LoopID) const { assert((!LoopID || LoopID->getOperand(0) == LoopID) && "Loop ID should refer to itself"); - BasicBlock *H = getHeader(); - for (BasicBlock *BB : this->blocks()) { - Instruction *TI = BB->getTerminator(); - for (BasicBlock *Successor : successors(TI)) { - if (Successor == H) { - TI->setMetadata(LLVMContext::MD_loop, LoopID); - break; - } - } - } + SmallVector<BasicBlock *, 4> LoopLatches; + getLoopLatches(LoopLatches); + for (BasicBlock *BB : LoopLatches) + BB->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID); } void Loop::setLoopAlreadyUnrolled() { - MDNode *LoopID = getLoopID(); - // First remove any existing loop unrolling metadata. - SmallVector<Metadata *, 4> MDs; - // Reserve first location for self reference to the LoopID metadata node. - MDs.push_back(nullptr); - - if (LoopID) { - for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { - bool IsUnrollMetadata = false; - MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); - if (MD) { - const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); - IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); - } - if (!IsUnrollMetadata) - MDs.push_back(LoopID->getOperand(i)); - } - } - - // Add unroll(disable) metadata to disable future unrolling. LLVMContext &Context = getHeader()->getContext(); - SmallVector<Metadata *, 1> DisableOperands; - DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); - MDNode *DisableNode = MDNode::get(Context, DisableOperands); - MDs.push_back(DisableNode); - MDNode *NewLoopID = MDNode::get(Context, MDs); - // Set operand 0 to refer to the loop id itself. - NewLoopID->replaceOperandWith(0, NewLoopID); + MDNode *DisableUnrollMD = + MDNode::get(Context, MDString::get(Context, "llvm.loop.unroll.disable")); + MDNode *LoopID = getLoopID(); + MDNode *NewLoopID = makePostTransformationMetadata( + Context, LoopID, {"llvm.loop.unroll."}, {DisableUnrollMD}); setLoopID(NewLoopID); } @@ -761,6 +968,46 @@ bool llvm::isValidAsAccessGroup(MDNode *Node) { return Node->getNumOperands() == 0 && Node->isDistinct(); } +MDNode *llvm::makePostTransformationMetadata(LLVMContext &Context, + MDNode *OrigLoopID, + ArrayRef<StringRef> RemovePrefixes, + ArrayRef<MDNode *> AddAttrs) { + // First remove any existing loop metadata related to this transformation. + SmallVector<Metadata *, 4> MDs; + + // Reserve first location for self reference to the LoopID metadata node. + TempMDTuple TempNode = MDNode::getTemporary(Context, None); + MDs.push_back(TempNode.get()); + + // Remove metadata for the transformation that has been applied or that became + // outdated. + if (OrigLoopID) { + for (unsigned i = 1, ie = OrigLoopID->getNumOperands(); i < ie; ++i) { + bool IsVectorMetadata = false; + Metadata *Op = OrigLoopID->getOperand(i); + if (MDNode *MD = dyn_cast<MDNode>(Op)) { + const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); + if (S) + IsVectorMetadata = + llvm::any_of(RemovePrefixes, [S](StringRef Prefix) -> bool { + return S->getString().startswith(Prefix); + }); + } + if (!IsVectorMetadata) + MDs.push_back(Op); + } + } + + // Add metadata to avoid reapplying a transformation, such as + // llvm.loop.unroll.disable and llvm.loop.isvectorized. + MDs.append(AddAttrs.begin(), AddAttrs.end()); + + MDNode *NewLoopID = MDNode::getDistinct(Context, MDs); + // Replace the temporary node with a self-reference. + NewLoopID->replaceOperandWith(0, NewLoopID); + return NewLoopID; +} + //===----------------------------------------------------------------------===// // LoopInfo implementation // @@ -792,7 +1039,7 @@ void LoopInfoWrapperPass::verifyAnalysis() const { void LoopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequiredTransitive<DominatorTreeWrapperPass>(); } void LoopInfoWrapperPass::print(raw_ostream &OS, const Module *) const { diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index a68f114b83a0..4ab3798039d8 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -1,9 +1,8 @@ //===- LoopPass.cpp - Loop Pass and Loop Pass Manager ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -23,6 +22,7 @@ #include "llvm/IR/PassTimingInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -210,6 +210,8 @@ bool LPPassManager::runOnFunction(Function &F) { for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { LoopPass *P = getContainedPass(Index); + llvm::TimeTraceScope LoopPassScope("RunLoopPass", P->getPassName()); + dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, CurrentLoop->getHeader()->getName()); dumpRequiredSet(P); @@ -384,16 +386,20 @@ void LoopPass::assignPassManager(PMStack &PMS, LPPM->add(this); } +static std::string getDescription(const Loop &L) { + return "loop"; +} + bool LoopPass::skipLoop(const Loop *L) const { const Function *F = L->getHeader()->getParent(); if (!F) return false; // Check the opt bisect limit. - LLVMContext &Context = F->getContext(); - if (!Context.getOptPassGate().shouldRunPass(this, *L)) + OptPassGate &Gate = F->getContext().getOptPassGate(); + if (Gate.isEnabled() && !Gate.shouldRunPass(this, getDescription(*L))) return true; // Check for the OptimizeNone attribute. - if (F->hasFnAttribute(Attribute::OptimizeNone)) { + if (F->hasOptNone()) { // FIXME: Report this to dbgs() only once per function. LLVM_DEBUG(dbgs() << "Skipping pass '" << getPassName() << "' in function " << F->getName() << "\n"); diff --git a/lib/Analysis/LoopUnrollAnalyzer.cpp b/lib/Analysis/LoopUnrollAnalyzer.cpp index c8b91a7a1a51..1728b5e9f6d2 100644 --- a/lib/Analysis/LoopUnrollAnalyzer.cpp +++ b/lib/Analysis/LoopUnrollAnalyzer.cpp @@ -1,9 +1,8 @@ //===- LoopUnrollAnalyzer.cpp - Unrolling Effect Estimation -----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp index 907b321b231a..6e1bb50e8893 100644 --- a/lib/Analysis/MemDepPrinter.cpp +++ b/lib/Analysis/MemDepPrinter.cpp @@ -1,9 +1,8 @@ //===- MemDepPrinter.cpp - Printer for MemoryDependenceAnalysis -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/MemDerefPrinter.cpp b/lib/Analysis/MemDerefPrinter.cpp index 4a136c5a0c6d..77ebf89d9a08 100644 --- a/lib/Analysis/MemDerefPrinter.cpp +++ b/lib/Analysis/MemDerefPrinter.cpp @@ -1,9 +1,8 @@ //===- MemDerefPrinter.cpp - Printer for isDereferenceablePointer ---------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -54,9 +53,10 @@ bool MemDerefPrinter::runOnFunction(Function &F) { for (auto &I: instructions(F)) { if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { Value *PO = LI->getPointerOperand(); - if (isDereferenceablePointer(PO, DL)) + if (isDereferenceablePointer(PO, LI->getType(), DL)) Deref.push_back(PO); - if (isDereferenceableAndAlignedPointer(PO, LI->getAlignment(), DL)) + if (isDereferenceableAndAlignedPointer(PO, LI->getType(), + LI->getAlignment(), DL)) DerefAndAligned.insert(PO); } } diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 686ad294378c..729dad463657 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -1,9 +1,8 @@ //===- MemoryBuiltins.cpp - Identify calls to memory builtins -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -264,6 +263,19 @@ bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, return getAllocationData(V, AllocLike, TLI, LookThroughBitCast).hasValue(); } +/// Tests if a value is a call or invoke to a library function that +/// reallocates memory (e.g., realloc). +bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast).hasValue(); +} + +/// Tests if a functions is a call or invoke to a library function that +/// reallocates memory (e.g., realloc). +bool llvm::isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI) { + return getAllocationDataForFunction(F, ReallocLike, TLI).hasValue(); +} + /// extractMallocCall - Returns the corresponding CallInst if the instruction /// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we /// ignore InvokeInst here. @@ -359,19 +371,8 @@ const CallInst *llvm::extractCallocCall(const Value *I, return isCallocLikeFn(I, TLI) ? cast<CallInst>(I) : nullptr; } -/// isFreeCall - Returns non-null if the value is a call to the builtin free() -const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { - bool IsNoBuiltinCall; - const Function *Callee = - getCalledFunction(I, /*LookThroughBitCast=*/false, IsNoBuiltinCall); - if (Callee == nullptr || IsNoBuiltinCall) - return nullptr; - - StringRef FnName = Callee->getName(); - LibFunc TLIFn; - if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) - return nullptr; - +/// isLibFreeFunction - Returns true if the function is a builtin free() +bool llvm::isLibFreeFunction(const Function *F, const LibFunc TLIFn) { unsigned ExpectedNumParams; if (TLIFn == LibFunc_free || TLIFn == LibFunc_ZdlPv || // operator delete(void*) @@ -402,22 +403,39 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { TLIFn == LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t) // delete[](void*, align_val_t, nothrow) ExpectedNumParams = 3; else - return nullptr; + return false; // Check free prototype. // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin // attribute will exist. - FunctionType *FTy = Callee->getFunctionType(); + FunctionType *FTy = F->getFunctionType(); if (!FTy->getReturnType()->isVoidTy()) - return nullptr; + return false; if (FTy->getNumParams() != ExpectedNumParams) + return false; + if (FTy->getParamType(0) != Type::getInt8PtrTy(F->getContext())) + return false; + + return true; +} + +/// isFreeCall - Returns non-null if the value is a call to the builtin free() +const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { + bool IsNoBuiltinCall; + const Function *Callee = + getCalledFunction(I, /*LookThroughBitCast=*/false, IsNoBuiltinCall); + if (Callee == nullptr || IsNoBuiltinCall) return nullptr; - if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext())) + + StringRef FnName = Callee->getName(); + LibFunc TLIFn; + if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) return nullptr; - return dyn_cast<CallInst>(I); + return isLibFreeFunction(Callee, TLIFn) ? dyn_cast<CallInst>(I) : nullptr; } + //===----------------------------------------------------------------------===// // Utility functions to compute size of objects. // @@ -442,10 +460,10 @@ bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, return true; } -ConstantInt *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - bool MustSucceed) { +Value *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize, + const DataLayout &DL, + const TargetLibraryInfo *TLI, + bool MustSucceed) { assert(ObjectSize->getIntrinsicID() == Intrinsic::objectsize && "ObjectSize must be a call to llvm.objectsize!"); @@ -462,13 +480,35 @@ ConstantInt *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize, EvalOptions.NullIsUnknownSize = cast<ConstantInt>(ObjectSize->getArgOperand(2))->isOne(); - // FIXME: Does it make sense to just return a failure value if the size won't - // fit in the output and `!MustSucceed`? - uint64_t Size; auto *ResultType = cast<IntegerType>(ObjectSize->getType()); - if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, EvalOptions) && - isUIntN(ResultType->getBitWidth(), Size)) - return ConstantInt::get(ResultType, Size); + bool StaticOnly = cast<ConstantInt>(ObjectSize->getArgOperand(3))->isZero(); + if (StaticOnly) { + // FIXME: Does it make sense to just return a failure value if the size won't + // fit in the output and `!MustSucceed`? + uint64_t Size; + if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, EvalOptions) && + isUIntN(ResultType->getBitWidth(), Size)) + return ConstantInt::get(ResultType, Size); + } else { + LLVMContext &Ctx = ObjectSize->getFunction()->getContext(); + ObjectSizeOffsetEvaluator Eval(DL, TLI, Ctx, EvalOptions); + SizeOffsetEvalType SizeOffsetPair = + Eval.compute(ObjectSize->getArgOperand(0)); + + if (SizeOffsetPair != ObjectSizeOffsetEvaluator::unknown()) { + IRBuilder<TargetFolder> Builder(Ctx, TargetFolder(DL)); + Builder.SetInsertPoint(ObjectSize); + + // If we've outside the end of the object, then we can always access + // exactly 0 bytes. + Value *ResultSize = + Builder.CreateSub(SizeOffsetPair.first, SizeOffsetPair.second); + Value *UseZero = + Builder.CreateICmpULT(SizeOffsetPair.first, SizeOffsetPair.second); + return Builder.CreateSelect(UseZero, ConstantInt::get(ResultType, 0), + ResultSize); + } + } if (!MustSucceed) return nullptr; @@ -684,7 +724,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){ if (!GV.hasDefinitiveInitializer()) return unknown(); - APInt Size(IntTyBits, DL.getTypeAllocSize(GV.getType()->getElementType())); + APInt Size(IntTyBits, DL.getTypeAllocSize(GV.getValueType())); return std::make_pair(align(Size, GV.getAlignment()), Zero); } @@ -743,9 +783,12 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) { ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator( const DataLayout &DL, const TargetLibraryInfo *TLI, LLVMContext &Context, - bool RoundToAlign) - : DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)), - RoundToAlign(RoundToAlign) { + ObjectSizeOpts EvalOpts) + : DL(DL), TLI(TLI), Context(Context), + Builder(Context, TargetFolder(DL), + IRBuilderCallbackInserter( + [&](Instruction *I) { InsertedInstructions.insert(I); })), + EvalOpts(EvalOpts) { // IntTy and Zero must be set for each compute() since the address space may // be different for later objects. } @@ -767,17 +810,21 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) { if (CacheIt != CacheMap.end() && anyKnown(CacheIt->second)) CacheMap.erase(CacheIt); } + + // Erase any instructions we inserted as part of the traversal. + for (Instruction *I : InsertedInstructions) { + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); + } } SeenVals.clear(); + InsertedInstructions.clear(); return Result; } SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { - ObjectSizeOpts ObjSizeOptions; - ObjSizeOptions.RoundToAlign = RoundToAlign; - - ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, ObjSizeOptions); + ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, EvalOpts); SizeOffsetType Const = Visitor.compute(V); if (Visitor.bothKnown(Const)) return std::make_pair(ConstantInt::get(Context, Const.first), @@ -916,24 +963,28 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) { if (!bothKnown(EdgeData)) { OffsetPHI->replaceAllUsesWith(UndefValue::get(IntTy)); OffsetPHI->eraseFromParent(); + InsertedInstructions.erase(OffsetPHI); SizePHI->replaceAllUsesWith(UndefValue::get(IntTy)); SizePHI->eraseFromParent(); + InsertedInstructions.erase(SizePHI); return unknown(); } SizePHI->addIncoming(EdgeData.first, PHI.getIncomingBlock(i)); OffsetPHI->addIncoming(EdgeData.second, PHI.getIncomingBlock(i)); } - Value *Size = SizePHI, *Offset = OffsetPHI, *Tmp; - if ((Tmp = SizePHI->hasConstantValue())) { + Value *Size = SizePHI, *Offset = OffsetPHI; + if (Value *Tmp = SizePHI->hasConstantValue()) { Size = Tmp; SizePHI->replaceAllUsesWith(Size); SizePHI->eraseFromParent(); + InsertedInstructions.erase(SizePHI); } - if ((Tmp = OffsetPHI->hasConstantValue())) { + if (Value *Tmp = OffsetPHI->hasConstantValue()) { Offset = Tmp; OffsetPHI->replaceAllUsesWith(Offset); OffsetPHI->eraseFromParent(); + InsertedInstructions.erase(OffsetPHI); } return std::make_pair(Size, Offset); } diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index e22182b99e11..b25b655165d7 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -1,9 +1,8 @@ //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -327,7 +326,8 @@ static bool isVolatile(Instruction *Inst) { MemDepResult MemoryDependenceResults::getPointerDependencyFrom( const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, - BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) { + BasicBlock *BB, Instruction *QueryInst, unsigned *Limit, + OrderedBasicBlock *OBB) { MemDepResult InvariantGroupDependency = MemDepResult::getUnknown(); if (QueryInst != nullptr) { if (auto *LI = dyn_cast<LoadInst>(QueryInst)) { @@ -338,7 +338,7 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom( } } MemDepResult SimpleDep = getSimplePointerDependencyFrom( - MemLoc, isLoad, ScanIt, BB, QueryInst, Limit); + MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, OBB); if (SimpleDep.isDef()) return SimpleDep; // Non-local invariant group dependency indicates there is non local Def @@ -439,14 +439,13 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, - BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) { + BasicBlock *BB, Instruction *QueryInst, unsigned *Limit, + OrderedBasicBlock *OBB) { bool isInvariantLoad = false; - if (!Limit) { - unsigned DefaultLimit = BlockScanLimit; - return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, - &DefaultLimit); - } + unsigned DefaultLimit = BlockScanLimit; + if (!Limit) + Limit = &DefaultLimit; // We must be careful with atomic accesses, as they may allow another thread // to touch this location, clobbering it. We are conservative: if the @@ -488,11 +487,14 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( const DataLayout &DL = BB->getModule()->getDataLayout(); - // Create a numbered basic block to lazily compute and cache instruction + // If the caller did not provide an ordered basic block, + // create one to lazily compute and cache instruction // positions inside a BB. This is used to provide fast queries for relative // position between two instructions in a BB and can be used by // AliasAnalysis::callCapturesBefore. - OrderedBasicBlock OBB(BB); + OrderedBasicBlock OBBTmp(BB); + if (!OBB) + OBB = &OBBTmp; // Return "true" if and only if the instruction I is either a non-simple // load or a non-simple store. @@ -673,7 +675,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( // A release fence requires that all stores complete before it, but does // not prevent the reordering of following loads or stores 'before' the // fence. As a result, we look past it when finding a dependency for - // loads. DSE uses this to find preceeding stores to delete and thus we + // loads. DSE uses this to find preceding stores to delete and thus we // can't bypass the fence if the query instruction is a store. if (FenceInst *FI = dyn_cast<FenceInst>(Inst)) if (isLoad && FI->getOrdering() == AtomicOrdering::Release) @@ -683,7 +685,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc); // If necessary, perform additional analysis. if (isModAndRefSet(MR)) - MR = AA.callCapturesBefore(Inst, MemLoc, &DT, &OBB); + MR = AA.callCapturesBefore(Inst, MemLoc, &DT, OBB); switch (clearMust(MR)) { case ModRefInfo::NoModRef: // If the call has no effect on the queried pointer, just ignore it. @@ -709,7 +711,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( return MemDepResult::getNonFuncLocal(); } -MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) { +MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst, + OrderedBasicBlock *OBB) { Instruction *ScanPos = QueryInst; // Check for a cached result @@ -747,8 +750,9 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) { if (auto *II = dyn_cast<IntrinsicInst>(QueryInst)) isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start; - LocalCache = getPointerDependencyFrom( - MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst); + LocalCache = + getPointerDependencyFrom(MemLoc, isLoad, ScanPos->getIterator(), + QueryParent, QueryInst, nullptr, OBB); } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) { bool isReadOnly = AA.onlyReadsMemory(QueryCall); LocalCache = getCallDependencyFrom(QueryCall, isReadOnly, diff --git a/lib/Analysis/MemoryLocation.cpp b/lib/Analysis/MemoryLocation.cpp index 27e8d72b8e89..163830eee797 100644 --- a/lib/Analysis/MemoryLocation.cpp +++ b/lib/Analysis/MemoryLocation.cpp @@ -1,9 +1,8 @@ //===- MemoryLocation.cpp - Memory location descriptions -------------------==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp index 6a5567ed765b..17f5d9b9f0ad 100644 --- a/lib/Analysis/MemorySSA.cpp +++ b/lib/Analysis/MemorySSA.cpp @@ -1,9 +1,8 @@ //===- MemorySSA.cpp - Memory SSA Builder ---------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -82,6 +81,11 @@ bool llvm::VerifyMemorySSA = true; #else bool llvm::VerifyMemorySSA = false; #endif +/// Enables memory ssa as a dependency for loop passes in legacy pass manager. +cl::opt<bool> llvm::EnableMSSALoopDependency( + "enable-mssa-loop-dependency", cl::Hidden, cl::init(false), + cl::desc("Enable MemorySSA dependency for loop pass manager")); + static cl::opt<bool, true> VerifyMemorySSAX("verify-memoryssa", cl::location(VerifyMemorySSA), cl::Hidden, cl::desc("Enable verification of MemorySSA.")); @@ -252,10 +256,10 @@ struct ClobberAlias { // Return a pair of {IsClobber (bool), AR (AliasResult)}. It relies on AR being // ignored if IsClobber = false. -static ClobberAlias instructionClobbersQuery(const MemoryDef *MD, - const MemoryLocation &UseLoc, - const Instruction *UseInst, - AliasAnalysis &AA) { +template <typename AliasAnalysisType> +static ClobberAlias +instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc, + const Instruction *UseInst, AliasAnalysisType &AA) { Instruction *DefInst = MD->getMemoryInst(); assert(DefInst && "Defining instruction not actually an instruction"); const auto *UseCall = dyn_cast<CallBase>(UseInst); @@ -300,10 +304,11 @@ static ClobberAlias instructionClobbersQuery(const MemoryDef *MD, return {isModSet(I), AR}; } +template <typename AliasAnalysisType> static ClobberAlias instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU, const MemoryLocOrCall &UseMLOC, - AliasAnalysis &AA) { + AliasAnalysisType &AA) { // FIXME: This is a temporary hack to allow a single instructionClobbersQuery // to exist while MemoryLocOrCall is pushed through places. if (UseMLOC.IsCall) @@ -346,12 +351,12 @@ struct UpwardsMemoryQuery { } // end anonymous namespace static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc, - AliasAnalysis &AA) { + BatchAAResults &AA) { Instruction *Inst = MD->getMemoryInst(); if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { switch (II->getIntrinsicID()) { case Intrinsic::lifetime_end: - return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), Loc); + return AA.alias(MemoryLocation(II->getArgOperand(1)), Loc) == MustAlias; default: return false; } @@ -359,13 +364,14 @@ static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc, return false; } -static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA, +template <typename AliasAnalysisType> +static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA, const Instruction *I) { // If the memory can't be changed, then loads of the memory can't be // clobbered. return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) || - AA.pointsToConstantMemory(cast<LoadInst>(I)-> - getPointerOperand())); + AA.pointsToConstantMemory(MemoryLocation( + cast<LoadInst>(I)->getPointerOperand()))); } /// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing @@ -381,10 +387,12 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA, /// \param Query The UpwardsMemoryQuery we used for our search. /// \param AA The AliasAnalysis we used for our search. /// \param AllowImpreciseClobber Always false, unless we do relaxed verify. -static void + +template <typename AliasAnalysisType> +LLVM_ATTRIBUTE_UNUSED static void checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt, const MemoryLocation &StartLoc, const MemorySSA &MSSA, - const UpwardsMemoryQuery &Query, AliasAnalysis &AA, + const UpwardsMemoryQuery &Query, AliasAnalysisType &AA, bool AllowImpreciseClobber = false) { assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?"); @@ -474,7 +482,7 @@ namespace { /// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up /// in one class. -class ClobberWalker { +template <class AliasAnalysisType> class ClobberWalker { /// Save a few bytes by using unsigned instead of size_t. using ListIndex = unsigned; @@ -498,9 +506,10 @@ class ClobberWalker { }; const MemorySSA &MSSA; - AliasAnalysis &AA; + AliasAnalysisType &AA; DominatorTree &DT; UpwardsMemoryQuery *Query; + unsigned *UpwardWalkLimit; // Phi optimization bookkeeping SmallVector<DefPath, 32> Paths; @@ -539,6 +548,16 @@ class ClobberWalker { walkToPhiOrClobber(DefPath &Desc, const MemoryAccess *StopAt = nullptr, const MemoryAccess *SkipStopAt = nullptr) const { assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world"); + assert(UpwardWalkLimit && "Need a valid walk limit"); + bool LimitAlreadyReached = false; + // (*UpwardWalkLimit) may be 0 here, due to the loop in tryOptimizePhi. Set + // it to 1. This will not do any alias() calls. It either returns in the + // first iteration in the loop below, or is set back to 0 if all def chains + // are free of MemoryDefs. + if (!*UpwardWalkLimit) { + *UpwardWalkLimit = 1; + LimitAlreadyReached = true; + } for (MemoryAccess *Current : def_chain(Desc.Last)) { Desc.Last = Current; @@ -548,6 +567,10 @@ class ClobberWalker { if (auto *MD = dyn_cast<MemoryDef>(Current)) { if (MSSA.isLiveOnEntryDef(MD)) return {MD, true, MustAlias}; + + if (!--*UpwardWalkLimit) + return {Current, true, MayAlias}; + ClobberAlias CA = instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA); if (CA.IsClobber) @@ -555,6 +578,9 @@ class ClobberWalker { } } + if (LimitAlreadyReached) + *UpwardWalkLimit = 0; + assert(isa<MemoryPhi>(Desc.Last) && "Ended at a non-clobber that's not a phi?"); return {Desc.Last, false, MayAlias}; @@ -626,10 +652,12 @@ class ClobberWalker { SkipStopWhere = Query->OriginalAccess; } - UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere, + UpwardsWalkResult Res = walkToPhiOrClobber(Node, + /*StopAt=*/StopWhere, /*SkipStopAt=*/SkipStopWhere); if (Res.IsKnownClobber) { assert(Res.Result != StopWhere && Res.Result != SkipStopWhere); + // If this wasn't a cache hit, we hit a clobber when walking. That's a // failure. TerminatedPath Term{Res.Result, PathIndex}; @@ -662,7 +690,7 @@ class ClobberWalker { struct generic_def_path_iterator : public iterator_facade_base<generic_def_path_iterator<T, Walker>, std::forward_iterator_tag, T *> { - generic_def_path_iterator() = default; + generic_def_path_iterator() {} generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {} T &operator*() const { return curNode(); } @@ -887,13 +915,19 @@ class ClobberWalker { } public: - ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT) + ClobberWalker(const MemorySSA &MSSA, AliasAnalysisType &AA, DominatorTree &DT) : MSSA(MSSA), AA(AA), DT(DT) {} + AliasAnalysisType *getAA() { return &AA; } /// Finds the nearest clobber for the given query, optimizing phis if /// possible. - MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q) { + MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q, + unsigned &UpWalkLimit) { Query = &Q; + UpwardWalkLimit = &UpWalkLimit; + // Starting limit must be > 0. + if (!UpWalkLimit) + UpWalkLimit++; MemoryAccess *Current = Start; // This walker pretends uses don't exist. If we're handed one, silently grab @@ -918,13 +952,11 @@ public: } #ifdef EXPENSIVE_CHECKS - if (!Q.SkipSelfAccess) + if (!Q.SkipSelfAccess && *UpwardWalkLimit > 0) checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA); #endif return Result; } - - void verify(const MemorySSA *MSSA) { assert(MSSA == &this->MSSA); } }; struct RenamePassData { @@ -947,77 +979,99 @@ struct RenamePassData { namespace llvm { -class MemorySSA::ClobberWalkerBase { - ClobberWalker Walker; +template <class AliasAnalysisType> class MemorySSA::ClobberWalkerBase { + ClobberWalker<AliasAnalysisType> Walker; MemorySSA *MSSA; public: - ClobberWalkerBase(MemorySSA *M, AliasAnalysis *A, DominatorTree *D) + ClobberWalkerBase(MemorySSA *M, AliasAnalysisType *A, DominatorTree *D) : Walker(*M, *A, *D), MSSA(M) {} MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, - const MemoryLocation &); - // Second argument (bool), defines whether the clobber search should skip the + const MemoryLocation &, + unsigned &); + // Third argument (bool), defines whether the clobber search should skip the // original queried access. If true, there will be a follow-up query searching // for a clobber access past "self". Note that the Optimized access is not // updated if a new clobber is found by this SkipSelf search. If this // additional query becomes heavily used we may decide to cache the result. // Walker instantiations will decide how to set the SkipSelf bool. - MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, bool); - void verify(const MemorySSA *MSSA) { Walker.verify(MSSA); } + MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, unsigned &, bool); }; /// A MemorySSAWalker that does AA walks to disambiguate accesses. It no /// longer does caching on its own, but the name has been retained for the /// moment. +template <class AliasAnalysisType> class MemorySSA::CachingWalker final : public MemorySSAWalker { - ClobberWalkerBase *Walker; + ClobberWalkerBase<AliasAnalysisType> *Walker; public: - CachingWalker(MemorySSA *M, ClobberWalkerBase *W) + CachingWalker(MemorySSA *M, ClobberWalkerBase<AliasAnalysisType> *W) : MemorySSAWalker(M), Walker(W) {} ~CachingWalker() override = default; using MemorySSAWalker::getClobberingMemoryAccess; - MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override; + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, unsigned &UWL) { + return Walker->getClobberingMemoryAccessBase(MA, UWL, false); + } MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, - const MemoryLocation &Loc) override; + const MemoryLocation &Loc, + unsigned &UWL) { + return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL); + } + + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override { + unsigned UpwardWalkLimit = MaxCheckLimit; + return getClobberingMemoryAccess(MA, UpwardWalkLimit); + } + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, + const MemoryLocation &Loc) override { + unsigned UpwardWalkLimit = MaxCheckLimit; + return getClobberingMemoryAccess(MA, Loc, UpwardWalkLimit); + } void invalidateInfo(MemoryAccess *MA) override { if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) MUD->resetOptimized(); } - - void verify(const MemorySSA *MSSA) override { - MemorySSAWalker::verify(MSSA); - Walker->verify(MSSA); - } }; +template <class AliasAnalysisType> class MemorySSA::SkipSelfWalker final : public MemorySSAWalker { - ClobberWalkerBase *Walker; + ClobberWalkerBase<AliasAnalysisType> *Walker; public: - SkipSelfWalker(MemorySSA *M, ClobberWalkerBase *W) + SkipSelfWalker(MemorySSA *M, ClobberWalkerBase<AliasAnalysisType> *W) : MemorySSAWalker(M), Walker(W) {} ~SkipSelfWalker() override = default; using MemorySSAWalker::getClobberingMemoryAccess; - MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override; + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, unsigned &UWL) { + return Walker->getClobberingMemoryAccessBase(MA, UWL, true); + } MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, - const MemoryLocation &Loc) override; + const MemoryLocation &Loc, + unsigned &UWL) { + return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL); + } + + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override { + unsigned UpwardWalkLimit = MaxCheckLimit; + return getClobberingMemoryAccess(MA, UpwardWalkLimit); + } + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, + const MemoryLocation &Loc) override { + unsigned UpwardWalkLimit = MaxCheckLimit; + return getClobberingMemoryAccess(MA, Loc, UpwardWalkLimit); + } void invalidateInfo(MemoryAccess *MA) override { if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) MUD->resetOptimized(); } - - void verify(const MemorySSA *MSSA) override { - MemorySSAWalker::verify(MSSA); - Walker->verify(MSSA); - } }; } // end namespace llvm @@ -1071,6 +1125,8 @@ MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal, void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal, SmallPtrSetImpl<BasicBlock *> &Visited, bool SkipVisited, bool RenameAllUses) { + assert(Root && "Trying to rename accesses in an unreachable block"); + SmallVector<RenamePassData, 32> WorkStack; // Skip everything if we already renamed this block and we are skipping. // Note: You can't sink this into the if, because we need it to occur @@ -1154,9 +1210,20 @@ void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) { } MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT) - : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), + : AA(nullptr), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), SkipWalker(nullptr), NextID(0) { - buildMemorySSA(); + // Build MemorySSA using a batch alias analysis. This reuses the internal + // state that AA collects during an alias()/getModRefInfo() call. This is + // safe because there are no CFG changes while building MemorySSA and can + // significantly reduce the time spent by the compiler in AA, because we will + // make queries about all the instructions in the Function. + BatchAAResults BatchAA(*AA); + buildMemorySSA(BatchAA); + // Intentionally leave AA to nullptr while building so we don't accidently + // use non-batch AliasAnalysis. + this->AA = AA; + // Also create the walker here. + getWalker(); } MemorySSA::~MemorySSA() { @@ -1193,11 +1260,9 @@ namespace llvm { /// which is walking bottom-up. class MemorySSA::OptimizeUses { public: - OptimizeUses(MemorySSA *MSSA, MemorySSAWalker *Walker, AliasAnalysis *AA, - DominatorTree *DT) - : MSSA(MSSA), Walker(Walker), AA(AA), DT(DT) { - Walker = MSSA->getWalker(); - } + OptimizeUses(MemorySSA *MSSA, CachingWalker<BatchAAResults> *Walker, + BatchAAResults *BAA, DominatorTree *DT) + : MSSA(MSSA), Walker(Walker), AA(BAA), DT(DT) {} void optimizeUses(); @@ -1225,8 +1290,8 @@ private: DenseMap<MemoryLocOrCall, MemlocStackInfo> &); MemorySSA *MSSA; - MemorySSAWalker *Walker; - AliasAnalysis *AA; + CachingWalker<BatchAAResults> *Walker; + BatchAAResults *AA; DominatorTree *DT; }; @@ -1343,11 +1408,12 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock( continue; } bool FoundClobberResult = false; + unsigned UpwardWalkLimit = MaxCheckLimit; while (UpperBound > LocInfo.LowerBound) { if (isa<MemoryPhi>(VersionStack[UpperBound])) { // For phis, use the walker, see where we ended up, go there - Instruction *UseInst = MU->getMemoryInst(); - MemoryAccess *Result = Walker->getClobberingMemoryAccess(UseInst); + MemoryAccess *Result = + Walker->getClobberingMemoryAccess(MU, UpwardWalkLimit); // We are guaranteed to find it or something is wrong while (VersionStack[UpperBound] != Result) { assert(UpperBound != 0); @@ -1423,7 +1489,7 @@ void MemorySSA::placePHINodes( createMemoryPhi(BB); } -void MemorySSA::buildMemorySSA() { +void MemorySSA::buildMemorySSA(BatchAAResults &BAA) { // We create an access to represent "live on entry", for things like // arguments or users of globals, where the memory they use is defined before // the beginning of the function. We do not actually insert it into the IR. @@ -1445,7 +1511,7 @@ void MemorySSA::buildMemorySSA() { AccessList *Accesses = nullptr; DefsList *Defs = nullptr; for (Instruction &I : B) { - MemoryUseOrDef *MUD = createNewAccess(&I); + MemoryUseOrDef *MUD = createNewAccess(&I, &BAA); if (!MUD) continue; @@ -1469,9 +1535,9 @@ void MemorySSA::buildMemorySSA() { SmallPtrSet<BasicBlock *, 16> Visited; renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited); - CachingWalker *Walker = getWalkerImpl(); - - OptimizeUses(this, Walker, AA, DT).optimizeUses(); + ClobberWalkerBase<BatchAAResults> WalkerBase(this, &BAA, DT); + CachingWalker<BatchAAResults> WalkerLocal(this, &WalkerBase); + OptimizeUses(this, &WalkerLocal, &BAA, DT).optimizeUses(); // Mark the uses in unreachable blocks as live on entry, so that they go // somewhere. @@ -1482,14 +1548,16 @@ void MemorySSA::buildMemorySSA() { MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); } -MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() { +MemorySSA::CachingWalker<AliasAnalysis> *MemorySSA::getWalkerImpl() { if (Walker) return Walker.get(); if (!WalkerBase) - WalkerBase = llvm::make_unique<ClobberWalkerBase>(this, AA, DT); + WalkerBase = + llvm::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT); - Walker = llvm::make_unique<CachingWalker>(this, WalkerBase.get()); + Walker = + llvm::make_unique<CachingWalker<AliasAnalysis>>(this, WalkerBase.get()); return Walker.get(); } @@ -1498,9 +1566,11 @@ MemorySSAWalker *MemorySSA::getSkipSelfWalker() { return SkipWalker.get(); if (!WalkerBase) - WalkerBase = llvm::make_unique<ClobberWalkerBase>(this, AA, DT); + WalkerBase = + llvm::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT); - SkipWalker = llvm::make_unique<SkipSelfWalker>(this, WalkerBase.get()); + SkipWalker = + llvm::make_unique<SkipSelfWalker<AliasAnalysis>>(this, WalkerBase.get()); return SkipWalker.get(); } @@ -1619,7 +1689,7 @@ MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, MemoryAccess *Definition, const MemoryUseOrDef *Template) { assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI"); - MemoryUseOrDef *NewAccess = createNewAccess(I, Template); + MemoryUseOrDef *NewAccess = createNewAccess(I, AA, Template); assert( NewAccess != nullptr && "Tried to create a memory access for a non-memory touching instruction"); @@ -1642,7 +1712,9 @@ static inline bool isOrdered(const Instruction *I) { } /// Helper function to create new memory accesses +template <typename AliasAnalysisType> MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I, + AliasAnalysisType *AAP, const MemoryUseOrDef *Template) { // The assume intrinsic has a control dependency which we model by claiming // that it writes arbitrarily. Ignore that fake memory dependency here. @@ -1657,7 +1729,7 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I, Def = dyn_cast_or_null<MemoryDef>(Template) != nullptr; Use = dyn_cast_or_null<MemoryUse>(Template) != nullptr; #if !defined(NDEBUG) - ModRefInfo ModRef = AA->getModRefInfo(I, None); + ModRefInfo ModRef = AAP->getModRefInfo(I, None); bool DefCheck, UseCheck; DefCheck = isModSet(ModRef) || isOrdered(I); UseCheck = isRefSet(ModRef); @@ -1665,7 +1737,7 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I, #endif } else { // Find out what affect this instruction has on memory. - ModRefInfo ModRef = AA->getModRefInfo(I, None); + ModRefInfo ModRef = AAP->getModRefInfo(I, None); // The isOrdered check is used to ensure that volatiles end up as defs // (atomics end up as ModRef right now anyway). Until we separate the // ordering chain from the memory chain, this enables people to see at least @@ -1718,7 +1790,7 @@ void MemorySSA::removeFromLookups(MemoryAccess *MA) { MUD->setDefiningAccess(nullptr); // Invalidate our walker's cache if necessary if (!isa<MemoryUse>(MA)) - Walker->invalidateInfo(MA); + getWalker()->invalidateInfo(MA); Value *MemoryInst; if (const auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) @@ -1778,35 +1850,16 @@ void MemorySSA::verifyMemorySSA() const { verifyDomination(F); verifyOrdering(F); verifyDominationNumbers(F); - Walker->verify(this); - verifyClobberSanity(F); -} - -/// Check sanity of the clobbering instruction for access MA. -void MemorySSA::checkClobberSanityAccess(const MemoryAccess *MA) const { - if (const auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) { - if (!MUD->isOptimized()) - return; - auto *I = MUD->getMemoryInst(); - auto Loc = MemoryLocation::getOrNone(I); - if (Loc == None) - return; - auto *Clobber = MUD->getOptimized(); - UpwardsMemoryQuery Q(I, MUD); - checkClobberSanity(MUD, Clobber, *Loc, *this, Q, *AA, true); - } -} - -void MemorySSA::verifyClobberSanity(const Function &F) const { -#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS) - for (const BasicBlock &BB : F) { - const AccessList *Accesses = getBlockAccesses(&BB); - if (!Accesses) - continue; - for (const MemoryAccess &MA : *Accesses) - checkClobberSanityAccess(&MA); - } -#endif + // Previously, the verification used to also verify that the clobberingAccess + // cached by MemorySSA is the same as the clobberingAccess found at a later + // query to AA. This does not hold true in general due to the current fragility + // of BasicAA which has arbitrary caps on the things it analyzes before giving + // up. As a result, transformations that are correct, will lead to BasicAA + // returning different Alias answers before and after that transformation. + // Invalidating MemorySSA is not an option, as the results in BasicAA can be so + // random, in the worst case we'd need to rebuild MemorySSA from scratch after + // every transformation, which defeats the purpose of using it. For such an + // example, see test4 added in D51960. } /// Verify that all of the blocks we believe to have valid domination numbers @@ -2162,6 +2215,15 @@ MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F, return MemorySSAAnalysis::Result(llvm::make_unique<MemorySSA>(F, &AA, &DT)); } +bool MemorySSAAnalysis::Result::invalidate( + Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv) { + auto PAC = PA.getChecker<MemorySSAAnalysis>(); + return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) || + Inv.invalidate<AAManager>(F, PA) || + Inv.invalidate<DominatorTreeAnalysis>(F, PA); +} + PreservedAnalyses MemorySSAPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { OS << "MemorySSA for function: " << F.getName() << "\n"; @@ -2210,8 +2272,11 @@ MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {} /// the MemoryAccess that actually clobbers Loc. /// /// \returns our clobbering memory access -MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase( - MemoryAccess *StartingAccess, const MemoryLocation &Loc) { +template <typename AliasAnalysisType> +MemoryAccess * +MemorySSA::ClobberWalkerBase<AliasAnalysisType>::getClobberingMemoryAccessBase( + MemoryAccess *StartingAccess, const MemoryLocation &Loc, + unsigned &UpwardWalkLimit) { if (isa<MemoryPhi>(StartingAccess)) return StartingAccess; @@ -2239,7 +2304,8 @@ MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase( ? StartingUseOrDef->getDefiningAccess() : StartingUseOrDef; - MemoryAccess *Clobber = Walker.findClobber(DefiningAccess, Q); + MemoryAccess *Clobber = + Walker.findClobber(DefiningAccess, Q, UpwardWalkLimit); LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); LLVM_DEBUG(dbgs() << *StartingUseOrDef << "\n"); LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); @@ -2247,9 +2313,10 @@ MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase( return Clobber; } +template <typename AliasAnalysisType> MemoryAccess * -MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA, - bool SkipSelf) { +MemorySSA::ClobberWalkerBase<AliasAnalysisType>::getClobberingMemoryAccessBase( + MemoryAccess *MA, unsigned &UpwardWalkLimit, bool SkipSelf) { auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA); // If this is a MemoryPhi, we can't do anything. if (!StartingAccess) @@ -2275,7 +2342,7 @@ MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA, UpwardsMemoryQuery Q(I, StartingAccess); - if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) { + if (isUseTriviallyOptimizableToLiveOnEntry(*Walker.getAA(), I)) { MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef(); StartingAccess->setOptimized(LiveOnEntry); StartingAccess->setOptimizedAccessType(None); @@ -2295,7 +2362,7 @@ MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA, return DefiningAccess; } - OptimizedAccess = Walker.findClobber(DefiningAccess, Q); + OptimizedAccess = Walker.findClobber(DefiningAccess, Q, UpwardWalkLimit); StartingAccess->setOptimized(OptimizedAccess); if (MSSA->isLiveOnEntryDef(OptimizedAccess)) StartingAccess->setOptimizedAccessType(None); @@ -2311,10 +2378,10 @@ MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA, MemoryAccess *Result; if (SkipSelf && isa<MemoryPhi>(OptimizedAccess) && - isa<MemoryDef>(StartingAccess)) { + isa<MemoryDef>(StartingAccess) && UpwardWalkLimit) { assert(isa<MemoryDef>(Q.OriginalAccess)); Q.SkipSelfAccess = true; - Result = Walker.findClobber(OptimizedAccess, Q); + Result = Walker.findClobber(OptimizedAccess, Q, UpwardWalkLimit); } else Result = OptimizedAccess; @@ -2325,28 +2392,6 @@ MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA, } MemoryAccess * -MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) { - return Walker->getClobberingMemoryAccessBase(MA, false); -} - -MemoryAccess * -MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA, - const MemoryLocation &Loc) { - return Walker->getClobberingMemoryAccessBase(MA, Loc); -} - -MemoryAccess * -MemorySSA::SkipSelfWalker::getClobberingMemoryAccess(MemoryAccess *MA) { - return Walker->getClobberingMemoryAccessBase(MA, true); -} - -MemoryAccess * -MemorySSA::SkipSelfWalker::getClobberingMemoryAccess(MemoryAccess *MA, - const MemoryLocation &Loc) { - return Walker->getClobberingMemoryAccessBase(MA, Loc); -} - -MemoryAccess * DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) { if (auto *Use = dyn_cast<MemoryUseOrDef>(MA)) return Use->getDefiningAccess(); diff --git a/lib/Analysis/MemorySSAUpdater.cpp b/lib/Analysis/MemorySSAUpdater.cpp index 6c817d203684..4c1feee7fd9a 100644 --- a/lib/Analysis/MemorySSAUpdater.cpp +++ b/lib/Analysis/MemorySSAUpdater.cpp @@ -1,9 +1,8 @@ //===-- MemorySSAUpdater.cpp - Memory SSA Updater--------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------===// // @@ -73,7 +72,10 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive( // potential phi node. This will insert phi nodes if we cycle in order to // break the cycle and have an operand. for (auto *Pred : predecessors(BB)) - PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef)); + if (MSSA->DT->isReachableFromEntry(Pred)) + PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef)); + else + PhiOps.push_back(MSSA->getLiveOnEntryDef()); // Now try to simplify the ops to avoid placing a phi. // This may return null if we never created a phi yet, that's okay @@ -157,8 +159,10 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd( DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &CachedPreviousDef) { auto *Defs = MSSA->getWritableBlockDefs(BB); - if (Defs) + if (Defs) { + CachedPreviousDef.insert({BB, &*Defs->rbegin()}); return &*Defs->rbegin(); + } return getPreviousDefRecursive(BB, CachedPreviousDef); } @@ -270,6 +274,8 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { // Also make sure we skip ourselves to avoid self references. if (isa<MemoryUse>(U.getUser()) || U.getUser() == MD) continue; + // Defs are automatically unoptimized when the user is set to MD below, + // because the isOptimized() call will fail to find the same ID. U.set(MD); } } @@ -277,6 +283,9 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { // and that def is now our defining access. MD->setDefiningAccess(DefBefore); + // Remember the index where we may insert new phis below. + unsigned NewPhiIndex = InsertedPHIs.size(); + SmallVector<WeakVH, 8> FixupList(InsertedPHIs.begin(), InsertedPHIs.end()); if (!DefBeforeSameBlock) { // If there was a local def before us, we must have the same effect it @@ -290,9 +299,56 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { // backwards to find the def. To make that work, we'd have to track whether // getDefRecursive only ever used the single predecessor case. These types // of paths also only exist in between CFG simplifications. + + // If this is the first def in the block and this insert is in an arbitrary + // place, compute IDF and place phis. + auto Iter = MD->getDefsIterator(); + ++Iter; + auto IterEnd = MSSA->getBlockDefs(MD->getBlock())->end(); + if (Iter == IterEnd) { + ForwardIDFCalculator IDFs(*MSSA->DT); + SmallVector<BasicBlock *, 32> IDFBlocks; + SmallPtrSet<BasicBlock *, 2> DefiningBlocks; + DefiningBlocks.insert(MD->getBlock()); + IDFs.setDefiningBlocks(DefiningBlocks); + IDFs.calculate(IDFBlocks); + SmallVector<AssertingVH<MemoryPhi>, 4> NewInsertedPHIs; + for (auto *BBIDF : IDFBlocks) + if (!MSSA->getMemoryAccess(BBIDF)) { + auto *MPhi = MSSA->createMemoryPhi(BBIDF); + NewInsertedPHIs.push_back(MPhi); + // Add the phis created into the IDF blocks to NonOptPhis, so they are + // not optimized out as trivial by the call to getPreviousDefFromEnd + // below. Once they are complete, all these Phis are added to the + // FixupList, and removed from NonOptPhis inside fixupDefs(). + NonOptPhis.insert(MPhi); + } + + for (auto &MPhi : NewInsertedPHIs) { + auto *BBIDF = MPhi->getBlock(); + for (auto *Pred : predecessors(BBIDF)) { + DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef; + MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef), + Pred); + } + } + + // Re-take the index where we're adding the new phis, because the above + // call to getPreviousDefFromEnd, may have inserted into InsertedPHIs. + NewPhiIndex = InsertedPHIs.size(); + for (auto &MPhi : NewInsertedPHIs) { + InsertedPHIs.push_back(&*MPhi); + FixupList.push_back(&*MPhi); + } + } + FixupList.push_back(MD); } + // Remember the index where we stopped inserting new phis above, since the + // fixupDefs call in the loop below may insert more, that are already minimal. + unsigned NewPhiIndexEnd = InsertedPHIs.size(); + while (!FixupList.empty()) { unsigned StartingPHISize = InsertedPHIs.size(); fixupDefs(FixupList); @@ -300,6 +356,12 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { // Put any new phis on the fixup list, and process them FixupList.append(InsertedPHIs.begin() + StartingPHISize, InsertedPHIs.end()); } + + // Optimize potentially non-minimal phis added in this method. + unsigned NewPhiSize = NewPhiIndexEnd - NewPhiIndex; + if (NewPhiSize) + tryRemoveTrivialPhis(ArrayRef<WeakVH>(&InsertedPHIs[NewPhiIndex], NewPhiSize)); + // Now that all fixups are done, rename all uses if we are asked. if (RenameUses) { SmallPtrSet<BasicBlock *, 16> Visited; @@ -401,8 +463,8 @@ void MemorySSAUpdater::removeEdge(BasicBlock *From, BasicBlock *To) { } } -void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(BasicBlock *From, - BasicBlock *To) { +void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(const BasicBlock *From, + const BasicBlock *To) { if (MemoryPhi *MPhi = MSSA->getMemoryAccess(To)) { bool Found = false; MPhi->unorderedDeleteIncomingIf([&](const MemoryAccess *, BasicBlock *B) { @@ -420,7 +482,8 @@ void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(BasicBlock *From, void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB, const ValueToValueMapTy &VMap, - PhiToDefMap &MPhiMap) { + PhiToDefMap &MPhiMap, + bool CloneWasSimplified) { auto GetNewDefiningAccess = [&](MemoryAccess *MA) -> MemoryAccess * { MemoryAccess *InsnDefining = MA; if (MemoryUseOrDef *DefMUD = dyn_cast<MemoryUseOrDef>(InsnDefining)) { @@ -450,16 +513,60 @@ void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB, // instructions. This occurs in LoopRotate when cloning instructions // from the old header to the old preheader. The cloned instruction may // also be a simplified Value, not an Instruction (see LoopRotate). + // Also in LoopRotate, even when it's an instruction, due to it being + // simplified, it may be a Use rather than a Def, so we cannot use MUD as + // template. Calls coming from updateForClonedBlockIntoPred, ensure this. if (Instruction *NewInsn = dyn_cast_or_null<Instruction>(VMap.lookup(Insn))) { MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess( - NewInsn, GetNewDefiningAccess(MUD->getDefiningAccess()), MUD); + NewInsn, GetNewDefiningAccess(MUD->getDefiningAccess()), + CloneWasSimplified ? nullptr : MUD); MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End); } } } } +void MemorySSAUpdater::updatePhisWhenInsertingUniqueBackedgeBlock( + BasicBlock *Header, BasicBlock *Preheader, BasicBlock *BEBlock) { + auto *MPhi = MSSA->getMemoryAccess(Header); + if (!MPhi) + return; + + // Create phi node in the backedge block and populate it with the same + // incoming values as MPhi. Skip incoming values coming from Preheader. + auto *NewMPhi = MSSA->createMemoryPhi(BEBlock); + bool HasUniqueIncomingValue = true; + MemoryAccess *UniqueValue = nullptr; + for (unsigned I = 0, E = MPhi->getNumIncomingValues(); I != E; ++I) { + BasicBlock *IBB = MPhi->getIncomingBlock(I); + MemoryAccess *IV = MPhi->getIncomingValue(I); + if (IBB != Preheader) { + NewMPhi->addIncoming(IV, IBB); + if (HasUniqueIncomingValue) { + if (!UniqueValue) + UniqueValue = IV; + else if (UniqueValue != IV) + HasUniqueIncomingValue = false; + } + } + } + + // Update incoming edges into MPhi. Remove all but the incoming edge from + // Preheader. Add an edge from NewMPhi + auto *AccFromPreheader = MPhi->getIncomingValueForBlock(Preheader); + MPhi->setIncomingValue(0, AccFromPreheader); + MPhi->setIncomingBlock(0, Preheader); + for (unsigned I = MPhi->getNumIncomingValues() - 1; I >= 1; --I) + MPhi->unorderedDeleteIncoming(I); + MPhi->addIncoming(NewMPhi, BEBlock); + + // If NewMPhi is a trivial phi, remove it. Its use in the header MPhi will be + // replaced with the unique value. + if (HasUniqueIncomingValue) + removeMemoryAccess(NewMPhi); +} + void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef<BasicBlock *> ExitBlocks, const ValueToValueMapTy &VMap, @@ -543,10 +650,13 @@ void MemorySSAUpdater::updateForClonedBlockIntoPred( // Defs from BB being used in BB will be replaced with the cloned defs from // VM. The uses of BB's Phi (if it exists) in BB will be replaced by the // incoming def into the Phi from P1. + // Instructions cloned into the predecessor are in practice sometimes + // simplified, so disable the use of the template, and create an access from + // scratch. PhiToDefMap MPhiMap; if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB)) MPhiMap[MPhi] = MPhi->getIncomingValueForBlock(P1); - cloneUsesAndDefs(BB, P1, VM, MPhiMap); + cloneUsesAndDefs(BB, P1, VM, MPhiMap, /*CloneWasSimplified=*/true); } template <typename Iter> @@ -599,7 +709,7 @@ void MemorySSAUpdater::applyUpdates(ArrayRef<CFGUpdate> Updates, if (!RevDeleteUpdates.empty()) { // Update for inserted edges: use newDT and snapshot CFG as if deletes had - // not occured. + // not occurred. // FIXME: This creates a new DT, so it's more expensive to do mix // delete/inserts vs just inserts. We can do an incremental update on the DT // to revert deletes, than re-delete the edges. Teaching DT to do this, is @@ -697,7 +807,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates, // Map a BB to its predecessors: added + previously existing. To get a // deterministic order, store predecessors as SetVectors. The order in each - // will be defined by teh order in Updates (fixed) and the order given by + // will be defined by the order in Updates (fixed) and the order given by // children<> (also fixed). Since we further iterate over these ordered sets, // we lose the information of multiple edges possibly existing between two // blocks, so we'll keep and EdgeCount map for that. @@ -756,15 +866,15 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates, for (auto *BB : NewBlocks) PredMap.erase(BB); - SmallVector<BasicBlock *, 8> BlocksToProcess; SmallVector<BasicBlock *, 16> BlocksWithDefsToReplace; + SmallVector<WeakVH, 8> InsertedPhis; // First create MemoryPhis in all blocks that don't have one. Create in the // order found in Updates, not in PredMap, to get deterministic numbering. for (auto &Edge : Updates) { BasicBlock *BB = Edge.getTo(); if (PredMap.count(BB) && !MSSA->getMemoryAccess(BB)) - MSSA->createMemoryPhi(BB); + InsertedPhis.push_back(MSSA->createMemoryPhi(BB)); } // Now we'll fill in the MemoryPhis with the right incoming values. @@ -831,10 +941,6 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates, for (auto *Pred : PrevBlockSet) for (int I = 0, E = EdgeCountMap[{Pred, BB}]; I < E; ++I) NewPhi->addIncoming(DefP1, Pred); - - // Insert BB in the set of blocks that now have definition. We'll use this - // to compute IDF and add Phis there next. - BlocksToProcess.push_back(BB); } // Get all blocks that used to dominate BB and no longer do after adding @@ -849,22 +955,41 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates, GetNoLongerDomBlocks(PrevIDom, NewIDom, BlocksWithDefsToReplace); } + tryRemoveTrivialPhis(InsertedPhis); + // Create the set of blocks that now have a definition. We'll use this to + // compute IDF and add Phis there next. + SmallVector<BasicBlock *, 8> BlocksToProcess; + for (auto &VH : InsertedPhis) + if (auto *MPhi = cast_or_null<MemoryPhi>(VH)) + BlocksToProcess.push_back(MPhi->getBlock()); + // Compute IDF and add Phis in all IDF blocks that do not have one. SmallVector<BasicBlock *, 32> IDFBlocks; if (!BlocksToProcess.empty()) { - ForwardIDFCalculator IDFs(DT); + ForwardIDFCalculator IDFs(DT, GD); SmallPtrSet<BasicBlock *, 16> DefiningBlocks(BlocksToProcess.begin(), BlocksToProcess.end()); IDFs.setDefiningBlocks(DefiningBlocks); IDFs.calculate(IDFBlocks); + + SmallSetVector<MemoryPhi *, 4> PhisToFill; + // First create all needed Phis. + for (auto *BBIDF : IDFBlocks) + if (!MSSA->getMemoryAccess(BBIDF)) { + auto *IDFPhi = MSSA->createMemoryPhi(BBIDF); + InsertedPhis.push_back(IDFPhi); + PhisToFill.insert(IDFPhi); + } + // Then update or insert their correct incoming values. for (auto *BBIDF : IDFBlocks) { - if (auto *IDFPhi = MSSA->getMemoryAccess(BBIDF)) { + auto *IDFPhi = MSSA->getMemoryAccess(BBIDF); + assert(IDFPhi && "Phi must exist"); + if (!PhisToFill.count(IDFPhi)) { // Update existing Phi. // FIXME: some updates may be redundant, try to optimize and skip some. for (unsigned I = 0, E = IDFPhi->getNumIncomingValues(); I < E; ++I) IDFPhi->setIncomingValue(I, GetLastDef(IDFPhi->getIncomingBlock(I))); } else { - IDFPhi = MSSA->createMemoryPhi(BBIDF); for (auto &Pair : children<GraphDiffInvBBPair>({GD, BBIDF})) { BasicBlock *Pi = Pair.second; IDFPhi->addIncoming(GetLastDef(Pi), Pi); @@ -907,6 +1032,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates, } } } + tryRemoveTrivialPhis(InsertedPhis); } // Move What before Where in the MemorySSA IR. @@ -1052,7 +1178,7 @@ void MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor( } } -void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) { +void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA, bool OptimizePhis) { assert(!MSSA->isLiveOnEntryDef(MA) && "Trying to remove the live on entry def"); // We can only delete phi nodes if they have no uses, or we can replace all @@ -1071,6 +1197,8 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) { NewDefTarget = cast<MemoryUseOrDef>(MA)->getDefiningAccess(); } + SmallSetVector<MemoryPhi *, 4> PhisToCheck; + // Re-point the uses at our defining access if (!isa<MemoryUse>(MA) && !MA->use_empty()) { // Reset optimized on users of this store, and reset the uses. @@ -1090,6 +1218,9 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) { Use &U = *MA->use_begin(); if (auto *MUD = dyn_cast<MemoryUseOrDef>(U.getUser())) MUD->resetOptimized(); + if (OptimizePhis) + if (MemoryPhi *MP = dyn_cast<MemoryPhi>(U.getUser())) + PhisToCheck.insert(MP); U.set(NewDefTarget); } } @@ -1098,10 +1229,25 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) { // are doing things here MSSA->removeFromLookups(MA); MSSA->removeFromLists(MA); + + // Optionally optimize Phi uses. This will recursively remove trivial phis. + if (!PhisToCheck.empty()) { + SmallVector<WeakVH, 16> PhisToOptimize{PhisToCheck.begin(), + PhisToCheck.end()}; + PhisToCheck.clear(); + + unsigned PhisSize = PhisToOptimize.size(); + while (PhisSize-- > 0) + if (MemoryPhi *MP = + cast_or_null<MemoryPhi>(PhisToOptimize.pop_back_val())) { + auto OperRange = MP->operands(); + tryRemoveTrivialPhi(MP, OperRange); + } + } } void MemorySSAUpdater::removeBlocks( - const SmallPtrSetImpl<BasicBlock *> &DeadBlocks) { + const SmallSetVector<BasicBlock *, 8> &DeadBlocks) { // First delete all uses of BB in MemoryPhis. for (BasicBlock *BB : DeadBlocks) { Instruction *TI = BB->getTerminator(); @@ -1133,6 +1279,51 @@ void MemorySSAUpdater::removeBlocks( } } +void MemorySSAUpdater::tryRemoveTrivialPhis(ArrayRef<WeakVH> UpdatedPHIs) { + for (auto &VH : UpdatedPHIs) + if (auto *MPhi = cast_or_null<MemoryPhi>(VH)) { + auto OperRange = MPhi->operands(); + tryRemoveTrivialPhi(MPhi, OperRange); + } +} + +void MemorySSAUpdater::changeToUnreachable(const Instruction *I) { + const BasicBlock *BB = I->getParent(); + // Remove memory accesses in BB for I and all following instructions. + auto BBI = I->getIterator(), BBE = BB->end(); + // FIXME: If this becomes too expensive, iterate until the first instruction + // with a memory access, then iterate over MemoryAccesses. + while (BBI != BBE) + removeMemoryAccess(&*(BBI++)); + // Update phis in BB's successors to remove BB. + SmallVector<WeakVH, 16> UpdatedPHIs; + for (const BasicBlock *Successor : successors(BB)) { + removeDuplicatePhiEdgesBetween(BB, Successor); + if (MemoryPhi *MPhi = MSSA->getMemoryAccess(Successor)) { + MPhi->unorderedDeleteIncomingBlock(BB); + UpdatedPHIs.push_back(MPhi); + } + } + // Optimize trivial phis. + tryRemoveTrivialPhis(UpdatedPHIs); +} + +void MemorySSAUpdater::changeCondBranchToUnconditionalTo(const BranchInst *BI, + const BasicBlock *To) { + const BasicBlock *BB = BI->getParent(); + SmallVector<WeakVH, 16> UpdatedPHIs; + for (const BasicBlock *Succ : successors(BB)) { + removeDuplicatePhiEdgesBetween(BB, Succ); + if (Succ != To) + if (auto *MPhi = MSSA->getMemoryAccess(Succ)) { + MPhi->unorderedDeleteIncomingBlock(BB); + UpdatedPHIs.push_back(MPhi); + } + } + // Optimize trivial phis. + tryRemoveTrivialPhis(UpdatedPHIs); +} + MemoryAccess *MemorySSAUpdater::createMemoryAccessInBB( Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point) { diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp index 1e321f17d59f..519242759824 100644 --- a/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -1,9 +1,8 @@ //===-- ModuleDebugInfoPrinter.cpp - Prints module debug info metadata ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index 87f76d43bb1e..e25eb290a665 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -1,9 +1,8 @@ //===- ModuleSummaryAnalysis.cpp - Module summary index builder -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -71,6 +70,11 @@ cl::opt<FunctionSummary::ForceSummaryHotnessType, true> FSEC( "all-non-critical", "All non-critical edges."), clEnumValN(FunctionSummary::FSHT_All, "all", "All edges."))); +cl::opt<std::string> ModuleSummaryDotFile( + "module-summary-dot-file", cl::init(""), cl::Hidden, + cl::value_desc("filename"), + cl::desc("File to emit dot graph of new summary into.")); + // Walk through the operands of a given User via worklist iteration and populate // the set of GlobalValue references encountered. Invoked either on an // Instruction or a GlobalVariable (which walks its initializer). @@ -227,6 +231,13 @@ static bool isNonVolatileLoad(const Instruction *I) { return false; } +static bool isNonVolatileStore(const Instruction *I) { + if (const auto *SI = dyn_cast<StoreInst>(I)) + return !SI->isVolatile(); + + return false; +} + static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, const Function &F, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, DominatorTree &DT, @@ -241,7 +252,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // Map from callee ValueId to profile count. Used to accumulate profile // counts for all static calls to a given callee. MapVector<ValueInfo, CalleeInfo> CallGraphEdges; - SetVector<ValueInfo> RefEdges; + SetVector<ValueInfo> RefEdges, LoadRefEdges, StoreRefEdges; SetVector<GlobalValue::GUID> TypeTests; SetVector<FunctionSummary::VFuncId> TypeTestAssumeVCalls, TypeCheckedLoadVCalls; @@ -254,6 +265,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // list. findRefEdges(Index, &F, RefEdges, Visited); std::vector<const Instruction *> NonVolatileLoads; + std::vector<const Instruction *> NonVolatileStores; bool HasInlineAsmMaybeReferencingInternal = false; for (const BasicBlock &BB : F) @@ -261,12 +273,34 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, if (isa<DbgInfoIntrinsic>(I)) continue; ++NumInsts; - if (isNonVolatileLoad(&I)) { - // Postpone processing of non-volatile load instructions - // See comments below - Visited.insert(&I); - NonVolatileLoads.push_back(&I); - continue; + // Regular LTO module doesn't participate in ThinLTO import, + // so no reference from it can be read/writeonly, since this + // would require importing variable as local copy + if (IsThinLTO) { + if (isNonVolatileLoad(&I)) { + // Postpone processing of non-volatile load instructions + // See comments below + Visited.insert(&I); + NonVolatileLoads.push_back(&I); + continue; + } else if (isNonVolatileStore(&I)) { + Visited.insert(&I); + NonVolatileStores.push_back(&I); + // All references from second operand of store (destination address) + // can be considered write-only if they're not referenced by any + // non-store instruction. References from first operand of store + // (stored value) can't be treated either as read- or as write-only + // so we add them to RefEdges as we do with all other instructions + // except non-volatile load. + Value *Stored = I.getOperand(0); + if (auto *GV = dyn_cast<GlobalValue>(Stored)) + // findRefEdges will try to examine GV operands, so instead + // of calling it we should add GV to RefEdges directly. + RefEdges.insert(Index.getOrInsertValueInfo(GV)); + else if (auto *U = dyn_cast<User>(Stored)) + findRefEdges(Index, U, RefEdges, Visited); + continue; + } } findRefEdges(Index, &I, RefEdges, Visited); auto CS = ImmutableCallSite(&I); @@ -357,24 +391,61 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, } } - // By now we processed all instructions in a function, except - // non-volatile loads. All new refs we add in a loop below - // are obviously constant. All constant refs are grouped in the - // end of RefEdges vector, so we can use a single integer value - // to identify them. - unsigned RefCnt = RefEdges.size(); - for (const Instruction *I : NonVolatileLoads) { - Visited.erase(I); - findRefEdges(Index, I, RefEdges, Visited); - } - std::vector<ValueInfo> Refs = RefEdges.takeVector(); - // Regular LTO module doesn't participate in ThinLTO import, - // so no reference from it can be readonly, since this would - // require importing variable as local copy - if (IsThinLTO) - for (; RefCnt < Refs.size(); ++RefCnt) + std::vector<ValueInfo> Refs; + if (IsThinLTO) { + auto AddRefEdges = [&](const std::vector<const Instruction *> &Instrs, + SetVector<ValueInfo> &Edges, + SmallPtrSet<const User *, 8> &Cache) { + for (const auto *I : Instrs) { + Cache.erase(I); + findRefEdges(Index, I, Edges, Cache); + } + }; + + // By now we processed all instructions in a function, except + // non-volatile loads and non-volatile value stores. Let's find + // ref edges for both of instruction sets + AddRefEdges(NonVolatileLoads, LoadRefEdges, Visited); + // We can add some values to the Visited set when processing load + // instructions which are also used by stores in NonVolatileStores. + // For example this can happen if we have following code: + // + // store %Derived* @foo, %Derived** bitcast (%Base** @bar to %Derived**) + // %42 = load %Derived*, %Derived** bitcast (%Base** @bar to %Derived**) + // + // After processing loads we'll add bitcast to the Visited set, and if + // we use the same set while processing stores, we'll never see store + // to @bar and @bar will be mistakenly treated as readonly. + SmallPtrSet<const llvm::User *, 8> StoreCache; + AddRefEdges(NonVolatileStores, StoreRefEdges, StoreCache); + + // If both load and store instruction reference the same variable + // we won't be able to optimize it. Add all such reference edges + // to RefEdges set. + for (auto &VI : StoreRefEdges) + if (LoadRefEdges.remove(VI)) + RefEdges.insert(VI); + + unsigned RefCnt = RefEdges.size(); + // All new reference edges inserted in two loops below are either + // read or write only. They will be grouped in the end of RefEdges + // vector, so we can use a single integer value to identify them. + for (auto &VI : LoadRefEdges) + RefEdges.insert(VI); + + unsigned FirstWORef = RefEdges.size(); + for (auto &VI : StoreRefEdges) + RefEdges.insert(VI); + + Refs = RefEdges.takeVector(); + for (; RefCnt < FirstWORef; ++RefCnt) Refs[RefCnt].setReadOnly(); + for (; RefCnt < Refs.size(); ++RefCnt) + Refs[RefCnt].setWriteOnly(); + } else { + Refs = RefEdges.takeVector(); + } // Explicit add hot edges to enforce importing for designated GUIDs for // sample PGO, to enable the same inlines as the profiled optimized binary. for (auto &I : F.getImportGUIDs()) @@ -387,7 +458,8 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, bool NotEligibleForImport = NonRenamableLocal || HasInlineAsmMaybeReferencingInternal; GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport, - /* Live = */ false, F.isDSOLocal()); + /* Live = */ false, F.isDSOLocal(), + F.hasLinkOnceODRLinkage() && F.hasGlobalUnnamedAddr()); FunctionSummary::FFlags FunFlags{ F.hasFnAttribute(Attribute::ReadNone), F.hasFnAttribute(Attribute::ReadOnly), @@ -406,26 +478,134 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, Index.addGlobalValueSummary(F, std::move(FuncSummary)); } +/// Find function pointers referenced within the given vtable initializer +/// (or subset of an initializer) \p I. The starting offset of \p I within +/// the vtable initializer is \p StartingOffset. Any discovered function +/// pointers are added to \p VTableFuncs along with their cumulative offset +/// within the initializer. +static void findFuncPointers(const Constant *I, uint64_t StartingOffset, + const Module &M, ModuleSummaryIndex &Index, + VTableFuncList &VTableFuncs) { + // First check if this is a function pointer. + if (I->getType()->isPointerTy()) { + auto Fn = dyn_cast<Function>(I->stripPointerCasts()); + // We can disregard __cxa_pure_virtual as a possible call target, as + // calls to pure virtuals are UB. + if (Fn && Fn->getName() != "__cxa_pure_virtual") + VTableFuncs.push_back({Index.getOrInsertValueInfo(Fn), StartingOffset}); + return; + } + + // Walk through the elements in the constant struct or array and recursively + // look for virtual function pointers. + const DataLayout &DL = M.getDataLayout(); + if (auto *C = dyn_cast<ConstantStruct>(I)) { + StructType *STy = dyn_cast<StructType>(C->getType()); + assert(STy); + const StructLayout *SL = DL.getStructLayout(C->getType()); + + for (StructType::element_iterator EB = STy->element_begin(), EI = EB, + EE = STy->element_end(); + EI != EE; ++EI) { + auto Offset = SL->getElementOffset(EI - EB); + unsigned Op = SL->getElementContainingOffset(Offset); + findFuncPointers(cast<Constant>(I->getOperand(Op)), + StartingOffset + Offset, M, Index, VTableFuncs); + } + } else if (auto *C = dyn_cast<ConstantArray>(I)) { + ArrayType *ATy = C->getType(); + Type *EltTy = ATy->getElementType(); + uint64_t EltSize = DL.getTypeAllocSize(EltTy); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { + findFuncPointers(cast<Constant>(I->getOperand(i)), + StartingOffset + i * EltSize, M, Index, VTableFuncs); + } + } +} + +// Identify the function pointers referenced by vtable definition \p V. +static void computeVTableFuncs(ModuleSummaryIndex &Index, + const GlobalVariable &V, const Module &M, + VTableFuncList &VTableFuncs) { + if (!V.isConstant()) + return; + + findFuncPointers(V.getInitializer(), /*StartingOffset=*/0, M, Index, + VTableFuncs); + +#ifndef NDEBUG + // Validate that the VTableFuncs list is ordered by offset. + uint64_t PrevOffset = 0; + for (auto &P : VTableFuncs) { + // The findVFuncPointers traversal should have encountered the + // functions in offset order. We need to use ">=" since PrevOffset + // starts at 0. + assert(P.VTableOffset >= PrevOffset); + PrevOffset = P.VTableOffset; + } +#endif +} + +/// Record vtable definition \p V for each type metadata it references. static void -computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V, - DenseSet<GlobalValue::GUID> &CantBePromoted) { +recordTypeIdCompatibleVtableReferences(ModuleSummaryIndex &Index, + const GlobalVariable &V, + SmallVectorImpl<MDNode *> &Types) { + for (MDNode *Type : Types) { + auto TypeID = Type->getOperand(1).get(); + + uint64_t Offset = + cast<ConstantInt>( + cast<ConstantAsMetadata>(Type->getOperand(0))->getValue()) + ->getZExtValue(); + + if (auto *TypeId = dyn_cast<MDString>(TypeID)) + Index.getOrInsertTypeIdCompatibleVtableSummary(TypeId->getString()) + .push_back({Offset, Index.getOrInsertValueInfo(&V)}); + } +} + +static void computeVariableSummary(ModuleSummaryIndex &Index, + const GlobalVariable &V, + DenseSet<GlobalValue::GUID> &CantBePromoted, + const Module &M, + SmallVectorImpl<MDNode *> &Types) { SetVector<ValueInfo> RefEdges; SmallPtrSet<const User *, 8> Visited; bool HasBlockAddress = findRefEdges(Index, &V, RefEdges, Visited); bool NonRenamableLocal = isNonRenamableLocal(V); GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal, - /* Live = */ false, V.isDSOLocal()); + /* Live = */ false, V.isDSOLocal(), + V.hasLinkOnceODRLinkage() && V.hasGlobalUnnamedAddr()); + + VTableFuncList VTableFuncs; + // If splitting is not enabled, then we compute the summary information + // necessary for index-based whole program devirtualization. + if (!Index.enableSplitLTOUnit()) { + Types.clear(); + V.getMetadata(LLVMContext::MD_type, Types); + if (!Types.empty()) { + // Identify the function pointers referenced by this vtable definition. + computeVTableFuncs(Index, V, M, VTableFuncs); + + // Record this vtable definition for each type metadata it references. + recordTypeIdCompatibleVtableReferences(Index, V, Types); + } + } - // Don't mark variables we won't be able to internalize as read-only. - GlobalVarSummary::GVarFlags VarFlags( + // Don't mark variables we won't be able to internalize as read/write-only. + bool CanBeInternalized = !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() && - !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass()); + !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass(); + GlobalVarSummary::GVarFlags VarFlags(CanBeInternalized, CanBeInternalized); auto GVarSummary = llvm::make_unique<GlobalVarSummary>(Flags, VarFlags, RefEdges.takeVector()); if (NonRenamableLocal) CantBePromoted.insert(V.getGUID()); if (HasBlockAddress) GVarSummary->setNotEligibleToImport(); + if (!VTableFuncs.empty()) + GVarSummary->setVTableFuncs(VTableFuncs); Index.addGlobalValueSummary(V, std::move(GVarSummary)); } @@ -434,12 +614,15 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, DenseSet<GlobalValue::GUID> &CantBePromoted) { bool NonRenamableLocal = isNonRenamableLocal(A); GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal, - /* Live = */ false, A.isDSOLocal()); + /* Live = */ false, A.isDSOLocal(), + A.hasLinkOnceODRLinkage() && A.hasGlobalUnnamedAddr()); auto AS = llvm::make_unique<AliasSummary>(Flags); auto *Aliasee = A.getBaseObject(); - auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee); - assert(AliaseeSummary && "Alias expects aliasee summary to be parsed"); - AS->setAliasee(AliaseeSummary); + auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID()); + assert(AliaseeVI && "Alias expects aliasee summary to be available"); + assert(AliaseeVI.getSummaryList().size() == 1 && + "Expected a single entry per aliasee in per-module index"); + AS->setAliasee(AliaseeVI, AliaseeVI.getSummaryList()[0].get()); if (NonRenamableLocal) CantBePromoted.insert(A.getGUID()); Index.addGlobalValueSummary(A, std::move(AS)); @@ -507,7 +690,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage, /* NotEligibleToImport = */ true, /* Live = */ true, - /* Local */ GV->isDSOLocal()); + /* Local */ GV->isDSOLocal(), + GV->hasLinkOnceODRLinkage() && GV->hasGlobalUnnamedAddr()); CantBePromoted.insert(GV->getGUID()); // Create the appropriate summary type. if (Function *F = dyn_cast<Function>(GV)) { @@ -531,7 +715,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( } else { std::unique_ptr<GlobalVarSummary> Summary = llvm::make_unique<GlobalVarSummary>( - GVFlags, GlobalVarSummary::GVarFlags(), + GVFlags, GlobalVarSummary::GVarFlags(false, false), ArrayRef<ValueInfo>{}); Index.addGlobalValueSummary(*GV, std::move(Summary)); } @@ -568,10 +752,11 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( // Compute summaries for all variables defined in module, and save in the // index. + SmallVector<MDNode *, 2> Types; for (const GlobalVariable &G : M.globals()) { if (G.isDeclaration()) continue; - computeVariableSummary(Index, G, CantBePromoted); + computeVariableSummary(Index, G, CantBePromoted, M, Types); } // Compute summaries for all aliases defined in module, and save in the @@ -626,6 +811,15 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( } } + if (!ModuleSummaryDotFile.empty()) { + std::error_code EC; + raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::F_None); + if (EC) + report_fatal_error(Twine("Failed to open dot file ") + + ModuleSummaryDotFile + ": " + EC.message() + "\n"); + Index.exportToDot(OSDot); + } + return Index; } diff --git a/lib/Analysis/MustExecute.cpp b/lib/Analysis/MustExecute.cpp index 180c38ddacc2..b616cd6f762b 100644 --- a/lib/Analysis/MustExecute.cpp +++ b/lib/Analysis/MustExecute.cpp @@ -1,9 +1,8 @@ //===- MustExecute.cpp - Printer for isGuaranteedToExecute ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -194,7 +193,8 @@ bool LoopSafetyInfo::allLoopPathsLeadToBlock(const Loop *CurLoop, SmallPtrSet<const BasicBlock *, 4> Predecessors; collectTransitivePredecessors(CurLoop, BB, Predecessors); - // Make sure that all successors of all predecessors of BB are either: + // Make sure that all successors of, all predecessors of BB which are not + // dominated by BB, are either: // 1) BB, // 2) Also predecessors of BB, // 3) Exit blocks which are not taken on 1st iteration. @@ -204,6 +204,12 @@ bool LoopSafetyInfo::allLoopPathsLeadToBlock(const Loop *CurLoop, // Predecessor block may throw, so it has a side exit. if (blockMayThrow(Pred)) return false; + + // BB dominates Pred, so if Pred runs, BB must run. + // This is true when Pred is a loop latch. + if (DT->dominates(BB, Pred)) + continue; + for (auto *Succ : successors(Pred)) if (CheckedSuccessors.insert(Succ).second && Succ != BB && !Predecessors.count(Succ)) diff --git a/lib/Analysis/ObjCARCAliasAnalysis.cpp b/lib/Analysis/ObjCARCAliasAnalysis.cpp index 95ae1a6e744f..811033e73147 100644 --- a/lib/Analysis/ObjCARCAliasAnalysis.cpp +++ b/lib/Analysis/ObjCARCAliasAnalysis.cpp @@ -1,9 +1,8 @@ //===- ObjCARCAliasAnalysis.cpp - ObjC ARC Optimization -------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -38,9 +37,10 @@ using namespace llvm; using namespace llvm::objcarc; AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { + const MemoryLocation &LocB, + AAQueryInfo &AAQI) { if (!EnableARCOpts) - return AAResultBase::alias(LocA, LocB); + return AAResultBase::alias(LocA, LocB, AAQI); // First, strip off no-ops, including ObjC-specific no-ops, and try making a // precise alias query. @@ -48,7 +48,7 @@ AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA, const Value *SB = GetRCIdentityRoot(LocB.Ptr); AliasResult Result = AAResultBase::alias(MemoryLocation(SA, LocA.Size, LocA.AATags), - MemoryLocation(SB, LocB.Size, LocB.AATags)); + MemoryLocation(SB, LocB.Size, LocB.AATags), AAQI); if (Result != MayAlias) return Result; @@ -57,7 +57,7 @@ AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA, const Value *UA = GetUnderlyingObjCPtr(SA, DL); const Value *UB = GetUnderlyingObjCPtr(SB, DL); if (UA != SA || UB != SB) { - Result = AAResultBase::alias(MemoryLocation(UA), MemoryLocation(UB)); + Result = AAResultBase::alias(MemoryLocation(UA), MemoryLocation(UB), AAQI); // We can't use MustAlias or PartialAlias results here because // GetUnderlyingObjCPtr may return an offsetted pointer value. if (Result == NoAlias) @@ -70,22 +70,23 @@ AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA, } bool ObjCARCAAResult::pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) { + AAQueryInfo &AAQI, bool OrLocal) { if (!EnableARCOpts) - return AAResultBase::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal); // First, strip off no-ops, including ObjC-specific no-ops, and try making // a precise alias query. const Value *S = GetRCIdentityRoot(Loc.Ptr); if (AAResultBase::pointsToConstantMemory( - MemoryLocation(S, Loc.Size, Loc.AATags), OrLocal)) + MemoryLocation(S, Loc.Size, Loc.AATags), AAQI, OrLocal)) return true; // If that failed, climb to the underlying object, including climbing through // ObjC-specific no-ops, and try making an imprecise alias query. const Value *U = GetUnderlyingObjCPtr(S, DL); if (U != S) - return AAResultBase::pointsToConstantMemory(MemoryLocation(U), OrLocal); + return AAResultBase::pointsToConstantMemory(MemoryLocation(U), AAQI, + OrLocal); // If that failed, fail. We don't need to chain here, since that's covered // by the earlier precise query. @@ -107,9 +108,10 @@ FunctionModRefBehavior ObjCARCAAResult::getModRefBehavior(const Function *F) { } ModRefInfo ObjCARCAAResult::getModRefInfo(const CallBase *Call, - const MemoryLocation &Loc) { + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { if (!EnableARCOpts) - return AAResultBase::getModRefInfo(Call, Loc); + return AAResultBase::getModRefInfo(Call, Loc, AAQI); switch (GetBasicARCInstKind(Call)) { case ARCInstKind::Retain: @@ -128,7 +130,7 @@ ModRefInfo ObjCARCAAResult::getModRefInfo(const CallBase *Call, break; } - return AAResultBase::getModRefInfo(Call, Loc); + return AAResultBase::getModRefInfo(Call, Loc, AAQI); } ObjCARCAAResult ObjCARCAA::run(Function &F, FunctionAnalysisManager &AM) { diff --git a/lib/Analysis/ObjCARCAnalysisUtils.cpp b/lib/Analysis/ObjCARCAnalysisUtils.cpp index d6db6386c38b..56d1cb421225 100644 --- a/lib/Analysis/ObjCARCAnalysisUtils.cpp +++ b/lib/Analysis/ObjCARCAnalysisUtils.cpp @@ -1,9 +1,8 @@ //===- ObjCARCAnalysisUtils.cpp -------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/ObjCARCInstKind.cpp b/lib/Analysis/ObjCARCInstKind.cpp index 31c432711834..0e96c6e975c9 100644 --- a/lib/Analysis/ObjCARCInstKind.cpp +++ b/lib/Analysis/ObjCARCInstKind.cpp @@ -1,9 +1,8 @@ //===- ARCInstKind.cpp - ObjC ARC Optimization ----------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -482,6 +481,41 @@ bool llvm::objcarc::IsNoopOnNull(ARCInstKind Class) { llvm_unreachable("covered switch isn't covered?"); } +/// Test if the given class represents instructions which do nothing if +/// passed a global variable. +bool llvm::objcarc::IsNoopOnGlobal(ARCInstKind Class) { + switch (Class) { + case ARCInstKind::Retain: + case ARCInstKind::RetainRV: + case ARCInstKind::ClaimRV: + case ARCInstKind::Release: + case ARCInstKind::Autorelease: + case ARCInstKind::AutoreleaseRV: + case ARCInstKind::RetainBlock: + case ARCInstKind::FusedRetainAutorelease: + case ARCInstKind::FusedRetainAutoreleaseRV: + return true; + case ARCInstKind::AutoreleasepoolPush: + case ARCInstKind::AutoreleasepoolPop: + case ARCInstKind::LoadWeakRetained: + case ARCInstKind::StoreWeak: + case ARCInstKind::InitWeak: + case ARCInstKind::LoadWeak: + case ARCInstKind::MoveWeak: + case ARCInstKind::CopyWeak: + case ARCInstKind::DestroyWeak: + case ARCInstKind::StoreStrong: + case ARCInstKind::IntrinsicUser: + case ARCInstKind::CallOrUser: + case ARCInstKind::Call: + case ARCInstKind::User: + case ARCInstKind::None: + case ARCInstKind::NoopCast: + return false; + } + llvm_unreachable("covered switch isn't covered?"); +} + /// Test if the given class represents instructions which are always safe /// to mark with the "tail" keyword. bool llvm::objcarc::IsAlwaysTail(ARCInstKind Class) { diff --git a/lib/Analysis/OptimizationRemarkEmitter.cpp b/lib/Analysis/OptimizationRemarkEmitter.cpp index 8ece0a2a3ed3..72c40a0be232 100644 --- a/lib/Analysis/OptimizationRemarkEmitter.cpp +++ b/lib/Analysis/OptimizationRemarkEmitter.cpp @@ -1,9 +1,8 @@ //===- OptimizationRemarkEmitter.cpp - Optimization Diagnostic --*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/OrderedBasicBlock.cpp b/lib/Analysis/OrderedBasicBlock.cpp index 5f4fe0f7dda2..48f2a4020c66 100644 --- a/lib/Analysis/OrderedBasicBlock.cpp +++ b/lib/Analysis/OrderedBasicBlock.cpp @@ -1,9 +1,8 @@ //===- OrderedBasicBlock.cpp --------------------------------- -*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -86,3 +85,27 @@ bool OrderedBasicBlock::dominates(const Instruction *A, const Instruction *B) { return comesBefore(A, B); } + +void OrderedBasicBlock::eraseInstruction(const Instruction *I) { + if (LastInstFound != BB->end() && I == &*LastInstFound) { + if (LastInstFound == BB->begin()) { + LastInstFound = BB->end(); + NextInstPos = 0; + } else + LastInstFound--; + } + + NumberedInsts.erase(I); +} + +void OrderedBasicBlock::replaceInstruction(const Instruction *Old, + const Instruction *New) { + auto OI = NumberedInsts.find(Old); + if (OI == NumberedInsts.end()) + return; + + NumberedInsts.insert({New, OI->second}); + if (LastInstFound != BB->end() && Old == &*LastInstFound) + LastInstFound = New->getIterator(); + NumberedInsts.erase(Old); +} diff --git a/lib/Analysis/OrderedInstructions.cpp b/lib/Analysis/OrderedInstructions.cpp index 7b155208c02e..458c0a7de6c2 100644 --- a/lib/Analysis/OrderedInstructions.cpp +++ b/lib/Analysis/OrderedInstructions.cpp @@ -1,9 +1,8 @@ //===-- OrderedInstructions.cpp - Instruction dominance function ---------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index 858f08f6537a..7f77ab146c4c 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -1,9 +1,8 @@ //===- PHITransAddr.cpp - PHI Translation for Addresses -------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/PhiValues.cpp b/lib/Analysis/PhiValues.cpp index 729227c86697..49749bc44746 100644 --- a/lib/Analysis/PhiValues.cpp +++ b/lib/Analysis/PhiValues.cpp @@ -1,9 +1,8 @@ //===- PhiValues.cpp - Phi Value Analysis ---------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp index e6b660fe26d7..4afe22bd5342 100644 --- a/lib/Analysis/PostDominators.cpp +++ b/lib/Analysis/PostDominators.cpp @@ -1,9 +1,8 @@ //===- PostDominators.cpp - Post-Dominator Calculation --------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp index 1d70c75f2e1c..dce19d6d546e 100644 --- a/lib/Analysis/ProfileSummaryInfo.cpp +++ b/lib/Analysis/ProfileSummaryInfo.cpp @@ -1,9 +1,8 @@ //===- ProfileSummaryInfo.cpp - Global profile summary information --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -61,10 +60,9 @@ static cl::opt<int> ProfileSummaryColdCount( // Find the summary entry for a desired percentile of counts. static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS, uint64_t Percentile) { - auto Compare = [](const ProfileSummaryEntry &Entry, uint64_t Percentile) { + auto It = partition_point(DS, [=](const ProfileSummaryEntry &Entry) { return Entry.Cutoff < Percentile; - }; - auto It = std::lower_bound(DS.begin(), DS.end(), Percentile, Compare); + }); // The required percentile has to be <= one of the percentiles in the // detailed summary. if (It == DS.end()) @@ -80,7 +78,14 @@ static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS, bool ProfileSummaryInfo::computeSummary() { if (Summary) return true; - auto *SummaryMD = M.getProfileSummary(); + // First try to get context sensitive ProfileSummary. + auto *SummaryMD = M.getProfileSummary(/* IsCS */ true); + if (SummaryMD) { + Summary.reset(ProfileSummary::getFromMD(SummaryMD)); + return true; + } + // This will actually return PSK_Instr or PSK_Sample summary. + SummaryMD = M.getProfileSummary(/* IsCS */ false); if (!SummaryMD) return false; Summary.reset(ProfileSummary::getFromMD(SummaryMD)); @@ -89,7 +94,8 @@ bool ProfileSummaryInfo::computeSummary() { Optional<uint64_t> ProfileSummaryInfo::getProfileCount(const Instruction *Inst, - BlockFrequencyInfo *BFI) { + BlockFrequencyInfo *BFI, + bool AllowSynthetic) { if (!Inst) return None; assert((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) && @@ -105,7 +111,7 @@ ProfileSummaryInfo::getProfileCount(const Instruction *Inst, return None; } if (BFI) - return BFI->getBlockProfileCount(Inst->getParent()); + return BFI->getBlockProfileCount(Inst->getParent(), AllowSynthetic); return None; } diff --git a/lib/Analysis/PtrUseVisitor.cpp b/lib/Analysis/PtrUseVisitor.cpp index 1fdaf4d55b59..9a834ba4866a 100644 --- a/lib/Analysis/PtrUseVisitor.cpp +++ b/lib/Analysis/PtrUseVisitor.cpp @@ -1,9 +1,8 @@ //===- PtrUseVisitor.cpp - InstVisitors over a pointers uses --------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -35,5 +34,11 @@ bool detail::PtrUseVisitorBase::adjustOffsetForGEP(GetElementPtrInst &GEPI) { if (!IsOffsetKnown) return false; - return GEPI.accumulateConstantOffset(DL, Offset); + APInt TmpOffset(DL.getIndexTypeSizeInBits(GEPI.getType()), 0); + if (GEPI.accumulateConstantOffset(DL, TmpOffset)) { + Offset += TmpOffset.sextOrTrunc(Offset.getBitWidth()); + return true; + } + + return false; } diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp index 2bd611350f46..8ba38adfb0d2 100644 --- a/lib/Analysis/RegionInfo.cpp +++ b/lib/Analysis/RegionInfo.cpp @@ -1,9 +1,8 @@ //===- RegionInfo.cpp - SESE region detection analysis --------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // Detects single entry single exit regions in the control flow graph. diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index a101ff109199..6c0d17b45c62 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -1,9 +1,8 @@ //===- RegionPass.cpp - Region Pass and Region Pass Manager ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -279,12 +278,17 @@ Pass *RegionPass::createPrinterPass(raw_ostream &O, return new PrintRegionPass(Banner, O); } +static std::string getDescription(const Region &R) { + return "region"; +} + bool RegionPass::skipRegion(Region &R) const { Function &F = *R.getEntry()->getParent(); - if (!F.getContext().getOptPassGate().shouldRunPass(this, R)) + OptPassGate &Gate = F.getContext().getOptPassGate(); + if (Gate.isEnabled() && !Gate.shouldRunPass(this, getDescription(R))) return true; - if (F.hasFnAttribute(Attribute::OptimizeNone)) { + if (F.hasOptNone()) { // Report this only once per function. if (R.getEntry() == &F.getEntryBlock()) LLVM_DEBUG(dbgs() << "Skipping pass '" << getPassName() diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp index 5986b8c4e0c3..5bdcb31fbe99 100644 --- a/lib/Analysis/RegionPrinter.cpp +++ b/lib/Analysis/RegionPrinter.cpp @@ -1,9 +1,8 @@ //===- RegionPrinter.cpp - Print regions tree pass ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // Print out the region tree of a function using dotty/graphviz. diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index e5134f2eeda9..bc2cfd6fcc42 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -1,9 +1,8 @@ //===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -203,15 +202,20 @@ static cl::opt<unsigned> MaxConstantEvolvingDepth( cl::desc("Maximum depth of recursive constant evolving"), cl::init(32)); static cl::opt<unsigned> - MaxExtDepth("scalar-evolution-max-ext-depth", cl::Hidden, - cl::desc("Maximum depth of recursive SExt/ZExt"), - cl::init(8)); + MaxCastDepth("scalar-evolution-max-cast-depth", cl::Hidden, + cl::desc("Maximum depth of recursive SExt/ZExt/Trunc"), + cl::init(8)); static cl::opt<unsigned> MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden, cl::desc("Max coefficients in AddRec during evolving"), cl::init(8)); +static cl::opt<unsigned> + HugeExprThreshold("scalar-evolution-huge-expr-threshold", cl::Hidden, + cl::desc("Size of the expression which is considered huge"), + cl::init(4096)); + //===----------------------------------------------------------------------===// // SCEV class definitions //===----------------------------------------------------------------------===// @@ -273,7 +277,9 @@ void SCEV::print(raw_ostream &OS) const { case scAddExpr: case scMulExpr: case scUMaxExpr: - case scSMaxExpr: { + case scSMaxExpr: + case scUMinExpr: + case scSMinExpr: { const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this); const char *OpStr = nullptr; switch (NAry->getSCEVType()) { @@ -281,6 +287,12 @@ void SCEV::print(raw_ostream &OS) const { case scMulExpr: OpStr = " * "; break; case scUMaxExpr: OpStr = " umax "; break; case scSMaxExpr: OpStr = " smax "; break; + case scUMinExpr: + OpStr = " umin "; + break; + case scSMinExpr: + OpStr = " smin "; + break; } OS << "("; for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); @@ -349,6 +361,8 @@ Type *SCEV::getType() const { case scMulExpr: case scUMaxExpr: case scSMaxExpr: + case scUMinExpr: + case scSMinExpr: return cast<SCEVNAryExpr>(this)->getType(); case scAddExpr: return cast<SCEVAddExpr>(this)->getType(); @@ -393,7 +407,7 @@ bool SCEV::isNonConstantNegative() const { } SCEVCouldNotCompute::SCEVCouldNotCompute() : - SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {} + SCEV(FoldingSetNodeIDRef(), scCouldNotCompute, 0) {} bool SCEVCouldNotCompute::classof(const SCEV *S) { return S->getSCEVType() == scCouldNotCompute; @@ -422,7 +436,7 @@ ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) { SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, unsigned SCEVTy, const SCEV *op, Type *ty) - : SCEV(ID, SCEVTy), Op(op), Ty(ty) {} + : SCEV(ID, SCEVTy, computeExpressionSize(op)), Op(op), Ty(ty) {} SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, const SCEV *op, Type *ty) @@ -713,7 +727,9 @@ static int CompareSCEVComplexity( case scAddExpr: case scMulExpr: case scSMaxExpr: - case scUMaxExpr: { + case scUMaxExpr: + case scSMinExpr: + case scUMinExpr: { const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS); const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS); @@ -795,11 +811,10 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops, } // Do the rough sort by complexity. - std::stable_sort(Ops.begin(), Ops.end(), - [&](const SCEV *LHS, const SCEV *RHS) { - return CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, - LHS, RHS, DT) < 0; - }); + llvm::stable_sort(Ops, [&](const SCEV *LHS, const SCEV *RHS) { + return CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LHS, RHS, DT) < + 0; + }); // Now that we are sorted by complexity, group elements of the same // complexity. Note that this is, at worst, N^2, but the vector is likely to @@ -846,6 +861,17 @@ static inline int sizeOfSCEV(const SCEV *S) { return F.Size; } +/// Returns true if the subtree of \p S contains at least HugeExprThreshold +/// nodes. +static bool isHugeExpression(const SCEV *S) { + return S->getExpressionSize() >= HugeExprThreshold; +} + +/// Returns true of \p Ops contains a huge SCEV (see definition above). +static bool hasHugeExpression(ArrayRef<const SCEV *> Ops) { + return any_of(Ops, isHugeExpression); +} + namespace { struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> { @@ -913,6 +939,8 @@ public: void visitUDivExpr(const SCEVUDivExpr *Numerator) {} void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {} void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {} + void visitSMinExpr(const SCEVSMinExpr *Numerator) {} + void visitUMinExpr(const SCEVUMinExpr *Numerator) {} void visitUnknown(const SCEVUnknown *Numerator) {} void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {} @@ -1219,8 +1247,8 @@ const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It, // SCEV Expression folder implementations //===----------------------------------------------------------------------===// -const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, - Type *Ty) { +const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty, + unsigned Depth) { assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) && "This is not a truncating conversion!"); assert(isSCEVable(Ty) && @@ -1241,15 +1269,23 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, // trunc(trunc(x)) --> trunc(x) if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) - return getTruncateExpr(ST->getOperand(), Ty); + return getTruncateExpr(ST->getOperand(), Ty, Depth + 1); // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op)) - return getTruncateOrSignExtend(SS->getOperand(), Ty); + return getTruncateOrSignExtend(SS->getOperand(), Ty, Depth + 1); // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) - return getTruncateOrZeroExtend(SZ->getOperand(), Ty); + return getTruncateOrZeroExtend(SZ->getOperand(), Ty, Depth + 1); + + if (Depth > MaxCastDepth) { + SCEV *S = + new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty); + UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); + return S; + } // trunc(x1 + ... + xN) --> trunc(x1) + ... + trunc(xN) and // trunc(x1 * ... * xN) --> trunc(x1) * ... * trunc(xN), @@ -1261,7 +1297,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, unsigned numTruncs = 0; for (unsigned i = 0, e = CommOp->getNumOperands(); i != e && numTruncs < 2; ++i) { - const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty); + const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty, Depth + 1); if (!isa<SCEVCastExpr>(CommOp->getOperand(i)) && isa<SCEVTruncateExpr>(S)) numTruncs++; Operands.push_back(S); @@ -1285,7 +1321,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) { SmallVector<const SCEV *, 4> Operands; for (const SCEV *Op : AddRec->operands()) - Operands.push_back(getTruncateExpr(Op, Ty)); + Operands.push_back(getTruncateExpr(Op, Ty, Depth + 1)); return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap); } @@ -1619,7 +1655,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { ID.AddPointer(Ty); void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - if (Depth > MaxExtDepth) { + if (Depth > MaxCastDepth) { SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); @@ -1637,7 +1673,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { unsigned NewBits = getTypeSizeInBits(Ty); if (CR.truncate(TruncBits).zeroExtend(NewBits).contains( CR.zextOrTrunc(NewBits))) - return getTruncateOrZeroExtend(X, Ty); + return getTruncateOrZeroExtend(X, Ty, Depth); } // If the input value is a chrec scev, and we can prove that the value @@ -1679,9 +1715,9 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { // Check whether the backedge-taken count can be losslessly casted to // the addrec's type. The count is always unsigned. const SCEV *CastedMaxBECount = - getTruncateOrZeroExtend(MaxBECount, Start->getType()); - const SCEV *RecastedMaxBECount = - getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); + getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth); + const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend( + CastedMaxBECount, MaxBECount->getType(), Depth); if (MaxBECount == RecastedMaxBECount) { Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no unsigned overflow. @@ -1930,7 +1966,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; // Limit recursion depth. - if (Depth > MaxExtDepth) { + if (Depth > MaxCastDepth) { SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); @@ -1948,7 +1984,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { unsigned NewBits = getTypeSizeInBits(Ty); if (CR.truncate(TruncBits).signExtend(NewBits).contains( CR.sextOrTrunc(NewBits))) - return getTruncateOrSignExtend(X, Ty); + return getTruncateOrSignExtend(X, Ty, Depth); } if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) { @@ -2023,9 +2059,9 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { // Check whether the backedge-taken count can be losslessly casted to // the addrec's type. The count is always unsigned. const SCEV *CastedMaxBECount = - getTruncateOrZeroExtend(MaxBECount, Start->getType()); - const SCEV *RecastedMaxBECount = - getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); + getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth); + const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend( + CastedMaxBECount, MaxBECount->getType(), Depth); if (MaxBECount == RecastedMaxBECount) { Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no signed overflow. @@ -2295,7 +2331,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M, // can't-overflow flags for the operation if possible. static SCEV::NoWrapFlags StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, - const SmallVectorImpl<const SCEV *> &Ops, + const ArrayRef<const SCEV *> Ops, SCEV::NoWrapFlags Flags) { using namespace std::placeholders; @@ -2405,7 +2441,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, } // Limit recursion calls depth. - if (Depth > MaxArithDepth) + if (Depth > MaxArithDepth || hasHugeExpression(Ops)) return getOrCreateAddExpr(Ops, Flags); // Okay, check to see if the same value occurs in the operand list more than @@ -2743,7 +2779,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, } const SCEV * -ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops, +ScalarEvolution::getOrCreateAddExpr(ArrayRef<const SCEV *> Ops, SCEV::NoWrapFlags Flags) { FoldingSetNodeID ID; ID.AddInteger(scAddExpr); @@ -2765,7 +2801,7 @@ ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops, } const SCEV * -ScalarEvolution::getOrCreateAddRecExpr(SmallVectorImpl<const SCEV *> &Ops, +ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops, const Loop *L, SCEV::NoWrapFlags Flags) { FoldingSetNodeID ID; ID.AddInteger(scAddRecExpr); @@ -2788,7 +2824,7 @@ ScalarEvolution::getOrCreateAddRecExpr(SmallVectorImpl<const SCEV *> &Ops, } const SCEV * -ScalarEvolution::getOrCreateMulExpr(SmallVectorImpl<const SCEV *> &Ops, +ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops, SCEV::NoWrapFlags Flags) { FoldingSetNodeID ID; ID.AddInteger(scMulExpr); @@ -2884,7 +2920,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags); // Limit recursion calls depth. - if (Depth > MaxArithDepth) + if (Depth > MaxArithDepth || hasHugeExpression(Ops)) return getOrCreateMulExpr(Ops, Flags); // If there are any constants, fold them together. @@ -3057,7 +3093,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // Limit max number of arguments to avoid creation of unreasonably big // SCEVAddRecs with very complex operands. if (AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1 > - MaxAddRecSize) + MaxAddRecSize || isHugeExpression(AddRec) || + isHugeExpression(OtherAddRec)) continue; bool Overflow = false; @@ -3090,7 +3127,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, AddRecOps.push_back(getAddExpr(SumOps, SCEV::FlagAnyWrap, Depth + 1)); } if (!Overflow) { - const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(), + const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap); if (Ops.size() == 2) return NewAddRec; Ops[Idx] = NewAddRec; @@ -3493,209 +3530,166 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP, return getAddExpr(BaseExpr, TotalOffset, Wrap); } -const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, - const SCEV *RHS) { - SmallVector<const SCEV *, 2> Ops = {LHS, RHS}; - return getSMaxExpr(Ops); +std::tuple<const SCEV *, FoldingSetNodeID, void *> +ScalarEvolution::findExistingSCEVInCache(int SCEVType, + ArrayRef<const SCEV *> Ops) { + FoldingSetNodeID ID; + void *IP = nullptr; + ID.AddInteger(SCEVType); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + return std::tuple<const SCEV *, FoldingSetNodeID, void *>( + UniqueSCEVs.FindNodeOrInsertPos(ID, IP), std::move(ID), IP); } -const SCEV * -ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { - assert(!Ops.empty() && "Cannot get empty smax!"); +const SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind, + SmallVectorImpl<const SCEV *> &Ops) { + assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && - "SCEVSMaxExpr operand types don't match!"); + "Operand types don't match!"); #endif + bool IsSigned = Kind == scSMaxExpr || Kind == scSMinExpr; + bool IsMax = Kind == scSMaxExpr || Kind == scUMaxExpr; + // Sort by complexity, this groups all similar expression types together. GroupByComplexity(Ops, &LI, DT); + // Check if we have created the same expression before. + if (const SCEV *S = std::get<0>(findExistingSCEVInCache(Kind, Ops))) { + return S; + } + // If there are any constants, fold them together. unsigned Idx = 0; if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { ++Idx; assert(Idx < Ops.size()); + auto FoldOp = [&](const APInt &LHS, const APInt &RHS) { + if (Kind == scSMaxExpr) + return APIntOps::smax(LHS, RHS); + else if (Kind == scSMinExpr) + return APIntOps::smin(LHS, RHS); + else if (Kind == scUMaxExpr) + return APIntOps::umax(LHS, RHS); + else if (Kind == scUMinExpr) + return APIntOps::umin(LHS, RHS); + llvm_unreachable("Unknown SCEV min/max opcode"); + }; + while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { // We found two constants, fold them together! ConstantInt *Fold = ConstantInt::get( - getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt())); + getContext(), FoldOp(LHSC->getAPInt(), RHSC->getAPInt())); Ops[0] = getConstant(Fold); Ops.erase(Ops.begin()+1); // Erase the folded element if (Ops.size() == 1) return Ops[0]; LHSC = cast<SCEVConstant>(Ops[0]); } - // If we are left with a constant minimum-int, strip it off. - if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) { + bool IsMinV = LHSC->getValue()->isMinValue(IsSigned); + bool IsMaxV = LHSC->getValue()->isMaxValue(IsSigned); + + if (IsMax ? IsMinV : IsMaxV) { + // If we are left with a constant minimum(/maximum)-int, strip it off. Ops.erase(Ops.begin()); --Idx; - } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) { - // If we have an smax with a constant maximum-int, it will always be - // maximum-int. - return Ops[0]; + } else if (IsMax ? IsMaxV : IsMinV) { + // If we have a max(/min) with a constant maximum(/minimum)-int, + // it will always be the extremum. + return LHSC; } if (Ops.size() == 1) return Ops[0]; } - // Find the first SMax - while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr) + // Find the first operation of the same kind + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < Kind) ++Idx; - // Check to see if one of the operands is an SMax. If so, expand its operands - // onto our operand list, and recurse to simplify. + // Check to see if one of the operands is of the same kind. If so, expand its + // operands onto our operand list, and recurse to simplify. if (Idx < Ops.size()) { - bool DeletedSMax = false; - while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) { + bool DeletedAny = false; + while (Ops[Idx]->getSCEVType() == Kind) { + const SCEVMinMaxExpr *SMME = cast<SCEVMinMaxExpr>(Ops[Idx]); Ops.erase(Ops.begin()+Idx); - Ops.append(SMax->op_begin(), SMax->op_end()); - DeletedSMax = true; + Ops.append(SMME->op_begin(), SMME->op_end()); + DeletedAny = true; } - if (DeletedSMax) - return getSMaxExpr(Ops); + if (DeletedAny) + return getMinMaxExpr(Kind, Ops); } // Okay, check to see if the same value occurs in the operand list twice. If // so, delete one. Since we sorted the list, these values are required to // be adjacent. - for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) - // X smax Y smax Y --> X smax Y - // X smax Y --> X, if X is always greater than Y - if (Ops[i] == Ops[i+1] || - isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) { - Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); - --i; --e; - } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) { - Ops.erase(Ops.begin()+i, Ops.begin()+i+1); - --i; --e; + llvm::CmpInst::Predicate GEPred = + IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; + llvm::CmpInst::Predicate LEPred = + IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; + llvm::CmpInst::Predicate FirstPred = IsMax ? GEPred : LEPred; + llvm::CmpInst::Predicate SecondPred = IsMax ? LEPred : GEPred; + for (unsigned i = 0, e = Ops.size() - 1; i != e; ++i) { + if (Ops[i] == Ops[i + 1] || + isKnownViaNonRecursiveReasoning(FirstPred, Ops[i], Ops[i + 1])) { + // X op Y op Y --> X op Y + // X op Y --> X, if we know X, Y are ordered appropriately + Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2); + --i; + --e; + } else if (isKnownViaNonRecursiveReasoning(SecondPred, Ops[i], + Ops[i + 1])) { + // X op Y --> Y, if we know X, Y are ordered appropriately + Ops.erase(Ops.begin() + i, Ops.begin() + i + 1); + --i; + --e; } + } if (Ops.size() == 1) return Ops[0]; assert(!Ops.empty() && "Reduced smax down to nothing!"); - // Okay, it looks like we really DO need an smax expr. Check to see if we + // Okay, it looks like we really DO need an expr. Check to see if we // already have one, otherwise create a new one. + const SCEV *ExistingSCEV; FoldingSetNodeID ID; - ID.AddInteger(scSMaxExpr); - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - ID.AddPointer(Ops[i]); - void *IP = nullptr; - if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + void *IP; + std::tie(ExistingSCEV, ID, IP) = findExistingSCEVInCache(Kind, Ops); + if (ExistingSCEV) + return ExistingSCEV; const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); - SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator), - O, Ops.size()); + SCEV *S = new (SCEVAllocator) SCEVMinMaxExpr( + ID.Intern(SCEVAllocator), static_cast<SCEVTypes>(Kind), O, Ops.size()); + UniqueSCEVs.InsertNode(S, IP); addToLoopUseLists(S); return S; } -const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, - const SCEV *RHS) { +const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) { SmallVector<const SCEV *, 2> Ops = {LHS, RHS}; - return getUMaxExpr(Ops); + return getSMaxExpr(Ops); } -const SCEV * -ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { - assert(!Ops.empty() && "Cannot get empty umax!"); - if (Ops.size() == 1) return Ops[0]; -#ifndef NDEBUG - Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); - for (unsigned i = 1, e = Ops.size(); i != e; ++i) - assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && - "SCEVUMaxExpr operand types don't match!"); -#endif - - // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, &LI, DT); - - // If there are any constants, fold them together. - unsigned Idx = 0; - if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { - ++Idx; - assert(Idx < Ops.size()); - while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { - // We found two constants, fold them together! - ConstantInt *Fold = ConstantInt::get( - getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt())); - Ops[0] = getConstant(Fold); - Ops.erase(Ops.begin()+1); // Erase the folded element - if (Ops.size() == 1) return Ops[0]; - LHSC = cast<SCEVConstant>(Ops[0]); - } - - // If we are left with a constant minimum-int, strip it off. - if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) { - Ops.erase(Ops.begin()); - --Idx; - } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) { - // If we have an umax with a constant maximum-int, it will always be - // maximum-int. - return Ops[0]; - } - - if (Ops.size() == 1) return Ops[0]; - } - - // Find the first UMax - while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr) - ++Idx; - - // Check to see if one of the operands is a UMax. If so, expand its operands - // onto our operand list, and recurse to simplify. - if (Idx < Ops.size()) { - bool DeletedUMax = false; - while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) { - Ops.erase(Ops.begin()+Idx); - Ops.append(UMax->op_begin(), UMax->op_end()); - DeletedUMax = true; - } - - if (DeletedUMax) - return getUMaxExpr(Ops); - } - - // Okay, check to see if the same value occurs in the operand list twice. If - // so, delete one. Since we sorted the list, these values are required to - // be adjacent. - for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) - // X umax Y umax Y --> X umax Y - // X umax Y --> X, if X is always greater than Y - if (Ops[i] == Ops[i + 1] || isKnownViaNonRecursiveReasoning( - ICmpInst::ICMP_UGE, Ops[i], Ops[i + 1])) { - Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2); - --i; --e; - } else if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, Ops[i], - Ops[i + 1])) { - Ops.erase(Ops.begin() + i, Ops.begin() + i + 1); - --i; --e; - } - - if (Ops.size() == 1) return Ops[0]; +const SCEV *ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { + return getMinMaxExpr(scSMaxExpr, Ops); +} - assert(!Ops.empty() && "Reduced umax down to nothing!"); +const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, const SCEV *RHS) { + SmallVector<const SCEV *, 2> Ops = {LHS, RHS}; + return getUMaxExpr(Ops); +} - // Okay, it looks like we really DO need a umax expr. Check to see if we - // already have one, otherwise create a new one. - FoldingSetNodeID ID; - ID.AddInteger(scUMaxExpr); - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - ID.AddPointer(Ops[i]); - void *IP = nullptr; - if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); - std::uninitialized_copy(Ops.begin(), Ops.end(), O); - SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator), - O, Ops.size()); - UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); - return S; +const SCEV *ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { + return getMinMaxExpr(scUMaxExpr, Ops); } const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS, @@ -3705,11 +3699,7 @@ const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS, } const SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl<const SCEV *> &Ops) { - // ~smax(~x, ~y, ~z) == smin(x, y, z). - SmallVector<const SCEV *, 2> NotOps; - for (auto *S : Ops) - NotOps.push_back(getNotSCEV(S)); - return getNotSCEV(getSMaxExpr(NotOps)); + return getMinMaxExpr(scSMinExpr, Ops); } const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, @@ -3719,16 +3709,7 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, } const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops) { - assert(!Ops.empty() && "At least one operand must be!"); - // Trivial case. - if (Ops.size() == 1) - return Ops[0]; - - // ~umax(~x, ~y, ~z) == umin(x, y, z). - SmallVector<const SCEV *, 2> NotOps; - for (auto *S : Ops) - NotOps.push_back(getNotSCEV(S)); - return getNotSCEV(getUMaxExpr(NotOps)); + return getMinMaxExpr(scUMinExpr, Ops); } const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { @@ -3892,7 +3873,7 @@ void ScalarEvolution::eraseValueFromMap(Value *V) { } /// Check whether value has nuw/nsw/exact set but SCEV does not. -/// TODO: In reality it is better to check the poison recursevely +/// TODO: In reality it is better to check the poison recursively /// but this is better than nothing. static bool SCEVLostPoisonFlags(const SCEV *S, const Value *V) { if (auto *I = dyn_cast<Instruction>(V)) { @@ -3970,12 +3951,45 @@ const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V, V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags); } +/// If Expr computes ~A, return A else return nullptr +static const SCEV *MatchNotExpr(const SCEV *Expr) { + const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr); + if (!Add || Add->getNumOperands() != 2 || + !Add->getOperand(0)->isAllOnesValue()) + return nullptr; + + const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1)); + if (!AddRHS || AddRHS->getNumOperands() != 2 || + !AddRHS->getOperand(0)->isAllOnesValue()) + return nullptr; + + return AddRHS->getOperand(1); +} + /// Return a SCEV corresponding to ~V = -1-V const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) return getConstant( cast<ConstantInt>(ConstantExpr::getNot(VC->getValue()))); + // Fold ~(u|s)(min|max)(~x, ~y) to (u|s)(max|min)(x, y) + if (const SCEVMinMaxExpr *MME = dyn_cast<SCEVMinMaxExpr>(V)) { + auto MatchMinMaxNegation = [&](const SCEVMinMaxExpr *MME) { + SmallVector<const SCEV *, 2> MatchedOperands; + for (const SCEV *Operand : MME->operands()) { + const SCEV *Matched = MatchNotExpr(Operand); + if (!Matched) + return (const SCEV *)nullptr; + MatchedOperands.push_back(Matched); + } + return getMinMaxExpr( + SCEVMinMaxExpr::negate(static_cast<SCEVTypes>(MME->getSCEVType())), + MatchedOperands); + }; + if (const SCEV *Replaced = MatchMinMaxNegation(MME)) + return Replaced; + } + Type *Ty = V->getType(); Ty = getEffectiveSCEVType(Ty); const SCEV *AllOnes = @@ -4022,29 +4036,28 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags, Depth); } -const SCEV * -ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) { +const SCEV *ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty, + unsigned Depth) { Type *SrcTy = V->getType(); assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot truncate or zero extend with non-integer arguments!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) - return getTruncateExpr(V, Ty); - return getZeroExtendExpr(V, Ty); + return getTruncateExpr(V, Ty, Depth); + return getZeroExtendExpr(V, Ty, Depth); } -const SCEV * -ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, - Type *Ty) { +const SCEV *ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, Type *Ty, + unsigned Depth) { Type *SrcTy = V->getType(); assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && "Cannot truncate or zero extend with non-integer arguments!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) - return getTruncateExpr(V, Ty); - return getSignExtendExpr(V, Ty); + return getTruncateExpr(V, Ty, Depth); + return getSignExtendExpr(V, Ty, Depth); } const SCEV * @@ -4530,52 +4543,21 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) { if (EVI->getNumIndices() != 1 || EVI->getIndices()[0] != 0) break; - auto *CI = dyn_cast<CallInst>(EVI->getAggregateOperand()); - if (!CI) + auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()); + if (!WO) break; - if (auto *F = CI->getCalledFunction()) - switch (F->getIntrinsicID()) { - case Intrinsic::sadd_with_overflow: - case Intrinsic::uadd_with_overflow: - if (!isOverflowIntrinsicNoWrap(cast<IntrinsicInst>(CI), DT)) - return BinaryOp(Instruction::Add, CI->getArgOperand(0), - CI->getArgOperand(1)); - - // Now that we know that all uses of the arithmetic-result component of - // CI are guarded by the overflow check, we can go ahead and pretend - // that the arithmetic is non-overflowing. - if (F->getIntrinsicID() == Intrinsic::sadd_with_overflow) - return BinaryOp(Instruction::Add, CI->getArgOperand(0), - CI->getArgOperand(1), /* IsNSW = */ true, - /* IsNUW = */ false); - else - return BinaryOp(Instruction::Add, CI->getArgOperand(0), - CI->getArgOperand(1), /* IsNSW = */ false, - /* IsNUW*/ true); - case Intrinsic::ssub_with_overflow: - case Intrinsic::usub_with_overflow: - if (!isOverflowIntrinsicNoWrap(cast<IntrinsicInst>(CI), DT)) - return BinaryOp(Instruction::Sub, CI->getArgOperand(0), - CI->getArgOperand(1)); - - // The same reasoning as sadd/uadd above. - if (F->getIntrinsicID() == Intrinsic::ssub_with_overflow) - return BinaryOp(Instruction::Sub, CI->getArgOperand(0), - CI->getArgOperand(1), /* IsNSW = */ true, - /* IsNUW = */ false); - else - return BinaryOp(Instruction::Sub, CI->getArgOperand(0), - CI->getArgOperand(1), /* IsNSW = */ false, - /* IsNUW = */ true); - case Intrinsic::smul_with_overflow: - case Intrinsic::umul_with_overflow: - return BinaryOp(Instruction::Mul, CI->getArgOperand(0), - CI->getArgOperand(1)); - default: - break; - } - break; + Instruction::BinaryOps BinOp = WO->getBinaryOp(); + bool Signed = WO->isSigned(); + // TODO: Should add nuw/nsw flags for mul as well. + if (BinOp == Instruction::Mul || !isOverflowIntrinsicNoWrap(WO, DT)) + return BinaryOp(BinOp, WO->getLHS(), WO->getRHS()); + + // Now that we know that all uses of the arithmetic-result component of + // CI are guarded by the overflow check, we can go ahead and pretend + // that the arithmetic is non-overflowing. + return BinaryOp(BinOp, WO->getLHS(), WO->getRHS(), + /* IsNSW = */ Signed, /* IsNUW = */ !Signed); } default: @@ -5009,7 +4991,7 @@ const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN, // overflow. if (auto *BEInst = dyn_cast<Instruction>(BEValueV)) if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L)) - (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); + (void)getAddRecExpr(getAddExpr(StartVal, Accum, Flags), Accum, L, Flags); return PHISCEV; } @@ -5196,6 +5178,8 @@ static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S, switch (S->getSCEVType()) { case scConstant: case scTruncate: case scZeroExtend: case scSignExtend: case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: + case scUMinExpr: + case scSMinExpr: // These expressions are available if their operand(s) is/are. return true; @@ -5551,6 +5535,9 @@ ScalarEvolution::getRangeRef(const SCEV *S, DenseMap<const SCEV *, ConstantRange> &Cache = SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges : SignedRanges; + ConstantRange::PreferredRangeType RangeType = + SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED + ? ConstantRange::Unsigned : ConstantRange::Signed; // See if we've computed this range already. DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S); @@ -5581,53 +5568,60 @@ ScalarEvolution::getRangeRef(const SCEV *S, ConstantRange X = getRangeRef(Add->getOperand(0), SignHint); for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) X = X.add(getRangeRef(Add->getOperand(i), SignHint)); - return setRange(Add, SignHint, ConservativeResult.intersectWith(X)); + return setRange(Add, SignHint, + ConservativeResult.intersectWith(X, RangeType)); } if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { ConstantRange X = getRangeRef(Mul->getOperand(0), SignHint); for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) X = X.multiply(getRangeRef(Mul->getOperand(i), SignHint)); - return setRange(Mul, SignHint, ConservativeResult.intersectWith(X)); + return setRange(Mul, SignHint, + ConservativeResult.intersectWith(X, RangeType)); } if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { ConstantRange X = getRangeRef(SMax->getOperand(0), SignHint); for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) X = X.smax(getRangeRef(SMax->getOperand(i), SignHint)); - return setRange(SMax, SignHint, ConservativeResult.intersectWith(X)); + return setRange(SMax, SignHint, + ConservativeResult.intersectWith(X, RangeType)); } if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { ConstantRange X = getRangeRef(UMax->getOperand(0), SignHint); for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) X = X.umax(getRangeRef(UMax->getOperand(i), SignHint)); - return setRange(UMax, SignHint, ConservativeResult.intersectWith(X)); + return setRange(UMax, SignHint, + ConservativeResult.intersectWith(X, RangeType)); } if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint); ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint); return setRange(UDiv, SignHint, - ConservativeResult.intersectWith(X.udiv(Y))); + ConservativeResult.intersectWith(X.udiv(Y), RangeType)); } if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { ConstantRange X = getRangeRef(ZExt->getOperand(), SignHint); return setRange(ZExt, SignHint, - ConservativeResult.intersectWith(X.zeroExtend(BitWidth))); + ConservativeResult.intersectWith(X.zeroExtend(BitWidth), + RangeType)); } if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { ConstantRange X = getRangeRef(SExt->getOperand(), SignHint); return setRange(SExt, SignHint, - ConservativeResult.intersectWith(X.signExtend(BitWidth))); + ConservativeResult.intersectWith(X.signExtend(BitWidth), + RangeType)); } if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { ConstantRange X = getRangeRef(Trunc->getOperand(), SignHint); return setRange(Trunc, SignHint, - ConservativeResult.intersectWith(X.truncate(BitWidth))); + ConservativeResult.intersectWith(X.truncate(BitWidth), + RangeType)); } if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { @@ -5637,7 +5631,7 @@ ScalarEvolution::getRangeRef(const SCEV *S, if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart())) if (!C->getValue()->isZero()) ConservativeResult = ConservativeResult.intersectWith( - ConstantRange(C->getAPInt(), APInt(BitWidth, 0))); + ConstantRange(C->getAPInt(), APInt(BitWidth, 0)), RangeType); // If there's no signed wrap, and all the operands have the same sign or // zero, the value won't ever change sign. @@ -5651,11 +5645,11 @@ ScalarEvolution::getRangeRef(const SCEV *S, if (AllNonNeg) ConservativeResult = ConservativeResult.intersectWith( ConstantRange(APInt(BitWidth, 0), - APInt::getSignedMinValue(BitWidth))); + APInt::getSignedMinValue(BitWidth)), RangeType); else if (AllNonPos) ConservativeResult = ConservativeResult.intersectWith( ConstantRange(APInt::getSignedMinValue(BitWidth), - APInt(BitWidth, 1))); + APInt(BitWidth, 1)), RangeType); } // TODO: non-affine addrec @@ -5668,14 +5662,14 @@ ScalarEvolution::getRangeRef(const SCEV *S, BitWidth); if (!RangeFromAffine.isFullSet()) ConservativeResult = - ConservativeResult.intersectWith(RangeFromAffine); + ConservativeResult.intersectWith(RangeFromAffine, RangeType); auto RangeFromFactoring = getRangeViaFactoring( AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount, BitWidth); if (!RangeFromFactoring.isFullSet()) ConservativeResult = - ConservativeResult.intersectWith(RangeFromFactoring); + ConservativeResult.intersectWith(RangeFromFactoring, RangeType); } } @@ -5686,7 +5680,8 @@ ScalarEvolution::getRangeRef(const SCEV *S, // Check if the IR explicitly contains !range metadata. Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue()); if (MDRange.hasValue()) - ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue()); + ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue(), + RangeType); // Split here to avoid paying the compile-time cost of calling both // computeKnownBits and ComputeNumSignBits. This restriction can be lifted @@ -5697,8 +5692,8 @@ ScalarEvolution::getRangeRef(const SCEV *S, KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT); if (Known.One != ~Known.Zero + 1) ConservativeResult = - ConservativeResult.intersectWith(ConstantRange(Known.One, - ~Known.Zero + 1)); + ConservativeResult.intersectWith( + ConstantRange(Known.One, ~Known.Zero + 1), RangeType); } else { assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED && "generalize as needed!"); @@ -5706,7 +5701,8 @@ ScalarEvolution::getRangeRef(const SCEV *S, if (NS > 1) ConservativeResult = ConservativeResult.intersectWith( ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), - APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1)); + APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1), + RangeType); } // A range of Phi is a subset of union of all ranges of its input. @@ -5721,7 +5717,8 @@ ScalarEvolution::getRangeRef(const SCEV *S, if (RangeFromOps.isFullSet()) break; } - ConservativeResult = ConservativeResult.intersectWith(RangeFromOps); + ConservativeResult = + ConservativeResult.intersectWith(RangeFromOps, RangeType); bool Erased = PendingPhiRanges.erase(Phi); assert(Erased && "Failed to erase Phi properly?"); (void) Erased; @@ -5751,7 +5748,7 @@ static ConstantRange getRangeForAffineARHelper(APInt Step, // FullRange), then we don't know anything about the final range either. // Return FullRange. if (StartRange.isFullSet()) - return ConstantRange(BitWidth, /* isFullSet = */ true); + return ConstantRange::getFull(BitWidth); // If Step is signed and negative, then we use its absolute value, but we also // note that we're moving in the opposite direction. @@ -5767,7 +5764,7 @@ static ConstantRange getRangeForAffineARHelper(APInt Step, // Check if Offset is more than full span of BitWidth. If it is, the // expression is guaranteed to overflow. if (APInt::getMaxValue(StartRange.getBitWidth()).udiv(Step).ult(MaxBECount)) - return ConstantRange(BitWidth, /* isFullSet = */ true); + return ConstantRange::getFull(BitWidth); // Offset is by how much the expression can change. Checks above guarantee no // overflow here. @@ -5786,7 +5783,7 @@ static ConstantRange getRangeForAffineARHelper(APInt Step, // range (due to wrap around). This means that the expression can take any // value in this bitwidth, and we have to return full range. if (StartRange.contains(MovedBoundary)) - return ConstantRange(BitWidth, /* isFullSet = */ true); + return ConstantRange::getFull(BitWidth); APInt NewLower = Descending ? std::move(MovedBoundary) : std::move(StartLower); @@ -5794,12 +5791,8 @@ static ConstantRange getRangeForAffineARHelper(APInt Step, Descending ? std::move(StartUpper) : std::move(MovedBoundary); NewUpper += 1; - // If we end up with full range, return a proper full range. - if (NewLower == NewUpper) - return ConstantRange(BitWidth, /* isFullSet = */ true); - // No overflow detected, return [StartLower, StartUpper + Offset + 1) range. - return ConstantRange(std::move(NewLower), std::move(NewUpper)); + return ConstantRange::getNonEmpty(std::move(NewLower), std::move(NewUpper)); } ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start, @@ -5832,7 +5825,7 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start, MaxBECountValue, BitWidth, /* Signed = */ false); // Finally, intersect signed and unsigned ranges. - return SR.intersectWith(UR); + return SR.intersectWith(UR, ConstantRange::Smallest); } ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start, @@ -5916,17 +5909,17 @@ ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start, SelectPattern StartPattern(*this, BitWidth, Start); if (!StartPattern.isRecognized()) - return ConstantRange(BitWidth, /* isFullSet = */ true); + return ConstantRange::getFull(BitWidth); SelectPattern StepPattern(*this, BitWidth, Step); if (!StepPattern.isRecognized()) - return ConstantRange(BitWidth, /* isFullSet = */ true); + return ConstantRange::getFull(BitWidth); if (StartPattern.Condition != StepPattern.Condition) { // We don't handle this case today; but we could, by considering four // possibilities below instead of two. I'm not sure if there are cases where // that will help over what getRange already does, though. - return ConstantRange(BitWidth, /* isFullSet = */ true); + return ConstantRange::getFull(BitWidth); } // NB! Calling ScalarEvolution::getConstant is fine, but we should not try to @@ -6128,7 +6121,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // to obey basic rules for definitions dominating uses which this // analysis depends on. if (!DT.isReachableFromEntry(I->getParent())) - return getUnknown(V); + return getUnknown(UndefValue::get(V->getType())); } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) return getConstant(CI); else if (isa<ConstantPointerNull>(V)) @@ -6744,6 +6737,28 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { return BackedgeTakenCounts.find(L)->second = std::move(Result); } +void ScalarEvolution::forgetAllLoops() { + // This method is intended to forget all info about loops. It should + // invalidate caches as if the following happened: + // - The trip counts of all loops have changed arbitrarily + // - Every llvm::Value has been updated in place to produce a different + // result. + BackedgeTakenCounts.clear(); + PredicatedBackedgeTakenCounts.clear(); + LoopPropertiesCache.clear(); + ConstantEvolutionLoopExitValue.clear(); + ValueExprMap.clear(); + ValuesAtScopes.clear(); + LoopDispositions.clear(); + BlockDispositions.clear(); + UnsignedRanges.clear(); + SignedRanges.clear(); + ExprValueMap.clear(); + HasRecMap.clear(); + MinTrailingZerosCache.clear(); + PredicatedSCEVRewrites.clear(); +} + void ScalarEvolution::forgetLoop(const Loop *L) { // Drop any stored trip count value. auto RemoveLoopFromBackedgeMap = @@ -6972,8 +6987,8 @@ ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M, /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each /// computable exit into a persistent ExitNotTakenInfo array. ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( - SmallVectorImpl<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo> - &&ExitCounts, + ArrayRef<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo> + ExitCounts, bool Complete, const SCEV *MaxCount, bool MaxOrZero) : MaxAndComplete(MaxCount, Complete), MaxOrZero(MaxOrZero) { using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo; @@ -7256,6 +7271,14 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( if (EL0.ExactNotTaken == EL1.ExactNotTaken) BECount = EL0.ExactNotTaken; } + // There are cases (e.g. PR26207) where computeExitLimitFromCond is able + // to be more aggressive when computing BECount than when computing + // MaxBECount. In these cases it is possible for EL0.ExactNotTaken and + // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken + // to not. + if (isa<SCEVCouldNotCompute>(MaxBECount) && + !isa<SCEVCouldNotCompute>(BECount)) + MaxBECount = getConstant(getUnsignedRangeMax(BECount)); return ExitLimit(BECount, MaxBECount, false, {&EL0.Predicates, &EL1.Predicates}); @@ -7651,7 +7674,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit( static bool CanConstantFold(const Instruction *I) { if (isa<BinaryOperator>(I) || isa<CmpInst>(I) || isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) || - isa<LoadInst>(I)) + isa<LoadInst>(I) || isa<ExtractValueInst>(I)) return true; if (const CallInst *CI = dyn_cast<CallInst>(I)) @@ -8075,7 +8098,9 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { } case scSMaxExpr: case scUMaxExpr: - break; // TODO: smax, umax. + case scSMinExpr: + case scUMinExpr: + break; // TODO: smax, umax, smin, umax. } return nullptr; } @@ -8087,44 +8112,64 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { // exit value from the loop without using SCEVs. if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) { if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) { - const Loop *LI = this->LI[I->getParent()]; - if (LI && LI->getParentLoop() == L) // Looking for loop exit value. - if (PHINode *PN = dyn_cast<PHINode>(I)) - if (PN->getParent() == LI->getHeader()) { - // Okay, there is no closed form solution for the PHI node. Check - // to see if the loop that contains it has a known backedge-taken - // count. If so, we may be able to force computation of the exit - // value. - const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI); - if (const SCEVConstant *BTCC = - dyn_cast<SCEVConstant>(BackedgeTakenCount)) { - - // This trivial case can show up in some degenerate cases where - // the incoming IR has not yet been fully simplified. - if (BTCC->getValue()->isZero()) { - Value *InitValue = nullptr; - bool MultipleInitValues = false; - for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { - if (!LI->contains(PN->getIncomingBlock(i))) { - if (!InitValue) - InitValue = PN->getIncomingValue(i); - else if (InitValue != PN->getIncomingValue(i)) { - MultipleInitValues = true; - break; - } - } - if (!MultipleInitValues && InitValue) - return getSCEV(InitValue); + if (PHINode *PN = dyn_cast<PHINode>(I)) { + const Loop *LI = this->LI[I->getParent()]; + // Looking for loop exit value. + if (LI && LI->getParentLoop() == L && + PN->getParent() == LI->getHeader()) { + // Okay, there is no closed form solution for the PHI node. Check + // to see if the loop that contains it has a known backedge-taken + // count. If so, we may be able to force computation of the exit + // value. + const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI); + // This trivial case can show up in some degenerate cases where + // the incoming IR has not yet been fully simplified. + if (BackedgeTakenCount->isZero()) { + Value *InitValue = nullptr; + bool MultipleInitValues = false; + for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { + if (!LI->contains(PN->getIncomingBlock(i))) { + if (!InitValue) + InitValue = PN->getIncomingValue(i); + else if (InitValue != PN->getIncomingValue(i)) { + MultipleInitValues = true; + break; } } - // Okay, we know how many times the containing loop executes. If - // this is a constant evolving PHI node, get the final value at - // the specified iteration number. - Constant *RV = - getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI); - if (RV) return getSCEV(RV); } + if (!MultipleInitValues && InitValue) + return getSCEV(InitValue); } + // Do we have a loop invariant value flowing around the backedge + // for a loop which must execute the backedge? + if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && + isKnownPositive(BackedgeTakenCount) && + PN->getNumIncomingValues() == 2) { + unsigned InLoopPred = LI->contains(PN->getIncomingBlock(0)) ? 0 : 1; + const SCEV *OnBackedge = getSCEV(PN->getIncomingValue(InLoopPred)); + if (IsAvailableOnEntry(LI, DT, OnBackedge, PN->getParent())) + return OnBackedge; + } + if (auto *BTCC = dyn_cast<SCEVConstant>(BackedgeTakenCount)) { + // Okay, we know how many times the containing loop executes. If + // this is a constant evolving PHI node, get the final value at + // the specified iteration number. + Constant *RV = + getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI); + if (RV) return getSCEV(RV); + } + } + + // If there is a single-input Phi, evaluate it at our scope. If we can + // prove that this replacement does not break LCSSA form, use new value. + if (PN->getNumOperands() == 1) { + const SCEV *Input = getSCEV(PN->getOperand(0)); + const SCEV *InputAtScope = getSCEVAtScope(Input, L); + // TODO: We can generalize it using LI.replacementPreservesLCSSAForm, + // for the simplest case just support constants. + if (isa<SCEVConstant>(InputAtScope)) return InputAtScope; + } + } // Okay, this is an expression that we cannot symbolically evaluate // into a SCEV. Check to see if it's possible to symbolically evaluate @@ -8198,13 +8243,11 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { NewOps.push_back(OpAtScope); } if (isa<SCEVAddExpr>(Comm)) - return getAddExpr(NewOps); + return getAddExpr(NewOps, Comm->getNoWrapFlags()); if (isa<SCEVMulExpr>(Comm)) - return getMulExpr(NewOps); - if (isa<SCEVSMaxExpr>(Comm)) - return getSMaxExpr(NewOps); - if (isa<SCEVUMaxExpr>(Comm)) - return getUMaxExpr(NewOps); + return getMulExpr(NewOps, Comm->getNoWrapFlags()); + if (isa<SCEVMinMaxExpr>(Comm)) + return getMinMaxExpr(Comm->getSCEVType(), NewOps); llvm_unreachable("Unknown commutative SCEV type!"); } } @@ -10045,41 +10088,15 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, getNotSCEV(FoundLHS)); } -/// If Expr computes ~A, return A else return nullptr -static const SCEV *MatchNotExpr(const SCEV *Expr) { - const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr); - if (!Add || Add->getNumOperands() != 2 || - !Add->getOperand(0)->isAllOnesValue()) - return nullptr; - - const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1)); - if (!AddRHS || AddRHS->getNumOperands() != 2 || - !AddRHS->getOperand(0)->isAllOnesValue()) - return nullptr; - - return AddRHS->getOperand(1); -} - -/// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values? -template<typename MaxExprType> -static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr, - const SCEV *Candidate) { - const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr); - if (!MaxExpr) return false; - - return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end(); -} - -/// Is MaybeMinExpr an SMin or UMin of Candidate and some other values? -template<typename MaxExprType> -static bool IsMinConsistingOf(ScalarEvolution &SE, - const SCEV *MaybeMinExpr, - const SCEV *Candidate) { - const SCEV *MaybeMaxExpr = MatchNotExpr(MaybeMinExpr); - if (!MaybeMaxExpr) +/// Is MaybeMinMaxExpr an (U|S)(Min|Max) of Candidate and some other values? +template <typename MinMaxExprType> +static bool IsMinMaxConsistingOf(const SCEV *MaybeMinMaxExpr, + const SCEV *Candidate) { + const MinMaxExprType *MinMaxExpr = dyn_cast<MinMaxExprType>(MaybeMinMaxExpr); + if (!MinMaxExpr) return false; - return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate)); + return find(MinMaxExpr->operands(), Candidate) != MinMaxExpr->op_end(); } static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE, @@ -10128,20 +10145,20 @@ static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE, LLVM_FALLTHROUGH; case ICmpInst::ICMP_SLE: return - // min(A, ...) <= A - IsMinConsistingOf<SCEVSMaxExpr>(SE, LHS, RHS) || - // A <= max(A, ...) - IsMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS); + // min(A, ...) <= A + IsMinMaxConsistingOf<SCEVSMinExpr>(LHS, RHS) || + // A <= max(A, ...) + IsMinMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS); case ICmpInst::ICMP_UGE: std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ICmpInst::ICMP_ULE: return - // min(A, ...) <= A - IsMinConsistingOf<SCEVUMaxExpr>(SE, LHS, RHS) || - // A <= max(A, ...) - IsMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS); + // min(A, ...) <= A + IsMinMaxConsistingOf<SCEVUMinExpr>(LHS, RHS) || + // A <= max(A, ...) + IsMinMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS); } llvm_unreachable("covered switch fell through?!"); @@ -10691,13 +10708,10 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, IsSigned ? APIntOps::smax(getSignedRangeMin(RHS), Limit) : APIntOps::umax(getUnsignedRangeMin(RHS), Limit); - - const SCEV *MaxBECount = getCouldNotCompute(); - if (isa<SCEVConstant>(BECount)) - MaxBECount = BECount; - else - MaxBECount = computeBECount(getConstant(MaxStart - MinEnd), - getConstant(MinStride), false); + const SCEV *MaxBECount = isa<SCEVConstant>(BECount) + ? BECount + : computeBECount(getConstant(MaxStart - MinEnd), + getConstant(MinStride), false); if (isa<SCEVCouldNotCompute>(MaxBECount)) MaxBECount = BECount; @@ -10806,8 +10820,6 @@ static inline bool containsUndefs(const SCEV *S) { return SCEVExprContains(S, [](const SCEV *S) { if (const auto *SU = dyn_cast<SCEVUnknown>(S)) return isa<UndefValue>(SU->getValue()); - else if (const auto *SC = dyn_cast<SCEVConstant>(S)) - return isa<UndefValue>(SC->getValue()); return false; }); } @@ -11402,19 +11414,23 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; - SmallVector<BasicBlock *, 8> ExitBlocks; - L->getExitBlocks(ExitBlocks); - if (ExitBlocks.size() != 1) + SmallVector<BasicBlock *, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + if (ExitingBlocks.size() != 1) OS << "<multiple exits> "; - if (SE->hasLoopInvariantBackedgeTakenCount(L)) { - OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L); - } else { - OS << "Unpredictable backedge-taken count. "; - } + if (SE->hasLoopInvariantBackedgeTakenCount(L)) + OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L) << "\n"; + else + OS << "Unpredictable backedge-taken count.\n"; - OS << "\n" - "Loop "; + if (ExitingBlocks.size() > 1) + for (BasicBlock *ExitingBlock : ExitingBlocks) { + OS << " exit count for " << ExitingBlock->getName() << ": " + << *SE->getExitCount(L, ExitingBlock) << "\n"; + } + + OS << "Loop "; L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; @@ -11611,7 +11627,9 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { case scAddExpr: case scMulExpr: case scUMaxExpr: - case scSMaxExpr: { + case scSMaxExpr: + case scUMinExpr: + case scSMinExpr: { bool HasVarying = false; for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) { LoopDisposition D = getLoopDisposition(Op, L); @@ -11698,7 +11716,9 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { case scAddExpr: case scMulExpr: case scUMaxExpr: - case scSMaxExpr: { + case scSMaxExpr: + case scUMinExpr: + case scSMinExpr: { const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S); bool Proper = true; for (const SCEV *NAryOp : NAry->operands()) { diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index 289d4f8ae49a..96da0a24cddd 100644 --- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -1,9 +1,8 @@ //===- ScalarEvolutionAliasAnalysis.cpp - SCEV-based Alias Analysis -------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -23,7 +22,7 @@ using namespace llvm; AliasResult SCEVAAResult::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { + const MemoryLocation &LocB, AAQueryInfo &AAQI) { // If either of the memory references is empty, it doesn't matter what the // pointer values are. This allows the code below to ignore this special // case. @@ -86,11 +85,12 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA, AO ? AAMDNodes() : LocA.AATags), MemoryLocation(BO ? BO : LocB.Ptr, BO ? LocationSize::unknown() : LocB.Size, - BO ? AAMDNodes() : LocB.AATags)) == NoAlias) + BO ? AAMDNodes() : LocB.AATags), + AAQI) == NoAlias) return NoAlias; // Forward the query to the next analysis. - return AAResultBase::alias(LocA, LocB); + return AAResultBase::alias(LocA, LocB, AAQI); } /// Given an expression, try to find a base value. diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index ca5cf1663b83..e8a95d35482c 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1,9 +1,8 @@ //===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -61,12 +60,10 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, // instructions that might be inserted before BIP. if (BasicBlock::iterator(CI) != IP || BIP == IP) { // Create a new cast, and leave the old cast in place in case - // it is being used as an insert point. Clear its operand - // so that it doesn't hold anything live. + // it is being used as an insert point. Ret = CastInst::Create(Op, V, Ty, "", &*IP); Ret->takeName(CI); CI->replaceAllUsesWith(Ret); - CI->setOperand(0, UndefValue::get(V->getType())); break; } Ret = CI; @@ -167,9 +164,11 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { } /// InsertBinop - Insert the specified binary operator, doing a small amount -/// of work to avoid inserting an obviously redundant operation. +/// of work to avoid inserting an obviously redundant operation, and hoisting +/// to an outer loop when the opportunity is there and it is safe. Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, - Value *LHS, Value *RHS) { + Value *LHS, Value *RHS, + SCEV::NoWrapFlags Flags, bool IsSafeToHoist) { // Fold a binop with constant operands. if (Constant *CLHS = dyn_cast<Constant>(LHS)) if (Constant *CRHS = dyn_cast<Constant>(RHS)) @@ -188,20 +187,22 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, if (isa<DbgInfoIntrinsic>(IP)) ScanLimit++; - // Conservatively, do not use any instruction which has any of wrap/exact - // flags installed. - // TODO: Instead of simply disable poison instructions we can be clever - // here and match SCEV to this instruction. - auto canGeneratePoison = [](Instruction *I) { - if (isa<OverflowingBinaryOperator>(I) && - (I->hasNoSignedWrap() || I->hasNoUnsignedWrap())) - return true; + auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) { + // Ensure that no-wrap flags match. + if (isa<OverflowingBinaryOperator>(I)) { + if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW)) + return true; + if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW)) + return true; + } + // Conservatively, do not use any instruction which has any of exact + // flags installed. if (isa<PossiblyExactOperator>(I) && I->isExact()) return true; return false; }; if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && - IP->getOperand(1) == RHS && !canGeneratePoison(&*IP)) + IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP)) return &*IP; if (IP == BlockBegin) break; } @@ -211,19 +212,25 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc(); SCEVInsertPointGuard Guard(Builder, this); - // Move the insertion point out of as many loops as we can. - while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { - if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break; - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) break; + if (IsSafeToHoist) { + // Move the insertion point out of as many loops as we can. + while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { + if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break; + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) break; - // Ok, move up a level. - Builder.SetInsertPoint(Preheader->getTerminator()); + // Ok, move up a level. + Builder.SetInsertPoint(Preheader->getTerminator()); + } } // If we haven't found this binop, insert it. Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS)); BO->setDebugLoc(Loc); + if (Flags & SCEV::FlagNUW) + BO->setHasNoUnsignedWrap(); + if (Flags & SCEV::FlagNSW) + BO->setHasNoSignedWrap(); rememberInstruction(BO); return BO; @@ -695,7 +702,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { // Sort by loop. Use a stable sort so that constants follow non-constants and // pointer operands precede non-pointer operands. - std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT)); + llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT)); // Emit instructions to add all the operands. Hoist as much as possible // out of loops, and form meaningful getelementptrs where possible. @@ -735,7 +742,8 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { // Instead of doing a negate and add, just do a subtract. Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty); Sum = InsertNoopCastOfTo(Sum, Ty); - Sum = InsertBinop(Instruction::Sub, Sum, W); + Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap, + /*IsSafeToHoist*/ true); ++I; } else { // A simple add. @@ -743,7 +751,8 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { Sum = InsertNoopCastOfTo(Sum, Ty); // Canonicalize a constant to the RHS. if (isa<Constant>(Sum)) std::swap(Sum, W); - Sum = InsertBinop(Instruction::Add, Sum, W); + Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(), + /*IsSafeToHoist*/ true); ++I; } } @@ -762,7 +771,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); // Sort by loop. Use a stable sort so that constants follow non-constants. - std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT)); + llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT)); // Emit instructions to mul all the operands. Hoist as much as possible // out of loops. @@ -795,9 +804,13 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { if (Exponent & 1) Result = P; for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) { - P = InsertBinop(Instruction::Mul, P, P); + P = InsertBinop(Instruction::Mul, P, P, SCEV::FlagAnyWrap, + /*IsSafeToHoist*/ true); if (Exponent & BinExp) - Result = Result ? InsertBinop(Instruction::Mul, Result, P) : P; + Result = Result ? InsertBinop(Instruction::Mul, Result, P, + SCEV::FlagAnyWrap, + /*IsSafeToHoist*/ true) + : P; } I = E; @@ -812,7 +825,8 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { } else if (I->second->isAllOnesValue()) { // Instead of doing a multiply by negative one, just do a negate. Prod = InsertNoopCastOfTo(Prod, Ty); - Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod); + Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod, + SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); ++I; } else { // A simple mul. @@ -824,10 +838,16 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { if (match(W, m_Power2(RHS))) { // Canonicalize Prod*(1<<C) to Prod<<C. assert(!Ty->isVectorTy() && "vector types are not SCEVable"); + auto NWFlags = S->getNoWrapFlags(); + // clear nsw flag if shl will produce poison value. + if (RHS->logBase2() == RHS->getBitWidth() - 1) + NWFlags = ScalarEvolution::clearFlags(NWFlags, SCEV::FlagNSW); Prod = InsertBinop(Instruction::Shl, Prod, - ConstantInt::get(Ty, RHS->logBase2())); + ConstantInt::get(Ty, RHS->logBase2()), NWFlags, + /*IsSafeToHoist*/ true); } else { - Prod = InsertBinop(Instruction::Mul, Prod, W); + Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(), + /*IsSafeToHoist*/ true); } } } @@ -843,11 +863,13 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { const APInt &RHS = SC->getAPInt(); if (RHS.isPowerOf2()) return InsertBinop(Instruction::LShr, LHS, - ConstantInt::get(Ty, RHS.logBase2())); + ConstantInt::get(Ty, RHS.logBase2()), + SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); } Value *RHS = expandCodeFor(S->getRHS(), Ty); - return InsertBinop(Instruction::UDiv, LHS, RHS); + return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap, + /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS())); } /// Move parts of Base into Rest to leave Base with the minimal @@ -1634,7 +1656,8 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { for (int i = S->getNumOperands()-2; i >= 0; --i) { // In the case of mixed integer and pointer types, do the // rest of the comparisons as integer. - if (S->getOperand(i)->getType() != Ty) { + Type *OpTy = S->getOperand(i)->getType(); + if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { Ty = SE.getEffectiveSCEVType(Ty); LHS = InsertNoopCastOfTo(LHS, Ty); } @@ -1658,7 +1681,8 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { for (int i = S->getNumOperands()-2; i >= 0; --i) { // In the case of mixed integer and pointer types, do the // rest of the comparisons as integer. - if (S->getOperand(i)->getType() != Ty) { + Type *OpTy = S->getOperand(i)->getType(); + if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { Ty = SE.getEffectiveSCEVType(Ty); LHS = InsertNoopCastOfTo(LHS, Ty); } @@ -1676,6 +1700,56 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { return LHS; } +Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) { + Value *LHS = expand(S->getOperand(S->getNumOperands() - 1)); + Type *Ty = LHS->getType(); + for (int i = S->getNumOperands() - 2; i >= 0; --i) { + // In the case of mixed integer and pointer types, do the + // rest of the comparisons as integer. + Type *OpTy = S->getOperand(i)->getType(); + if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { + Ty = SE.getEffectiveSCEVType(Ty); + LHS = InsertNoopCastOfTo(LHS, Ty); + } + Value *RHS = expandCodeFor(S->getOperand(i), Ty); + Value *ICmp = Builder.CreateICmpSLT(LHS, RHS); + rememberInstruction(ICmp); + Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin"); + rememberInstruction(Sel); + LHS = Sel; + } + // In the case of mixed integer and pointer types, cast the + // final result back to the pointer type. + if (LHS->getType() != S->getType()) + LHS = InsertNoopCastOfTo(LHS, S->getType()); + return LHS; +} + +Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { + Value *LHS = expand(S->getOperand(S->getNumOperands() - 1)); + Type *Ty = LHS->getType(); + for (int i = S->getNumOperands() - 2; i >= 0; --i) { + // In the case of mixed integer and pointer types, do the + // rest of the comparisons as integer. + Type *OpTy = S->getOperand(i)->getType(); + if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { + Ty = SE.getEffectiveSCEVType(Ty); + LHS = InsertNoopCastOfTo(LHS, Ty); + } + Value *RHS = expandCodeFor(S->getOperand(i), Ty); + Value *ICmp = Builder.CreateICmpULT(LHS, RHS); + rememberInstruction(ICmp); + Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin"); + rememberInstruction(Sel); + LHS = Sel; + } + // In the case of mixed integer and pointer types, cast the + // final result back to the pointer type. + if (LHS->getType() != S->getType()) + LHS = InsertNoopCastOfTo(LHS, S->getType()); + return LHS; +} + Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, Instruction *IP) { setInsertPoint(IP); @@ -1732,49 +1806,55 @@ Value *SCEVExpander::expand(const SCEV *S) { // Compute an insertion point for this SCEV object. Hoist the instructions // as far out in the loop nest as possible. Instruction *InsertPt = &*Builder.GetInsertPoint(); - for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());; - L = L->getParentLoop()) - if (SE.isLoopInvariant(S, L)) { - if (!L) break; - if (BasicBlock *Preheader = L->getLoopPreheader()) - InsertPt = Preheader->getTerminator(); - else { - // LSR sets the insertion point for AddRec start/step values to the - // block start to simplify value reuse, even though it's an invalid - // position. SCEVExpander must correct for this in all cases. - InsertPt = &*L->getHeader()->getFirstInsertionPt(); - } - } else { - // We can move insertion point only if there is no div or rem operations - // otherwise we are risky to move it over the check for zero denominator. - auto SafeToHoist = [](const SCEV *S) { - return !SCEVExprContains(S, [](const SCEV *S) { - if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) { - if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS())) - // Division by non-zero constants can be hoisted. - return SC->getValue()->isZero(); - // All other divisions should not be moved as they may be - // divisions by zero and should be kept within the - // conditions of the surrounding loops that guard their - // execution (see PR35406). - return true; - } - return false; - }); - }; - // If the SCEV is computable at this level, insert it into the header - // after the PHIs (and after any other instructions that we've inserted - // there) so that it is guaranteed to dominate any user inside the loop. - if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L) && - SafeToHoist(S)) - InsertPt = &*L->getHeader()->getFirstInsertionPt(); - while (InsertPt->getIterator() != Builder.GetInsertPoint() && - (isInsertedInstruction(InsertPt) || - isa<DbgInfoIntrinsic>(InsertPt))) { - InsertPt = &*std::next(InsertPt->getIterator()); + + // We can move insertion point only if there is no div or rem operations + // otherwise we are risky to move it over the check for zero denominator. + auto SafeToHoist = [](const SCEV *S) { + return !SCEVExprContains(S, [](const SCEV *S) { + if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) { + if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS())) + // Division by non-zero constants can be hoisted. + return SC->getValue()->isZero(); + // All other divisions should not be moved as they may be + // divisions by zero and should be kept within the + // conditions of the surrounding loops that guard their + // execution (see PR35406). + return true; + } + return false; + }); + }; + if (SafeToHoist(S)) { + for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());; + L = L->getParentLoop()) { + if (SE.isLoopInvariant(S, L)) { + if (!L) break; + if (BasicBlock *Preheader = L->getLoopPreheader()) + InsertPt = Preheader->getTerminator(); + else + // LSR sets the insertion point for AddRec start/step values to the + // block start to simplify value reuse, even though it's an invalid + // position. SCEVExpander must correct for this in all cases. + InsertPt = &*L->getHeader()->getFirstInsertionPt(); + } else { + // If the SCEV is computable at this level, insert it into the header + // after the PHIs (and after any other instructions that we've inserted + // there) so that it is guaranteed to dominate any user inside the loop. + if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) + InsertPt = &*L->getHeader()->getFirstInsertionPt(); + while (InsertPt->getIterator() != Builder.GetInsertPoint() && + (isInsertedInstruction(InsertPt) || + isa<DbgInfoIntrinsic>(InsertPt))) + InsertPt = &*std::next(InsertPt->getIterator()); + break; } - break; } + } + + // IndVarSimplify sometimes sets the insertion point at the block start, even + // when there are PHIs at that point. We must correct for this. + if (isa<PHINode>(*InsertPt)) + InsertPt = &*InsertPt->getParent()->getFirstInsertionPt(); // Check to see if we already expanded this here. auto I = InsertedExpressions.find(std::make_pair(S, InsertPt)); @@ -2071,10 +2151,13 @@ bool SCEVExpander::isHighCostExpansionHelper( if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) { // If the divisor is a power of two and the SCEV type fits in a native - // integer, consider the division cheap irrespective of whether it occurs in - // the user code since it can be lowered into a right shift. + // integer (and the LHS not expensive), consider the division cheap + // irrespective of whether it occurs in the user code since it can be + // lowered into a right shift. if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS())) if (SC->getAPInt().isPowerOf2()) { + if (isHighCostExpansionHelper(UDivExpr->getLHS(), L, At, Processed)) + return true; const DataLayout &DL = L->getHeader()->getParent()->getParent()->getDataLayout(); unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth(); @@ -2102,7 +2185,7 @@ bool SCEVExpander::isHighCostExpansionHelper( // HowManyLessThans uses a Max expression whenever the loop is not guarded by // the exit condition. - if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S)) + if (isa<SCEVMinMaxExpr>(S)) return true; // Recurse past nary expressions, which commonly occur in the @@ -2339,6 +2422,24 @@ bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) { bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, ScalarEvolution &SE) { - return isSafeToExpand(S, SE) && SE.dominates(S, InsertionPoint->getParent()); + if (!isSafeToExpand(S, SE)) + return false; + // We have to prove that the expanded site of S dominates InsertionPoint. + // This is easy when not in the same block, but hard when S is an instruction + // to be expanded somewhere inside the same block as our insertion point. + // What we really need here is something analogous to an OrderedBasicBlock, + // but for the moment, we paper over the problem by handling two common and + // cheap to check cases. + if (SE.properlyDominates(S, InsertionPoint->getParent())) + return true; + if (SE.dominates(S, InsertionPoint->getParent())) { + if (InsertionPoint->getParent()->getTerminator() == InsertionPoint) + return true; + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) + for (const Value *V : InsertionPoint->operand_values()) + if (V == U->getValue()) + return true; + } + return false; } } diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp index 3740039b8f86..209ae66ca53e 100644 --- a/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -1,9 +1,8 @@ //===- ScalarEvolutionNormalization.cpp - See below -----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/ScopedNoAliasAA.cpp b/lib/Analysis/ScopedNoAliasAA.cpp index 9a581fe46afc..094e4a3d5dc8 100644 --- a/lib/Analysis/ScopedNoAliasAA.cpp +++ b/lib/Analysis/ScopedNoAliasAA.cpp @@ -1,9 +1,8 @@ //===- ScopedNoAliasAA.cpp - Scoped No-Alias Alias Analysis ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -76,9 +75,10 @@ public: } // end anonymous namespace AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { + const MemoryLocation &LocB, + AAQueryInfo &AAQI) { if (!EnableScopedNoAlias) - return AAResultBase::alias(LocA, LocB); + return AAResultBase::alias(LocA, LocB, AAQI); // Get the attached MDNodes. const MDNode *AScopes = LocA.AATags.Scope, *BScopes = LocB.AATags.Scope; @@ -92,13 +92,14 @@ AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA, return NoAlias; // If they may alias, chain to the next AliasAnalysis. - return AAResultBase::alias(LocA, LocB); + return AAResultBase::alias(LocA, LocB, AAQI); } ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call, - const MemoryLocation &Loc) { + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { if (!EnableScopedNoAlias) - return AAResultBase::getModRefInfo(Call, Loc); + return AAResultBase::getModRefInfo(Call, Loc, AAQI); if (!mayAliasInScopes(Loc.AATags.Scope, Call->getMetadata(LLVMContext::MD_noalias))) @@ -108,13 +109,14 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call, Loc.AATags.NoAlias)) return ModRefInfo::NoModRef; - return AAResultBase::getModRefInfo(Call, Loc); + return AAResultBase::getModRefInfo(Call, Loc, AAQI); } ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call1, - const CallBase *Call2) { + const CallBase *Call2, + AAQueryInfo &AAQI) { if (!EnableScopedNoAlias) - return AAResultBase::getModRefInfo(Call1, Call2); + return AAResultBase::getModRefInfo(Call1, Call2, AAQI); if (!mayAliasInScopes(Call1->getMetadata(LLVMContext::MD_alias_scope), Call2->getMetadata(LLVMContext::MD_noalias))) @@ -124,7 +126,7 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call1, Call1->getMetadata(LLVMContext::MD_noalias))) return ModRefInfo::NoModRef; - return AAResultBase::getModRefInfo(Call1, Call2); + return AAResultBase::getModRefInfo(Call1, Call2, AAQI); } static void collectMDInDomain(const MDNode *List, const MDNode *Domain, diff --git a/lib/Analysis/StackSafetyAnalysis.cpp b/lib/Analysis/StackSafetyAnalysis.cpp index 66b03845864f..4cf235db86eb 100644 --- a/lib/Analysis/StackSafetyAnalysis.cpp +++ b/lib/Analysis/StackSafetyAnalysis.cpp @@ -1,9 +1,8 @@ //===- StackSafetyAnalysis.cpp - Stack memory safety analysis -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -416,7 +415,9 @@ class StackSafetyDataFlowAnalysis { updateOneNode(F.first, F.second); } void runDataFlow(); +#ifndef NDEBUG void verifyFixedPoint(); +#endif public: StackSafetyDataFlowAnalysis( @@ -527,11 +528,13 @@ void StackSafetyDataFlowAnalysis::runDataFlow() { } } +#ifndef NDEBUG void StackSafetyDataFlowAnalysis::verifyFixedPoint() { WorkList.clear(); updateAllNodes(); assert(WorkList.empty()); } +#endif StackSafetyGlobalInfo StackSafetyDataFlowAnalysis::run() { runDataFlow(); diff --git a/lib/Analysis/StratifiedSets.h b/lib/Analysis/StratifiedSets.h index 2f20cd12506c..60ea2451b0ef 100644 --- a/lib/Analysis/StratifiedSets.h +++ b/lib/Analysis/StratifiedSets.h @@ -1,9 +1,8 @@ //===- StratifiedSets.h - Abstract stratified sets implementation. --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/SyncDependenceAnalysis.cpp b/lib/Analysis/SyncDependenceAnalysis.cpp index e1a7e4476d12..3cf248a31142 100644 --- a/lib/Analysis/SyncDependenceAnalysis.cpp +++ b/lib/Analysis/SyncDependenceAnalysis.cpp @@ -1,10 +1,9 @@ //===- SyncDependenceAnalysis.cpp - Divergent Branch Dependence Calculation //--===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -219,14 +218,9 @@ struct DivergencePropagator { template <typename SuccessorIterable> std::unique_ptr<ConstBlockSet> computeJoinPoints(const BasicBlock &RootBlock, - SuccessorIterable NodeSuccessors, const Loop *ParentLoop) { + SuccessorIterable NodeSuccessors, const Loop *ParentLoop, const BasicBlock * PdBoundBlock) { assert(JoinBlocks); - // immediate post dominator (no join block beyond that block) - const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(&RootBlock)); - const auto *IpdNode = PdNode->getIDom(); - const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr; - // bootstrap with branch targets for (const auto *SuccBlock : NodeSuccessors) { DefMap.emplace(SuccBlock, SuccBlock); @@ -341,13 +335,23 @@ const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) { // already available in cache? auto ItCached = CachedLoopExitJoins.find(&Loop); - if (ItCached != CachedLoopExitJoins.end()) + if (ItCached != CachedLoopExitJoins.end()) { return *ItCached->second; + } + + // dont propagte beyond the immediate post dom of the loop + const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Loop.getHeader())); + const auto *IpdNode = PdNode->getIDom(); + const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr; + while (PdBoundBlock && Loop.contains(PdBoundBlock)) { + IpdNode = IpdNode->getIDom(); + PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr; + } // compute all join points DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI}; auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>( - *Loop.getHeader(), LoopExits, Loop.getParentLoop()); + *Loop.getHeader(), LoopExits, Loop.getParentLoop(), PdBoundBlock); auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks)); assert(ItInserted.second); @@ -366,11 +370,16 @@ SyncDependenceAnalysis::join_blocks(const Instruction &Term) { if (ItCached != CachedBranchJoins.end()) return *ItCached->second; + // dont propagate beyond the immediate post dominator of the branch + const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Term.getParent())); + const auto *IpdNode = PdNode->getIDom(); + const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr; + // compute all join points DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI}; const auto &TermBlock = *Term.getParent(); auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>( - TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock)); + TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock), PdBoundBlock); auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks)); assert(ItInserted.second); diff --git a/lib/Analysis/SyntheticCountsUtils.cpp b/lib/Analysis/SyntheticCountsUtils.cpp index c2d7bb11a4cf..22766e5f07f5 100644 --- a/lib/Analysis/SyntheticCountsUtils.cpp +++ b/lib/Analysis/SyntheticCountsUtils.cpp @@ -1,9 +1,8 @@ //===--- SyntheticCountsUtils.cpp - synthetic counts propagation utils ---===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp index 4643f75da42d..ef139d3257d2 100644 --- a/lib/Analysis/TargetLibraryInfo.cpp +++ b/lib/Analysis/TargetLibraryInfo.cpp @@ -1,9 +1,8 @@ //===-- TargetLibraryInfo.cpp - Runtime library information ----------------==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -24,6 +23,8 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary( "No vector functions library"), clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate", "Accelerate framework"), + clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV", + "IBM MASS vector library"), clEnumValN(TargetLibraryInfoImpl::SVML, "SVML", "Intel SVML library"))); @@ -50,6 +51,16 @@ static bool hasSinCosPiStret(const Triple &T) { return true; } +static bool hasBcmp(const Triple &TT) { + // Posix removed support from bcmp() in 2001, but the glibc and several + // implementations of the libc still have it. + if (TT.isOSLinux()) + return TT.isGNUEnvironment() || TT.isMusl(); + // Both NetBSD and OpenBSD are planning to remove the function. Windows does + // not have it. + return TT.isOSFreeBSD() || TT.isOSSolaris() || TT.isOSDarwin(); +} + /// Initialize the set of available library functions based on the specified /// target triple. This should be carefully written so that a missing target /// triple gets a sane set of defaults. @@ -78,8 +89,8 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ShouldSignExtI32Param = false; // PowerPC64, Sparc64, SystemZ need signext/zeroext on i32 parameters and // returns corresponding to C-level ints and unsigned ints. - if (T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le || - T.getArch() == Triple::sparcv9 || T.getArch() == Triple::systemz) { + if (T.isPPC64() || T.getArch() == Triple::sparcv9 || + T.getArch() == Triple::systemz) { ShouldExtI32Param = true; ShouldExtI32Return = true; } @@ -142,6 +153,9 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_sincospif_stret); } + if (!hasBcmp(T)) + TLI.setUnavailable(LibFunc_bcmp); + if (T.isMacOSX() && T.getArch() == Triple::x86 && !T.isMacOSXVersionLT(10, 7)) { // x86-32 OSX has a scheme where fwrite and fputs (and some other functions @@ -153,33 +167,82 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setAvailableWithName(LibFunc_fputs, "fputs$UNIX2003"); } - // iprintf and friends are only available on XCore and TCE. - if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) { + // iprintf and friends are only available on XCore, TCE, and Emscripten. + if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce && + T.getOS() != Triple::Emscripten) { TLI.setUnavailable(LibFunc_iprintf); TLI.setUnavailable(LibFunc_siprintf); TLI.setUnavailable(LibFunc_fiprintf); } + // __small_printf and friends are only available on Emscripten. + if (T.getOS() != Triple::Emscripten) { + TLI.setUnavailable(LibFunc_small_printf); + TLI.setUnavailable(LibFunc_small_sprintf); + TLI.setUnavailable(LibFunc_small_fprintf); + } + if (T.isOSWindows() && !T.isOSCygMing()) { - // Win32 does not support long double + // XXX: The earliest documentation available at the moment is for VS2015/VC19: + // https://docs.microsoft.com/en-us/cpp/c-runtime-library/floating-point-support?view=vs-2015 + // XXX: In order to use an MSVCRT older than VC19, + // the specific library version must be explicit in the target triple, + // e.g., x86_64-pc-windows-msvc18. + bool hasPartialC99 = true; + if (T.isKnownWindowsMSVCEnvironment()) { + unsigned Major, Minor, Micro; + T.getEnvironmentVersion(Major, Minor, Micro); + hasPartialC99 = (Major == 0 || Major >= 19); + } + + // Latest targets support C89 math functions, in part. + bool isARM = (T.getArch() == Triple::aarch64 || + T.getArch() == Triple::arm); + bool hasPartialFloat = (isARM || + T.getArch() == Triple::x86_64); + + // Win32 does not support float C89 math functions, in general. + if (!hasPartialFloat) { + TLI.setUnavailable(LibFunc_acosf); + TLI.setUnavailable(LibFunc_asinf); + TLI.setUnavailable(LibFunc_atan2f); + TLI.setUnavailable(LibFunc_atanf); + TLI.setUnavailable(LibFunc_ceilf); + TLI.setUnavailable(LibFunc_cosf); + TLI.setUnavailable(LibFunc_coshf); + TLI.setUnavailable(LibFunc_expf); + TLI.setUnavailable(LibFunc_floorf); + TLI.setUnavailable(LibFunc_fmodf); + TLI.setUnavailable(LibFunc_log10f); + TLI.setUnavailable(LibFunc_logf); + TLI.setUnavailable(LibFunc_modff); + TLI.setUnavailable(LibFunc_powf); + TLI.setUnavailable(LibFunc_sinf); + TLI.setUnavailable(LibFunc_sinhf); + TLI.setUnavailable(LibFunc_sqrtf); + TLI.setUnavailable(LibFunc_tanf); + TLI.setUnavailable(LibFunc_tanhf); + } + if (!isARM) + TLI.setUnavailable(LibFunc_fabsf); + TLI.setUnavailable(LibFunc_frexpf); + TLI.setUnavailable(LibFunc_ldexpf); + + // Win32 does not support long double C89 math functions. TLI.setUnavailable(LibFunc_acosl); TLI.setUnavailable(LibFunc_asinl); - TLI.setUnavailable(LibFunc_atanl); TLI.setUnavailable(LibFunc_atan2l); + TLI.setUnavailable(LibFunc_atanl); TLI.setUnavailable(LibFunc_ceill); - TLI.setUnavailable(LibFunc_copysignl); TLI.setUnavailable(LibFunc_cosl); TLI.setUnavailable(LibFunc_coshl); TLI.setUnavailable(LibFunc_expl); - TLI.setUnavailable(LibFunc_fabsf); // Win32 and Win64 both lack fabsf TLI.setUnavailable(LibFunc_fabsl); TLI.setUnavailable(LibFunc_floorl); - TLI.setUnavailable(LibFunc_fmaxl); - TLI.setUnavailable(LibFunc_fminl); TLI.setUnavailable(LibFunc_fmodl); TLI.setUnavailable(LibFunc_frexpl); - TLI.setUnavailable(LibFunc_ldexpf); TLI.setUnavailable(LibFunc_ldexpl); + TLI.setUnavailable(LibFunc_log10l); TLI.setUnavailable(LibFunc_logl); TLI.setUnavailable(LibFunc_modfl); TLI.setUnavailable(LibFunc_powl); @@ -189,81 +252,66 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_tanl); TLI.setUnavailable(LibFunc_tanhl); - // Win32 only has C89 math - TLI.setUnavailable(LibFunc_acosh); - TLI.setUnavailable(LibFunc_acoshf); + // Win32 does not fully support C99 math functions. + if (!hasPartialC99) { + TLI.setUnavailable(LibFunc_acosh); + TLI.setUnavailable(LibFunc_acoshf); + TLI.setUnavailable(LibFunc_asinh); + TLI.setUnavailable(LibFunc_asinhf); + TLI.setUnavailable(LibFunc_atanh); + TLI.setUnavailable(LibFunc_atanhf); + TLI.setAvailableWithName(LibFunc_cabs, "_cabs"); + TLI.setUnavailable(LibFunc_cabsf); + TLI.setUnavailable(LibFunc_cbrt); + TLI.setUnavailable(LibFunc_cbrtf); + TLI.setAvailableWithName(LibFunc_copysign, "_copysign"); + TLI.setAvailableWithName(LibFunc_copysignf, "_copysignf"); + TLI.setUnavailable(LibFunc_exp2); + TLI.setUnavailable(LibFunc_exp2f); + TLI.setUnavailable(LibFunc_expm1); + TLI.setUnavailable(LibFunc_expm1f); + TLI.setUnavailable(LibFunc_fmax); + TLI.setUnavailable(LibFunc_fmaxf); + TLI.setUnavailable(LibFunc_fmin); + TLI.setUnavailable(LibFunc_fminf); + TLI.setUnavailable(LibFunc_log1p); + TLI.setUnavailable(LibFunc_log1pf); + TLI.setUnavailable(LibFunc_log2); + TLI.setUnavailable(LibFunc_log2f); + TLI.setAvailableWithName(LibFunc_logb, "_logb"); + if (hasPartialFloat) + TLI.setAvailableWithName(LibFunc_logbf, "_logbf"); + else + TLI.setUnavailable(LibFunc_logbf); + TLI.setUnavailable(LibFunc_rint); + TLI.setUnavailable(LibFunc_rintf); + TLI.setUnavailable(LibFunc_round); + TLI.setUnavailable(LibFunc_roundf); + TLI.setUnavailable(LibFunc_trunc); + TLI.setUnavailable(LibFunc_truncf); + } + + // Win32 does not support long double C99 math functions. TLI.setUnavailable(LibFunc_acoshl); - TLI.setUnavailable(LibFunc_asinh); - TLI.setUnavailable(LibFunc_asinhf); TLI.setUnavailable(LibFunc_asinhl); - TLI.setUnavailable(LibFunc_atanh); - TLI.setUnavailable(LibFunc_atanhf); TLI.setUnavailable(LibFunc_atanhl); - TLI.setUnavailable(LibFunc_cabs); - TLI.setUnavailable(LibFunc_cabsf); TLI.setUnavailable(LibFunc_cabsl); - TLI.setUnavailable(LibFunc_cbrt); - TLI.setUnavailable(LibFunc_cbrtf); TLI.setUnavailable(LibFunc_cbrtl); - TLI.setUnavailable(LibFunc_exp2); - TLI.setUnavailable(LibFunc_exp2f); + TLI.setUnavailable(LibFunc_copysignl); TLI.setUnavailable(LibFunc_exp2l); - TLI.setUnavailable(LibFunc_expm1); - TLI.setUnavailable(LibFunc_expm1f); TLI.setUnavailable(LibFunc_expm1l); - TLI.setUnavailable(LibFunc_log2); - TLI.setUnavailable(LibFunc_log2f); - TLI.setUnavailable(LibFunc_log2l); - TLI.setUnavailable(LibFunc_log1p); - TLI.setUnavailable(LibFunc_log1pf); + TLI.setUnavailable(LibFunc_fmaxl); + TLI.setUnavailable(LibFunc_fminl); TLI.setUnavailable(LibFunc_log1pl); - TLI.setUnavailable(LibFunc_logb); - TLI.setUnavailable(LibFunc_logbf); + TLI.setUnavailable(LibFunc_log2l); TLI.setUnavailable(LibFunc_logbl); - TLI.setUnavailable(LibFunc_nearbyint); - TLI.setUnavailable(LibFunc_nearbyintf); TLI.setUnavailable(LibFunc_nearbyintl); - TLI.setUnavailable(LibFunc_rint); - TLI.setUnavailable(LibFunc_rintf); TLI.setUnavailable(LibFunc_rintl); - TLI.setUnavailable(LibFunc_round); - TLI.setUnavailable(LibFunc_roundf); TLI.setUnavailable(LibFunc_roundl); - TLI.setUnavailable(LibFunc_trunc); - TLI.setUnavailable(LibFunc_truncf); TLI.setUnavailable(LibFunc_truncl); - // Win32 provides some C99 math with mangled names - TLI.setAvailableWithName(LibFunc_copysign, "_copysign"); - - if (T.getArch() == Triple::x86) { - // Win32 on x86 implements single-precision math functions as macros - TLI.setUnavailable(LibFunc_acosf); - TLI.setUnavailable(LibFunc_asinf); - TLI.setUnavailable(LibFunc_atanf); - TLI.setUnavailable(LibFunc_atan2f); - TLI.setUnavailable(LibFunc_ceilf); - TLI.setUnavailable(LibFunc_copysignf); - TLI.setUnavailable(LibFunc_cosf); - TLI.setUnavailable(LibFunc_coshf); - TLI.setUnavailable(LibFunc_expf); - TLI.setUnavailable(LibFunc_floorf); - TLI.setUnavailable(LibFunc_fminf); - TLI.setUnavailable(LibFunc_fmaxf); - TLI.setUnavailable(LibFunc_fmodf); - TLI.setUnavailable(LibFunc_logf); - TLI.setUnavailable(LibFunc_log10f); - TLI.setUnavailable(LibFunc_modff); - TLI.setUnavailable(LibFunc_powf); - TLI.setUnavailable(LibFunc_sinf); - TLI.setUnavailable(LibFunc_sinhf); - TLI.setUnavailable(LibFunc_sqrtf); - TLI.setUnavailable(LibFunc_tanf); - TLI.setUnavailable(LibFunc_tanhf); - } - - // Win32 does *not* provide these functions, but they are - // generally available on POSIX-compliant systems: + // Win32 does not support these functions, but + // they are generally available on POSIX-compliant systems. TLI.setUnavailable(LibFunc_access); TLI.setUnavailable(LibFunc_bcmp); TLI.setUnavailable(LibFunc_bcopy); @@ -318,12 +366,6 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_utime); TLI.setUnavailable(LibFunc_utimes); TLI.setUnavailable(LibFunc_write); - - // Win32 does *not* provide provide these functions, but they are - // specified by C99: - TLI.setUnavailable(LibFunc_atoll); - TLI.setUnavailable(LibFunc_frexpf); - TLI.setUnavailable(LibFunc_llabs); } switch (T.getOS()) { @@ -651,11 +693,21 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return ((NumParams == 2 || NumParams == 3) && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); + case LibFunc_strcat_chk: + --NumParams; + if (!IsSizeTTy(FTy.getParamType(NumParams))) + return false; + LLVM_FALLTHROUGH; case LibFunc_strcat: return (NumParams == 2 && FTy.getReturnType()->isPointerTy() && FTy.getParamType(0) == FTy.getReturnType() && FTy.getParamType(1) == FTy.getReturnType()); + case LibFunc_strncat_chk: + --NumParams; + if (!IsSizeTTy(FTy.getParamType(NumParams))) + return false; + LLVM_FALLTHROUGH; case LibFunc_strncat: return (NumParams == 3 && FTy.getReturnType()->isPointerTy() && FTy.getParamType(0) == FTy.getReturnType() && @@ -674,6 +726,19 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, FTy.getParamType(0) == FTy.getParamType(1) && FTy.getParamType(0) == PCharTy); + case LibFunc_strlcat_chk: + case LibFunc_strlcpy_chk: + --NumParams; + if (!IsSizeTTy(FTy.getParamType(NumParams))) + return false; + LLVM_FALLTHROUGH; + case LibFunc_strlcat: + case LibFunc_strlcpy: + return NumParams == 3 && IsSizeTTy(FTy.getReturnType()) && + FTy.getParamType(0)->isPointerTy() && + FTy.getParamType(1)->isPointerTy() && + IsSizeTTy(FTy.getParamType(2)); + case LibFunc_strncpy_chk: case LibFunc_stpncpy_chk: --NumParams; @@ -739,14 +804,32 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_stat: case LibFunc_statvfs: case LibFunc_siprintf: + case LibFunc_small_sprintf: case LibFunc_sprintf: return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy() && FTy.getReturnType()->isIntegerTy(32)); + + case LibFunc_sprintf_chk: + return NumParams == 4 && FTy.getParamType(0)->isPointerTy() && + FTy.getParamType(1)->isIntegerTy(32) && + IsSizeTTy(FTy.getParamType(2)) && + FTy.getParamType(3)->isPointerTy() && + FTy.getReturnType()->isIntegerTy(32); + case LibFunc_snprintf: return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(2)->isPointerTy() && FTy.getReturnType()->isIntegerTy(32)); + + case LibFunc_snprintf_chk: + return NumParams == 5 && FTy.getParamType(0)->isPointerTy() && + IsSizeTTy(FTy.getParamType(1)) && + FTy.getParamType(2)->isIntegerTy(32) && + IsSizeTTy(FTy.getParamType(3)) && + FTy.getParamType(4)->isPointerTy() && + FTy.getReturnType()->isIntegerTy(32); + case LibFunc_setitimer: return (NumParams == 3 && FTy.getParamType(1)->isPointerTy() && FTy.getParamType(2)->isPointerTy()); @@ -795,6 +878,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, FTy.getParamType(1)->isIntegerTy() && IsSizeTTy(FTy.getParamType(2))); + case LibFunc_memccpy_chk: + --NumParams; + if (!IsSizeTTy(FTy.getParamType(NumParams))) + return false; + LLVM_FALLTHROUGH; case LibFunc_memccpy: return (NumParams >= 2 && FTy.getParamType(1)->isPointerTy()); case LibFunc_memalign: @@ -836,6 +924,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_getenv: case LibFunc_getpwnam: case LibFunc_iprintf: + case LibFunc_small_printf: case LibFunc_pclose: case LibFunc_perror: case LibFunc_printf: @@ -915,6 +1004,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, FTy.getParamType(1)->isPointerTy()); case LibFunc_fscanf: case LibFunc_fiprintf: + case LibFunc_small_fprintf: case LibFunc_fprintf: return (NumParams >= 2 && FTy.getReturnType()->isIntegerTy() && FTy.getParamType(0)->isPointerTy() && @@ -961,9 +1051,17 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_vsprintf: return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); + case LibFunc_vsprintf_chk: + return NumParams == 5 && FTy.getParamType(0)->isPointerTy() && + FTy.getParamType(1)->isIntegerTy(32) && + IsSizeTTy(FTy.getParamType(2)) && FTy.getParamType(3)->isPointerTy(); case LibFunc_vsnprintf: return (NumParams == 4 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(2)->isPointerTy()); + case LibFunc_vsnprintf_chk: + return NumParams == 6 && FTy.getParamType(0)->isPointerTy() && + FTy.getParamType(2)->isIntegerTy(32) && + IsSizeTTy(FTy.getParamType(3)) && FTy.getParamType(4)->isPointerTy(); case LibFunc_open: return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy()); case LibFunc_opendir: @@ -1391,6 +1489,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, bool TargetLibraryInfoImpl::getLibFunc(const Function &FDecl, LibFunc &F) const { + // Intrinsics don't overlap w/libcalls; if our module has a large number of + // intrinsics, this ends up being an interesting compile time win since we + // avoid string normalization and comparison. + if (FDecl.isIntrinsic()) return false; + const DataLayout *DL = FDecl.getParent() ? &FDecl.getParent()->getDataLayout() : nullptr; return getLibFunc(FDecl.getName(), F) && @@ -1430,151 +1533,24 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( switch (VecLib) { case Accelerate: { const VecDesc VecFuncs[] = { - // Floating-Point Arithmetic and Auxiliary Functions - {"ceilf", "vceilf", 4}, - {"fabsf", "vfabsf", 4}, - {"llvm.fabs.f32", "vfabsf", 4}, - {"floorf", "vfloorf", 4}, - {"sqrtf", "vsqrtf", 4}, - {"llvm.sqrt.f32", "vsqrtf", 4}, - - // Exponential and Logarithmic Functions - {"expf", "vexpf", 4}, - {"llvm.exp.f32", "vexpf", 4}, - {"expm1f", "vexpm1f", 4}, - {"logf", "vlogf", 4}, - {"llvm.log.f32", "vlogf", 4}, - {"log1pf", "vlog1pf", 4}, - {"log10f", "vlog10f", 4}, - {"llvm.log10.f32", "vlog10f", 4}, - {"logbf", "vlogbf", 4}, - - // Trigonometric Functions - {"sinf", "vsinf", 4}, - {"llvm.sin.f32", "vsinf", 4}, - {"cosf", "vcosf", 4}, - {"llvm.cos.f32", "vcosf", 4}, - {"tanf", "vtanf", 4}, - {"asinf", "vasinf", 4}, - {"acosf", "vacosf", 4}, - {"atanf", "vatanf", 4}, - - // Hyperbolic Functions - {"sinhf", "vsinhf", 4}, - {"coshf", "vcoshf", 4}, - {"tanhf", "vtanhf", 4}, - {"asinhf", "vasinhf", 4}, - {"acoshf", "vacoshf", 4}, - {"atanhf", "vatanhf", 4}, + #define TLI_DEFINE_ACCELERATE_VECFUNCS + #include "llvm/Analysis/VecFuncs.def" + }; + addVectorizableFunctions(VecFuncs); + break; + } + case MASSV: { + const VecDesc VecFuncs[] = { + #define TLI_DEFINE_MASSV_VECFUNCS + #include "llvm/Analysis/VecFuncs.def" }; addVectorizableFunctions(VecFuncs); break; } case SVML: { const VecDesc VecFuncs[] = { - {"sin", "__svml_sin2", 2}, - {"sin", "__svml_sin4", 4}, - {"sin", "__svml_sin8", 8}, - - {"sinf", "__svml_sinf4", 4}, - {"sinf", "__svml_sinf8", 8}, - {"sinf", "__svml_sinf16", 16}, - - {"llvm.sin.f64", "__svml_sin2", 2}, - {"llvm.sin.f64", "__svml_sin4", 4}, - {"llvm.sin.f64", "__svml_sin8", 8}, - - {"llvm.sin.f32", "__svml_sinf4", 4}, - {"llvm.sin.f32", "__svml_sinf8", 8}, - {"llvm.sin.f32", "__svml_sinf16", 16}, - - {"cos", "__svml_cos2", 2}, - {"cos", "__svml_cos4", 4}, - {"cos", "__svml_cos8", 8}, - - {"cosf", "__svml_cosf4", 4}, - {"cosf", "__svml_cosf8", 8}, - {"cosf", "__svml_cosf16", 16}, - - {"llvm.cos.f64", "__svml_cos2", 2}, - {"llvm.cos.f64", "__svml_cos4", 4}, - {"llvm.cos.f64", "__svml_cos8", 8}, - - {"llvm.cos.f32", "__svml_cosf4", 4}, - {"llvm.cos.f32", "__svml_cosf8", 8}, - {"llvm.cos.f32", "__svml_cosf16", 16}, - - {"pow", "__svml_pow2", 2}, - {"pow", "__svml_pow4", 4}, - {"pow", "__svml_pow8", 8}, - - {"powf", "__svml_powf4", 4}, - {"powf", "__svml_powf8", 8}, - {"powf", "__svml_powf16", 16}, - - { "__pow_finite", "__svml_pow2", 2 }, - { "__pow_finite", "__svml_pow4", 4 }, - { "__pow_finite", "__svml_pow8", 8 }, - - { "__powf_finite", "__svml_powf4", 4 }, - { "__powf_finite", "__svml_powf8", 8 }, - { "__powf_finite", "__svml_powf16", 16 }, - - {"llvm.pow.f64", "__svml_pow2", 2}, - {"llvm.pow.f64", "__svml_pow4", 4}, - {"llvm.pow.f64", "__svml_pow8", 8}, - - {"llvm.pow.f32", "__svml_powf4", 4}, - {"llvm.pow.f32", "__svml_powf8", 8}, - {"llvm.pow.f32", "__svml_powf16", 16}, - - {"exp", "__svml_exp2", 2}, - {"exp", "__svml_exp4", 4}, - {"exp", "__svml_exp8", 8}, - - {"expf", "__svml_expf4", 4}, - {"expf", "__svml_expf8", 8}, - {"expf", "__svml_expf16", 16}, - - { "__exp_finite", "__svml_exp2", 2 }, - { "__exp_finite", "__svml_exp4", 4 }, - { "__exp_finite", "__svml_exp8", 8 }, - - { "__expf_finite", "__svml_expf4", 4 }, - { "__expf_finite", "__svml_expf8", 8 }, - { "__expf_finite", "__svml_expf16", 16 }, - - {"llvm.exp.f64", "__svml_exp2", 2}, - {"llvm.exp.f64", "__svml_exp4", 4}, - {"llvm.exp.f64", "__svml_exp8", 8}, - - {"llvm.exp.f32", "__svml_expf4", 4}, - {"llvm.exp.f32", "__svml_expf8", 8}, - {"llvm.exp.f32", "__svml_expf16", 16}, - - {"log", "__svml_log2", 2}, - {"log", "__svml_log4", 4}, - {"log", "__svml_log8", 8}, - - {"logf", "__svml_logf4", 4}, - {"logf", "__svml_logf8", 8}, - {"logf", "__svml_logf16", 16}, - - { "__log_finite", "__svml_log2", 2 }, - { "__log_finite", "__svml_log4", 4 }, - { "__log_finite", "__svml_log8", 8 }, - - { "__logf_finite", "__svml_logf4", 4 }, - { "__logf_finite", "__svml_logf8", 8 }, - { "__logf_finite", "__svml_logf16", 16 }, - - {"llvm.log.f64", "__svml_log2", 2}, - {"llvm.log.f64", "__svml_log4", 4}, - {"llvm.log.f64", "__svml_log8", 8}, - - {"llvm.log.f32", "__svml_logf4", 4}, - {"llvm.log.f32", "__svml_logf8", 8}, - {"llvm.log.f32", "__svml_logf16", 16}, + #define TLI_DEFINE_SVML_VECFUNCS + #include "llvm/Analysis/VecFuncs.def" }; addVectorizableFunctions(VecFuncs); break; @@ -1589,9 +1565,8 @@ bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const { if (funcName.empty()) return false; - std::vector<VecDesc>::const_iterator I = std::lower_bound( - VectorDescs.begin(), VectorDescs.end(), funcName, - compareWithScalarFnName); + std::vector<VecDesc>::const_iterator I = + llvm::lower_bound(VectorDescs, funcName, compareWithScalarFnName); return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName; } @@ -1600,8 +1575,8 @@ StringRef TargetLibraryInfoImpl::getVectorizedFunction(StringRef F, F = sanitizeFunctionName(F); if (F.empty()) return F; - std::vector<VecDesc>::const_iterator I = std::lower_bound( - VectorDescs.begin(), VectorDescs.end(), F, compareWithScalarFnName); + std::vector<VecDesc>::const_iterator I = + llvm::lower_bound(VectorDescs, F, compareWithScalarFnName); while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) { if (I->VectorizationFactor == VF) return I->VectorFnName; @@ -1616,8 +1591,8 @@ StringRef TargetLibraryInfoImpl::getScalarizedFunction(StringRef F, if (F.empty()) return F; - std::vector<VecDesc>::const_iterator I = std::lower_bound( - ScalarDescs.begin(), ScalarDescs.end(), F, compareWithVectorFnName); + std::vector<VecDesc>::const_iterator I = + llvm::lower_bound(ScalarDescs, F, compareWithVectorFnName); if (I == VectorDescs.end() || StringRef(I->VectorFnName) != F) return StringRef(); VF = I->VectorizationFactor; diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 9151d46c6cce..eb04c34453fb 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -1,9 +1,8 @@ //===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -19,6 +18,8 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/LoopIterator.h" #include <utility> using namespace llvm; @@ -41,6 +42,101 @@ struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> { }; } +bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) { + // If the loop has irreducible control flow, it can not be converted to + // Hardware loop. + LoopBlocksRPO RPOT(L); + RPOT.perform(&LI); + if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI)) + return false; + return true; +} + +bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, + LoopInfo &LI, DominatorTree &DT, + bool ForceNestedLoop, + bool ForceHardwareLoopPHI) { + SmallVector<BasicBlock *, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), + IE = ExitingBlocks.end(); + I != IE; ++I) { + BasicBlock *BB = *I; + + // If we pass the updated counter back through a phi, we need to know + // which latch the updated value will be coming from. + if (!L->isLoopLatch(BB)) { + if (ForceHardwareLoopPHI || CounterInReg) + continue; + } + + const SCEV *EC = SE.getExitCount(L, BB); + if (isa<SCEVCouldNotCompute>(EC)) + continue; + if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) { + if (ConstEC->getValue()->isZero()) + continue; + } else if (!SE.isLoopInvariant(EC, L)) + continue; + + if (SE.getTypeSizeInBits(EC->getType()) > CountType->getBitWidth()) + continue; + + // If this exiting block is contained in a nested loop, it is not eligible + // for insertion of the branch-and-decrement since the inner loop would + // end up messing up the value in the CTR. + if (!IsNestingLegal && LI.getLoopFor(BB) != L && !ForceNestedLoop) + continue; + + // We now have a loop-invariant count of loop iterations (which is not the + // constant zero) for which we know that this loop will not exit via this + // existing block. + + // We need to make sure that this block will run on every loop iteration. + // For this to be true, we must dominate all blocks with backedges. Such + // blocks are in-loop predecessors to the header block. + bool NotAlways = false; + for (pred_iterator PI = pred_begin(L->getHeader()), + PIE = pred_end(L->getHeader()); + PI != PIE; ++PI) { + if (!L->contains(*PI)) + continue; + + if (!DT.dominates(*I, *PI)) { + NotAlways = true; + break; + } + } + + if (NotAlways) + continue; + + // Make sure this blocks ends with a conditional branch. + Instruction *TI = BB->getTerminator(); + if (!TI) + continue; + + if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + if (!BI->isConditional()) + continue; + + ExitBranch = BI; + } else + continue; + + // Note that this block may not be the loop latch block, even if the loop + // has a latch block. + ExitBlock = *I; + ExitCount = EC; + break; + } + + if (!ExitBlock) + return false; + return true; +} + TargetTransformInfo::TargetTransformInfo(const DataLayout &DL) : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {} @@ -61,15 +157,17 @@ int TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty, return Cost; } -int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs) const { - int Cost = TTIImpl->getCallCost(FTy, NumArgs); +int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs, + const User *U) const { + int Cost = TTIImpl->getCallCost(FTy, NumArgs, U); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCallCost(const Function *F, - ArrayRef<const Value *> Arguments) const { - int Cost = TTIImpl->getCallCost(F, Arguments); + ArrayRef<const Value *> Arguments, + const User *U) const { + int Cost = TTIImpl->getCallCost(F, Arguments, U); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -78,6 +176,10 @@ unsigned TargetTransformInfo::getInliningThresholdMultiplier() const { return TTIImpl->getInliningThresholdMultiplier(); } +int TargetTransformInfo::getInlinerVectorBonusPercent() const { + return TTIImpl->getInlinerVectorBonusPercent(); +} + int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef<const Value *> Operands) const { return TTIImpl->getGEPCost(PointeeType, Ptr, Operands); @@ -89,8 +191,9 @@ int TargetTransformInfo::getExtCost(const Instruction *I, } int TargetTransformInfo::getIntrinsicCost( - Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const { - int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments); + Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments, + const User *U) const { + int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -128,6 +231,12 @@ bool TargetTransformInfo::isLoweredToCall(const Function *F) const { return TTIImpl->isLoweredToCall(F); } +bool TargetTransformInfo::isHardwareLoopProfitable( + Loop *L, ScalarEvolution &SE, AssumptionCache &AC, + TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const { + return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); +} + void TargetTransformInfo::getUnrollingPreferences( Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const { return TTIImpl->getUnrollingPreferences(L, SE, UP); @@ -159,10 +268,21 @@ bool TargetTransformInfo::canMacroFuseCmp() const { return TTIImpl->canMacroFuseCmp(); } +bool TargetTransformInfo::canSaveCmp(Loop *L, BranchInst **BI, + ScalarEvolution *SE, LoopInfo *LI, + DominatorTree *DT, AssumptionCache *AC, + TargetLibraryInfo *LibInfo) const { + return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo); +} + bool TargetTransformInfo::shouldFavorPostInc() const { return TTIImpl->shouldFavorPostInc(); } +bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const { + return TTIImpl->shouldFavorBackedgeIndex(L); +} + bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const { return TTIImpl->isLegalMaskedStore(DataType); } @@ -171,6 +291,16 @@ bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const { return TTIImpl->isLegalMaskedLoad(DataType); } +bool TargetTransformInfo::isLegalNTStore(Type *DataType, + unsigned Alignment) const { + return TTIImpl->isLegalNTStore(DataType, Alignment); +} + +bool TargetTransformInfo::isLegalNTLoad(Type *DataType, + unsigned Alignment) const { + return TTIImpl->isLegalNTLoad(DataType, Alignment); +} + bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const { return TTIImpl->isLegalMaskedGather(DataType); } @@ -179,6 +309,14 @@ bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const { return TTIImpl->isLegalMaskedScatter(DataType); } +bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const { + return TTIImpl->isLegalMaskedCompressStore(DataType); +} + +bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const { + return TTIImpl->isLegalMaskedExpandLoad(DataType); +} + bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const { return TTIImpl->hasDivRemOp(DataType, IsSigned); } @@ -259,9 +397,9 @@ bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) c return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); } -const TargetTransformInfo::MemCmpExpansionOptions * -TargetTransformInfo::enableMemCmpExpansion(bool IsZeroCmp) const { - return TTIImpl->enableMemCmpExpansion(IsZeroCmp); +TargetTransformInfo::MemCmpExpansionOptions +TargetTransformInfo::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { + return TTIImpl->enableMemCmpExpansion(OptSize, IsZeroCmp); } bool TargetTransformInfo::enableInterleavedAccessVectorization() const { @@ -570,6 +708,12 @@ int TargetTransformInfo::getAddressComputationCost(Type *Tp, return Cost; } +int TargetTransformInfo::getMemcpyCost(const Instruction *I) const { + int Cost = TTIImpl->getMemcpyCost(I); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const { int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm); @@ -688,6 +832,10 @@ bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const { return TTIImpl->shouldExpandReduction(II); } +unsigned TargetTransformInfo::getGISelRematGlobalCost() const { + return TTIImpl->getGISelRematGlobalCost(); +} + int TargetTransformInfo::getInstructionLatency(const Instruction *I) const { return TTIImpl->getInstructionLatency(I); } @@ -1023,6 +1171,16 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK, Op1VP, Op2VP, Operands); } + case Instruction::FNeg: { + TargetTransformInfo::OperandValueKind Op1VK, Op2VK; + TargetTransformInfo::OperandValueProperties Op1VP, Op2VP; + Op1VK = getOperandInfo(I->getOperand(0), Op1VP); + Op2VK = OK_AnyValue; + Op2VP = OP_None; + SmallVector<const Value *, 2> Operands(I->operand_values()); + return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK, + Op1VP, Op2VP, Operands); + } case Instruction::Select: { const SelectInst *SI = cast<SelectInst>(I); Type *CondTy = SI->getCondition()->getType(); diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp index 4dec53151ed6..879c7172d038 100644 --- a/lib/Analysis/Trace.cpp +++ b/lib/Analysis/Trace.cpp @@ -1,9 +1,8 @@ //===- Trace.cpp - Implementation of Trace class --------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp index 83974da30a54..3b9040aa0f52 100644 --- a/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -1,9 +1,8 @@ //===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -368,26 +367,28 @@ static bool isStructPathTBAA(const MDNode *MD) { } AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { + const MemoryLocation &LocB, + AAQueryInfo &AAQI) { if (!EnableTBAA) - return AAResultBase::alias(LocA, LocB); + return AAResultBase::alias(LocA, LocB, AAQI); // If accesses may alias, chain to the next AliasAnalysis. if (Aliases(LocA.AATags.TBAA, LocB.AATags.TBAA)) - return AAResultBase::alias(LocA, LocB); + return AAResultBase::alias(LocA, LocB, AAQI); // Otherwise return a definitive result. return NoAlias; } bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc, + AAQueryInfo &AAQI, bool OrLocal) { if (!EnableTBAA) - return AAResultBase::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal); const MDNode *M = Loc.AATags.TBAA; if (!M) - return AAResultBase::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal); // If this is an "immutable" type, we can assume the pointer is pointing // to constant memory. @@ -395,7 +396,7 @@ bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc, (isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable())) return true; - return AAResultBase::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal); } FunctionModRefBehavior @@ -421,29 +422,31 @@ FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) { } ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call, - const MemoryLocation &Loc) { + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { if (!EnableTBAA) - return AAResultBase::getModRefInfo(Call, Loc); + return AAResultBase::getModRefInfo(Call, Loc, AAQI); if (const MDNode *L = Loc.AATags.TBAA) if (const MDNode *M = Call->getMetadata(LLVMContext::MD_tbaa)) if (!Aliases(L, M)) return ModRefInfo::NoModRef; - return AAResultBase::getModRefInfo(Call, Loc); + return AAResultBase::getModRefInfo(Call, Loc, AAQI); } ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call1, - const CallBase *Call2) { + const CallBase *Call2, + AAQueryInfo &AAQI) { if (!EnableTBAA) - return AAResultBase::getModRefInfo(Call1, Call2); + return AAResultBase::getModRefInfo(Call1, Call2, AAQI); if (const MDNode *M1 = Call1->getMetadata(LLVMContext::MD_tbaa)) if (const MDNode *M2 = Call2->getMetadata(LLVMContext::MD_tbaa)) if (!Aliases(M1, M2)) return ModRefInfo::NoModRef; - return AAResultBase::getModRefInfo(Call1, Call2); + return AAResultBase::getModRefInfo(Call1, Call2, AAQI); } bool MDNode::isTBAAVtableAccess() const { diff --git a/lib/Analysis/TypeMetadataUtils.cpp b/lib/Analysis/TypeMetadataUtils.cpp index bd13a43b8d46..9311dfbc6eba 100644 --- a/lib/Analysis/TypeMetadataUtils.cpp +++ b/lib/Analysis/TypeMetadataUtils.cpp @@ -1,9 +1,8 @@ //===- TypeMetadataUtils.cpp - Utilities related to type metadata ---------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/ValueLattice.cpp b/lib/Analysis/ValueLattice.cpp index 7de437ca480e..a0115a0eec36 100644 --- a/lib/Analysis/ValueLattice.cpp +++ b/lib/Analysis/ValueLattice.cpp @@ -1,9 +1,8 @@ //===- ValueLattice.cpp - Value constraint analysis -------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/ValueLatticeUtils.cpp b/lib/Analysis/ValueLatticeUtils.cpp index 22c9de4fe94d..3f9287e26ce7 100644 --- a/lib/Analysis/ValueLatticeUtils.cpp +++ b/lib/Analysis/ValueLatticeUtils.cpp @@ -1,9 +1,8 @@ //===-- ValueLatticeUtils.cpp - Utils for solving lattices ------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 0446426c0e66..c70906dcc629 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -1,9 +1,8 @@ //===- ValueTracking.cpp - Walk computations to compute properties --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -39,7 +38,6 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" @@ -617,237 +615,242 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, if (Depth == MaxDepth) continue; + ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg); + if (!Cmp) + continue; + Value *A, *B; - auto m_V = m_CombineOr(m_Specific(V), - m_CombineOr(m_PtrToInt(m_Specific(V)), - m_BitCast(m_Specific(V)))); + auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V))); CmpInst::Predicate Pred; uint64_t C; - // assume(v = a) - if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - Known.Zero |= RHSKnown.Zero; - Known.One |= RHSKnown.One; - // assume(v & b = a) - } else if (match(Arg, - m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - KnownBits MaskKnown(BitWidth); - computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I)); - - // For those bits in the mask that are known to be one, we can propagate - // known bits from the RHS to V. - Known.Zero |= RHSKnown.Zero & MaskKnown.One; - Known.One |= RHSKnown.One & MaskKnown.One; - // assume(~(v & b) = a) - } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))), - m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - KnownBits MaskKnown(BitWidth); - computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I)); - - // For those bits in the mask that are known to be one, we can propagate - // inverted known bits from the RHS to V. - Known.Zero |= RHSKnown.One & MaskKnown.One; - Known.One |= RHSKnown.Zero & MaskKnown.One; - // assume(v | b = a) - } else if (match(Arg, - m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - KnownBits BKnown(BitWidth); - computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); - - // For those bits in B that are known to be zero, we can propagate known - // bits from the RHS to V. - Known.Zero |= RHSKnown.Zero & BKnown.Zero; - Known.One |= RHSKnown.One & BKnown.Zero; - // assume(~(v | b) = a) - } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))), - m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - KnownBits BKnown(BitWidth); - computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); - - // For those bits in B that are known to be zero, we can propagate - // inverted known bits from the RHS to V. - Known.Zero |= RHSKnown.One & BKnown.Zero; - Known.One |= RHSKnown.Zero & BKnown.Zero; - // assume(v ^ b = a) - } else if (match(Arg, - m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - KnownBits BKnown(BitWidth); - computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); - - // For those bits in B that are known to be zero, we can propagate known - // bits from the RHS to V. For those bits in B that are known to be one, - // we can propagate inverted known bits from the RHS to V. - Known.Zero |= RHSKnown.Zero & BKnown.Zero; - Known.One |= RHSKnown.One & BKnown.Zero; - Known.Zero |= RHSKnown.One & BKnown.One; - Known.One |= RHSKnown.Zero & BKnown.One; - // assume(~(v ^ b) = a) - } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))), - m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - KnownBits BKnown(BitWidth); - computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); - - // For those bits in B that are known to be zero, we can propagate - // inverted known bits from the RHS to V. For those bits in B that are - // known to be one, we can propagate known bits from the RHS to V. - Known.Zero |= RHSKnown.One & BKnown.Zero; - Known.One |= RHSKnown.Zero & BKnown.Zero; - Known.Zero |= RHSKnown.Zero & BKnown.One; - Known.One |= RHSKnown.One & BKnown.One; - // assume(v << c = a) - } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), - m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT) && - C < BitWidth) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - // For those bits in RHS that are known, we can propagate them to known - // bits in V shifted to the right by C. - RHSKnown.Zero.lshrInPlace(C); - Known.Zero |= RHSKnown.Zero; - RHSKnown.One.lshrInPlace(C); - Known.One |= RHSKnown.One; - // assume(~(v << c) = a) - } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), - m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT) && - C < BitWidth) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - // For those bits in RHS that are known, we can propagate them inverted - // to known bits in V shifted to the right by C. - RHSKnown.One.lshrInPlace(C); - Known.Zero |= RHSKnown.One; - RHSKnown.Zero.lshrInPlace(C); - Known.One |= RHSKnown.Zero; - // assume(v >> c = a) - } else if (match(Arg, - m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)), - m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT) && - C < BitWidth) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - // For those bits in RHS that are known, we can propagate them to known - // bits in V shifted to the right by C. - Known.Zero |= RHSKnown.Zero << C; - Known.One |= RHSKnown.One << C; - // assume(~(v >> c) = a) - } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))), - m_Value(A))) && - Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT) && - C < BitWidth) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - // For those bits in RHS that are known, we can propagate them inverted - // to known bits in V shifted to the right by C. - Known.Zero |= RHSKnown.One << C; - Known.One |= RHSKnown.Zero << C; - // assume(v >=_s c) where c is non-negative - } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_SGE && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - - if (RHSKnown.isNonNegative()) { - // We know that the sign bit is zero. - Known.makeNonNegative(); + switch (Cmp->getPredicate()) { + default: + break; + case ICmpInst::ICMP_EQ: + // assume(v = a) + if (match(Cmp, m_c_ICmp(Pred, m_V, m_Value(A))) && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + Known.Zero |= RHSKnown.Zero; + Known.One |= RHSKnown.One; + // assume(v & b = a) + } else if (match(Cmp, + m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits MaskKnown(BitWidth); + computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I)); + + // For those bits in the mask that are known to be one, we can propagate + // known bits from the RHS to V. + Known.Zero |= RHSKnown.Zero & MaskKnown.One; + Known.One |= RHSKnown.One & MaskKnown.One; + // assume(~(v & b) = a) + } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))), + m_Value(A))) && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits MaskKnown(BitWidth); + computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I)); + + // For those bits in the mask that are known to be one, we can propagate + // inverted known bits from the RHS to V. + Known.Zero |= RHSKnown.One & MaskKnown.One; + Known.One |= RHSKnown.Zero & MaskKnown.One; + // assume(v | b = a) + } else if (match(Cmp, + m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits BKnown(BitWidth); + computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); + + // For those bits in B that are known to be zero, we can propagate known + // bits from the RHS to V. + Known.Zero |= RHSKnown.Zero & BKnown.Zero; + Known.One |= RHSKnown.One & BKnown.Zero; + // assume(~(v | b) = a) + } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))), + m_Value(A))) && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits BKnown(BitWidth); + computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); + + // For those bits in B that are known to be zero, we can propagate + // inverted known bits from the RHS to V. + Known.Zero |= RHSKnown.One & BKnown.Zero; + Known.One |= RHSKnown.Zero & BKnown.Zero; + // assume(v ^ b = a) + } else if (match(Cmp, + m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits BKnown(BitWidth); + computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); + + // For those bits in B that are known to be zero, we can propagate known + // bits from the RHS to V. For those bits in B that are known to be one, + // we can propagate inverted known bits from the RHS to V. + Known.Zero |= RHSKnown.Zero & BKnown.Zero; + Known.One |= RHSKnown.One & BKnown.Zero; + Known.Zero |= RHSKnown.One & BKnown.One; + Known.One |= RHSKnown.Zero & BKnown.One; + // assume(~(v ^ b) = a) + } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))), + m_Value(A))) && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits BKnown(BitWidth); + computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); + + // For those bits in B that are known to be zero, we can propagate + // inverted known bits from the RHS to V. For those bits in B that are + // known to be one, we can propagate known bits from the RHS to V. + Known.Zero |= RHSKnown.One & BKnown.Zero; + Known.One |= RHSKnown.Zero & BKnown.Zero; + Known.Zero |= RHSKnown.Zero & BKnown.One; + Known.One |= RHSKnown.One & BKnown.One; + // assume(v << c = a) + } else if (match(Cmp, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), + m_Value(A))) && + isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) { + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + // For those bits in RHS that are known, we can propagate them to known + // bits in V shifted to the right by C. + RHSKnown.Zero.lshrInPlace(C); + Known.Zero |= RHSKnown.Zero; + RHSKnown.One.lshrInPlace(C); + Known.One |= RHSKnown.One; + // assume(~(v << c) = a) + } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), + m_Value(A))) && + isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) { + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + // For those bits in RHS that are known, we can propagate them inverted + // to known bits in V shifted to the right by C. + RHSKnown.One.lshrInPlace(C); + Known.Zero |= RHSKnown.One; + RHSKnown.Zero.lshrInPlace(C); + Known.One |= RHSKnown.Zero; + // assume(v >> c = a) + } else if (match(Cmp, m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)), + m_Value(A))) && + isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) { + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + // For those bits in RHS that are known, we can propagate them to known + // bits in V shifted to the right by C. + Known.Zero |= RHSKnown.Zero << C; + Known.One |= RHSKnown.One << C; + // assume(~(v >> c) = a) + } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))), + m_Value(A))) && + isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) { + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + // For those bits in RHS that are known, we can propagate them inverted + // to known bits in V shifted to the right by C. + Known.Zero |= RHSKnown.One << C; + Known.One |= RHSKnown.Zero << C; } - // assume(v >_s c) where c is at least -1. - } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_SGT && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - - if (RHSKnown.isAllOnes() || RHSKnown.isNonNegative()) { - // We know that the sign bit is zero. - Known.makeNonNegative(); + break; + case ICmpInst::ICMP_SGE: + // assume(v >=_s c) where c is non-negative + if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth + 1, Query(Q, I)); + + if (RHSKnown.isNonNegative()) { + // We know that the sign bit is zero. + Known.makeNonNegative(); + } } - // assume(v <=_s c) where c is negative - } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_SLE && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - - if (RHSKnown.isNegative()) { - // We know that the sign bit is one. - Known.makeNegative(); + break; + case ICmpInst::ICMP_SGT: + // assume(v >_s c) where c is at least -1. + if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth + 1, Query(Q, I)); + + if (RHSKnown.isAllOnes() || RHSKnown.isNonNegative()) { + // We know that the sign bit is zero. + Known.makeNonNegative(); + } } - // assume(v <_s c) where c is non-positive - } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_SLT && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - - if (RHSKnown.isZero() || RHSKnown.isNegative()) { - // We know that the sign bit is one. - Known.makeNegative(); + break; + case ICmpInst::ICMP_SLE: + // assume(v <=_s c) where c is negative + if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth + 1, Query(Q, I)); + + if (RHSKnown.isNegative()) { + // We know that the sign bit is one. + Known.makeNegative(); + } } - // assume(v <=_u c) - } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_ULE && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - - // Whatever high bits in c are zero are known to be zero. - Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros()); - // assume(v <_u c) - } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && - Pred == ICmpInst::ICMP_ULT && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - - // If the RHS is known zero, then this assumption must be wrong (nothing - // is unsigned less than zero). Signal a conflict and get out of here. - if (RHSKnown.isZero()) { - Known.Zero.setAllBits(); - Known.One.setAllBits(); - break; + break; + case ICmpInst::ICMP_SLT: + // assume(v <_s c) where c is non-positive + if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + + if (RHSKnown.isZero() || RHSKnown.isNegative()) { + // We know that the sign bit is one. + Known.makeNegative(); + } } - - // Whatever high bits in c are zero are known to be zero (if c is a power - // of 2, then one more). - if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I))) - Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1); - else + break; + case ICmpInst::ICMP_ULE: + // assume(v <=_u c) + if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + + // Whatever high bits in c are zero are known to be zero. Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros()); + } + break; + case ICmpInst::ICMP_ULT: + // assume(v <_u c) + if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + + // If the RHS is known zero, then this assumption must be wrong (nothing + // is unsigned less than zero). Signal a conflict and get out of here. + if (RHSKnown.isZero()) { + Known.Zero.setAllBits(); + Known.One.setAllBits(); + break; + } + + // Whatever high bits in c are zero are known to be zero (if c is a power + // of 2, then one more). + if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I))) + Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1); + else + Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros()); + } + break; } } @@ -1129,12 +1132,9 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, Q.DL.getTypeSizeInBits(ScalarTy); assert(SrcBitWidth && "SrcBitWidth can't be zero"); - Known = Known.zextOrTrunc(SrcBitWidth); + Known = Known.zextOrTrunc(SrcBitWidth, false); computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); - Known = Known.zextOrTrunc(BitWidth); - // Any top bits are known to be zero. - if (BitWidth > SrcBitWidth) - Known.Zero.setBitsFrom(SrcBitWidth); + Known = Known.zextOrTrunc(BitWidth, true /* ExtendedBitsAreKnownZero */); break; } case Instruction::BitCast: { @@ -1527,6 +1527,37 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt); break; } + case Intrinsic::uadd_sat: + case Intrinsic::usub_sat: { + bool IsAdd = II->getIntrinsicID() == Intrinsic::uadd_sat; + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + + // Add: Leading ones of either operand are preserved. + // Sub: Leading zeros of LHS and leading ones of RHS are preserved + // as leading zeros in the result. + unsigned LeadingKnown; + if (IsAdd) + LeadingKnown = std::max(Known.countMinLeadingOnes(), + Known2.countMinLeadingOnes()); + else + LeadingKnown = std::max(Known.countMinLeadingZeros(), + Known2.countMinLeadingOnes()); + + Known = KnownBits::computeForAddSub( + IsAdd, /* NSW */ false, Known, Known2); + + // We select between the operation result and all-ones/zero + // respectively, so we can preserve known ones/zeros. + if (IsAdd) { + Known.One.setHighBits(LeadingKnown); + Known.Zero.clearAllBits(); + } else { + Known.Zero.setHighBits(LeadingKnown); + Known.One.clearAllBits(); + } + break; + } case Intrinsic::x86_sse42_crc32_64_64: Known.Zero.setBitsFrom(32); break; @@ -1967,6 +1998,15 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { // Must be non-zero due to null test above. return true; + if (auto *CE = dyn_cast<ConstantExpr>(C)) { + // See the comment for IntToPtr/PtrToInt instructions below. + if (CE->getOpcode() == Instruction::IntToPtr || + CE->getOpcode() == Instruction::PtrToInt) + if (Q.DL.getTypeSizeInBits(CE->getOperand(0)->getType()) <= + Q.DL.getTypeSizeInBits(CE->getType())) + return isKnownNonZero(CE->getOperand(0), Depth, Q); + } + // For constant vectors, check that all elements are undefined or known // non-zero to determine that the whole vector is known non-zero. if (auto *VecTy = dyn_cast<VectorType>(C->getType())) { @@ -2037,11 +2077,33 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { if (isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT)) return true; + // Look through bitcast operations, GEPs, and int2ptr instructions as they + // do not alter the value, or at least not the nullness property of the + // value, e.g., int2ptr is allowed to zero/sign extend the value. + // + // Note that we have to take special care to avoid looking through + // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well + // as casts that can alter the value, e.g., AddrSpaceCasts. if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) if (isGEPKnownNonNull(GEP, Depth, Q)) return true; + + if (auto *BCO = dyn_cast<BitCastOperator>(V)) + return isKnownNonZero(BCO->getOperand(0), Depth, Q); + + if (auto *I2P = dyn_cast<IntToPtrInst>(V)) + if (Q.DL.getTypeSizeInBits(I2P->getSrcTy()) <= + Q.DL.getTypeSizeInBits(I2P->getDestTy())) + return isKnownNonZero(I2P->getOperand(0), Depth, Q); } + // Similar to int2ptr above, we can look through ptr2int here if the cast + // is a no-op or an extend and not a truncate. + if (auto *P2I = dyn_cast<PtrToIntInst>(V)) + if (Q.DL.getTypeSizeInBits(P2I->getSrcTy()) <= + Q.DL.getTypeSizeInBits(P2I->getDestTy())) + return isKnownNonZero(P2I->getOperand(0), Depth, Q); + unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), Q.DL); // X | Y != 0 if X != 0 or Y != 0. @@ -3082,6 +3144,11 @@ bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, case Intrinsic::sqrt: return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) && CannotBeOrderedLessThanZero(II->getArgOperand(0), TLI); + case Intrinsic::minnum: + case Intrinsic::maxnum: + // If either operand is not NaN, the result is not NaN. + return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) || + isKnownNeverNaN(II->getArgOperand(1), TLI, Depth + 1); default: return false; } @@ -3107,7 +3174,7 @@ bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, return true; } -Value *llvm::isBytewiseValue(Value *V) { +Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { // All byte-wide stores are splatable, even of arbitrary variables. if (V->getType()->isIntegerTy(8)) @@ -3120,6 +3187,10 @@ Value *llvm::isBytewiseValue(Value *V) { if (isa<UndefValue>(V)) return UndefInt8; + const uint64_t Size = DL.getTypeStoreSize(V->getType()); + if (!Size) + return UndefInt8; + Constant *C = dyn_cast<Constant>(V); if (!C) { // Conceptually, we could handle things like: @@ -3146,7 +3217,8 @@ Value *llvm::isBytewiseValue(Value *V) { else if (CFP->getType()->isDoubleTy()) Ty = Type::getInt64Ty(Ctx); // Don't handle long double formats, which have strange constraints. - return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty)) : nullptr; + return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty), DL) + : nullptr; } // We can handle constant integers that are multiple of 8 bits. @@ -3159,6 +3231,17 @@ Value *llvm::isBytewiseValue(Value *V) { } } + if (auto *CE = dyn_cast<ConstantExpr>(C)) { + if (CE->getOpcode() == Instruction::IntToPtr) { + auto PS = DL.getPointerSizeInBits( + cast<PointerType>(CE->getType())->getAddressSpace()); + return isBytewiseValue( + ConstantExpr::getIntegerCast(CE->getOperand(0), + Type::getIntNTy(Ctx, PS), false), + DL); + } + } + auto Merge = [&](Value *LHS, Value *RHS) -> Value * { if (LHS == RHS) return LHS; @@ -3174,20 +3257,15 @@ Value *llvm::isBytewiseValue(Value *V) { if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) { Value *Val = UndefInt8; for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I) - if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I))))) + if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I), DL)))) return nullptr; return Val; } - if (isa<ConstantVector>(C)) { - Constant *Splat = cast<ConstantVector>(C)->getSplatValue(); - return Splat ? isBytewiseValue(Splat) : nullptr; - } - - if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) { + if (isa<ConstantAggregate>(C)) { Value *Val = UndefInt8; for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) - if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I))))) + if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I), DL)))) return nullptr; return Val; } @@ -3363,57 +3441,6 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, return nullptr; } -/// Analyze the specified pointer to see if it can be expressed as a base -/// pointer plus a constant offset. Return the base and offset to the caller. -Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const DataLayout &DL) { - unsigned BitWidth = DL.getIndexTypeSizeInBits(Ptr->getType()); - APInt ByteOffset(BitWidth, 0); - - // We walk up the defs but use a visited set to handle unreachable code. In - // that case, we stop after accumulating the cycle once (not that it - // matters). - SmallPtrSet<Value *, 16> Visited; - while (Visited.insert(Ptr).second) { - if (Ptr->getType()->isVectorTy()) - break; - - if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) { - // If one of the values we have visited is an addrspacecast, then - // the pointer type of this GEP may be different from the type - // of the Ptr parameter which was passed to this function. This - // means when we construct GEPOffset, we need to use the size - // of GEP's pointer type rather than the size of the original - // pointer type. - APInt GEPOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); - if (!GEP->accumulateConstantOffset(DL, GEPOffset)) - break; - - APInt OrigByteOffset(ByteOffset); - ByteOffset += GEPOffset.sextOrTrunc(ByteOffset.getBitWidth()); - if (ByteOffset.getMinSignedBits() > 64) { - // Stop traversal if the pointer offset wouldn't fit into int64_t - // (this should be removed if Offset is updated to an APInt) - ByteOffset = OrigByteOffset; - break; - } - - Ptr = GEP->getPointerOperand(); - } else if (Operator::getOpcode(Ptr) == Instruction::BitCast || - Operator::getOpcode(Ptr) == Instruction::AddrSpaceCast) { - Ptr = cast<Operator>(Ptr)->getOperand(0); - } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) { - if (GA->isInterposable()) - break; - Ptr = GA->getAliasee(); - } else { - break; - } - } - Offset = ByteOffset.getSExtValue(); - return Ptr; -} - bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP, unsigned CharSize) { // Make sure the GEP has exactly three arguments. @@ -3638,7 +3665,9 @@ const Value *llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call) { bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( const CallBase *Call) { return Call->getIntrinsicID() == Intrinsic::launder_invariant_group || - Call->getIntrinsicID() == Intrinsic::strip_invariant_group; + Call->getIntrinsicID() == Intrinsic::strip_invariant_group || + Call->getIntrinsicID() == Intrinsic::aarch64_irg || + Call->getIntrinsicID() == Intrinsic::aarch64_tagp; } /// \p PN defines a loop-variant pointer to an object. Check if the @@ -3717,26 +3746,27 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, return V; } -void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects, +void llvm::GetUnderlyingObjects(const Value *V, + SmallVectorImpl<const Value *> &Objects, const DataLayout &DL, LoopInfo *LI, unsigned MaxLookup) { - SmallPtrSet<Value *, 4> Visited; - SmallVector<Value *, 4> Worklist; + SmallPtrSet<const Value *, 4> Visited; + SmallVector<const Value *, 4> Worklist; Worklist.push_back(V); do { - Value *P = Worklist.pop_back_val(); + const Value *P = Worklist.pop_back_val(); P = GetUnderlyingObject(P, DL, MaxLookup); if (!Visited.insert(P).second) continue; - if (SelectInst *SI = dyn_cast<SelectInst>(P)) { + if (auto *SI = dyn_cast<SelectInst>(P)) { Worklist.push_back(SI->getTrueValue()); Worklist.push_back(SI->getFalseValue()); continue; } - if (PHINode *PN = dyn_cast<PHINode>(P)) { + if (auto *PN = dyn_cast<PHINode>(P)) { // If this PHI changes the underlying object in every iteration of the // loop, don't look through it. Consider: // int **A; @@ -3797,10 +3827,10 @@ bool llvm::getUnderlyingObjectsForCodeGen(const Value *V, do { V = Working.pop_back_val(); - SmallVector<Value *, 4> Objs; - GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL); + SmallVector<const Value *, 4> Objs; + GetUnderlyingObjects(V, Objs, DL); - for (Value *V : Objs) { + for (const Value *V : Objs) { if (!Visited.insert(V).second) continue; if (Operator::getOpcode(V) == Instruction::IntToPtr) { @@ -3888,7 +3918,8 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, return false; const DataLayout &DL = LI->getModule()->getDataLayout(); return isDereferenceableAndAlignedPointer(LI->getPointerOperand(), - LI->getAlignment(), DL, CtxI, DT); + LI->getType(), LI->getAlignment(), + DL, CtxI, DT); } case Instruction::Call: { auto *CI = cast<const CallInst>(Inst); @@ -3901,6 +3932,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, case Instruction::VAArg: case Instruction::Alloca: case Instruction::Invoke: + case Instruction::CallBr: case Instruction::PHI: case Instruction::Store: case Instruction::Ret: @@ -3926,51 +3958,46 @@ bool llvm::mayBeMemoryDependent(const Instruction &I) { return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I); } +/// Convert ConstantRange OverflowResult into ValueTracking OverflowResult. +static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) { + switch (OR) { + case ConstantRange::OverflowResult::MayOverflow: + return OverflowResult::MayOverflow; + case ConstantRange::OverflowResult::AlwaysOverflowsLow: + return OverflowResult::AlwaysOverflowsLow; + case ConstantRange::OverflowResult::AlwaysOverflowsHigh: + return OverflowResult::AlwaysOverflowsHigh; + case ConstantRange::OverflowResult::NeverOverflows: + return OverflowResult::NeverOverflows; + } + llvm_unreachable("Unknown OverflowResult"); +} + +/// Combine constant ranges from computeConstantRange() and computeKnownBits(). +static ConstantRange computeConstantRangeIncludingKnownBits( + const Value *V, bool ForSigned, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, + OptimizationRemarkEmitter *ORE = nullptr, bool UseInstrInfo = true) { + KnownBits Known = computeKnownBits( + V, DL, Depth, AC, CxtI, DT, ORE, UseInstrInfo); + ConstantRange CR1 = ConstantRange::fromKnownBits(Known, ForSigned); + ConstantRange CR2 = computeConstantRange(V, UseInstrInfo); + ConstantRange::PreferredRangeType RangeType = + ForSigned ? ConstantRange::Signed : ConstantRange::Unsigned; + return CR1.intersectWith(CR2, RangeType); +} + OverflowResult llvm::computeOverflowForUnsignedMul( const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) { - // Multiplying n * m significant bits yields a result of n + m significant - // bits. If the total number of significant bits does not exceed the - // result bit width (minus 1), there is no overflow. - // This means if we have enough leading zero bits in the operands - // we can guarantee that the result does not overflow. - // Ref: "Hacker's Delight" by Henry Warren - unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); - KnownBits LHSKnown(BitWidth); - KnownBits RHSKnown(BitWidth); - computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT, nullptr, - UseInstrInfo); - computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT, nullptr, - UseInstrInfo); - // Note that underestimating the number of zero bits gives a more - // conservative answer. - unsigned ZeroBits = LHSKnown.countMinLeadingZeros() + - RHSKnown.countMinLeadingZeros(); - // First handle the easy case: if we have enough zero bits there's - // definitely no overflow. - if (ZeroBits >= BitWidth) - return OverflowResult::NeverOverflows; - - // Get the largest possible values for each operand. - APInt LHSMax = ~LHSKnown.Zero; - APInt RHSMax = ~RHSKnown.Zero; - - // We know the multiply operation doesn't overflow if the maximum values for - // each operand will not overflow after we multiply them together. - bool MaxOverflow; - (void)LHSMax.umul_ov(RHSMax, MaxOverflow); - if (!MaxOverflow) - return OverflowResult::NeverOverflows; - - // We know it always overflows if multiplying the smallest possible values for - // the operands also results in overflow. - bool MinOverflow; - (void)LHSKnown.One.umul_ov(RHSKnown.One, MinOverflow); - if (MinOverflow) - return OverflowResult::AlwaysOverflows; - - return OverflowResult::MayOverflow; + KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT, + nullptr, UseInstrInfo); + KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT, + nullptr, UseInstrInfo); + ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false); + ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false); + return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange)); } OverflowResult @@ -4020,69 +4047,13 @@ OverflowResult llvm::computeOverflowForUnsignedAdd( const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) { - KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT, - nullptr, UseInstrInfo); - if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) { - KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT, - nullptr, UseInstrInfo); - - if (LHSKnown.isNegative() && RHSKnown.isNegative()) { - // The sign bit is set in both cases: this MUST overflow. - return OverflowResult::AlwaysOverflows; - } - - if (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()) { - // The sign bit is clear in both cases: this CANNOT overflow. - return OverflowResult::NeverOverflows; - } - } - - return OverflowResult::MayOverflow; -} - -/// Return true if we can prove that adding the two values of the -/// knownbits will not overflow. -/// Otherwise return false. -static bool checkRippleForSignedAdd(const KnownBits &LHSKnown, - const KnownBits &RHSKnown) { - // Addition of two 2's complement numbers having opposite signs will never - // overflow. - if ((LHSKnown.isNegative() && RHSKnown.isNonNegative()) || - (LHSKnown.isNonNegative() && RHSKnown.isNegative())) - return true; - - // If either of the values is known to be non-negative, adding them can only - // overflow if the second is also non-negative, so we can assume that. - // Two non-negative numbers will only overflow if there is a carry to the - // sign bit, so we can check if even when the values are as big as possible - // there is no overflow to the sign bit. - if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) { - APInt MaxLHS = ~LHSKnown.Zero; - MaxLHS.clearSignBit(); - APInt MaxRHS = ~RHSKnown.Zero; - MaxRHS.clearSignBit(); - APInt Result = std::move(MaxLHS) + std::move(MaxRHS); - return Result.isSignBitClear(); - } - - // If either of the values is known to be negative, adding them can only - // overflow if the second is also negative, so we can assume that. - // Two negative number will only overflow if there is no carry to the sign - // bit, so we can check if even when the values are as small as possible - // there is overflow to the sign bit. - if (LHSKnown.isNegative() || RHSKnown.isNegative()) { - APInt MinLHS = LHSKnown.One; - MinLHS.clearSignBit(); - APInt MinRHS = RHSKnown.One; - MinRHS.clearSignBit(); - APInt Result = std::move(MinLHS) + std::move(MinRHS); - return Result.isSignBitSet(); - } - - // If we reached here it means that we know nothing about the sign bits. - // In this case we can't know if there will be an overflow, since by - // changing the sign bits any two values can be made to overflow. - return false; + ConstantRange LHSRange = computeConstantRangeIncludingKnownBits( + LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT, + nullptr, UseInstrInfo); + ConstantRange RHSRange = computeConstantRangeIncludingKnownBits( + RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT, + nullptr, UseInstrInfo); + return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange)); } static OverflowResult computeOverflowForSignedAdd(const Value *LHS, @@ -4114,30 +4085,35 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS, ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1) return OverflowResult::NeverOverflows; - KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT); - KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT); - - if (checkRippleForSignedAdd(LHSKnown, RHSKnown)) - return OverflowResult::NeverOverflows; + ConstantRange LHSRange = computeConstantRangeIncludingKnownBits( + LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT); + ConstantRange RHSRange = computeConstantRangeIncludingKnownBits( + RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT); + OverflowResult OR = + mapOverflowResult(LHSRange.signedAddMayOverflow(RHSRange)); + if (OR != OverflowResult::MayOverflow) + return OR; // The remaining code needs Add to be available. Early returns if not so. if (!Add) return OverflowResult::MayOverflow; // If the sign of Add is the same as at least one of the operands, this add - // CANNOT overflow. This is particularly useful when the sum is - // @llvm.assume'ed non-negative rather than proved so from analyzing its - // operands. + // CANNOT overflow. If this can be determined from the known bits of the + // operands the above signedAddMayOverflow() check will have already done so. + // The only other way to improve on the known bits is from an assumption, so + // call computeKnownBitsFromAssume() directly. bool LHSOrRHSKnownNonNegative = - (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()); + (LHSRange.isAllNonNegative() || RHSRange.isAllNonNegative()); bool LHSOrRHSKnownNegative = - (LHSKnown.isNegative() || RHSKnown.isNegative()); + (LHSRange.isAllNegative() || RHSRange.isAllNegative()); if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { - KnownBits AddKnown = computeKnownBits(Add, DL, /*Depth=*/0, AC, CxtI, DT); + KnownBits AddKnown(LHSRange.getBitWidth()); + computeKnownBitsFromAssume( + Add, AddKnown, /*Depth=*/0, Query(DL, AC, CxtI, DT, true)); if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) || - (AddKnown.isNegative() && LHSOrRHSKnownNegative)) { + (AddKnown.isNegative() && LHSOrRHSKnownNegative)) return OverflowResult::NeverOverflows; - } } return OverflowResult::MayOverflow; @@ -4149,20 +4125,11 @@ OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT); - if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) { - KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT); - - // If the LHS is negative and the RHS is non-negative, no unsigned wrap. - if (LHSKnown.isNegative() && RHSKnown.isNonNegative()) - return OverflowResult::NeverOverflows; - - // If the LHS is non-negative and the RHS negative, we always wrap. - if (LHSKnown.isNonNegative() && RHSKnown.isNegative()) - return OverflowResult::AlwaysOverflows; - } - - return OverflowResult::MayOverflow; + ConstantRange LHSRange = computeConstantRangeIncludingKnownBits( + LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT); + ConstantRange RHSRange = computeConstantRangeIncludingKnownBits( + RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT); + return mapOverflowResult(LHSRange.unsignedSubMayOverflow(RHSRange)); } OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS, @@ -4177,37 +4144,19 @@ OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS, ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1) return OverflowResult::NeverOverflows; - KnownBits LHSKnown = computeKnownBits(LHS, DL, 0, AC, CxtI, DT); - - KnownBits RHSKnown = computeKnownBits(RHS, DL, 0, AC, CxtI, DT); - - // Subtraction of two 2's complement numbers having identical signs will - // never overflow. - if ((LHSKnown.isNegative() && RHSKnown.isNegative()) || - (LHSKnown.isNonNegative() && RHSKnown.isNonNegative())) - return OverflowResult::NeverOverflows; - - // TODO: implement logic similar to checkRippleForAdd - return OverflowResult::MayOverflow; + ConstantRange LHSRange = computeConstantRangeIncludingKnownBits( + LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT); + ConstantRange RHSRange = computeConstantRangeIncludingKnownBits( + RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT); + return mapOverflowResult(LHSRange.signedSubMayOverflow(RHSRange)); } -bool llvm::isOverflowIntrinsicNoWrap(const IntrinsicInst *II, +bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO, const DominatorTree &DT) { -#ifndef NDEBUG - auto IID = II->getIntrinsicID(); - assert((IID == Intrinsic::sadd_with_overflow || - IID == Intrinsic::uadd_with_overflow || - IID == Intrinsic::ssub_with_overflow || - IID == Intrinsic::usub_with_overflow || - IID == Intrinsic::smul_with_overflow || - IID == Intrinsic::umul_with_overflow) && - "Not an overflow intrinsic!"); -#endif - SmallVector<const BranchInst *, 2> GuardingBranches; SmallVector<const ExtractValueInst *, 2> Results; - for (const User *U : II->users()) { + for (const User *U : WO->users()) { if (const auto *EVI = dyn_cast<ExtractValueInst>(U)) { assert(EVI->getNumIndices() == 1 && "Obvious from CI's type"); @@ -4307,6 +4256,11 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { if (!CS.doesNotThrow()) return false; + // A function which doens't throw and has "willreturn" attribute will + // always return. + if (CS.hasFnAttr(Attribute::WillReturn)) + return true; + // Non-throwing call sites can loop infinitely, call exit/pthread_exit // etc. and thus not return. However, LLVM already assumes that // @@ -4325,7 +4279,8 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { // is guaranteed to return. return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory() || match(I, m_Intrinsic<Intrinsic::assume>()) || - match(I, m_Intrinsic<Intrinsic::sideeffect>()); + match(I, m_Intrinsic<Intrinsic::sideeffect>()) || + match(I, m_Intrinsic<Intrinsic::experimental_widenable_condition>()); } // Other instructions return normally. @@ -4333,7 +4288,7 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { } bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) { - // TODO: This is slightly consdervative for invoke instruction since exiting + // TODO: This is slightly conservative for invoke instruction since exiting // via an exception *is* normal control for them. for (auto I = BB->begin(), E = BB->end(); I != E; ++I) if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) @@ -4357,6 +4312,8 @@ bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, } bool llvm::propagatesFullPoison(const Instruction *I) { + // TODO: This should include all instructions apart from phis, selects and + // call-like instructions. switch (I->getOpcode()) { case Instruction::Add: case Instruction::Sub: @@ -4409,10 +4366,21 @@ const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) { return I->getOperand(1); default: + // Note: It's really tempting to think that a conditional branch or + // switch should be listed here, but that's incorrect. It's not + // branching off of poison which is UB, it is executing a side effecting + // instruction which follows the branch. return nullptr; } } +bool llvm::mustTriggerUB(const Instruction *I, + const SmallSet<const Value *, 16>& KnownPoison) { + auto *NotPoison = getGuaranteedNonFullPoisonOp(I); + return (NotPoison && KnownPoison.count(NotPoison)); +} + + bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) { // We currently only look for uses of poison values within the same basic // block, as that makes it easier to guarantee that the uses will be @@ -4436,8 +4404,7 @@ bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) { while (Iter++ < MaxDepth) { for (auto &I : make_range(Begin, End)) { if (&I != PoisonI) { - const Value *NotPoison = getGuaranteedNonFullPoisonOp(&I); - if (NotPoison != nullptr && YieldsPoison.count(NotPoison)) + if (mustTriggerUB(&I, YieldsPoison)) return true; if (!isGuaranteedToTransferExecutionToSuccessor(&I)) return false; @@ -4926,6 +4893,10 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes)) return {SPF_ABS, SPNB_NA, false}; + // (X >=s 0) ? X : -X or (X >=s 1) ? X : -X --> ABS(X) + if (Pred == ICmpInst::ICMP_SGE && match(CmpRHS, ZeroOrOne)) + return {SPF_ABS, SPNB_NA, false}; + // (X <s 0) ? X : -X or (X <s 1) ? X : -X --> NABS(X) // (-X <s 0) ? -X : X or (-X <s 1) ? -X : X --> NABS(X) if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne)) @@ -5084,11 +5055,19 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition()); if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false}; + Value *TrueVal = SI->getTrueValue(); + Value *FalseVal = SI->getFalseValue(); + + return llvm::matchDecomposedSelectPattern(CmpI, TrueVal, FalseVal, LHS, RHS, + CastOp, Depth); +} + +SelectPatternResult llvm::matchDecomposedSelectPattern( + CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS, + Instruction::CastOps *CastOp, unsigned Depth) { CmpInst::Predicate Pred = CmpI->getPredicate(); Value *CmpLHS = CmpI->getOperand(0); Value *CmpRHS = CmpI->getOperand(1); - Value *TrueVal = SI->getTrueValue(); - Value *FalseVal = SI->getFalseValue(); FastMathFlags FMF; if (isa<FPMathOperator>(CmpI)) FMF = CmpI->getFastMathFlags(); @@ -5430,3 +5409,298 @@ Optional<bool> llvm::isImpliedByDomCondition(const Value *Cond, bool CondIsTrue = TrueBB == ContextBB; return isImpliedCondition(PredCond, Cond, DL, CondIsTrue); } + +static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, + APInt &Upper, const InstrInfoQuery &IIQ) { + unsigned Width = Lower.getBitWidth(); + const APInt *C; + switch (BO.getOpcode()) { + case Instruction::Add: + if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) { + // FIXME: If we have both nuw and nsw, we should reduce the range further. + if (IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(&BO))) { + // 'add nuw x, C' produces [C, UINT_MAX]. + Lower = *C; + } else if (IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(&BO))) { + if (C->isNegative()) { + // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C]. + Lower = APInt::getSignedMinValue(Width); + Upper = APInt::getSignedMaxValue(Width) + *C + 1; + } else { + // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX]. + Lower = APInt::getSignedMinValue(Width) + *C; + Upper = APInt::getSignedMaxValue(Width) + 1; + } + } + } + break; + + case Instruction::And: + if (match(BO.getOperand(1), m_APInt(C))) + // 'and x, C' produces [0, C]. + Upper = *C + 1; + break; + + case Instruction::Or: + if (match(BO.getOperand(1), m_APInt(C))) + // 'or x, C' produces [C, UINT_MAX]. + Lower = *C; + break; + + case Instruction::AShr: + if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { + // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C]. + Lower = APInt::getSignedMinValue(Width).ashr(*C); + Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1; + } else if (match(BO.getOperand(0), m_APInt(C))) { + unsigned ShiftAmount = Width - 1; + if (!C->isNullValue() && IIQ.isExact(&BO)) + ShiftAmount = C->countTrailingZeros(); + if (C->isNegative()) { + // 'ashr C, x' produces [C, C >> (Width-1)] + Lower = *C; + Upper = C->ashr(ShiftAmount) + 1; + } else { + // 'ashr C, x' produces [C >> (Width-1), C] + Lower = C->ashr(ShiftAmount); + Upper = *C + 1; + } + } + break; + + case Instruction::LShr: + if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { + // 'lshr x, C' produces [0, UINT_MAX >> C]. + Upper = APInt::getAllOnesValue(Width).lshr(*C) + 1; + } else if (match(BO.getOperand(0), m_APInt(C))) { + // 'lshr C, x' produces [C >> (Width-1), C]. + unsigned ShiftAmount = Width - 1; + if (!C->isNullValue() && IIQ.isExact(&BO)) + ShiftAmount = C->countTrailingZeros(); + Lower = C->lshr(ShiftAmount); + Upper = *C + 1; + } + break; + + case Instruction::Shl: + if (match(BO.getOperand(0), m_APInt(C))) { + if (IIQ.hasNoUnsignedWrap(&BO)) { + // 'shl nuw C, x' produces [C, C << CLZ(C)] + Lower = *C; + Upper = Lower.shl(Lower.countLeadingZeros()) + 1; + } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw? + if (C->isNegative()) { + // 'shl nsw C, x' produces [C << CLO(C)-1, C] + unsigned ShiftAmount = C->countLeadingOnes() - 1; + Lower = C->shl(ShiftAmount); + Upper = *C + 1; + } else { + // 'shl nsw C, x' produces [C, C << CLZ(C)-1] + unsigned ShiftAmount = C->countLeadingZeros() - 1; + Lower = *C; + Upper = C->shl(ShiftAmount) + 1; + } + } + } + break; + + case Instruction::SDiv: + if (match(BO.getOperand(1), m_APInt(C))) { + APInt IntMin = APInt::getSignedMinValue(Width); + APInt IntMax = APInt::getSignedMaxValue(Width); + if (C->isAllOnesValue()) { + // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX] + // where C != -1 and C != 0 and C != 1 + Lower = IntMin + 1; + Upper = IntMax + 1; + } else if (C->countLeadingZeros() < Width - 1) { + // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C] + // where C != -1 and C != 0 and C != 1 + Lower = IntMin.sdiv(*C); + Upper = IntMax.sdiv(*C); + if (Lower.sgt(Upper)) + std::swap(Lower, Upper); + Upper = Upper + 1; + assert(Upper != Lower && "Upper part of range has wrapped!"); + } + } else if (match(BO.getOperand(0), m_APInt(C))) { + if (C->isMinSignedValue()) { + // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2]. + Lower = *C; + Upper = Lower.lshr(1) + 1; + } else { + // 'sdiv C, x' produces [-|C|, |C|]. + Upper = C->abs() + 1; + Lower = (-Upper) + 1; + } + } + break; + + case Instruction::UDiv: + if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) { + // 'udiv x, C' produces [0, UINT_MAX / C]. + Upper = APInt::getMaxValue(Width).udiv(*C) + 1; + } else if (match(BO.getOperand(0), m_APInt(C))) { + // 'udiv C, x' produces [0, C]. + Upper = *C + 1; + } + break; + + case Instruction::SRem: + if (match(BO.getOperand(1), m_APInt(C))) { + // 'srem x, C' produces (-|C|, |C|). + Upper = C->abs(); + Lower = (-Upper) + 1; + } + break; + + case Instruction::URem: + if (match(BO.getOperand(1), m_APInt(C))) + // 'urem x, C' produces [0, C). + Upper = *C; + break; + + default: + break; + } +} + +static void setLimitsForIntrinsic(const IntrinsicInst &II, APInt &Lower, + APInt &Upper) { + unsigned Width = Lower.getBitWidth(); + const APInt *C; + switch (II.getIntrinsicID()) { + case Intrinsic::uadd_sat: + // uadd.sat(x, C) produces [C, UINT_MAX]. + if (match(II.getOperand(0), m_APInt(C)) || + match(II.getOperand(1), m_APInt(C))) + Lower = *C; + break; + case Intrinsic::sadd_sat: + if (match(II.getOperand(0), m_APInt(C)) || + match(II.getOperand(1), m_APInt(C))) { + if (C->isNegative()) { + // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)]. + Lower = APInt::getSignedMinValue(Width); + Upper = APInt::getSignedMaxValue(Width) + *C + 1; + } else { + // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX]. + Lower = APInt::getSignedMinValue(Width) + *C; + Upper = APInt::getSignedMaxValue(Width) + 1; + } + } + break; + case Intrinsic::usub_sat: + // usub.sat(C, x) produces [0, C]. + if (match(II.getOperand(0), m_APInt(C))) + Upper = *C + 1; + // usub.sat(x, C) produces [0, UINT_MAX - C]. + else if (match(II.getOperand(1), m_APInt(C))) + Upper = APInt::getMaxValue(Width) - *C + 1; + break; + case Intrinsic::ssub_sat: + if (match(II.getOperand(0), m_APInt(C))) { + if (C->isNegative()) { + // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)]. + Lower = APInt::getSignedMinValue(Width); + Upper = *C - APInt::getSignedMinValue(Width) + 1; + } else { + // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX]. + Lower = *C - APInt::getSignedMaxValue(Width); + Upper = APInt::getSignedMaxValue(Width) + 1; + } + } else if (match(II.getOperand(1), m_APInt(C))) { + if (C->isNegative()) { + // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]: + Lower = APInt::getSignedMinValue(Width) - *C; + Upper = APInt::getSignedMaxValue(Width) + 1; + } else { + // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C]. + Lower = APInt::getSignedMinValue(Width); + Upper = APInt::getSignedMaxValue(Width) - *C + 1; + } + } + break; + default: + break; + } +} + +static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower, + APInt &Upper) { + const Value *LHS, *RHS; + SelectPatternResult R = matchSelectPattern(&SI, LHS, RHS); + if (R.Flavor == SPF_UNKNOWN) + return; + + unsigned BitWidth = SI.getType()->getScalarSizeInBits(); + + if (R.Flavor == SelectPatternFlavor::SPF_ABS) { + // If the negation part of the abs (in RHS) has the NSW flag, + // then the result of abs(X) is [0..SIGNED_MAX], + // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN. + Lower = APInt::getNullValue(BitWidth); + if (cast<Instruction>(RHS)->hasNoSignedWrap()) + Upper = APInt::getSignedMaxValue(BitWidth) + 1; + else + Upper = APInt::getSignedMinValue(BitWidth) + 1; + return; + } + + if (R.Flavor == SelectPatternFlavor::SPF_NABS) { + // The result of -abs(X) is <= 0. + Lower = APInt::getSignedMinValue(BitWidth); + Upper = APInt(BitWidth, 1); + return; + } + + const APInt *C; + if (!match(LHS, m_APInt(C)) && !match(RHS, m_APInt(C))) + return; + + switch (R.Flavor) { + case SPF_UMIN: + Upper = *C + 1; + break; + case SPF_UMAX: + Lower = *C; + break; + case SPF_SMIN: + Lower = APInt::getSignedMinValue(BitWidth); + Upper = *C + 1; + break; + case SPF_SMAX: + Lower = *C; + Upper = APInt::getSignedMaxValue(BitWidth) + 1; + break; + default: + break; + } +} + +ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo) { + assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction"); + + const APInt *C; + if (match(V, m_APInt(C))) + return ConstantRange(*C); + + InstrInfoQuery IIQ(UseInstrInfo); + unsigned BitWidth = V->getType()->getScalarSizeInBits(); + APInt Lower = APInt(BitWidth, 0); + APInt Upper = APInt(BitWidth, 0); + if (auto *BO = dyn_cast<BinaryOperator>(V)) + setLimitsForBinOp(*BO, Lower, Upper, IIQ); + else if (auto *II = dyn_cast<IntrinsicInst>(V)) + setLimitsForIntrinsic(*II, Lower, Upper); + else if (auto *SI = dyn_cast<SelectInst>(V)) + setLimitsForSelectPattern(*SI, Lower, Upper); + + ConstantRange CR = ConstantRange::getNonEmpty(Lower, Upper); + + if (auto *I = dyn_cast<Instruction>(V)) + if (auto *Range = IIQ.getMetadata(I, LLVMContext::MD_range)) + CR = CR.intersectWith(getConstantRangeFromMetadata(*Range)); + + return CR; +} diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp index 5656a19d7e0d..986756eb2627 100644 --- a/lib/Analysis/VectorUtils.cpp +++ b/lib/Analysis/VectorUtils.cpp @@ -1,9 +1,8 @@ //===----------- VectorUtils.cpp - Vectorizer utility functions -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -38,8 +37,9 @@ static cl::opt<unsigned> MaxInterleaveGroupFactor( cl::init(8)); /// Return true if all of the intrinsic's arguments and return type are scalars -/// for the scalar form of the intrinsic and vectors for the vector form of the -/// intrinsic. +/// for the scalar form of the intrinsic, and vectors for the vector form of the +/// intrinsic (except operands that are marked as always being scalar by +/// hasVectorInstrinsicScalarOpd). bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { switch (ID) { case Intrinsic::bswap: // Begin integer bit-manipulation. @@ -49,6 +49,13 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::cttz: case Intrinsic::fshl: case Intrinsic::fshr: + case Intrinsic::sadd_sat: + case Intrinsic::ssub_sat: + case Intrinsic::uadd_sat: + case Intrinsic::usub_sat: + case Intrinsic::smul_fix: + case Intrinsic::smul_fix_sat: + case Intrinsic::umul_fix: case Intrinsic::sqrt: // Begin floating-point. case Intrinsic::sin: case Intrinsic::cos: @@ -74,18 +81,13 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::fmuladd: case Intrinsic::powi: case Intrinsic::canonicalize: - case Intrinsic::sadd_sat: - case Intrinsic::ssub_sat: - case Intrinsic::uadd_sat: - case Intrinsic::usub_sat: return true; default: return false; } } -/// Identifies if the intrinsic has a scalar operand. It check for -/// ctlz,cttz and powi special intrinsics whose argument is scalar. +/// Identifies if the vector form of the intrinsic has a scalar operand. bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx) { switch (ID) { @@ -93,6 +95,10 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, case Intrinsic::cttz: case Intrinsic::powi: return (ScalarOpdIdx == 1); + case Intrinsic::smul_fix: + case Intrinsic::smul_fix_sat: + case Intrinsic::umul_fix: + return (ScalarOpdIdx == 2); default: return false; } @@ -300,30 +306,60 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) { /// Get splat value if the input is a splat vector or return nullptr. /// This function is not fully general. It checks only 2 cases: -/// the input value is (1) a splat constants vector or (2) a sequence -/// of instructions that broadcast a single value into a vector. -/// +/// the input value is (1) a splat constant vector or (2) a sequence +/// of instructions that broadcasts a scalar at element 0. const llvm::Value *llvm::getSplatValue(const Value *V) { - - if (auto *C = dyn_cast<Constant>(V)) - if (isa<VectorType>(V->getType())) + if (isa<VectorType>(V->getType())) + if (auto *C = dyn_cast<Constant>(V)) return C->getSplatValue(); - auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V); - if (!ShuffleInst) - return nullptr; - // All-zero (or undef) shuffle mask elements. - for (int MaskElt : ShuffleInst->getShuffleMask()) - if (MaskElt != 0 && MaskElt != -1) - return nullptr; - // The first shuffle source is 'insertelement' with index 0. - auto *InsertEltInst = - dyn_cast<InsertElementInst>(ShuffleInst->getOperand(0)); - if (!InsertEltInst || !isa<ConstantInt>(InsertEltInst->getOperand(2)) || - !cast<ConstantInt>(InsertEltInst->getOperand(2))->isZero()) - return nullptr; + // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...> + Value *Splat; + if (match(V, m_ShuffleVector(m_InsertElement(m_Value(), m_Value(Splat), + m_ZeroInt()), + m_Value(), m_ZeroInt()))) + return Splat; - return InsertEltInst->getOperand(1); + return nullptr; +} + +// This setting is based on its counterpart in value tracking, but it could be +// adjusted if needed. +const unsigned MaxDepth = 6; + +bool llvm::isSplatValue(const Value *V, unsigned Depth) { + assert(Depth <= MaxDepth && "Limit Search Depth"); + + if (isa<VectorType>(V->getType())) { + if (isa<UndefValue>(V)) + return true; + // FIXME: Constant splat analysis does not allow undef elements. + if (auto *C = dyn_cast<Constant>(V)) + return C->getSplatValue() != nullptr; + } + + // FIXME: Constant splat analysis does not allow undef elements. + Constant *Mask; + if (match(V, m_ShuffleVector(m_Value(), m_Value(), m_Constant(Mask)))) + return Mask->getSplatValue() != nullptr; + + // The remaining tests are all recursive, so bail out if we hit the limit. + if (Depth++ == MaxDepth) + return false; + + // If both operands of a binop are splats, the result is a splat. + Value *X, *Y, *Z; + if (match(V, m_BinOp(m_Value(X), m_Value(Y)))) + return isSplatValue(X, Depth) && isSplatValue(Y, Depth); + + // If all operands of a select are splats, the result is a splat. + if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z)))) + return isSplatValue(X, Depth) && isSplatValue(Y, Depth) && + isSplatValue(Z, Depth); + + // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops). + + return false; } MapVector<Instruction *, uint64_t> @@ -711,6 +747,52 @@ Value *llvm::concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs) { return ResList[0]; } +bool llvm::maskIsAllZeroOrUndef(Value *Mask) { + auto *ConstMask = dyn_cast<Constant>(Mask); + if (!ConstMask) + return false; + if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask)) + return true; + for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E; + ++I) { + if (auto *MaskElt = ConstMask->getAggregateElement(I)) + if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt)) + continue; + return false; + } + return true; +} + + +bool llvm::maskIsAllOneOrUndef(Value *Mask) { + auto *ConstMask = dyn_cast<Constant>(Mask); + if (!ConstMask) + return false; + if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask)) + return true; + for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E; + ++I) { + if (auto *MaskElt = ConstMask->getAggregateElement(I)) + if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt)) + continue; + return false; + } + return true; +} + +/// TODO: This is a lot like known bits, but for +/// vectors. Is there something we can common this with? +APInt llvm::possiblyDemandedEltsInMask(Value *Mask) { + + const unsigned VWidth = cast<VectorType>(Mask->getType())->getNumElements(); + APInt DemandedElts = APInt::getAllOnesValue(VWidth); + if (auto *CV = dyn_cast<ConstantVector>(Mask)) + for (unsigned i = 0; i < VWidth; i++) + if (CV->getAggregateElement(i)->isNullValue()) + DemandedElts.clearBit(i); + return DemandedElts; +} + bool InterleavedAccessInfo::isStrided(int Stride) { unsigned Factor = std::abs(Stride); return Factor >= 2 && Factor <= MaxInterleaveGroupFactor; @@ -992,7 +1074,7 @@ void InterleavedAccessInfo::analyzeInterleaving( // that all the pointers in the group don't wrap. // So we check only group member 0 (which is always guaranteed to exist), // and group member Factor - 1; If the latter doesn't exist we rely on - // peeling (if it is a non-reveresed accsess -- see Case 3). + // peeling (if it is a non-reversed accsess -- see Case 3). Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0)); if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false, /*ShouldCheckWrap=*/true)) { |
