diff options
Diffstat (limited to 'lib/Transforms/IPO')
22 files changed, 5331 insertions, 1179 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 95a9f31cced3..dd9f74a881ee 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -304,7 +304,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, // of the previous load. LoadInst *newLoad = IRB.CreateLoad(OrigLoad->getType(), V, V->getName() + ".val"); - newLoad->setAlignment(OrigLoad->getAlignment()); + newLoad->setAlignment(MaybeAlign(OrigLoad->getAlignment())); // Transfer the AA info too. AAMDNodes AAInfo; OrigLoad->getAAMetadata(AAInfo); diff --git a/lib/Transforms/IPO/Attributor.cpp b/lib/Transforms/IPO/Attributor.cpp index 2a52c6b9b4ad..95f47345d8fd 100644 --- a/lib/Transforms/IPO/Attributor.cpp +++ b/lib/Transforms/IPO/Attributor.cpp @@ -16,11 +16,15 @@ #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" @@ -30,6 +34,9 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" + #include <cassert> using namespace llvm; @@ -46,19 +53,50 @@ STATISTIC(NumAttributesValidFixpoint, "Number of abstract attributes in a valid fixpoint state"); STATISTIC(NumAttributesManifested, "Number of abstract attributes manifested in IR"); -STATISTIC(NumFnNoUnwind, "Number of functions marked nounwind"); - -STATISTIC(NumFnUniqueReturned, "Number of function with unique return"); -STATISTIC(NumFnKnownReturns, "Number of function with known return values"); -STATISTIC(NumFnArgumentReturned, - "Number of function arguments marked returned"); -STATISTIC(NumFnNoSync, "Number of functions marked nosync"); -STATISTIC(NumFnNoFree, "Number of functions marked nofree"); -STATISTIC(NumFnReturnedNonNull, - "Number of function return values marked nonnull"); -STATISTIC(NumFnArgumentNonNull, "Number of function arguments marked nonnull"); -STATISTIC(NumCSArgumentNonNull, "Number of call site arguments marked nonnull"); -STATISTIC(NumFnWillReturn, "Number of functions marked willreturn"); + +// Some helper macros to deal with statistics tracking. +// +// Usage: +// For simple IR attribute tracking overload trackStatistics in the abstract +// attribute and choose the right STATS_DECLTRACK_********* macro, +// e.g.,: +// void trackStatistics() const override { +// STATS_DECLTRACK_ARG_ATTR(returned) +// } +// If there is a single "increment" side one can use the macro +// STATS_DECLTRACK with a custom message. If there are multiple increment +// sides, STATS_DECL and STATS_TRACK can also be used separatly. +// +#define BUILD_STAT_MSG_IR_ATTR(TYPE, NAME) \ + ("Number of " #TYPE " marked '" #NAME "'") +#define BUILD_STAT_NAME(NAME, TYPE) NumIR##TYPE##_##NAME +#define STATS_DECL_(NAME, MSG) STATISTIC(NAME, MSG); +#define STATS_DECL(NAME, TYPE, MSG) \ + STATS_DECL_(BUILD_STAT_NAME(NAME, TYPE), MSG); +#define STATS_TRACK(NAME, TYPE) ++(BUILD_STAT_NAME(NAME, TYPE)); +#define STATS_DECLTRACK(NAME, TYPE, MSG) \ + { \ + STATS_DECL(NAME, TYPE, MSG) \ + STATS_TRACK(NAME, TYPE) \ + } +#define STATS_DECLTRACK_ARG_ATTR(NAME) \ + STATS_DECLTRACK(NAME, Arguments, BUILD_STAT_MSG_IR_ATTR(arguments, NAME)) +#define STATS_DECLTRACK_CSARG_ATTR(NAME) \ + STATS_DECLTRACK(NAME, CSArguments, \ + BUILD_STAT_MSG_IR_ATTR(call site arguments, NAME)) +#define STATS_DECLTRACK_FN_ATTR(NAME) \ + STATS_DECLTRACK(NAME, Function, BUILD_STAT_MSG_IR_ATTR(functions, NAME)) +#define STATS_DECLTRACK_CS_ATTR(NAME) \ + STATS_DECLTRACK(NAME, CS, BUILD_STAT_MSG_IR_ATTR(call site, NAME)) +#define STATS_DECLTRACK_FNRET_ATTR(NAME) \ + STATS_DECLTRACK(NAME, FunctionReturn, \ + BUILD_STAT_MSG_IR_ATTR(function returns, NAME)) +#define STATS_DECLTRACK_CSRET_ATTR(NAME) \ + STATS_DECLTRACK(NAME, CSReturn, \ + BUILD_STAT_MSG_IR_ATTR(call site returns, NAME)) +#define STATS_DECLTRACK_FLOATING_ATTR(NAME) \ + STATS_DECLTRACK(NAME, Floating, \ + ("Number of floating values known to be '" #NAME "'")) // TODO: Determine a good default value. // @@ -72,18 +110,32 @@ static cl::opt<unsigned> MaxFixpointIterations("attributor-max-iterations", cl::Hidden, cl::desc("Maximal number of fixpoint iterations."), cl::init(32)); +static cl::opt<bool> VerifyMaxFixpointIterations( + "attributor-max-iterations-verify", cl::Hidden, + cl::desc("Verify that max-iterations is a tight bound for a fixpoint"), + cl::init(false)); static cl::opt<bool> DisableAttributor( "attributor-disable", cl::Hidden, cl::desc("Disable the attributor inter-procedural deduction pass."), cl::init(true)); -static cl::opt<bool> VerifyAttributor( - "attributor-verify", cl::Hidden, - cl::desc("Verify the Attributor deduction and " - "manifestation of attributes -- may issue false-positive errors"), +static cl::opt<bool> ManifestInternal( + "attributor-manifest-internal", cl::Hidden, + cl::desc("Manifest Attributor internal string attributes."), cl::init(false)); +static cl::opt<unsigned> DepRecInterval( + "attributor-dependence-recompute-interval", cl::Hidden, + cl::desc("Number of iterations until dependences are recomputed."), + cl::init(4)); + +static cl::opt<bool> EnableHeapToStack("enable-heap-to-stack-conversion", + cl::init(true), cl::Hidden); + +static cl::opt<int> MaxHeapToStackSize("max-heap-to-stack-size", cl::init(128), + cl::Hidden); + /// Logic operators for the change status enum class. /// ///{ @@ -95,78 +147,30 @@ ChangeStatus llvm::operator&(ChangeStatus l, ChangeStatus r) { } ///} -/// Helper to adjust the statistics. -static void bookkeeping(AbstractAttribute::ManifestPosition MP, - const Attribute &Attr) { - if (!AreStatisticsEnabled()) - return; - - if (!Attr.isEnumAttribute()) - return; - switch (Attr.getKindAsEnum()) { - case Attribute::NoUnwind: - NumFnNoUnwind++; - return; - case Attribute::Returned: - NumFnArgumentReturned++; - return; - case Attribute::NoSync: - NumFnNoSync++; - break; - case Attribute::NoFree: - NumFnNoFree++; - break; - case Attribute::NonNull: - switch (MP) { - case AbstractAttribute::MP_RETURNED: - NumFnReturnedNonNull++; - break; - case AbstractAttribute::MP_ARGUMENT: - NumFnArgumentNonNull++; - break; - case AbstractAttribute::MP_CALL_SITE_ARGUMENT: - NumCSArgumentNonNull++; - break; - default: - break; - } - break; - case Attribute::WillReturn: - NumFnWillReturn++; - break; - default: - return; - } -} - -template <typename StateTy> -using followValueCB_t = std::function<bool(Value *, StateTy &State)>; -template <typename StateTy> -using visitValueCB_t = std::function<void(Value *, StateTy &State)>; - -/// Recursively visit all values that might become \p InitV at some point. This +/// Recursively visit all values that might become \p IRP at some point. This /// will be done by looking through cast instructions, selects, phis, and calls -/// with the "returned" attribute. The callback \p FollowValueCB is asked before -/// a potential origin value is looked at. If no \p FollowValueCB is passed, a -/// default one is used that will make sure we visit every value only once. Once -/// we cannot look through the value any further, the callback \p VisitValueCB -/// is invoked and passed the current value and the \p State. To limit how much -/// effort is invested, we will never visit more than \p MaxValues values. -template <typename StateTy> +/// with the "returned" attribute. Once we cannot look through the value any +/// further, the callback \p VisitValueCB is invoked and passed the current +/// value, the \p State, and a flag to indicate if we stripped anything. To +/// limit how much effort is invested, we will never visit more values than +/// specified by \p MaxValues. +template <typename AAType, typename StateTy> static bool genericValueTraversal( - Value *InitV, StateTy &State, visitValueCB_t<StateTy> &VisitValueCB, - followValueCB_t<StateTy> *FollowValueCB = nullptr, int MaxValues = 8) { - + Attributor &A, IRPosition IRP, const AAType &QueryingAA, StateTy &State, + const function_ref<bool(Value &, StateTy &, bool)> &VisitValueCB, + int MaxValues = 8) { + + const AAIsDead *LivenessAA = nullptr; + if (IRP.getAnchorScope()) + LivenessAA = &A.getAAFor<AAIsDead>( + QueryingAA, IRPosition::function(*IRP.getAnchorScope()), + /* TrackDependence */ false); + bool AnyDead = false; + + // TODO: Use Positions here to allow context sensitivity in VisitValueCB SmallPtrSet<Value *, 16> Visited; - followValueCB_t<bool> DefaultFollowValueCB = [&](Value *Val, bool &) { - return Visited.insert(Val).second; - }; - - if (!FollowValueCB) - FollowValueCB = &DefaultFollowValueCB; - SmallVector<Value *, 16> Worklist; - Worklist.push_back(InitV); + Worklist.push_back(&IRP.getAssociatedValue()); int Iteration = 0; do { @@ -174,7 +178,7 @@ static bool genericValueTraversal( // Check if we should process the current value. To prevent endless // recursion keep a record of the values we followed! - if (!(*FollowValueCB)(V, State)) + if (!Visited.insert(V).second) continue; // Make sure we limit the compile time for complex expressions. @@ -183,23 +187,23 @@ static bool genericValueTraversal( // Explicitly look through calls with a "returned" attribute if we do // not have a pointer as stripPointerCasts only works on them. + Value *NewV = nullptr; if (V->getType()->isPointerTy()) { - V = V->stripPointerCasts(); + NewV = V->stripPointerCasts(); } else { CallSite CS(V); if (CS && CS.getCalledFunction()) { - Value *NewV = nullptr; for (Argument &Arg : CS.getCalledFunction()->args()) if (Arg.hasReturnedAttr()) { NewV = CS.getArgOperand(Arg.getArgNo()); break; } - if (NewV) { - Worklist.push_back(NewV); - continue; - } } } + if (NewV && NewV != V) { + Worklist.push_back(NewV); + continue; + } // Look through select instructions, visit both potential values. if (auto *SI = dyn_cast<SelectInst>(V)) { @@ -208,35 +212,34 @@ static bool genericValueTraversal( continue; } - // Look through phi nodes, visit all operands. + // Look through phi nodes, visit all live operands. if (auto *PHI = dyn_cast<PHINode>(V)) { - Worklist.append(PHI->op_begin(), PHI->op_end()); + assert(LivenessAA && + "Expected liveness in the presence of instructions!"); + for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) { + const BasicBlock *IncomingBB = PHI->getIncomingBlock(u); + if (LivenessAA->isAssumedDead(IncomingBB->getTerminator())) { + AnyDead = true; + continue; + } + Worklist.push_back(PHI->getIncomingValue(u)); + } continue; } // Once a leaf is reached we inform the user through the callback. - VisitValueCB(V, State); + if (!VisitValueCB(*V, State, Iteration > 1)) + return false; } while (!Worklist.empty()); + // If we actually used liveness information so we have to record a dependence. + if (AnyDead) + A.recordDependence(*LivenessAA, QueryingAA); + // All values have been visited. return true; } -/// Helper to identify the correct offset into an attribute list. -static unsigned getAttrIndex(AbstractAttribute::ManifestPosition MP, - unsigned ArgNo = 0) { - switch (MP) { - case AbstractAttribute::MP_ARGUMENT: - case AbstractAttribute::MP_CALL_SITE_ARGUMENT: - return ArgNo + AttributeList::FirstArgIndex; - case AbstractAttribute::MP_FUNCTION: - return AttributeList::FunctionIndex; - case AbstractAttribute::MP_RETURNED: - return AttributeList::ReturnIndex; - } - llvm_unreachable("Unknown manifest position!"); -} - /// Return true if \p New is equal or worse than \p Old. static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) { if (!Old.isIntAttribute()) @@ -247,12 +250,9 @@ static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) { /// Return true if the information provided by \p Attr was added to the /// attribute list \p Attrs. This is only the case if it was not already present -/// in \p Attrs at the position describe by \p MP and \p ArgNo. +/// in \p Attrs at the position describe by \p PK and \p AttrIdx. static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr, - AttributeList &Attrs, - AbstractAttribute::ManifestPosition MP, - unsigned ArgNo = 0) { - unsigned AttrIdx = getAttrIndex(MP, ArgNo); + AttributeList &Attrs, int AttrIdx) { if (Attr.isEnumAttribute()) { Attribute::AttrKind Kind = Attr.getKindAsEnum(); @@ -270,9 +270,47 @@ static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr, Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr); return true; } + if (Attr.isIntAttribute()) { + Attribute::AttrKind Kind = Attr.getKindAsEnum(); + if (Attrs.hasAttribute(AttrIdx, Kind)) + if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind))) + return false; + Attrs = Attrs.removeAttribute(Ctx, AttrIdx, Kind); + Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr); + return true; + } llvm_unreachable("Expected enum or string attribute!"); } +static const Value *getPointerOperand(const Instruction *I) { + if (auto *LI = dyn_cast<LoadInst>(I)) + if (!LI->isVolatile()) + return LI->getPointerOperand(); + + if (auto *SI = dyn_cast<StoreInst>(I)) + if (!SI->isVolatile()) + return SI->getPointerOperand(); + + if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(I)) + if (!CXI->isVolatile()) + return CXI->getPointerOperand(); + + if (auto *RMWI = dyn_cast<AtomicRMWInst>(I)) + if (!RMWI->isVolatile()) + return RMWI->getPointerOperand(); + + return nullptr; +} +static const Value *getBasePointerOfAccessPointerOperand(const Instruction *I, + int64_t &BytesOffset, + const DataLayout &DL) { + const Value *Ptr = getPointerOperand(I); + if (!Ptr) + return nullptr; + + return GetPointerBaseWithConstantOffset(Ptr, BytesOffset, DL, + /*AllowNonInbounds*/ false); +} ChangeStatus AbstractAttribute::update(Attributor &A) { ChangeStatus HasChanged = ChangeStatus::UNCHANGED; @@ -289,143 +327,527 @@ ChangeStatus AbstractAttribute::update(Attributor &A) { return HasChanged; } -ChangeStatus AbstractAttribute::manifest(Attributor &A) { - assert(getState().isValidState() && - "Attempted to manifest an invalid state!"); - assert(getAssociatedValue() && - "Attempted to manifest an attribute without associated value!"); - - ChangeStatus HasChanged = ChangeStatus::UNCHANGED; - SmallVector<Attribute, 4> DeducedAttrs; - getDeducedAttributes(DeducedAttrs); - - Function &ScopeFn = getAnchorScope(); - LLVMContext &Ctx = ScopeFn.getContext(); - ManifestPosition MP = getManifestPosition(); - - AttributeList Attrs; - SmallVector<unsigned, 4> ArgNos; +ChangeStatus +IRAttributeManifest::manifestAttrs(Attributor &A, IRPosition &IRP, + const ArrayRef<Attribute> &DeducedAttrs) { + Function *ScopeFn = IRP.getAssociatedFunction(); + IRPosition::Kind PK = IRP.getPositionKind(); // In the following some generic code that will manifest attributes in // DeducedAttrs if they improve the current IR. Due to the different // annotation positions we use the underlying AttributeList interface. - // Note that MP_CALL_SITE_ARGUMENT can annotate multiple locations. - switch (MP) { - case MP_ARGUMENT: - ArgNos.push_back(cast<Argument>(getAssociatedValue())->getArgNo()); - Attrs = ScopeFn.getAttributes(); + AttributeList Attrs; + switch (PK) { + case IRPosition::IRP_INVALID: + case IRPosition::IRP_FLOAT: + return ChangeStatus::UNCHANGED; + case IRPosition::IRP_ARGUMENT: + case IRPosition::IRP_FUNCTION: + case IRPosition::IRP_RETURNED: + Attrs = ScopeFn->getAttributes(); break; - case MP_FUNCTION: - case MP_RETURNED: - ArgNos.push_back(0); - Attrs = ScopeFn.getAttributes(); + case IRPosition::IRP_CALL_SITE: + case IRPosition::IRP_CALL_SITE_RETURNED: + case IRPosition::IRP_CALL_SITE_ARGUMENT: + Attrs = ImmutableCallSite(&IRP.getAnchorValue()).getAttributes(); break; - case MP_CALL_SITE_ARGUMENT: { - CallSite CS(&getAnchoredValue()); - for (unsigned u = 0, e = CS.getNumArgOperands(); u != e; u++) - if (CS.getArgOperand(u) == getAssociatedValue()) - ArgNos.push_back(u); - Attrs = CS.getAttributes(); - } } + ChangeStatus HasChanged = ChangeStatus::UNCHANGED; + LLVMContext &Ctx = IRP.getAnchorValue().getContext(); for (const Attribute &Attr : DeducedAttrs) { - for (unsigned ArgNo : ArgNos) { - if (!addIfNotExistent(Ctx, Attr, Attrs, MP, ArgNo)) - continue; + if (!addIfNotExistent(Ctx, Attr, Attrs, IRP.getAttrIdx())) + continue; - HasChanged = ChangeStatus::CHANGED; - bookkeeping(MP, Attr); - } + HasChanged = ChangeStatus::CHANGED; } if (HasChanged == ChangeStatus::UNCHANGED) return HasChanged; - switch (MP) { - case MP_ARGUMENT: - case MP_FUNCTION: - case MP_RETURNED: - ScopeFn.setAttributes(Attrs); + switch (PK) { + case IRPosition::IRP_ARGUMENT: + case IRPosition::IRP_FUNCTION: + case IRPosition::IRP_RETURNED: + ScopeFn->setAttributes(Attrs); + break; + case IRPosition::IRP_CALL_SITE: + case IRPosition::IRP_CALL_SITE_RETURNED: + case IRPosition::IRP_CALL_SITE_ARGUMENT: + CallSite(&IRP.getAnchorValue()).setAttributes(Attrs); + break; + case IRPosition::IRP_INVALID: + case IRPosition::IRP_FLOAT: break; - case MP_CALL_SITE_ARGUMENT: - CallSite(&getAnchoredValue()).setAttributes(Attrs); } return HasChanged; } -Function &AbstractAttribute::getAnchorScope() { - Value &V = getAnchoredValue(); - if (isa<Function>(V)) - return cast<Function>(V); - if (isa<Argument>(V)) - return *cast<Argument>(V).getParent(); - if (isa<Instruction>(V)) - return *cast<Instruction>(V).getFunction(); - llvm_unreachable("No scope for anchored value found!"); +const IRPosition IRPosition::EmptyKey(255); +const IRPosition IRPosition::TombstoneKey(256); + +SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { + IRPositions.emplace_back(IRP); + + ImmutableCallSite ICS(&IRP.getAnchorValue()); + switch (IRP.getPositionKind()) { + case IRPosition::IRP_INVALID: + case IRPosition::IRP_FLOAT: + case IRPosition::IRP_FUNCTION: + return; + case IRPosition::IRP_ARGUMENT: + case IRPosition::IRP_RETURNED: + IRPositions.emplace_back( + IRPosition::function(*IRP.getAssociatedFunction())); + return; + case IRPosition::IRP_CALL_SITE: + assert(ICS && "Expected call site!"); + // TODO: We need to look at the operand bundles similar to the redirection + // in CallBase. + if (!ICS.hasOperandBundles()) + if (const Function *Callee = ICS.getCalledFunction()) + IRPositions.emplace_back(IRPosition::function(*Callee)); + return; + case IRPosition::IRP_CALL_SITE_RETURNED: + assert(ICS && "Expected call site!"); + // TODO: We need to look at the operand bundles similar to the redirection + // in CallBase. + if (!ICS.hasOperandBundles()) { + if (const Function *Callee = ICS.getCalledFunction()) { + IRPositions.emplace_back(IRPosition::returned(*Callee)); + IRPositions.emplace_back(IRPosition::function(*Callee)); + } + } + IRPositions.emplace_back( + IRPosition::callsite_function(cast<CallBase>(*ICS.getInstruction()))); + return; + case IRPosition::IRP_CALL_SITE_ARGUMENT: { + int ArgNo = IRP.getArgNo(); + assert(ICS && ArgNo >= 0 && "Expected call site!"); + // TODO: We need to look at the operand bundles similar to the redirection + // in CallBase. + if (!ICS.hasOperandBundles()) { + const Function *Callee = ICS.getCalledFunction(); + if (Callee && Callee->arg_size() > unsigned(ArgNo)) + IRPositions.emplace_back(IRPosition::argument(*Callee->getArg(ArgNo))); + if (Callee) + IRPositions.emplace_back(IRPosition::function(*Callee)); + } + IRPositions.emplace_back(IRPosition::value(IRP.getAssociatedValue())); + return; + } + } +} + +bool IRPosition::hasAttr(ArrayRef<Attribute::AttrKind> AKs, + bool IgnoreSubsumingPositions) const { + for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this)) { + for (Attribute::AttrKind AK : AKs) + if (EquivIRP.getAttr(AK).getKindAsEnum() == AK) + return true; + // The first position returned by the SubsumingPositionIterator is + // always the position itself. If we ignore subsuming positions we + // are done after the first iteration. + if (IgnoreSubsumingPositions) + break; + } + return false; } -const Function &AbstractAttribute::getAnchorScope() const { - return const_cast<AbstractAttribute *>(this)->getAnchorScope(); +void IRPosition::getAttrs(ArrayRef<Attribute::AttrKind> AKs, + SmallVectorImpl<Attribute> &Attrs) const { + for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this)) + for (Attribute::AttrKind AK : AKs) { + const Attribute &Attr = EquivIRP.getAttr(AK); + if (Attr.getKindAsEnum() == AK) + Attrs.push_back(Attr); + } } -/// -----------------------NoUnwind Function Attribute-------------------------- +void IRPosition::verify() { + switch (KindOrArgNo) { + default: + assert(KindOrArgNo >= 0 && "Expected argument or call site argument!"); + assert((isa<CallBase>(AnchorVal) || isa<Argument>(AnchorVal)) && + "Expected call base or argument for positive attribute index!"); + if (isa<Argument>(AnchorVal)) { + assert(cast<Argument>(AnchorVal)->getArgNo() == unsigned(getArgNo()) && + "Argument number mismatch!"); + assert(cast<Argument>(AnchorVal) == &getAssociatedValue() && + "Associated value mismatch!"); + } else { + assert(cast<CallBase>(*AnchorVal).arg_size() > unsigned(getArgNo()) && + "Call site argument number mismatch!"); + assert(cast<CallBase>(*AnchorVal).getArgOperand(getArgNo()) == + &getAssociatedValue() && + "Associated value mismatch!"); + } + break; + case IRP_INVALID: + assert(!AnchorVal && "Expected no value for an invalid position!"); + break; + case IRP_FLOAT: + assert((!isa<CallBase>(&getAssociatedValue()) && + !isa<Argument>(&getAssociatedValue())) && + "Expected specialized kind for call base and argument values!"); + break; + case IRP_RETURNED: + assert(isa<Function>(AnchorVal) && + "Expected function for a 'returned' position!"); + assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!"); + break; + case IRP_CALL_SITE_RETURNED: + assert((isa<CallBase>(AnchorVal)) && + "Expected call base for 'call site returned' position!"); + assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!"); + break; + case IRP_CALL_SITE: + assert((isa<CallBase>(AnchorVal)) && + "Expected call base for 'call site function' position!"); + assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!"); + break; + case IRP_FUNCTION: + assert(isa<Function>(AnchorVal) && + "Expected function for a 'function' position!"); + assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!"); + break; + } +} + +namespace { +/// Helper functions to clamp a state \p S of type \p StateType with the +/// information in \p R and indicate/return if \p S did change (as-in update is +/// required to be run again). +/// +///{ +template <typename StateType> +ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R); + +template <> +ChangeStatus clampStateAndIndicateChange<IntegerState>(IntegerState &S, + const IntegerState &R) { + auto Assumed = S.getAssumed(); + S ^= R; + return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED + : ChangeStatus::CHANGED; +} -struct AANoUnwindFunction : AANoUnwind, BooleanState { +template <> +ChangeStatus clampStateAndIndicateChange<BooleanState>(BooleanState &S, + const BooleanState &R) { + return clampStateAndIndicateChange<IntegerState>(S, R); +} +///} - AANoUnwindFunction(Function &F, InformationCache &InfoCache) - : AANoUnwind(F, InfoCache) {} +/// Clamp the information known for all returned values of a function +/// (identified by \p QueryingAA) into \p S. +template <typename AAType, typename StateType = typename AAType::StateType> +static void clampReturnedValueStates(Attributor &A, const AAType &QueryingAA, + StateType &S) { + LLVM_DEBUG(dbgs() << "[Attributor] Clamp return value states for " + << static_cast<const AbstractAttribute &>(QueryingAA) + << " into " << S << "\n"); + + assert((QueryingAA.getIRPosition().getPositionKind() == + IRPosition::IRP_RETURNED || + QueryingAA.getIRPosition().getPositionKind() == + IRPosition::IRP_CALL_SITE_RETURNED) && + "Can only clamp returned value states for a function returned or call " + "site returned position!"); + + // Use an optional state as there might not be any return values and we want + // to join (IntegerState::operator&) the state of all there are. + Optional<StateType> T; + + // Callback for each possibly returned value. + auto CheckReturnValue = [&](Value &RV) -> bool { + const IRPosition &RVPos = IRPosition::value(RV); + const AAType &AA = A.getAAFor<AAType>(QueryingAA, RVPos); + LLVM_DEBUG(dbgs() << "[Attributor] RV: " << RV << " AA: " << AA.getAsStr() + << " @ " << RVPos << "\n"); + const StateType &AAS = static_cast<const StateType &>(AA.getState()); + if (T.hasValue()) + *T &= AAS; + else + T = AAS; + LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " RV State: " << T + << "\n"); + return T->isValidState(); + }; - /// See AbstractAttribute::getState() - /// { - AbstractState &getState() override { return *this; } - const AbstractState &getState() const override { return *this; } - /// } + if (!A.checkForAllReturnedValues(CheckReturnValue, QueryingAA)) + S.indicatePessimisticFixpoint(); + else if (T.hasValue()) + S ^= *T; +} - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { return MP_FUNCTION; } +/// Helper class to compose two generic deduction +template <typename AAType, typename Base, typename StateType, + template <typename...> class F, template <typename...> class G> +struct AAComposeTwoGenericDeduction + : public F<AAType, G<AAType, Base, StateType>, StateType> { + AAComposeTwoGenericDeduction(const IRPosition &IRP) + : F<AAType, G<AAType, Base, StateType>, StateType>(IRP) {} - const std::string getAsStr() const override { - return getAssumed() ? "nounwind" : "may-unwind"; + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + ChangeStatus ChangedF = F<AAType, G<AAType, Base, StateType>, StateType>::updateImpl(A); + ChangeStatus ChangedG = G<AAType, Base, StateType>::updateImpl(A); + return ChangedF | ChangedG; } +}; + +/// Helper class for generic deduction: return value -> returned position. +template <typename AAType, typename Base, + typename StateType = typename AAType::StateType> +struct AAReturnedFromReturnedValues : public Base { + AAReturnedFromReturnedValues(const IRPosition &IRP) : Base(IRP) {} /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; + ChangeStatus updateImpl(Attributor &A) override { + StateType S; + clampReturnedValueStates<AAType, StateType>(A, *this, S); + // TODO: If we know we visited all returned values, thus no are assumed + // dead, we can take the known information from the state T. + return clampStateAndIndicateChange<StateType>(this->getState(), S); + } +}; - /// See AANoUnwind::isAssumedNoUnwind(). - bool isAssumedNoUnwind() const override { return getAssumed(); } +/// Clamp the information known at all call sites for a given argument +/// (identified by \p QueryingAA) into \p S. +template <typename AAType, typename StateType = typename AAType::StateType> +static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA, + StateType &S) { + LLVM_DEBUG(dbgs() << "[Attributor] Clamp call site argument states for " + << static_cast<const AbstractAttribute &>(QueryingAA) + << " into " << S << "\n"); + + assert(QueryingAA.getIRPosition().getPositionKind() == + IRPosition::IRP_ARGUMENT && + "Can only clamp call site argument states for an argument position!"); + + // Use an optional state as there might not be any return values and we want + // to join (IntegerState::operator&) the state of all there are. + Optional<StateType> T; + + // The argument number which is also the call site argument number. + unsigned ArgNo = QueryingAA.getIRPosition().getArgNo(); + + auto CallSiteCheck = [&](AbstractCallSite ACS) { + const IRPosition &ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo); + // Check if a coresponding argument was found or if it is on not associated + // (which can happen for callback calls). + if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID) + return false; - /// See AANoUnwind::isKnownNoUnwind(). - bool isKnownNoUnwind() const override { return getKnown(); } + const AAType &AA = A.getAAFor<AAType>(QueryingAA, ACSArgPos); + LLVM_DEBUG(dbgs() << "[Attributor] ACS: " << *ACS.getInstruction() + << " AA: " << AA.getAsStr() << " @" << ACSArgPos << "\n"); + const StateType &AAS = static_cast<const StateType &>(AA.getState()); + if (T.hasValue()) + *T &= AAS; + else + T = AAS; + LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " CSA State: " << T + << "\n"); + return T->isValidState(); + }; + + if (!A.checkForAllCallSites(CallSiteCheck, QueryingAA, true)) + S.indicatePessimisticFixpoint(); + else if (T.hasValue()) + S ^= *T; +} + +/// Helper class for generic deduction: call site argument -> argument position. +template <typename AAType, typename Base, + typename StateType = typename AAType::StateType> +struct AAArgumentFromCallSiteArguments : public Base { + AAArgumentFromCallSiteArguments(const IRPosition &IRP) : Base(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + StateType S; + clampCallSiteArgumentStates<AAType, StateType>(A, *this, S); + // TODO: If we know we visited all incoming values, thus no are assumed + // dead, we can take the known information from the state T. + return clampStateAndIndicateChange<StateType>(this->getState(), S); + } }; -ChangeStatus AANoUnwindFunction::updateImpl(Attributor &A) { - Function &F = getAnchorScope(); +/// Helper class for generic replication: function returned -> cs returned. +template <typename AAType, typename Base, + typename StateType = typename AAType::StateType> +struct AACallSiteReturnedFromReturned : public Base { + AACallSiteReturnedFromReturned(const IRPosition &IRP) : Base(IRP) {} - // The map from instruction opcodes to those instructions in the function. - auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); - auto Opcodes = { - (unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, - (unsigned)Instruction::Call, (unsigned)Instruction::CleanupRet, - (unsigned)Instruction::CatchSwitch, (unsigned)Instruction::Resume}; + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + assert(this->getIRPosition().getPositionKind() == + IRPosition::IRP_CALL_SITE_RETURNED && + "Can only wrap function returned positions for call site returned " + "positions!"); + auto &S = this->getState(); + + const Function *AssociatedFunction = + this->getIRPosition().getAssociatedFunction(); + if (!AssociatedFunction) + return S.indicatePessimisticFixpoint(); + + IRPosition FnPos = IRPosition::returned(*AssociatedFunction); + const AAType &AA = A.getAAFor<AAType>(*this, FnPos); + return clampStateAndIndicateChange( + S, static_cast<const typename AAType::StateType &>(AA.getState())); + } +}; - for (unsigned Opcode : Opcodes) { - for (Instruction *I : OpcodeInstMap[Opcode]) { - if (!I->mayThrow()) - continue; +/// Helper class for generic deduction using must-be-executed-context +/// Base class is required to have `followUse` method. - auto *NoUnwindAA = A.getAAFor<AANoUnwind>(*this, *I); +/// bool followUse(Attributor &A, const Use *U, const Instruction *I) +/// U - Underlying use. +/// I - The user of the \p U. +/// `followUse` returns true if the value should be tracked transitively. - if (!NoUnwindAA || !NoUnwindAA->isAssumedNoUnwind()) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; +template <typename AAType, typename Base, + typename StateType = typename AAType::StateType> +struct AAFromMustBeExecutedContext : public Base { + AAFromMustBeExecutedContext(const IRPosition &IRP) : Base(IRP) {} + + void initialize(Attributor &A) override { + Base::initialize(A); + IRPosition &IRP = this->getIRPosition(); + Instruction *CtxI = IRP.getCtxI(); + + if (!CtxI) + return; + + for (const Use &U : IRP.getAssociatedValue().uses()) + Uses.insert(&U); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto BeforeState = this->getState(); + auto &S = this->getState(); + Instruction *CtxI = this->getIRPosition().getCtxI(); + if (!CtxI) + return ChangeStatus::UNCHANGED; + + MustBeExecutedContextExplorer &Explorer = + A.getInfoCache().getMustBeExecutedContextExplorer(); + + SetVector<const Use *> NextUses; + + for (const Use *U : Uses) { + if (const Instruction *UserI = dyn_cast<Instruction>(U->getUser())) { + auto EIt = Explorer.begin(CtxI), EEnd = Explorer.end(CtxI); + bool Found = EIt.count(UserI); + while (!Found && ++EIt != EEnd) + Found = EIt.getCurrentInst() == UserI; + if (Found && Base::followUse(A, U, UserI)) + for (const Use &Us : UserI->uses()) + NextUses.insert(&Us); } } + for (const Use *U : NextUses) + Uses.insert(U); + + return BeforeState == S ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; } - return ChangeStatus::UNCHANGED; -} + +private: + /// Container for (transitive) uses of the associated value. + SetVector<const Use *> Uses; +}; + +template <typename AAType, typename Base, + typename StateType = typename AAType::StateType> +using AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext = + AAComposeTwoGenericDeduction<AAType, Base, StateType, + AAFromMustBeExecutedContext, + AAArgumentFromCallSiteArguments>; + +template <typename AAType, typename Base, + typename StateType = typename AAType::StateType> +using AACallSiteReturnedFromReturnedAndMustBeExecutedContext = + AAComposeTwoGenericDeduction<AAType, Base, StateType, + AAFromMustBeExecutedContext, + AACallSiteReturnedFromReturned>; + +/// -----------------------NoUnwind Function Attribute-------------------------- + +struct AANoUnwindImpl : AANoUnwind { + AANoUnwindImpl(const IRPosition &IRP) : AANoUnwind(IRP) {} + + const std::string getAsStr() const override { + return getAssumed() ? "nounwind" : "may-unwind"; + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto Opcodes = { + (unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, + (unsigned)Instruction::Call, (unsigned)Instruction::CleanupRet, + (unsigned)Instruction::CatchSwitch, (unsigned)Instruction::Resume}; + + auto CheckForNoUnwind = [&](Instruction &I) { + if (!I.mayThrow()) + return true; + + if (ImmutableCallSite ICS = ImmutableCallSite(&I)) { + const auto &NoUnwindAA = + A.getAAFor<AANoUnwind>(*this, IRPosition::callsite_function(ICS)); + return NoUnwindAA.isAssumedNoUnwind(); + } + return false; + }; + + if (!A.checkForAllInstructions(CheckForNoUnwind, *this, Opcodes)) + return indicatePessimisticFixpoint(); + + return ChangeStatus::UNCHANGED; + } +}; + +struct AANoUnwindFunction final : public AANoUnwindImpl { + AANoUnwindFunction(const IRPosition &IRP) : AANoUnwindImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nounwind) } +}; + +/// NoUnwind attribute deduction for a call sites. +struct AANoUnwindCallSite final : AANoUnwindImpl { + AANoUnwindCallSite(const IRPosition &IRP) : AANoUnwindImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoUnwindImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor<AANoUnwind>(*this, FnPos); + return clampStateAndIndicateChange( + getState(), + static_cast<const AANoUnwind::StateType &>(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nounwind); } +}; /// --------------------- Function Return Values ------------------------------- @@ -434,68 +856,48 @@ ChangeStatus AANoUnwindFunction::updateImpl(Attributor &A) { /// /// If there is a unique returned value R, the manifest method will: /// - mark R with the "returned" attribute, if R is an argument. -class AAReturnedValuesImpl final : public AAReturnedValues, AbstractState { +class AAReturnedValuesImpl : public AAReturnedValues, public AbstractState { /// Mapping of values potentially returned by the associated function to the /// return instructions that might return them. - DenseMap<Value *, SmallPtrSet<ReturnInst *, 2>> ReturnedValues; + MapVector<Value *, SmallSetVector<ReturnInst *, 4>> ReturnedValues; + + /// Mapping to remember the number of returned values for a call site such + /// that we can avoid updates if nothing changed. + DenseMap<const CallBase *, unsigned> NumReturnedValuesPerKnownAA; + + /// Set of unresolved calls returned by the associated function. + SmallSetVector<CallBase *, 4> UnresolvedCalls; /// State flags /// ///{ - bool IsFixed; - bool IsValidState; - bool HasOverdefinedReturnedCalls; + bool IsFixed = false; + bool IsValidState = true; ///} - /// Collect values that could become \p V in the set \p Values, each mapped to - /// \p ReturnInsts. - void collectValuesRecursively( - Attributor &A, Value *V, SmallPtrSetImpl<ReturnInst *> &ReturnInsts, - DenseMap<Value *, SmallPtrSet<ReturnInst *, 2>> &Values) { - - visitValueCB_t<bool> VisitValueCB = [&](Value *Val, bool &) { - assert(!isa<Instruction>(Val) || - &getAnchorScope() == cast<Instruction>(Val)->getFunction()); - Values[Val].insert(ReturnInsts.begin(), ReturnInsts.end()); - }; - - bool UnusedBool; - bool Success = genericValueTraversal(V, UnusedBool, VisitValueCB); - - // If we did abort the above traversal we haven't see all the values. - // Consequently, we cannot know if the information we would derive is - // accurate so we give up early. - if (!Success) - indicatePessimisticFixpoint(); - } - public: - /// See AbstractAttribute::AbstractAttribute(...). - AAReturnedValuesImpl(Function &F, InformationCache &InfoCache) - : AAReturnedValues(F, InfoCache) { - // We do not have an associated argument yet. - AssociatedVal = nullptr; - } + AAReturnedValuesImpl(const IRPosition &IRP) : AAReturnedValues(IRP) {} /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { // Reset the state. - AssociatedVal = nullptr; IsFixed = false; IsValidState = true; - HasOverdefinedReturnedCalls = false; ReturnedValues.clear(); - Function &F = cast<Function>(getAnchoredValue()); + Function *F = getAssociatedFunction(); + if (!F) { + indicatePessimisticFixpoint(); + return; + } // The map from instruction opcodes to those instructions in the function. - auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); + auto &OpcodeInstMap = A.getInfoCache().getOpcodeInstMapForFunction(*F); // Look through all arguments, if one is marked as returned we are done. - for (Argument &Arg : F.args()) { + for (Argument &Arg : F->args()) { if (Arg.hasReturnedAttr()) { - auto &ReturnInstSet = ReturnedValues[&Arg]; for (Instruction *RI : OpcodeInstMap[Instruction::Ret]) ReturnInstSet.insert(cast<ReturnInst>(RI)); @@ -505,13 +907,8 @@ public: } } - // If no argument was marked as returned we look at all return instructions - // and collect potentially returned values. - for (Instruction *RI : OpcodeInstMap[Instruction::Ret]) { - SmallPtrSet<ReturnInst *, 1> RISet({cast<ReturnInst>(RI)}); - collectValuesRecursively(A, cast<ReturnInst>(RI)->getReturnValue(), RISet, - ReturnedValues); - } + if (!F->hasExactDefinition()) + indicatePessimisticFixpoint(); } /// See AbstractAttribute::manifest(...). @@ -523,25 +920,35 @@ public: /// See AbstractAttribute::getState(...). const AbstractState &getState() const override { return *this; } - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { return MP_ARGUMENT; } - /// See AbstractAttribute::updateImpl(Attributor &A). ChangeStatus updateImpl(Attributor &A) override; + llvm::iterator_range<iterator> returned_values() override { + return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end()); + } + + llvm::iterator_range<const_iterator> returned_values() const override { + return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end()); + } + + const SmallSetVector<CallBase *, 4> &getUnresolvedCalls() const override { + return UnresolvedCalls; + } + /// Return the number of potential return values, -1 if unknown. - size_t getNumReturnValues() const { + size_t getNumReturnValues() const override { return isValidState() ? ReturnedValues.size() : -1; } /// Return an assumed unique return value if a single candidate is found. If /// there cannot be one, return a nullptr. If it is not clear yet, return the /// Optional::NoneType. - Optional<Value *> getAssumedUniqueReturnValue() const; + Optional<Value *> getAssumedUniqueReturnValue(Attributor &A) const; - /// See AbstractState::checkForallReturnedValues(...). - bool - checkForallReturnedValues(std::function<bool(Value &)> &Pred) const override; + /// See AbstractState::checkForAllReturnedValues(...). + bool checkForAllReturnedValuesAndReturnInsts( + const function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> + &Pred) const override; /// Pretty print the attribute similar to the IR representation. const std::string getAsStr() const override; @@ -553,13 +960,15 @@ public: bool isValidState() const override { return IsValidState; } /// See AbstractState::indicateOptimisticFixpoint(...). - void indicateOptimisticFixpoint() override { + ChangeStatus indicateOptimisticFixpoint() override { IsFixed = true; - IsValidState &= true; + return ChangeStatus::UNCHANGED; } - void indicatePessimisticFixpoint() override { + + ChangeStatus indicatePessimisticFixpoint() override { IsFixed = true; IsValidState = false; + return ChangeStatus::CHANGED; } }; @@ -568,21 +977,52 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) { // Bookkeeping. assert(isValidState()); - NumFnKnownReturns++; + STATS_DECLTRACK(KnownReturnValues, FunctionReturn, + "Number of function with known return values"); // Check if we have an assumed unique return value that we could manifest. - Optional<Value *> UniqueRV = getAssumedUniqueReturnValue(); + Optional<Value *> UniqueRV = getAssumedUniqueReturnValue(A); if (!UniqueRV.hasValue() || !UniqueRV.getValue()) return Changed; // Bookkeeping. - NumFnUniqueReturned++; + STATS_DECLTRACK(UniqueReturnValue, FunctionReturn, + "Number of function with unique return"); + + // Callback to replace the uses of CB with the constant C. + auto ReplaceCallSiteUsersWith = [](CallBase &CB, Constant &C) { + if (CB.getNumUses() == 0 || CB.isMustTailCall()) + return ChangeStatus::UNCHANGED; + CB.replaceAllUsesWith(&C); + return ChangeStatus::CHANGED; + }; // If the assumed unique return value is an argument, annotate it. if (auto *UniqueRVArg = dyn_cast<Argument>(UniqueRV.getValue())) { - AssociatedVal = UniqueRVArg; - Changed = AbstractAttribute::manifest(A) | Changed; + getIRPosition() = IRPosition::argument(*UniqueRVArg); + Changed = IRAttribute::manifest(A); + } else if (auto *RVC = dyn_cast<Constant>(UniqueRV.getValue())) { + // We can replace the returned value with the unique returned constant. + Value &AnchorValue = getAnchorValue(); + if (Function *F = dyn_cast<Function>(&AnchorValue)) { + for (const Use &U : F->uses()) + if (CallBase *CB = dyn_cast<CallBase>(U.getUser())) + if (CB->isCallee(&U)) { + Constant *RVCCast = + ConstantExpr::getTruncOrBitCast(RVC, CB->getType()); + Changed = ReplaceCallSiteUsersWith(*CB, *RVCCast) | Changed; + } + } else { + assert(isa<CallBase>(AnchorValue) && + "Expcected a function or call base anchor!"); + Constant *RVCCast = + ConstantExpr::getTruncOrBitCast(RVC, AnchorValue.getType()); + Changed = ReplaceCallSiteUsersWith(cast<CallBase>(AnchorValue), *RVCCast); + } + if (Changed == ChangeStatus::CHANGED) + STATS_DECLTRACK(UniqueConstantReturnValue, FunctionReturn, + "Number of function returns replaced by constant return"); } return Changed; @@ -590,18 +1030,20 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) { const std::string AAReturnedValuesImpl::getAsStr() const { return (isAtFixpoint() ? "returns(#" : "may-return(#") + - (isValidState() ? std::to_string(getNumReturnValues()) : "?") + ")"; + (isValidState() ? std::to_string(getNumReturnValues()) : "?") + + ")[#UC: " + std::to_string(UnresolvedCalls.size()) + "]"; } -Optional<Value *> AAReturnedValuesImpl::getAssumedUniqueReturnValue() const { - // If checkForallReturnedValues provides a unique value, ignoring potential +Optional<Value *> +AAReturnedValuesImpl::getAssumedUniqueReturnValue(Attributor &A) const { + // If checkForAllReturnedValues provides a unique value, ignoring potential // undef values that can also be present, it is assumed to be the actual // return value and forwarded to the caller of this method. If there are // multiple, a nullptr is returned indicating there cannot be a unique // returned value. Optional<Value *> UniqueRV; - std::function<bool(Value &)> Pred = [&](Value &RV) -> bool { + auto Pred = [&](Value &RV) -> bool { // If we found a second returned value and neither the current nor the saved // one is an undef, there is no unique returned value. Undefs are special // since we can pretend they have any value. @@ -618,14 +1060,15 @@ Optional<Value *> AAReturnedValuesImpl::getAssumedUniqueReturnValue() const { return true; }; - if (!checkForallReturnedValues(Pred)) + if (!A.checkForAllReturnedValues(Pred, *this)) UniqueRV = nullptr; return UniqueRV; } -bool AAReturnedValuesImpl::checkForallReturnedValues( - std::function<bool(Value &)> &Pred) const { +bool AAReturnedValuesImpl::checkForAllReturnedValuesAndReturnInsts( + const function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> + &Pred) const { if (!isValidState()) return false; @@ -634,11 +1077,11 @@ bool AAReturnedValuesImpl::checkForallReturnedValues( for (auto &It : ReturnedValues) { Value *RV = It.first; - ImmutableCallSite ICS(RV); - if (ICS && !HasOverdefinedReturnedCalls) + CallBase *CB = dyn_cast<CallBase>(RV); + if (CB && !UnresolvedCalls.count(CB)) continue; - if (!Pred(*RV)) + if (!Pred(*RV, It.second)) return false; } @@ -646,125 +1089,196 @@ bool AAReturnedValuesImpl::checkForallReturnedValues( } ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) { + size_t NumUnresolvedCalls = UnresolvedCalls.size(); + bool Changed = false; + + // State used in the value traversals starting in returned values. + struct RVState { + // The map in which we collect return values -> return instrs. + decltype(ReturnedValues) &RetValsMap; + // The flag to indicate a change. + bool &Changed; + // The return instrs we come from. + SmallSetVector<ReturnInst *, 4> RetInsts; + }; - // Check if we know of any values returned by the associated function, - // if not, we are done. - if (getNumReturnValues() == 0) { - indicateOptimisticFixpoint(); - return ChangeStatus::UNCHANGED; - } + // Callback for a leaf value returned by the associated function. + auto VisitValueCB = [](Value &Val, RVState &RVS, bool) -> bool { + auto Size = RVS.RetValsMap[&Val].size(); + RVS.RetValsMap[&Val].insert(RVS.RetInsts.begin(), RVS.RetInsts.end()); + bool Inserted = RVS.RetValsMap[&Val].size() != Size; + RVS.Changed |= Inserted; + LLVM_DEBUG({ + if (Inserted) + dbgs() << "[AAReturnedValues] 1 Add new returned value " << Val + << " => " << RVS.RetInsts.size() << "\n"; + }); + return true; + }; - // Check if any of the returned values is a call site we can refine. - decltype(ReturnedValues) AddRVs; - bool HasCallSite = false; + // Helper method to invoke the generic value traversal. + auto VisitReturnedValue = [&](Value &RV, RVState &RVS) { + IRPosition RetValPos = IRPosition::value(RV); + return genericValueTraversal<AAReturnedValues, RVState>(A, RetValPos, *this, + RVS, VisitValueCB); + }; - // Look at all returned call sites. - for (auto &It : ReturnedValues) { - SmallPtrSet<ReturnInst *, 2> &ReturnInsts = It.second; - Value *RV = It.first; - LLVM_DEBUG(dbgs() << "[AAReturnedValues] Potentially returned value " << *RV - << "\n"); + // Callback for all "return intructions" live in the associated function. + auto CheckReturnInst = [this, &VisitReturnedValue, &Changed](Instruction &I) { + ReturnInst &Ret = cast<ReturnInst>(I); + RVState RVS({ReturnedValues, Changed, {}}); + RVS.RetInsts.insert(&Ret); + return VisitReturnedValue(*Ret.getReturnValue(), RVS); + }; - // Only call sites can change during an update, ignore the rest. - CallSite RetCS(RV); - if (!RetCS) + // Start by discovering returned values from all live returned instructions in + // the associated function. + if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret})) + return indicatePessimisticFixpoint(); + + // Once returned values "directly" present in the code are handled we try to + // resolve returned calls. + decltype(ReturnedValues) NewRVsMap; + for (auto &It : ReturnedValues) { + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *It.first + << " by #" << It.second.size() << " RIs\n"); + CallBase *CB = dyn_cast<CallBase>(It.first); + if (!CB || UnresolvedCalls.count(CB)) continue; - // For now, any call site we see will prevent us from directly fixing the - // state. However, if the information on the callees is fixed, the call - // sites will be removed and we will fix the information for this state. - HasCallSite = true; - - // Try to find a assumed unique return value for the called function. - auto *RetCSAA = A.getAAFor<AAReturnedValuesImpl>(*this, *RV); - if (!RetCSAA) { - HasOverdefinedReturnedCalls = true; - LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned call site (" << *RV - << ") with " << (RetCSAA ? "invalid" : "no") - << " associated state\n"); + if (!CB->getCalledFunction()) { + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Unresolved call: " << *CB + << "\n"); + UnresolvedCalls.insert(CB); continue; } - // Try to find a assumed unique return value for the called function. - Optional<Value *> AssumedUniqueRV = RetCSAA->getAssumedUniqueReturnValue(); + // TODO: use the function scope once we have call site AAReturnedValues. + const auto &RetValAA = A.getAAFor<AAReturnedValues>( + *this, IRPosition::function(*CB->getCalledFunction())); + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Found another AAReturnedValues: " + << static_cast<const AbstractAttribute &>(RetValAA) + << "\n"); - // If no assumed unique return value was found due to the lack of - // candidates, we may need to resolve more calls (through more update - // iterations) or the called function will not return. Either way, we simply - // stick with the call sites as return values. Because there were not - // multiple possibilities, we do not treat it as overdefined. - if (!AssumedUniqueRV.hasValue()) + // Skip dead ends, thus if we do not know anything about the returned + // call we mark it as unresolved and it will stay that way. + if (!RetValAA.getState().isValidState()) { + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Unresolved call: " << *CB + << "\n"); + UnresolvedCalls.insert(CB); continue; + } - // If multiple, non-refinable values were found, there cannot be a unique - // return value for the called function. The returned call is overdefined! - if (!AssumedUniqueRV.getValue()) { - HasOverdefinedReturnedCalls = true; - LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned call site has multiple " - "potentially returned values\n"); + // Do not try to learn partial information. If the callee has unresolved + // return values we will treat the call as unresolved/opaque. + auto &RetValAAUnresolvedCalls = RetValAA.getUnresolvedCalls(); + if (!RetValAAUnresolvedCalls.empty()) { + UnresolvedCalls.insert(CB); continue; } - LLVM_DEBUG({ - bool UniqueRVIsKnown = RetCSAA->isAtFixpoint(); - dbgs() << "[AAReturnedValues] Returned call site " - << (UniqueRVIsKnown ? "known" : "assumed") - << " unique return value: " << *AssumedUniqueRV << "\n"; - }); + // Now check if we can track transitively returned values. If possible, thus + // if all return value can be represented in the current scope, do so. + bool Unresolved = false; + for (auto &RetValAAIt : RetValAA.returned_values()) { + Value *RetVal = RetValAAIt.first; + if (isa<Argument>(RetVal) || isa<CallBase>(RetVal) || + isa<Constant>(RetVal)) + continue; + // Anything that did not fit in the above categories cannot be resolved, + // mark the call as unresolved. + LLVM_DEBUG(dbgs() << "[AAReturnedValues] transitively returned value " + "cannot be translated: " + << *RetVal << "\n"); + UnresolvedCalls.insert(CB); + Unresolved = true; + break; + } - // The assumed unique return value. - Value *AssumedRetVal = AssumedUniqueRV.getValue(); - - // If the assumed unique return value is an argument, lookup the matching - // call site operand and recursively collect new returned values. - // If it is not an argument, it is just put into the set of returned values - // as we would have already looked through casts, phis, and similar values. - if (Argument *AssumedRetArg = dyn_cast<Argument>(AssumedRetVal)) - collectValuesRecursively(A, - RetCS.getArgOperand(AssumedRetArg->getArgNo()), - ReturnInsts, AddRVs); - else - AddRVs[AssumedRetVal].insert(ReturnInsts.begin(), ReturnInsts.end()); - } + if (Unresolved) + continue; - // Keep track of any change to trigger updates on dependent attributes. - ChangeStatus Changed = ChangeStatus::UNCHANGED; + // Now track transitively returned values. + unsigned &NumRetAA = NumReturnedValuesPerKnownAA[CB]; + if (NumRetAA == RetValAA.getNumReturnValues()) { + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Skip call as it has not " + "changed since it was seen last\n"); + continue; + } + NumRetAA = RetValAA.getNumReturnValues(); + + for (auto &RetValAAIt : RetValAA.returned_values()) { + Value *RetVal = RetValAAIt.first; + if (Argument *Arg = dyn_cast<Argument>(RetVal)) { + // Arguments are mapped to call site operands and we begin the traversal + // again. + bool Unused = false; + RVState RVS({NewRVsMap, Unused, RetValAAIt.second}); + VisitReturnedValue(*CB->getArgOperand(Arg->getArgNo()), RVS); + continue; + } else if (isa<CallBase>(RetVal)) { + // Call sites are resolved by the callee attribute over time, no need to + // do anything for us. + continue; + } else if (isa<Constant>(RetVal)) { + // Constants are valid everywhere, we can simply take them. + NewRVsMap[RetVal].insert(It.second.begin(), It.second.end()); + continue; + } + } + } - for (auto &It : AddRVs) { + // To avoid modifications to the ReturnedValues map while we iterate over it + // we kept record of potential new entries in a copy map, NewRVsMap. + for (auto &It : NewRVsMap) { assert(!It.second.empty() && "Entry does not add anything."); auto &ReturnInsts = ReturnedValues[It.first]; for (ReturnInst *RI : It.second) - if (ReturnInsts.insert(RI).second) { + if (ReturnInsts.insert(RI)) { LLVM_DEBUG(dbgs() << "[AAReturnedValues] Add new returned value " << *It.first << " => " << *RI << "\n"); - Changed = ChangeStatus::CHANGED; + Changed = true; } } - // If there is no call site in the returned values we are done. - if (!HasCallSite) { - indicateOptimisticFixpoint(); - return ChangeStatus::CHANGED; - } - - return Changed; + Changed |= (NumUnresolvedCalls != UnresolvedCalls.size()); + return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; } -/// ------------------------ NoSync Function Attribute ------------------------- +struct AAReturnedValuesFunction final : public AAReturnedValuesImpl { + AAReturnedValuesFunction(const IRPosition &IRP) : AAReturnedValuesImpl(IRP) {} -struct AANoSyncFunction : AANoSync, BooleanState { + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(returned) } +}; - AANoSyncFunction(Function &F, InformationCache &InfoCache) - : AANoSync(F, InfoCache) {} +/// Returned values information for a call sites. +struct AAReturnedValuesCallSite final : AAReturnedValuesImpl { + AAReturnedValuesCallSite(const IRPosition &IRP) : AAReturnedValuesImpl(IRP) {} - /// See AbstractAttribute::getState() - /// { - AbstractState &getState() override { return *this; } - const AbstractState &getState() const override { return *this; } - /// } + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites instead of + // redirecting requests to the callee. + llvm_unreachable("Abstract attributes for returned values are not " + "supported for call sites yet!"); + } - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { return MP_FUNCTION; } + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + return indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} +}; + +/// ------------------------ NoSync Function Attribute ------------------------- + +struct AANoSyncImpl : AANoSync { + AANoSyncImpl(const IRPosition &IRP) : AANoSync(IRP) {} const std::string getAsStr() const override { return getAssumed() ? "nosync" : "may-sync"; @@ -773,12 +1287,6 @@ struct AANoSyncFunction : AANoSync, BooleanState { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override; - /// See AANoSync::isAssumedNoSync() - bool isAssumedNoSync() const override { return getAssumed(); } - - /// See AANoSync::isKnownNoSync() - bool isKnownNoSync() const override { return getKnown(); } - /// Helper function used to determine whether an instruction is non-relaxed /// atomic. In other words, if an atomic instruction does not have unordered /// or monotonic ordering @@ -792,7 +1300,7 @@ struct AANoSyncFunction : AANoSync, BooleanState { static bool isNoSyncIntrinsic(Instruction *I); }; -bool AANoSyncFunction::isNonRelaxedAtomic(Instruction *I) { +bool AANoSyncImpl::isNonRelaxedAtomic(Instruction *I) { if (!I->isAtomic()) return false; @@ -841,7 +1349,7 @@ bool AANoSyncFunction::isNonRelaxedAtomic(Instruction *I) { /// Checks if an intrinsic is nosync. Currently only checks mem* intrinsics. /// FIXME: We should ipmrove the handling of intrinsics. -bool AANoSyncFunction::isNoSyncIntrinsic(Instruction *I) { +bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) { if (auto *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { /// Element wise atomic memory intrinsics are can only be unordered, @@ -863,7 +1371,7 @@ bool AANoSyncFunction::isNoSyncIntrinsic(Instruction *I) { return false; } -bool AANoSyncFunction::isVolatile(Instruction *I) { +bool AANoSyncImpl::isVolatile(Instruction *I) { assert(!ImmutableCallSite(I) && !isa<CallBase>(I) && "Calls should not be checked here"); @@ -881,482 +1389,3074 @@ bool AANoSyncFunction::isVolatile(Instruction *I) { } } -ChangeStatus AANoSyncFunction::updateImpl(Attributor &A) { - Function &F = getAnchorScope(); +ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) { - /// We are looking for volatile instructions or Non-Relaxed atomics. - /// FIXME: We should ipmrove the handling of intrinsics. - for (Instruction *I : InfoCache.getReadOrWriteInstsForFunction(F)) { - ImmutableCallSite ICS(I); - auto *NoSyncAA = A.getAAFor<AANoSyncFunction>(*this, *I); + auto CheckRWInstForNoSync = [&](Instruction &I) { + /// We are looking for volatile instructions or Non-Relaxed atomics. + /// FIXME: We should ipmrove the handling of intrinsics. - if (isa<IntrinsicInst>(I) && isNoSyncIntrinsic(I)) - continue; + if (isa<IntrinsicInst>(&I) && isNoSyncIntrinsic(&I)) + return true; + + if (ImmutableCallSite ICS = ImmutableCallSite(&I)) { + if (ICS.hasFnAttr(Attribute::NoSync)) + return true; + + const auto &NoSyncAA = + A.getAAFor<AANoSync>(*this, IRPosition::callsite_function(ICS)); + if (NoSyncAA.isAssumedNoSync()) + return true; + return false; + } + + if (!isVolatile(&I) && !isNonRelaxedAtomic(&I)) + return true; + + return false; + }; - if (ICS && (!NoSyncAA || !NoSyncAA->isAssumedNoSync()) && - !ICS.hasFnAttr(Attribute::NoSync)) { + auto CheckForNoSync = [&](Instruction &I) { + // At this point we handled all read/write effects and they are all + // nosync, so they can be skipped. + if (I.mayReadOrWriteMemory()) + return true; + + // non-convergent and readnone imply nosync. + return !ImmutableCallSite(&I).isConvergent(); + }; + + if (!A.checkForAllReadWriteInstructions(CheckRWInstForNoSync, *this) || + !A.checkForAllCallLikeInstructions(CheckForNoSync, *this)) + return indicatePessimisticFixpoint(); + + return ChangeStatus::UNCHANGED; +} + +struct AANoSyncFunction final : public AANoSyncImpl { + AANoSyncFunction(const IRPosition &IRP) : AANoSyncImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nosync) } +}; + +/// NoSync attribute deduction for a call sites. +struct AANoSyncCallSite final : AANoSyncImpl { + AANoSyncCallSite(const IRPosition &IRP) : AANoSyncImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoSyncImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor<AANoSync>(*this, FnPos); + return clampStateAndIndicateChange( + getState(), static_cast<const AANoSync::StateType &>(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nosync); } +}; + +/// ------------------------ No-Free Attributes ---------------------------- + +struct AANoFreeImpl : public AANoFree { + AANoFreeImpl(const IRPosition &IRP) : AANoFree(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto CheckForNoFree = [&](Instruction &I) { + ImmutableCallSite ICS(&I); + if (ICS.hasFnAttr(Attribute::NoFree)) + return true; + + const auto &NoFreeAA = + A.getAAFor<AANoFree>(*this, IRPosition::callsite_function(ICS)); + return NoFreeAA.isAssumedNoFree(); + }; + + if (!A.checkForAllCallLikeInstructions(CheckForNoFree, *this)) + return indicatePessimisticFixpoint(); + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return getAssumed() ? "nofree" : "may-free"; + } +}; + +struct AANoFreeFunction final : public AANoFreeImpl { + AANoFreeFunction(const IRPosition &IRP) : AANoFreeImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nofree) } +}; + +/// NoFree attribute deduction for a call sites. +struct AANoFreeCallSite final : AANoFreeImpl { + AANoFreeCallSite(const IRPosition &IRP) : AANoFreeImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoFreeImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor<AANoFree>(*this, FnPos); + return clampStateAndIndicateChange( + getState(), static_cast<const AANoFree::StateType &>(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nofree); } +}; + +/// ------------------------ NonNull Argument Attribute ------------------------ +static int64_t getKnownNonNullAndDerefBytesForUse( + Attributor &A, AbstractAttribute &QueryingAA, Value &AssociatedValue, + const Use *U, const Instruction *I, bool &IsNonNull, bool &TrackUse) { + TrackUse = false; + + const Value *UseV = U->get(); + if (!UseV->getType()->isPointerTy()) + return 0; + + Type *PtrTy = UseV->getType(); + const Function *F = I->getFunction(); + bool NullPointerIsDefined = + F ? llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace()) : true; + const DataLayout &DL = A.getInfoCache().getDL(); + if (ImmutableCallSite ICS = ImmutableCallSite(I)) { + if (ICS.isBundleOperand(U)) + return 0; + + if (ICS.isCallee(U)) { + IsNonNull |= !NullPointerIsDefined; + return 0; } - if (ICS) - continue; + unsigned ArgNo = ICS.getArgumentNo(U); + IRPosition IRP = IRPosition::callsite_argument(ICS, ArgNo); + auto &DerefAA = A.getAAFor<AADereferenceable>(QueryingAA, IRP); + IsNonNull |= DerefAA.isKnownNonNull(); + return DerefAA.getKnownDereferenceableBytes(); + } - if (!isVolatile(I) && !isNonRelaxedAtomic(I)) - continue; + int64_t Offset; + if (const Value *Base = getBasePointerOfAccessPointerOperand(I, Offset, DL)) { + if (Base == &AssociatedValue && getPointerOperand(I) == UseV) { + int64_t DerefBytes = + Offset + (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType()); + + IsNonNull |= !NullPointerIsDefined; + return DerefBytes; + } + } + if (const Value *Base = + GetPointerBaseWithConstantOffset(UseV, Offset, DL, + /*AllowNonInbounds*/ false)) { + auto &DerefAA = + A.getAAFor<AADereferenceable>(QueryingAA, IRPosition::value(*Base)); + IsNonNull |= (!NullPointerIsDefined && DerefAA.isKnownNonNull()); + IsNonNull |= (!NullPointerIsDefined && (Offset != 0)); + int64_t DerefBytes = DerefAA.getKnownDereferenceableBytes(); + return std::max(int64_t(0), DerefBytes - Offset); + } + + return 0; +} + +struct AANonNullImpl : AANonNull { + AANonNullImpl(const IRPosition &IRP) + : AANonNull(IRP), + NullIsDefined(NullPointerIsDefined( + getAnchorScope(), + getAssociatedValue().getType()->getPointerAddressSpace())) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + if (!NullIsDefined && + hasAttr({Attribute::NonNull, Attribute::Dereferenceable})) + indicateOptimisticFixpoint(); + else + AANonNull::initialize(A); + } + + /// See AAFromMustBeExecutedContext + bool followUse(Attributor &A, const Use *U, const Instruction *I) { + bool IsNonNull = false; + bool TrackUse = false; + getKnownNonNullAndDerefBytesForUse(A, *this, getAssociatedValue(), U, I, + IsNonNull, TrackUse); + takeKnownMaximum(IsNonNull); + return TrackUse; + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return getAssumed() ? "nonnull" : "may-null"; + } + + /// Flag to determine if the underlying value can be null and still allow + /// valid accesses. + const bool NullIsDefined; +}; + +/// NonNull attribute for a floating value. +struct AANonNullFloating + : AAFromMustBeExecutedContext<AANonNull, AANonNullImpl> { + using Base = AAFromMustBeExecutedContext<AANonNull, AANonNullImpl>; + AANonNullFloating(const IRPosition &IRP) : Base(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + Base::initialize(A); + + if (isAtFixpoint()) + return; + + const IRPosition &IRP = getIRPosition(); + const Value &V = IRP.getAssociatedValue(); + const DataLayout &DL = A.getDataLayout(); + + // TODO: This context sensitive query should be removed once we can do + // context sensitive queries in the genericValueTraversal below. + if (isKnownNonZero(&V, DL, 0, /* TODO: AC */ nullptr, IRP.getCtxI(), + /* TODO: DT */ nullptr)) + indicateOptimisticFixpoint(); + } + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + ChangeStatus Change = Base::updateImpl(A); + if (isKnownNonNull()) + return Change; + + if (!NullIsDefined) { + const auto &DerefAA = A.getAAFor<AADereferenceable>(*this, getIRPosition()); + if (DerefAA.getAssumedDereferenceableBytes()) + return Change; + } + + const DataLayout &DL = A.getDataLayout(); + + auto VisitValueCB = [&](Value &V, AAAlign::StateType &T, + bool Stripped) -> bool { + const auto &AA = A.getAAFor<AANonNull>(*this, IRPosition::value(V)); + if (!Stripped && this == &AA) { + if (!isKnownNonZero(&V, DL, 0, /* TODO: AC */ nullptr, + /* CtxI */ getCtxI(), + /* TODO: DT */ nullptr)) + T.indicatePessimisticFixpoint(); + } else { + // Use abstract attribute information. + const AANonNull::StateType &NS = + static_cast<const AANonNull::StateType &>(AA.getState()); + T ^= NS; + } + return T.isValidState(); + }; + + StateType T; + if (!genericValueTraversal<AANonNull, StateType>(A, getIRPosition(), *this, + T, VisitValueCB)) + return indicatePessimisticFixpoint(); + + return clampStateAndIndicateChange(getState(), T); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) } +}; + +/// NonNull attribute for function return value. +struct AANonNullReturned final + : AAReturnedFromReturnedValues<AANonNull, AANonNullImpl> { + AANonNullReturned(const IRPosition &IRP) + : AAReturnedFromReturnedValues<AANonNull, AANonNullImpl>(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) } +}; + +/// NonNull attribute for function argument. +struct AANonNullArgument final + : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<AANonNull, + AANonNullImpl> { + AANonNullArgument(const IRPosition &IRP) + : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<AANonNull, + AANonNullImpl>( + IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nonnull) } +}; + +struct AANonNullCallSiteArgument final : AANonNullFloating { + AANonNullCallSiteArgument(const IRPosition &IRP) : AANonNullFloating(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(nonnull) } +}; + +/// NonNull attribute for a call site return position. +struct AANonNullCallSiteReturned final + : AACallSiteReturnedFromReturnedAndMustBeExecutedContext<AANonNull, + AANonNullImpl> { + AANonNullCallSiteReturned(const IRPosition &IRP) + : AACallSiteReturnedFromReturnedAndMustBeExecutedContext<AANonNull, + AANonNullImpl>( + IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nonnull) } +}; + +/// ------------------------ No-Recurse Attributes ---------------------------- + +struct AANoRecurseImpl : public AANoRecurse { + AANoRecurseImpl(const IRPosition &IRP) : AANoRecurse(IRP) {} + + /// See AbstractAttribute::getAsStr() + const std::string getAsStr() const override { + return getAssumed() ? "norecurse" : "may-recurse"; + } +}; + +struct AANoRecurseFunction final : AANoRecurseImpl { + AANoRecurseFunction(const IRPosition &IRP) : AANoRecurseImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoRecurseImpl::initialize(A); + if (const Function *F = getAnchorScope()) + if (A.getInfoCache().getSccSize(*F) == 1) + return; indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; } - auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); - auto Opcodes = {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, - (unsigned)Instruction::Call}; + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { - for (unsigned Opcode : Opcodes) { - for (Instruction *I : OpcodeInstMap[Opcode]) { - // At this point we handled all read/write effects and they are all - // nosync, so they can be skipped. - if (I->mayReadOrWriteMemory()) - continue; + auto CheckForNoRecurse = [&](Instruction &I) { + ImmutableCallSite ICS(&I); + if (ICS.hasFnAttr(Attribute::NoRecurse)) + return true; - ImmutableCallSite ICS(I); + const auto &NoRecurseAA = + A.getAAFor<AANoRecurse>(*this, IRPosition::callsite_function(ICS)); + if (!NoRecurseAA.isAssumedNoRecurse()) + return false; - // non-convergent and readnone imply nosync. - if (!ICS.isConvergent()) - continue; + // Recursion to the same function + if (ICS.getCalledFunction() == getAnchorScope()) + return false; + + return true; + }; + + if (!A.checkForAllCallLikeInstructions(CheckForNoRecurse, *this)) + return indicatePessimisticFixpoint(); + return ChangeStatus::UNCHANGED; + } + + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(norecurse) } +}; + +/// NoRecurse attribute deduction for a call sites. +struct AANoRecurseCallSite final : AANoRecurseImpl { + AANoRecurseCallSite(const IRPosition &IRP) : AANoRecurseImpl(IRP) {} + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoRecurseImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor<AANoRecurse>(*this, FnPos); + return clampStateAndIndicateChange( + getState(), + static_cast<const AANoRecurse::StateType &>(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(norecurse); } +}; + +/// ------------------------ Will-Return Attributes ---------------------------- + +// Helper function that checks whether a function has any cycle. +// TODO: Replace with more efficent code +static bool containsCycle(Function &F) { + SmallPtrSet<BasicBlock *, 32> Visited; + + // Traverse BB by dfs and check whether successor is already visited. + for (BasicBlock *BB : depth_first(&F)) { + Visited.insert(BB); + for (auto *SuccBB : successors(BB)) { + if (Visited.count(SuccBB)) + return true; } } + return false; +} - return ChangeStatus::UNCHANGED; +// Helper function that checks the function have a loop which might become an +// endless loop +// FIXME: Any cycle is regarded as endless loop for now. +// We have to allow some patterns. +static bool containsPossiblyEndlessLoop(Function *F) { + return !F || !F->hasExactDefinition() || containsCycle(*F); } -/// ------------------------ No-Free Attributes ---------------------------- +struct AAWillReturnImpl : public AAWillReturn { + AAWillReturnImpl(const IRPosition &IRP) : AAWillReturn(IRP) {} -struct AANoFreeFunction : AbstractAttribute, BooleanState { + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAWillReturn::initialize(A); - /// See AbstractAttribute::AbstractAttribute(...). - AANoFreeFunction(Function &F, InformationCache &InfoCache) - : AbstractAttribute(F, InfoCache) {} + Function *F = getAssociatedFunction(); + if (containsPossiblyEndlessLoop(F)) + indicatePessimisticFixpoint(); + } - /// See AbstractAttribute::getState() - ///{ - AbstractState &getState() override { return *this; } - const AbstractState &getState() const override { return *this; } - ///} + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto CheckForWillReturn = [&](Instruction &I) { + IRPosition IPos = IRPosition::callsite_function(ImmutableCallSite(&I)); + const auto &WillReturnAA = A.getAAFor<AAWillReturn>(*this, IPos); + if (WillReturnAA.isKnownWillReturn()) + return true; + if (!WillReturnAA.isAssumedWillReturn()) + return false; + const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(*this, IPos); + return NoRecurseAA.isAssumedNoRecurse(); + }; + + if (!A.checkForAllCallLikeInstructions(CheckForWillReturn, *this)) + return indicatePessimisticFixpoint(); + + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::getAsStr() + const std::string getAsStr() const override { + return getAssumed() ? "willreturn" : "may-noreturn"; + } +}; + +struct AAWillReturnFunction final : AAWillReturnImpl { + AAWillReturnFunction(const IRPosition &IRP) : AAWillReturnImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(willreturn) } +}; + +/// WillReturn attribute deduction for a call sites. +struct AAWillReturnCallSite final : AAWillReturnImpl { + AAWillReturnCallSite(const IRPosition &IRP) : AAWillReturnImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAWillReturnImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor<AAWillReturn>(*this, FnPos); + return clampStateAndIndicateChange( + getState(), + static_cast<const AAWillReturn::StateType &>(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(willreturn); } +}; + +/// ------------------------ NoAlias Argument Attribute ------------------------ + +struct AANoAliasImpl : AANoAlias { + AANoAliasImpl(const IRPosition &IRP) : AANoAlias(IRP) {} + + const std::string getAsStr() const override { + return getAssumed() ? "noalias" : "may-alias"; + } +}; + +/// NoAlias attribute for a floating value. +struct AANoAliasFloating final : AANoAliasImpl { + AANoAliasFloating(const IRPosition &IRP) : AANoAliasImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoAliasImpl::initialize(A); + Value &Val = getAssociatedValue(); + if (isa<AllocaInst>(Val)) + indicateOptimisticFixpoint(); + if (isa<ConstantPointerNull>(Val) && + Val.getType()->getPointerAddressSpace() == 0) + indicateOptimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Implement this. + return indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(noalias) + } +}; + +/// NoAlias attribute for an argument. +struct AANoAliasArgument final + : AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl> { + AANoAliasArgument(const IRPosition &IRP) + : AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl>(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noalias) } +}; + +struct AANoAliasCallSiteArgument final : AANoAliasImpl { + AANoAliasCallSiteArgument(const IRPosition &IRP) : AANoAliasImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // See callsite argument attribute and callee argument attribute. + ImmutableCallSite ICS(&getAnchorValue()); + if (ICS.paramHasAttr(getArgNo(), Attribute::NoAlias)) + indicateOptimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // We can deduce "noalias" if the following conditions hold. + // (i) Associated value is assumed to be noalias in the definition. + // (ii) Associated value is assumed to be no-capture in all the uses + // possibly executed before this callsite. + // (iii) There is no other pointer argument which could alias with the + // value. + + const Value &V = getAssociatedValue(); + const IRPosition IRP = IRPosition::value(V); + + // (i) Check whether noalias holds in the definition. + + auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, IRP); + + if (!NoAliasAA.isAssumedNoAlias()) + return indicatePessimisticFixpoint(); + + LLVM_DEBUG(dbgs() << "[Attributor][AANoAliasCSArg] " << V + << " is assumed NoAlias in the definition\n"); + + // (ii) Check whether the value is captured in the scope using AANoCapture. + // FIXME: This is conservative though, it is better to look at CFG and + // check only uses possibly executed before this callsite. - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { return MP_FUNCTION; } + auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, IRP); + if (!NoCaptureAA.isAssumedNoCaptureMaybeReturned()) { + LLVM_DEBUG( + dbgs() << "[Attributor][AANoAliasCSArg] " << V + << " cannot be noalias as it is potentially captured\n"); + return indicatePessimisticFixpoint(); + } + + // (iii) Check there is no other pointer argument which could alias with the + // value. + ImmutableCallSite ICS(&getAnchorValue()); + for (unsigned i = 0; i < ICS.getNumArgOperands(); i++) { + if (getArgNo() == (int)i) + continue; + const Value *ArgOp = ICS.getArgOperand(i); + if (!ArgOp->getType()->isPointerTy()) + continue; + + if (const Function *F = getAnchorScope()) { + if (AAResults *AAR = A.getInfoCache().getAAResultsForFunction(*F)) { + bool IsAliasing = AAR->isNoAlias(&getAssociatedValue(), ArgOp); + LLVM_DEBUG(dbgs() + << "[Attributor][NoAliasCSArg] Check alias between " + "callsite arguments " + << AAR->isNoAlias(&getAssociatedValue(), ArgOp) << " " + << getAssociatedValue() << " " << *ArgOp << " => " + << (IsAliasing ? "" : "no-") << "alias \n"); + + if (IsAliasing) + continue; + } + } + return indicatePessimisticFixpoint(); + } + + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(noalias) } +}; + +/// NoAlias attribute for function return value. +struct AANoAliasReturned final : AANoAliasImpl { + AANoAliasReturned(const IRPosition &IRP) : AANoAliasImpl(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + virtual ChangeStatus updateImpl(Attributor &A) override { + + auto CheckReturnValue = [&](Value &RV) -> bool { + if (Constant *C = dyn_cast<Constant>(&RV)) + if (C->isNullValue() || isa<UndefValue>(C)) + return true; + + /// For now, we can only deduce noalias if we have call sites. + /// FIXME: add more support. + ImmutableCallSite ICS(&RV); + if (!ICS) + return false; + + const IRPosition &RVPos = IRPosition::value(RV); + const auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, RVPos); + if (!NoAliasAA.isAssumedNoAlias()) + return false; + + const auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, RVPos); + return NoCaptureAA.isAssumedNoCaptureMaybeReturned(); + }; + + if (!A.checkForAllReturnedValues(CheckReturnValue, *this)) + return indicatePessimisticFixpoint(); + + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noalias) } +}; + +/// NoAlias attribute deduction for a call site return value. +struct AANoAliasCallSiteReturned final : AANoAliasImpl { + AANoAliasCallSiteReturned(const IRPosition &IRP) : AANoAliasImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoAliasImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::returned(*F); + auto &FnAA = A.getAAFor<AANoAlias>(*this, FnPos); + return clampStateAndIndicateChange( + getState(), static_cast<const AANoAlias::StateType &>(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noalias); } +}; + +/// -------------------AAIsDead Function Attribute----------------------- + +struct AAIsDeadImpl : public AAIsDead { + AAIsDeadImpl(const IRPosition &IRP) : AAIsDead(IRP) {} + + void initialize(Attributor &A) override { + const Function *F = getAssociatedFunction(); + if (F && !F->isDeclaration()) + exploreFromEntry(A, F); + } + + void exploreFromEntry(Attributor &A, const Function *F) { + ToBeExploredPaths.insert(&(F->getEntryBlock().front())); + + for (size_t i = 0; i < ToBeExploredPaths.size(); ++i) + if (const Instruction *NextNoReturnI = + findNextNoReturn(A, ToBeExploredPaths[i])) + NoReturnCalls.insert(NextNoReturnI); + + // Mark the block live after we looked for no-return instructions. + assumeLive(A, F->getEntryBlock()); + } + + /// Find the next assumed noreturn instruction in the block of \p I starting + /// from, thus including, \p I. + /// + /// The caller is responsible to monitor the ToBeExploredPaths set as new + /// instructions discovered in other basic block will be placed in there. + /// + /// \returns The next assumed noreturn instructions in the block of \p I + /// starting from, thus including, \p I. + const Instruction *findNextNoReturn(Attributor &A, const Instruction *I); /// See AbstractAttribute::getAsStr(). const std::string getAsStr() const override { - return getAssumed() ? "nofree" : "may-free"; + return "Live[#BB " + std::to_string(AssumedLiveBlocks.size()) + "/" + + std::to_string(getAssociatedFunction()->size()) + "][#NRI " + + std::to_string(NoReturnCalls.size()) + "]"; + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + assert(getState().isValidState() && + "Attempted to manifest an invalid state!"); + + ChangeStatus HasChanged = ChangeStatus::UNCHANGED; + Function &F = *getAssociatedFunction(); + + if (AssumedLiveBlocks.empty()) { + A.deleteAfterManifest(F); + return ChangeStatus::CHANGED; + } + + // Flag to determine if we can change an invoke to a call assuming the + // callee is nounwind. This is not possible if the personality of the + // function allows to catch asynchronous exceptions. + bool Invoke2CallAllowed = !mayCatchAsynchronousExceptions(F); + + for (const Instruction *NRC : NoReturnCalls) { + Instruction *I = const_cast<Instruction *>(NRC); + BasicBlock *BB = I->getParent(); + Instruction *SplitPos = I->getNextNode(); + // TODO: mark stuff before unreachable instructions as dead. + + if (auto *II = dyn_cast<InvokeInst>(I)) { + // If we keep the invoke the split position is at the beginning of the + // normal desitination block (it invokes a noreturn function after all). + BasicBlock *NormalDestBB = II->getNormalDest(); + SplitPos = &NormalDestBB->front(); + + /// Invoke is replaced with a call and unreachable is placed after it if + /// the callee is nounwind and noreturn. Otherwise, we keep the invoke + /// and only place an unreachable in the normal successor. + if (Invoke2CallAllowed) { + if (II->getCalledFunction()) { + const IRPosition &IPos = IRPosition::callsite_function(*II); + const auto &AANoUnw = A.getAAFor<AANoUnwind>(*this, IPos); + if (AANoUnw.isAssumedNoUnwind()) { + LLVM_DEBUG(dbgs() + << "[AAIsDead] Replace invoke with call inst\n"); + // We do not need an invoke (II) but instead want a call followed + // by an unreachable. However, we do not remove II as other + // abstract attributes might have it cached as part of their + // results. Given that we modify the CFG anyway, we simply keep II + // around but in a new dead block. To avoid II being live through + // a different edge we have to ensure the block we place it in is + // only reached from the current block of II and then not reached + // at all when we insert the unreachable. + SplitBlockPredecessors(NormalDestBB, {BB}, ".i2c"); + CallInst *CI = createCallMatchingInvoke(II); + CI->insertBefore(II); + CI->takeName(II); + II->replaceAllUsesWith(CI); + SplitPos = CI->getNextNode(); + } + } + } + + if (SplitPos == &NormalDestBB->front()) { + // If this is an invoke of a noreturn function the edge to the normal + // destination block is dead but not necessarily the block itself. + // TODO: We need to move to an edge based system during deduction and + // also manifest. + assert(!NormalDestBB->isLandingPad() && + "Expected the normal destination not to be a landingpad!"); + if (NormalDestBB->getUniquePredecessor() == BB) { + assumeLive(A, *NormalDestBB); + } else { + BasicBlock *SplitBB = + SplitBlockPredecessors(NormalDestBB, {BB}, ".dead"); + // The split block is live even if it contains only an unreachable + // instruction at the end. + assumeLive(A, *SplitBB); + SplitPos = SplitBB->getTerminator(); + HasChanged = ChangeStatus::CHANGED; + } + } + } + + if (isa_and_nonnull<UnreachableInst>(SplitPos)) + continue; + + BB = SplitPos->getParent(); + SplitBlock(BB, SplitPos); + changeToUnreachable(BB->getTerminator(), /* UseLLVMTrap */ false); + HasChanged = ChangeStatus::CHANGED; + } + + for (BasicBlock &BB : F) + if (!AssumedLiveBlocks.count(&BB)) + A.deleteAfterManifest(BB); + + return HasChanged; } /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override; - /// See AbstractAttribute::getAttrKind(). - Attribute::AttrKind getAttrKind() const override { return ID; } + /// See AAIsDead::isAssumedDead(BasicBlock *). + bool isAssumedDead(const BasicBlock *BB) const override { + assert(BB->getParent() == getAssociatedFunction() && + "BB must be in the same anchor scope function."); + + if (!getAssumed()) + return false; + return !AssumedLiveBlocks.count(BB); + } + + /// See AAIsDead::isKnownDead(BasicBlock *). + bool isKnownDead(const BasicBlock *BB) const override { + return getKnown() && isAssumedDead(BB); + } + + /// See AAIsDead::isAssumed(Instruction *I). + bool isAssumedDead(const Instruction *I) const override { + assert(I->getParent()->getParent() == getAssociatedFunction() && + "Instruction must be in the same anchor scope function."); + + if (!getAssumed()) + return false; + + // If it is not in AssumedLiveBlocks then it for sure dead. + // Otherwise, it can still be after noreturn call in a live block. + if (!AssumedLiveBlocks.count(I->getParent())) + return true; + + // If it is not after a noreturn call, than it is live. + return isAfterNoReturn(I); + } + + /// See AAIsDead::isKnownDead(Instruction *I). + bool isKnownDead(const Instruction *I) const override { + return getKnown() && isAssumedDead(I); + } + + /// Check if instruction is after noreturn call, in other words, assumed dead. + bool isAfterNoReturn(const Instruction *I) const; - /// Return true if "nofree" is assumed. - bool isAssumedNoFree() const { return getAssumed(); } + /// Determine if \p F might catch asynchronous exceptions. + static bool mayCatchAsynchronousExceptions(const Function &F) { + return F.hasPersonalityFn() && !canSimplifyInvokeNoUnwind(&F); + } + + /// Assume \p BB is (partially) live now and indicate to the Attributor \p A + /// that internal function called from \p BB should now be looked at. + void assumeLive(Attributor &A, const BasicBlock &BB) { + if (!AssumedLiveBlocks.insert(&BB).second) + return; + + // We assume that all of BB is (probably) live now and if there are calls to + // internal functions we will assume that those are now live as well. This + // is a performance optimization for blocks with calls to a lot of internal + // functions. It can however cause dead functions to be treated as live. + for (const Instruction &I : BB) + if (ImmutableCallSite ICS = ImmutableCallSite(&I)) + if (const Function *F = ICS.getCalledFunction()) + if (F->hasLocalLinkage()) + A.markLiveInternalFunction(*F); + } - /// Return true if "nofree" is known. - bool isKnownNoFree() const { return getKnown(); } + /// Collection of to be explored paths. + SmallSetVector<const Instruction *, 8> ToBeExploredPaths; - /// The identifier used by the Attributor for this class of attributes. - static constexpr Attribute::AttrKind ID = Attribute::NoFree; + /// Collection of all assumed live BasicBlocks. + DenseSet<const BasicBlock *> AssumedLiveBlocks; + + /// Collection of calls with noreturn attribute, assumed or knwon. + SmallSetVector<const Instruction *, 4> NoReturnCalls; }; -ChangeStatus AANoFreeFunction::updateImpl(Attributor &A) { - Function &F = getAnchorScope(); +struct AAIsDeadFunction final : public AAIsDeadImpl { + AAIsDeadFunction(const IRPosition &IRP) : AAIsDeadImpl(IRP) {} - // The map from instruction opcodes to those instructions in the function. - auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECL(PartiallyDeadBlocks, Function, + "Number of basic blocks classified as partially dead"); + BUILD_STAT_NAME(PartiallyDeadBlocks, Function) += NoReturnCalls.size(); + } +}; - for (unsigned Opcode : - {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, - (unsigned)Instruction::Call}) { - for (Instruction *I : OpcodeInstMap[Opcode]) { +bool AAIsDeadImpl::isAfterNoReturn(const Instruction *I) const { + const Instruction *PrevI = I->getPrevNode(); + while (PrevI) { + if (NoReturnCalls.count(PrevI)) + return true; + PrevI = PrevI->getPrevNode(); + } + return false; +} - auto ICS = ImmutableCallSite(I); - auto *NoFreeAA = A.getAAFor<AANoFreeFunction>(*this, *I); +const Instruction *AAIsDeadImpl::findNextNoReturn(Attributor &A, + const Instruction *I) { + const BasicBlock *BB = I->getParent(); + const Function &F = *BB->getParent(); - if ((!NoFreeAA || !NoFreeAA->isAssumedNoFree()) && - !ICS.hasFnAttr(Attribute::NoFree)) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; + // Flag to determine if we can change an invoke to a call assuming the callee + // is nounwind. This is not possible if the personality of the function allows + // to catch asynchronous exceptions. + bool Invoke2CallAllowed = !mayCatchAsynchronousExceptions(F); + + // TODO: We should have a function that determines if an "edge" is dead. + // Edges could be from an instruction to the next or from a terminator + // to the successor. For now, we need to special case the unwind block + // of InvokeInst below. + + while (I) { + ImmutableCallSite ICS(I); + + if (ICS) { + const IRPosition &IPos = IRPosition::callsite_function(ICS); + // Regarless of the no-return property of an invoke instruction we only + // learn that the regular successor is not reachable through this + // instruction but the unwind block might still be. + if (auto *Invoke = dyn_cast<InvokeInst>(I)) { + // Use nounwind to justify the unwind block is dead as well. + const auto &AANoUnw = A.getAAFor<AANoUnwind>(*this, IPos); + if (!Invoke2CallAllowed || !AANoUnw.isAssumedNoUnwind()) { + assumeLive(A, *Invoke->getUnwindDest()); + ToBeExploredPaths.insert(&Invoke->getUnwindDest()->front()); + } } + + const auto &NoReturnAA = A.getAAFor<AANoReturn>(*this, IPos); + if (NoReturnAA.isAssumedNoReturn()) + return I; } + + I = I->getNextNode(); } - return ChangeStatus::UNCHANGED; + + // get new paths (reachable blocks). + for (const BasicBlock *SuccBB : successors(BB)) { + assumeLive(A, *SuccBB); + ToBeExploredPaths.insert(&SuccBB->front()); + } + + // No noreturn instruction found. + return nullptr; } -/// ------------------------ NonNull Argument Attribute ------------------------ -struct AANonNullImpl : AANonNull, BooleanState { +ChangeStatus AAIsDeadImpl::updateImpl(Attributor &A) { + ChangeStatus Status = ChangeStatus::UNCHANGED; + + // Temporary collection to iterate over existing noreturn instructions. This + // will alow easier modification of NoReturnCalls collection + SmallVector<const Instruction *, 8> NoReturnChanged; + + for (const Instruction *I : NoReturnCalls) + NoReturnChanged.push_back(I); + + for (const Instruction *I : NoReturnChanged) { + size_t Size = ToBeExploredPaths.size(); + + const Instruction *NextNoReturnI = findNextNoReturn(A, I); + if (NextNoReturnI != I) { + Status = ChangeStatus::CHANGED; + NoReturnCalls.remove(I); + if (NextNoReturnI) + NoReturnCalls.insert(NextNoReturnI); + } - AANonNullImpl(Value &V, InformationCache &InfoCache) - : AANonNull(V, InfoCache) {} + // Explore new paths. + while (Size != ToBeExploredPaths.size()) { + Status = ChangeStatus::CHANGED; + if (const Instruction *NextNoReturnI = + findNextNoReturn(A, ToBeExploredPaths[Size++])) + NoReturnCalls.insert(NextNoReturnI); + } + } + + LLVM_DEBUG(dbgs() << "[AAIsDead] AssumedLiveBlocks: " + << AssumedLiveBlocks.size() << " Total number of blocks: " + << getAssociatedFunction()->size() << "\n"); - AANonNullImpl(Value *AssociatedVal, Value &AnchoredValue, - InformationCache &InfoCache) - : AANonNull(AssociatedVal, AnchoredValue, InfoCache) {} + // If we know everything is live there is no need to query for liveness. + if (NoReturnCalls.empty() && + getAssociatedFunction()->size() == AssumedLiveBlocks.size()) { + // Indicating a pessimistic fixpoint will cause the state to be "invalid" + // which will cause the Attributor to not return the AAIsDead on request, + // which will prevent us from querying isAssumedDead(). + indicatePessimisticFixpoint(); + assert(!isValidState() && "Expected an invalid state!"); + Status = ChangeStatus::CHANGED; + } + + return Status; +} + +/// Liveness information for a call sites. +struct AAIsDeadCallSite final : AAIsDeadImpl { + AAIsDeadCallSite(const IRPosition &IRP) : AAIsDeadImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites instead of + // redirecting requests to the callee. + llvm_unreachable("Abstract attributes for liveness are not " + "supported for call sites yet!"); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + return indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} +}; + +/// -------------------- Dereferenceable Argument Attribute -------------------- + +template <> +ChangeStatus clampStateAndIndicateChange<DerefState>(DerefState &S, + const DerefState &R) { + ChangeStatus CS0 = clampStateAndIndicateChange<IntegerState>( + S.DerefBytesState, R.DerefBytesState); + ChangeStatus CS1 = + clampStateAndIndicateChange<IntegerState>(S.GlobalState, R.GlobalState); + return CS0 | CS1; +} + +struct AADereferenceableImpl : AADereferenceable { + AADereferenceableImpl(const IRPosition &IRP) : AADereferenceable(IRP) {} + using StateType = DerefState; + + void initialize(Attributor &A) override { + SmallVector<Attribute, 4> Attrs; + getAttrs({Attribute::Dereferenceable, Attribute::DereferenceableOrNull}, + Attrs); + for (const Attribute &Attr : Attrs) + takeKnownDerefBytesMaximum(Attr.getValueAsInt()); + + NonNullAA = &A.getAAFor<AANonNull>(*this, getIRPosition()); + + const IRPosition &IRP = this->getIRPosition(); + bool IsFnInterface = IRP.isFnInterfaceKind(); + const Function *FnScope = IRP.getAnchorScope(); + if (IsFnInterface && (!FnScope || !FnScope->hasExactDefinition())) + indicatePessimisticFixpoint(); + } /// See AbstractAttribute::getState() /// { - AbstractState &getState() override { return *this; } - const AbstractState &getState() const override { return *this; } + StateType &getState() override { return *this; } + const StateType &getState() const override { return *this; } /// } + /// See AAFromMustBeExecutedContext + bool followUse(Attributor &A, const Use *U, const Instruction *I) { + bool IsNonNull = false; + bool TrackUse = false; + int64_t DerefBytes = getKnownNonNullAndDerefBytesForUse( + A, *this, getAssociatedValue(), U, I, IsNonNull, TrackUse); + takeKnownDerefBytesMaximum(DerefBytes); + return TrackUse; + } + + void getDeducedAttributes(LLVMContext &Ctx, + SmallVectorImpl<Attribute> &Attrs) const override { + // TODO: Add *_globally support + if (isAssumedNonNull()) + Attrs.emplace_back(Attribute::getWithDereferenceableBytes( + Ctx, getAssumedDereferenceableBytes())); + else + Attrs.emplace_back(Attribute::getWithDereferenceableOrNullBytes( + Ctx, getAssumedDereferenceableBytes())); + } + /// See AbstractAttribute::getAsStr(). const std::string getAsStr() const override { - return getAssumed() ? "nonnull" : "may-null"; + if (!getAssumedDereferenceableBytes()) + return "unknown-dereferenceable"; + return std::string("dereferenceable") + + (isAssumedNonNull() ? "" : "_or_null") + + (isAssumedGlobal() ? "_globally" : "") + "<" + + std::to_string(getKnownDereferenceableBytes()) + "-" + + std::to_string(getAssumedDereferenceableBytes()) + ">"; } +}; + +/// Dereferenceable attribute for a floating value. +struct AADereferenceableFloating + : AAFromMustBeExecutedContext<AADereferenceable, AADereferenceableImpl> { + using Base = + AAFromMustBeExecutedContext<AADereferenceable, AADereferenceableImpl>; + AADereferenceableFloating(const IRPosition &IRP) : Base(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + ChangeStatus Change = Base::updateImpl(A); + + const DataLayout &DL = A.getDataLayout(); + + auto VisitValueCB = [&](Value &V, DerefState &T, bool Stripped) -> bool { + unsigned IdxWidth = + DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace()); + APInt Offset(IdxWidth, 0); + const Value *Base = + V.stripAndAccumulateInBoundsConstantOffsets(DL, Offset); + + const auto &AA = + A.getAAFor<AADereferenceable>(*this, IRPosition::value(*Base)); + int64_t DerefBytes = 0; + if (!Stripped && this == &AA) { + // Use IR information if we did not strip anything. + // TODO: track globally. + bool CanBeNull; + DerefBytes = Base->getPointerDereferenceableBytes(DL, CanBeNull); + T.GlobalState.indicatePessimisticFixpoint(); + } else { + const DerefState &DS = static_cast<const DerefState &>(AA.getState()); + DerefBytes = DS.DerefBytesState.getAssumed(); + T.GlobalState &= DS.GlobalState; + } + + // For now we do not try to "increase" dereferenceability due to negative + // indices as we first have to come up with code to deal with loops and + // for overflows of the dereferenceable bytes. + int64_t OffsetSExt = Offset.getSExtValue(); + if (OffsetSExt < 0) + OffsetSExt = 0; + + T.takeAssumedDerefBytesMinimum( + std::max(int64_t(0), DerefBytes - OffsetSExt)); + + if (this == &AA) { + if (!Stripped) { + // If nothing was stripped IR information is all we got. + T.takeKnownDerefBytesMaximum( + std::max(int64_t(0), DerefBytes - OffsetSExt)); + T.indicatePessimisticFixpoint(); + } else if (OffsetSExt > 0) { + // If something was stripped but there is circular reasoning we look + // for the offset. If it is positive we basically decrease the + // dereferenceable bytes in a circluar loop now, which will simply + // drive them down to the known value in a very slow way which we + // can accelerate. + T.indicatePessimisticFixpoint(); + } + } + + return T.isValidState(); + }; - /// See AANonNull::isAssumedNonNull(). - bool isAssumedNonNull() const override { return getAssumed(); } + DerefState T; + if (!genericValueTraversal<AADereferenceable, DerefState>( + A, getIRPosition(), *this, T, VisitValueCB)) + return indicatePessimisticFixpoint(); - /// See AANonNull::isKnownNonNull(). - bool isKnownNonNull() const override { return getKnown(); } + return Change | clampStateAndIndicateChange(getState(), T); + } - /// Generate a predicate that checks if a given value is assumed nonnull. - /// The generated function returns true if a value satisfies any of - /// following conditions. - /// (i) A value is known nonZero(=nonnull). - /// (ii) A value is associated with AANonNull and its isAssumedNonNull() is - /// true. - std::function<bool(Value &)> generatePredicate(Attributor &); + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(dereferenceable) + } }; -std::function<bool(Value &)> AANonNullImpl::generatePredicate(Attributor &A) { - // FIXME: The `AAReturnedValues` should provide the predicate with the - // `ReturnInst` vector as well such that we can use the control flow sensitive - // version of `isKnownNonZero`. This should fix `test11` in - // `test/Transforms/FunctionAttrs/nonnull.ll` +/// Dereferenceable attribute for a return value. +struct AADereferenceableReturned final + : AAReturnedFromReturnedValues<AADereferenceable, AADereferenceableImpl, + DerefState> { + AADereferenceableReturned(const IRPosition &IRP) + : AAReturnedFromReturnedValues<AADereferenceable, AADereferenceableImpl, + DerefState>(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FNRET_ATTR(dereferenceable) + } +}; - std::function<bool(Value &)> Pred = [&](Value &RV) -> bool { - if (isKnownNonZero(&RV, getAnchorScope().getParent()->getDataLayout())) - return true; +/// Dereferenceable attribute for an argument +struct AADereferenceableArgument final + : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext< + AADereferenceable, AADereferenceableImpl, DerefState> { + using Base = AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext< + AADereferenceable, AADereferenceableImpl, DerefState>; + AADereferenceableArgument(const IRPosition &IRP) : Base(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_ARG_ATTR(dereferenceable) + } +}; - auto *NonNullAA = A.getAAFor<AANonNull>(*this, RV); +/// Dereferenceable attribute for a call site argument. +struct AADereferenceableCallSiteArgument final : AADereferenceableFloating { + AADereferenceableCallSiteArgument(const IRPosition &IRP) + : AADereferenceableFloating(IRP) {} - ImmutableCallSite ICS(&RV); + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSARG_ATTR(dereferenceable) + } +}; - if ((!NonNullAA || !NonNullAA->isAssumedNonNull()) && - (!ICS || !ICS.hasRetAttr(Attribute::NonNull))) - return false; +/// Dereferenceable attribute deduction for a call site return value. +struct AADereferenceableCallSiteReturned final + : AACallSiteReturnedFromReturnedAndMustBeExecutedContext< + AADereferenceable, AADereferenceableImpl> { + using Base = AACallSiteReturnedFromReturnedAndMustBeExecutedContext< + AADereferenceable, AADereferenceableImpl>; + AADereferenceableCallSiteReturned(const IRPosition &IRP) : Base(IRP) {} - return true; - }; + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + Base::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } - return Pred; -} + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + + ChangeStatus Change = Base::updateImpl(A); + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::returned(*F); + auto &FnAA = A.getAAFor<AADereferenceable>(*this, FnPos); + return Change | + clampStateAndIndicateChange( + getState(), static_cast<const DerefState &>(FnAA.getState())); + } -/// NonNull attribute for function return value. -struct AANonNullReturned : AANonNullImpl { + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CS_ATTR(dereferenceable); + } +}; - AANonNullReturned(Function &F, InformationCache &InfoCache) - : AANonNullImpl(F, InfoCache) {} +// ------------------------ Align Argument Attribute ------------------------ - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { return MP_RETURNED; } +struct AAAlignImpl : AAAlign { + AAAlignImpl(const IRPosition &IRP) : AAAlign(IRP) {} - /// See AbstractAttriubute::initialize(...). + // Max alignemnt value allowed in IR + static const unsigned MAX_ALIGN = 1U << 29; + + /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { - Function &F = getAnchorScope(); + takeAssumedMinimum(MAX_ALIGN); - // Already nonnull. - if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex, - Attribute::NonNull)) - indicateOptimisticFixpoint(); + SmallVector<Attribute, 4> Attrs; + getAttrs({Attribute::Alignment}, Attrs); + for (const Attribute &Attr : Attrs) + takeKnownMaximum(Attr.getValueAsInt()); + + if (getIRPosition().isFnInterfaceKind() && + (!getAssociatedFunction() || + !getAssociatedFunction()->hasExactDefinition())) + indicatePessimisticFixpoint(); } + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + ChangeStatus Changed = ChangeStatus::UNCHANGED; + + // Check for users that allow alignment annotations. + Value &AnchorVal = getIRPosition().getAnchorValue(); + for (const Use &U : AnchorVal.uses()) { + if (auto *SI = dyn_cast<StoreInst>(U.getUser())) { + if (SI->getPointerOperand() == &AnchorVal) + if (SI->getAlignment() < getAssumedAlign()) { + STATS_DECLTRACK(AAAlign, Store, + "Number of times alignemnt added to a store"); + SI->setAlignment(Align(getAssumedAlign())); + Changed = ChangeStatus::CHANGED; + } + } else if (auto *LI = dyn_cast<LoadInst>(U.getUser())) { + if (LI->getPointerOperand() == &AnchorVal) + if (LI->getAlignment() < getAssumedAlign()) { + LI->setAlignment(Align(getAssumedAlign())); + STATS_DECLTRACK(AAAlign, Load, + "Number of times alignemnt added to a load"); + Changed = ChangeStatus::CHANGED; + } + } + } + + return AAAlign::manifest(A) | Changed; + } + + // TODO: Provide a helper to determine the implied ABI alignment and check in + // the existing manifest method and a new one for AAAlignImpl that value + // to avoid making the alignment explicit if it did not improve. + + /// See AbstractAttribute::getDeducedAttributes + virtual void + getDeducedAttributes(LLVMContext &Ctx, + SmallVectorImpl<Attribute> &Attrs) const override { + if (getAssumedAlign() > 1) + Attrs.emplace_back( + Attribute::getWithAlignment(Ctx, Align(getAssumedAlign()))); + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return getAssumedAlign() ? ("align<" + std::to_string(getKnownAlign()) + + "-" + std::to_string(getAssumedAlign()) + ">") + : "unknown-align"; + } +}; + +/// Align attribute for a floating value. +struct AAAlignFloating : AAAlignImpl { + AAAlignFloating(const IRPosition &IRP) : AAAlignImpl(IRP) {} + /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; + ChangeStatus updateImpl(Attributor &A) override { + const DataLayout &DL = A.getDataLayout(); + + auto VisitValueCB = [&](Value &V, AAAlign::StateType &T, + bool Stripped) -> bool { + const auto &AA = A.getAAFor<AAAlign>(*this, IRPosition::value(V)); + if (!Stripped && this == &AA) { + // Use only IR information if we did not strip anything. + const MaybeAlign PA = V.getPointerAlignment(DL); + T.takeKnownMaximum(PA ? PA->value() : 0); + T.indicatePessimisticFixpoint(); + } else { + // Use abstract attribute information. + const AAAlign::StateType &DS = + static_cast<const AAAlign::StateType &>(AA.getState()); + T ^= DS; + } + return T.isValidState(); + }; + + StateType T; + if (!genericValueTraversal<AAAlign, StateType>(A, getIRPosition(), *this, T, + VisitValueCB)) + return indicatePessimisticFixpoint(); + + // TODO: If we know we visited all incoming values, thus no are assumed + // dead, we can take the known information from the state T. + return clampStateAndIndicateChange(getState(), T); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FLOATING_ATTR(align) } }; -ChangeStatus AANonNullReturned::updateImpl(Attributor &A) { - Function &F = getAnchorScope(); +/// Align attribute for function return value. +struct AAAlignReturned final + : AAReturnedFromReturnedValues<AAAlign, AAAlignImpl> { + AAAlignReturned(const IRPosition &IRP) + : AAReturnedFromReturnedValues<AAAlign, AAAlignImpl>(IRP) {} - auto *AARetVal = A.getAAFor<AAReturnedValues>(*this, F); - if (!AARetVal) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(aligned) } +}; + +/// Align attribute for function argument. +struct AAAlignArgument final + : AAArgumentFromCallSiteArguments<AAAlign, AAAlignImpl> { + AAAlignArgument(const IRPosition &IRP) + : AAArgumentFromCallSiteArguments<AAAlign, AAAlignImpl>(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(aligned) } +}; + +struct AAAlignCallSiteArgument final : AAAlignFloating { + AAAlignCallSiteArgument(const IRPosition &IRP) : AAAlignFloating(IRP) {} + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + return AAAlignImpl::manifest(A); } - std::function<bool(Value &)> Pred = this->generatePredicate(A); - if (!AARetVal->checkForallReturnedValues(Pred)) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(aligned) } +}; + +/// Align attribute deduction for a call site return value. +struct AAAlignCallSiteReturned final : AAAlignImpl { + AAAlignCallSiteReturned(const IRPosition &IRP) : AAAlignImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAAlignImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); } - return ChangeStatus::UNCHANGED; -} -/// NonNull attribute for function argument. -struct AANonNullArgument : AANonNullImpl { + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::returned(*F); + auto &FnAA = A.getAAFor<AAAlign>(*this, FnPos); + return clampStateAndIndicateChange( + getState(), static_cast<const AAAlign::StateType &>(FnAA.getState())); + } - AANonNullArgument(Argument &A, InformationCache &InfoCache) - : AANonNullImpl(A, InfoCache) {} + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); } +}; - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { return MP_ARGUMENT; } +/// ------------------ Function No-Return Attribute ---------------------------- +struct AANoReturnImpl : public AANoReturn { + AANoReturnImpl(const IRPosition &IRP) : AANoReturn(IRP) {} - /// See AbstractAttriubute::initialize(...). + /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { - Argument *Arg = cast<Argument>(getAssociatedValue()); - if (Arg->hasNonNullAttr()) - indicateOptimisticFixpoint(); + AANoReturn::initialize(A); + Function *F = getAssociatedFunction(); + if (!F || F->hasFnAttribute(Attribute::WillReturn)) + indicatePessimisticFixpoint(); } + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return getAssumed() ? "noreturn" : "may-return"; + } + + /// See AbstractAttribute::updateImpl(Attributor &A). + virtual ChangeStatus updateImpl(Attributor &A) override { + const auto &WillReturnAA = A.getAAFor<AAWillReturn>(*this, getIRPosition()); + if (WillReturnAA.isKnownWillReturn()) + return indicatePessimisticFixpoint(); + auto CheckForNoReturn = [](Instruction &) { return false; }; + if (!A.checkForAllInstructions(CheckForNoReturn, *this, + {(unsigned)Instruction::Ret})) + return indicatePessimisticFixpoint(); + return ChangeStatus::UNCHANGED; + } +}; + +struct AANoReturnFunction final : AANoReturnImpl { + AANoReturnFunction(const IRPosition &IRP) : AANoReturnImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(noreturn) } +}; + +/// NoReturn attribute deduction for a call sites. +struct AANoReturnCallSite final : AANoReturnImpl { + AANoReturnCallSite(const IRPosition &IRP) : AANoReturnImpl(IRP) {} + /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor<AANoReturn>(*this, FnPos); + return clampStateAndIndicateChange( + getState(), + static_cast<const AANoReturn::StateType &>(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(noreturn); } }; -/// NonNull attribute for a call site argument. -struct AANonNullCallSiteArgument : AANonNullImpl { +/// ----------------------- Variable Capturing --------------------------------- - /// See AANonNullImpl::AANonNullImpl(...). - AANonNullCallSiteArgument(CallSite CS, unsigned ArgNo, - InformationCache &InfoCache) - : AANonNullImpl(CS.getArgOperand(ArgNo), *CS.getInstruction(), InfoCache), - ArgNo(ArgNo) {} +/// A class to hold the state of for no-capture attributes. +struct AANoCaptureImpl : public AANoCapture { + AANoCaptureImpl(const IRPosition &IRP) : AANoCapture(IRP) {} /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { - CallSite CS(&getAnchoredValue()); - if (isKnownNonZero(getAssociatedValue(), - getAnchorScope().getParent()->getDataLayout()) || - CS.paramHasAttr(ArgNo, getAttrKind())) + AANoCapture::initialize(A); + + // You cannot "capture" null in the default address space. + if (isa<ConstantPointerNull>(getAssociatedValue()) && + getAssociatedValue().getType()->getPointerAddressSpace() == 0) { indicateOptimisticFixpoint(); + return; + } + + const IRPosition &IRP = getIRPosition(); + const Function *F = + getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope(); + + // Check what state the associated function can actually capture. + if (F) + determineFunctionCaptureCapabilities(IRP, *F, *this); + else + indicatePessimisticFixpoint(); } - /// See AbstractAttribute::updateImpl(Attributor &A). + /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override; - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { - return MP_CALL_SITE_ARGUMENT; - }; + /// see AbstractAttribute::isAssumedNoCaptureMaybeReturned(...). + virtual void + getDeducedAttributes(LLVMContext &Ctx, + SmallVectorImpl<Attribute> &Attrs) const override { + if (!isAssumedNoCaptureMaybeReturned()) + return; + + if (getArgNo() >= 0) { + if (isAssumedNoCapture()) + Attrs.emplace_back(Attribute::get(Ctx, Attribute::NoCapture)); + else if (ManifestInternal) + Attrs.emplace_back(Attribute::get(Ctx, "no-capture-maybe-returned")); + } + } + + /// Set the NOT_CAPTURED_IN_MEM and NOT_CAPTURED_IN_RET bits in \p Known + /// depending on the ability of the function associated with \p IRP to capture + /// state in memory and through "returning/throwing", respectively. + static void determineFunctionCaptureCapabilities(const IRPosition &IRP, + const Function &F, + IntegerState &State) { + // TODO: Once we have memory behavior attributes we should use them here. + + // If we know we cannot communicate or write to memory, we do not care about + // ptr2int anymore. + if (F.onlyReadsMemory() && F.doesNotThrow() && + F.getReturnType()->isVoidTy()) { + State.addKnownBits(NO_CAPTURE); + return; + } + + // A function cannot capture state in memory if it only reads memory, it can + // however return/throw state and the state might be influenced by the + // pointer value, e.g., loading from a returned pointer might reveal a bit. + if (F.onlyReadsMemory()) + State.addKnownBits(NOT_CAPTURED_IN_MEM); + + // A function cannot communicate state back if it does not through + // exceptions and doesn not return values. + if (F.doesNotThrow() && F.getReturnType()->isVoidTy()) + State.addKnownBits(NOT_CAPTURED_IN_RET); + + // Check existing "returned" attributes. + int ArgNo = IRP.getArgNo(); + if (F.doesNotThrow() && ArgNo >= 0) { + for (unsigned u = 0, e = F.arg_size(); u< e; ++u) + if (F.hasParamAttribute(u, Attribute::Returned)) { + if (u == unsigned(ArgNo)) + State.removeAssumedBits(NOT_CAPTURED_IN_RET); + else if (F.onlyReadsMemory()) + State.addKnownBits(NO_CAPTURE); + else + State.addKnownBits(NOT_CAPTURED_IN_RET); + break; + } + } + } - // Return argument index of associated value. - int getArgNo() const { return ArgNo; } + /// See AbstractState::getAsStr(). + const std::string getAsStr() const override { + if (isKnownNoCapture()) + return "known not-captured"; + if (isAssumedNoCapture()) + return "assumed not-captured"; + if (isKnownNoCaptureMaybeReturned()) + return "known not-captured-maybe-returned"; + if (isAssumedNoCaptureMaybeReturned()) + return "assumed not-captured-maybe-returned"; + return "assumed-captured"; + } +}; + +/// Attributor-aware capture tracker. +struct AACaptureUseTracker final : public CaptureTracker { + + /// Create a capture tracker that can lookup in-flight abstract attributes + /// through the Attributor \p A. + /// + /// If a use leads to a potential capture, \p CapturedInMemory is set and the + /// search is stopped. If a use leads to a return instruction, + /// \p CommunicatedBack is set to true and \p CapturedInMemory is not changed. + /// If a use leads to a ptr2int which may capture the value, + /// \p CapturedInInteger is set. If a use is found that is currently assumed + /// "no-capture-maybe-returned", the user is added to the \p PotentialCopies + /// set. All values in \p PotentialCopies are later tracked as well. For every + /// explored use we decrement \p RemainingUsesToExplore. Once it reaches 0, + /// the search is stopped with \p CapturedInMemory and \p CapturedInInteger + /// conservatively set to true. + AACaptureUseTracker(Attributor &A, AANoCapture &NoCaptureAA, + const AAIsDead &IsDeadAA, IntegerState &State, + SmallVectorImpl<const Value *> &PotentialCopies, + unsigned &RemainingUsesToExplore) + : A(A), NoCaptureAA(NoCaptureAA), IsDeadAA(IsDeadAA), State(State), + PotentialCopies(PotentialCopies), + RemainingUsesToExplore(RemainingUsesToExplore) {} + + /// Determine if \p V maybe captured. *Also updates the state!* + bool valueMayBeCaptured(const Value *V) { + if (V->getType()->isPointerTy()) { + PointerMayBeCaptured(V, this); + } else { + State.indicatePessimisticFixpoint(); + } + return State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED); + } + + /// See CaptureTracker::tooManyUses(). + void tooManyUses() override { + State.removeAssumedBits(AANoCapture::NO_CAPTURE); + } + + bool isDereferenceableOrNull(Value *O, const DataLayout &DL) override { + if (CaptureTracker::isDereferenceableOrNull(O, DL)) + return true; + const auto &DerefAA = + A.getAAFor<AADereferenceable>(NoCaptureAA, IRPosition::value(*O)); + return DerefAA.getAssumedDereferenceableBytes(); + } + + /// See CaptureTracker::captured(...). + bool captured(const Use *U) override { + Instruction *UInst = cast<Instruction>(U->getUser()); + LLVM_DEBUG(dbgs() << "Check use: " << *U->get() << " in " << *UInst + << "\n"); + + // Because we may reuse the tracker multiple times we keep track of the + // number of explored uses ourselves as well. + if (RemainingUsesToExplore-- == 0) { + LLVM_DEBUG(dbgs() << " - too many uses to explore!\n"); + return isCapturedIn(/* Memory */ true, /* Integer */ true, + /* Return */ true); + } + + // Deal with ptr2int by following uses. + if (isa<PtrToIntInst>(UInst)) { + LLVM_DEBUG(dbgs() << " - ptr2int assume the worst!\n"); + return valueMayBeCaptured(UInst); + } + + // Explicitly catch return instructions. + if (isa<ReturnInst>(UInst)) + return isCapturedIn(/* Memory */ false, /* Integer */ false, + /* Return */ true); + + // For now we only use special logic for call sites. However, the tracker + // itself knows about a lot of other non-capturing cases already. + CallSite CS(UInst); + if (!CS || !CS.isArgOperand(U)) + return isCapturedIn(/* Memory */ true, /* Integer */ true, + /* Return */ true); + + unsigned ArgNo = CS.getArgumentNo(U); + const IRPosition &CSArgPos = IRPosition::callsite_argument(CS, ArgNo); + // If we have a abstract no-capture attribute for the argument we can use + // it to justify a non-capture attribute here. This allows recursion! + auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(NoCaptureAA, CSArgPos); + if (ArgNoCaptureAA.isAssumedNoCapture()) + return isCapturedIn(/* Memory */ false, /* Integer */ false, + /* Return */ false); + if (ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) { + addPotentialCopy(CS); + return isCapturedIn(/* Memory */ false, /* Integer */ false, + /* Return */ false); + } + + // Lastly, we could not find a reason no-capture can be assumed so we don't. + return isCapturedIn(/* Memory */ true, /* Integer */ true, + /* Return */ true); + } + + /// Register \p CS as potential copy of the value we are checking. + void addPotentialCopy(CallSite CS) { + PotentialCopies.push_back(CS.getInstruction()); + } + + /// See CaptureTracker::shouldExplore(...). + bool shouldExplore(const Use *U) override { + // Check liveness. + return !IsDeadAA.isAssumedDead(cast<Instruction>(U->getUser())); + } + + /// Update the state according to \p CapturedInMem, \p CapturedInInt, and + /// \p CapturedInRet, then return the appropriate value for use in the + /// CaptureTracker::captured() interface. + bool isCapturedIn(bool CapturedInMem, bool CapturedInInt, + bool CapturedInRet) { + LLVM_DEBUG(dbgs() << " - captures [Mem " << CapturedInMem << "|Int " + << CapturedInInt << "|Ret " << CapturedInRet << "]\n"); + if (CapturedInMem) + State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_MEM); + if (CapturedInInt) + State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_INT); + if (CapturedInRet) + State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_RET); + return !State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED); + } private: - unsigned ArgNo; + /// The attributor providing in-flight abstract attributes. + Attributor &A; + + /// The abstract attribute currently updated. + AANoCapture &NoCaptureAA; + + /// The abstract liveness state. + const AAIsDead &IsDeadAA; + + /// The state currently updated. + IntegerState &State; + + /// Set of potential copies of the tracked value. + SmallVectorImpl<const Value *> &PotentialCopies; + + /// Global counter to limit the number of explored uses. + unsigned &RemainingUsesToExplore; +}; + +ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) { + const IRPosition &IRP = getIRPosition(); + const Value *V = + getArgNo() >= 0 ? IRP.getAssociatedArgument() : &IRP.getAssociatedValue(); + if (!V) + return indicatePessimisticFixpoint(); + + const Function *F = + getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope(); + assert(F && "Expected a function!"); + const IRPosition &FnPos = IRPosition::function(*F); + const auto &IsDeadAA = A.getAAFor<AAIsDead>(*this, FnPos); + + AANoCapture::StateType T; + + // Readonly means we cannot capture through memory. + const auto &FnMemAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos); + if (FnMemAA.isAssumedReadOnly()) { + T.addKnownBits(NOT_CAPTURED_IN_MEM); + if (FnMemAA.isKnownReadOnly()) + addKnownBits(NOT_CAPTURED_IN_MEM); + } + + // Make sure all returned values are different than the underlying value. + // TODO: we could do this in a more sophisticated way inside + // AAReturnedValues, e.g., track all values that escape through returns + // directly somehow. + auto CheckReturnedArgs = [&](const AAReturnedValues &RVAA) { + bool SeenConstant = false; + for (auto &It : RVAA.returned_values()) { + if (isa<Constant>(It.first)) { + if (SeenConstant) + return false; + SeenConstant = true; + } else if (!isa<Argument>(It.first) || + It.first == getAssociatedArgument()) + return false; + } + return true; + }; + + const auto &NoUnwindAA = A.getAAFor<AANoUnwind>(*this, FnPos); + if (NoUnwindAA.isAssumedNoUnwind()) { + bool IsVoidTy = F->getReturnType()->isVoidTy(); + const AAReturnedValues *RVAA = + IsVoidTy ? nullptr : &A.getAAFor<AAReturnedValues>(*this, FnPos); + if (IsVoidTy || CheckReturnedArgs(*RVAA)) { + T.addKnownBits(NOT_CAPTURED_IN_RET); + if (T.isKnown(NOT_CAPTURED_IN_MEM)) + return ChangeStatus::UNCHANGED; + if (NoUnwindAA.isKnownNoUnwind() && + (IsVoidTy || RVAA->getState().isAtFixpoint())) { + addKnownBits(NOT_CAPTURED_IN_RET); + if (isKnown(NOT_CAPTURED_IN_MEM)) + return indicateOptimisticFixpoint(); + } + } + } + + // Use the CaptureTracker interface and logic with the specialized tracker, + // defined in AACaptureUseTracker, that can look at in-flight abstract + // attributes and directly updates the assumed state. + SmallVector<const Value *, 4> PotentialCopies; + unsigned RemainingUsesToExplore = DefaultMaxUsesToExplore; + AACaptureUseTracker Tracker(A, *this, IsDeadAA, T, PotentialCopies, + RemainingUsesToExplore); + + // Check all potential copies of the associated value until we can assume + // none will be captured or we have to assume at least one might be. + unsigned Idx = 0; + PotentialCopies.push_back(V); + while (T.isAssumed(NO_CAPTURE_MAYBE_RETURNED) && Idx < PotentialCopies.size()) + Tracker.valueMayBeCaptured(PotentialCopies[Idx++]); + + AAAlign::StateType &S = getState(); + auto Assumed = S.getAssumed(); + S.intersectAssumedBits(T.getAssumed()); + return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED + : ChangeStatus::CHANGED; +} + +/// NoCapture attribute for function arguments. +struct AANoCaptureArgument final : AANoCaptureImpl { + AANoCaptureArgument(const IRPosition &IRP) : AANoCaptureImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nocapture) } +}; + +/// NoCapture attribute for call site arguments. +struct AANoCaptureCallSiteArgument final : AANoCaptureImpl { + AANoCaptureCallSiteArgument(const IRPosition &IRP) : AANoCaptureImpl(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Argument *Arg = getAssociatedArgument(); + if (!Arg) + return indicatePessimisticFixpoint(); + const IRPosition &ArgPos = IRPosition::argument(*Arg); + auto &ArgAA = A.getAAFor<AANoCapture>(*this, ArgPos); + return clampStateAndIndicateChange( + getState(), + static_cast<const AANoCapture::StateType &>(ArgAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override{STATS_DECLTRACK_CSARG_ATTR(nocapture)}; +}; + +/// NoCapture attribute for floating values. +struct AANoCaptureFloating final : AANoCaptureImpl { + AANoCaptureFloating(const IRPosition &IRP) : AANoCaptureImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(nocapture) + } +}; + +/// NoCapture attribute for function return value. +struct AANoCaptureReturned final : AANoCaptureImpl { + AANoCaptureReturned(const IRPosition &IRP) : AANoCaptureImpl(IRP) { + llvm_unreachable("NoCapture is not applicable to function returns!"); + } + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + llvm_unreachable("NoCapture is not applicable to function returns!"); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + llvm_unreachable("NoCapture is not applicable to function returns!"); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} +}; + +/// NoCapture attribute deduction for a call site return value. +struct AANoCaptureCallSiteReturned final : AANoCaptureImpl { + AANoCaptureCallSiteReturned(const IRPosition &IRP) : AANoCaptureImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSRET_ATTR(nocapture) + } }; -ChangeStatus AANonNullArgument::updateImpl(Attributor &A) { - Function &F = getAnchorScope(); - Argument &Arg = cast<Argument>(getAnchoredValue()); - unsigned ArgNo = Arg.getArgNo(); +/// ------------------ Value Simplify Attribute ---------------------------- +struct AAValueSimplifyImpl : AAValueSimplify { + AAValueSimplifyImpl(const IRPosition &IRP) : AAValueSimplify(IRP) {} + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return getAssumed() ? (getKnown() ? "simplified" : "maybe-simple") + : "not-simple"; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} + + /// See AAValueSimplify::getAssumedSimplifiedValue() + Optional<Value *> getAssumedSimplifiedValue(Attributor &A) const override { + if (!getAssumed()) + return const_cast<Value *>(&getAssociatedValue()); + return SimplifiedAssociatedValue; + } + void initialize(Attributor &A) override {} + + /// Helper function for querying AAValueSimplify and updating candicate. + /// \param QueryingValue Value trying to unify with SimplifiedValue + /// \param AccumulatedSimplifiedValue Current simplification result. + static bool checkAndUpdate(Attributor &A, const AbstractAttribute &QueryingAA, + Value &QueryingValue, + Optional<Value *> &AccumulatedSimplifiedValue) { + // FIXME: Add a typecast support. + + auto &ValueSimpifyAA = A.getAAFor<AAValueSimplify>( + QueryingAA, IRPosition::value(QueryingValue)); - // Callback function - std::function<bool(CallSite)> CallSiteCheck = [&](CallSite CS) { - assert(CS && "Sanity check: Call site was not initialized properly!"); + Optional<Value *> QueryingValueSimplified = + ValueSimpifyAA.getAssumedSimplifiedValue(A); - auto *NonNullAA = A.getAAFor<AANonNull>(*this, *CS.getInstruction(), ArgNo); + if (!QueryingValueSimplified.hasValue()) + return true; - // Check that NonNullAA is AANonNullCallSiteArgument. - if (NonNullAA) { - ImmutableCallSite ICS(&NonNullAA->getAnchoredValue()); - if (ICS && CS.getInstruction() == ICS.getInstruction()) - return NonNullAA->isAssumedNonNull(); + if (!QueryingValueSimplified.getValue()) return false; + + Value &QueryingValueSimplifiedUnwrapped = + *QueryingValueSimplified.getValue(); + + if (isa<UndefValue>(QueryingValueSimplifiedUnwrapped)) + return true; + + if (AccumulatedSimplifiedValue.hasValue()) + return AccumulatedSimplifiedValue == QueryingValueSimplified; + + LLVM_DEBUG(dbgs() << "[Attributor][ValueSimplify] " << QueryingValue + << " is assumed to be " + << QueryingValueSimplifiedUnwrapped << "\n"); + + AccumulatedSimplifiedValue = QueryingValueSimplified; + return true; + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + ChangeStatus Changed = ChangeStatus::UNCHANGED; + + if (!SimplifiedAssociatedValue.hasValue() || + !SimplifiedAssociatedValue.getValue()) + return Changed; + + if (auto *C = dyn_cast<Constant>(SimplifiedAssociatedValue.getValue())) { + // We can replace the AssociatedValue with the constant. + Value &V = getAssociatedValue(); + if (!V.user_empty() && &V != C && V.getType() == C->getType()) { + LLVM_DEBUG(dbgs() << "[Attributor][ValueSimplify] " << V << " -> " << *C + << "\n"); + V.replaceAllUsesWith(C); + Changed = ChangeStatus::CHANGED; + } + } + + return Changed | AAValueSimplify::manifest(A); + } + +protected: + // An assumed simplified value. Initially, it is set to Optional::None, which + // means that the value is not clear under current assumption. If in the + // pessimistic state, getAssumedSimplifiedValue doesn't return this value but + // returns orignal associated value. + Optional<Value *> SimplifiedAssociatedValue; +}; + +struct AAValueSimplifyArgument final : AAValueSimplifyImpl { + AAValueSimplifyArgument(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + bool HasValueBefore = SimplifiedAssociatedValue.hasValue(); + + auto PredForCallSite = [&](AbstractCallSite ACS) { + // Check if we have an associated argument or not (which can happen for + // callback calls). + if (Value *ArgOp = ACS.getCallArgOperand(getArgNo())) + return checkAndUpdate(A, *this, *ArgOp, SimplifiedAssociatedValue); + return false; + }; + + if (!A.checkForAllCallSites(PredForCallSite, *this, true)) + return indicatePessimisticFixpoint(); + + // If a candicate was found in this update, return CHANGED. + return HasValueBefore == SimplifiedAssociatedValue.hasValue() + ? ChangeStatus::UNCHANGED + : ChangeStatus ::CHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_ARG_ATTR(value_simplify) + } +}; + +struct AAValueSimplifyReturned : AAValueSimplifyImpl { + AAValueSimplifyReturned(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + bool HasValueBefore = SimplifiedAssociatedValue.hasValue(); + + auto PredForReturned = [&](Value &V) { + return checkAndUpdate(A, *this, V, SimplifiedAssociatedValue); + }; + + if (!A.checkForAllReturnedValues(PredForReturned, *this)) + return indicatePessimisticFixpoint(); + + // If a candicate was found in this update, return CHANGED. + return HasValueBefore == SimplifiedAssociatedValue.hasValue() + ? ChangeStatus::UNCHANGED + : ChangeStatus ::CHANGED; + } + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FNRET_ATTR(value_simplify) + } +}; + +struct AAValueSimplifyFloating : AAValueSimplifyImpl { + AAValueSimplifyFloating(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + Value &V = getAnchorValue(); + + // TODO: add other stuffs + if (isa<Constant>(V) || isa<UndefValue>(V)) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + bool HasValueBefore = SimplifiedAssociatedValue.hasValue(); + + auto VisitValueCB = [&](Value &V, BooleanState, bool Stripped) -> bool { + auto &AA = A.getAAFor<AAValueSimplify>(*this, IRPosition::value(V)); + if (!Stripped && this == &AA) { + // TODO: Look the instruction and check recursively. + LLVM_DEBUG( + dbgs() << "[Attributor][ValueSimplify] Can't be stripped more : " + << V << "\n"); + indicatePessimisticFixpoint(); + return false; + } + return checkAndUpdate(A, *this, V, SimplifiedAssociatedValue); + }; + + if (!genericValueTraversal<AAValueSimplify, BooleanState>( + A, getIRPosition(), *this, static_cast<BooleanState &>(*this), + VisitValueCB)) + return indicatePessimisticFixpoint(); + + // If a candicate was found in this update, return CHANGED. + + return HasValueBefore == SimplifiedAssociatedValue.hasValue() + ? ChangeStatus::UNCHANGED + : ChangeStatus ::CHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(value_simplify) + } +}; + +struct AAValueSimplifyFunction : AAValueSimplifyImpl { + AAValueSimplifyFunction(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + SimplifiedAssociatedValue = &getAnchorValue(); + indicateOptimisticFixpoint(); + } + /// See AbstractAttribute::initialize(...). + ChangeStatus updateImpl(Attributor &A) override { + llvm_unreachable( + "AAValueSimplify(Function|CallSite)::updateImpl will not be called"); + } + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FN_ATTR(value_simplify) + } +}; + +struct AAValueSimplifyCallSite : AAValueSimplifyFunction { + AAValueSimplifyCallSite(const IRPosition &IRP) + : AAValueSimplifyFunction(IRP) {} + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CS_ATTR(value_simplify) + } +}; + +struct AAValueSimplifyCallSiteReturned : AAValueSimplifyReturned { + AAValueSimplifyCallSiteReturned(const IRPosition &IRP) + : AAValueSimplifyReturned(IRP) {} + + void trackStatistics() const override { + STATS_DECLTRACK_CSRET_ATTR(value_simplify) + } +}; +struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating { + AAValueSimplifyCallSiteArgument(const IRPosition &IRP) + : AAValueSimplifyFloating(IRP) {} + + void trackStatistics() const override { + STATS_DECLTRACK_CSARG_ATTR(value_simplify) + } +}; + +/// ----------------------- Heap-To-Stack Conversion --------------------------- +struct AAHeapToStackImpl : public AAHeapToStack { + AAHeapToStackImpl(const IRPosition &IRP) : AAHeapToStack(IRP) {} + + const std::string getAsStr() const override { + return "[H2S] Mallocs: " + std::to_string(MallocCalls.size()); + } + + ChangeStatus manifest(Attributor &A) override { + assert(getState().isValidState() && + "Attempted to manifest an invalid state!"); + + ChangeStatus HasChanged = ChangeStatus::UNCHANGED; + Function *F = getAssociatedFunction(); + const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); + + for (Instruction *MallocCall : MallocCalls) { + // This malloc cannot be replaced. + if (BadMallocCalls.count(MallocCall)) + continue; + + for (Instruction *FreeCall : FreesForMalloc[MallocCall]) { + LLVM_DEBUG(dbgs() << "H2S: Removing free call: " << *FreeCall << "\n"); + A.deleteAfterManifest(*FreeCall); + HasChanged = ChangeStatus::CHANGED; + } + + LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall + << "\n"); + + Constant *Size; + if (isCallocLikeFn(MallocCall, TLI)) { + auto *Num = cast<ConstantInt>(MallocCall->getOperand(0)); + auto *SizeT = dyn_cast<ConstantInt>(MallocCall->getOperand(1)); + APInt TotalSize = SizeT->getValue() * Num->getValue(); + Size = + ConstantInt::get(MallocCall->getOperand(0)->getType(), TotalSize); + } else { + Size = cast<ConstantInt>(MallocCall->getOperand(0)); + } + + unsigned AS = cast<PointerType>(MallocCall->getType())->getAddressSpace(); + Instruction *AI = new AllocaInst(Type::getInt8Ty(F->getContext()), AS, + Size, "", MallocCall->getNextNode()); + + if (AI->getType() != MallocCall->getType()) + AI = new BitCastInst(AI, MallocCall->getType(), "malloc_bc", + AI->getNextNode()); + + MallocCall->replaceAllUsesWith(AI); + + if (auto *II = dyn_cast<InvokeInst>(MallocCall)) { + auto *NBB = II->getNormalDest(); + BranchInst::Create(NBB, MallocCall->getParent()); + A.deleteAfterManifest(*MallocCall); + } else { + A.deleteAfterManifest(*MallocCall); + } + + if (isCallocLikeFn(MallocCall, TLI)) { + auto *BI = new BitCastInst(AI, MallocCall->getType(), "calloc_bc", + AI->getNextNode()); + Value *Ops[] = { + BI, ConstantInt::get(F->getContext(), APInt(8, 0, false)), Size, + ConstantInt::get(Type::getInt1Ty(F->getContext()), false)}; + + Type *Tys[] = {BI->getType(), MallocCall->getOperand(0)->getType()}; + Module *M = F->getParent(); + Function *Fn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys); + CallInst::Create(Fn, Ops, "", BI->getNextNode()); + } + HasChanged = ChangeStatus::CHANGED; } - if (CS.paramHasAttr(ArgNo, Attribute::NonNull)) + return HasChanged; + } + + /// Collection of all malloc calls in a function. + SmallSetVector<Instruction *, 4> MallocCalls; + + /// Collection of malloc calls that cannot be converted. + DenseSet<const Instruction *> BadMallocCalls; + + /// A map for each malloc call to the set of associated free calls. + DenseMap<Instruction *, SmallPtrSet<Instruction *, 4>> FreesForMalloc; + + ChangeStatus updateImpl(Attributor &A) override; +}; + +ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) { + const Function *F = getAssociatedFunction(); + const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); + + auto UsesCheck = [&](Instruction &I) { + SmallPtrSet<const Use *, 8> Visited; + SmallVector<const Use *, 8> Worklist; + + for (Use &U : I.uses()) + Worklist.push_back(&U); + + while (!Worklist.empty()) { + const Use *U = Worklist.pop_back_val(); + if (!Visited.insert(U).second) + continue; + + auto *UserI = U->getUser(); + + if (isa<LoadInst>(UserI)) + continue; + if (auto *SI = dyn_cast<StoreInst>(UserI)) { + if (SI->getValueOperand() == U->get()) { + LLVM_DEBUG(dbgs() << "[H2S] escaping store to memory: " << *UserI << "\n"); + return false; + } + // A store into the malloc'ed memory is fine. + continue; + } + + // NOTE: Right now, if a function that has malloc pointer as an argument + // frees memory, we assume that the malloc pointer is freed. + + // TODO: Add nofree callsite argument attribute to indicate that pointer + // argument is not freed. + if (auto *CB = dyn_cast<CallBase>(UserI)) { + if (!CB->isArgOperand(U)) + continue; + + if (CB->isLifetimeStartOrEnd()) + continue; + + // Record malloc. + if (isFreeCall(UserI, TLI)) { + FreesForMalloc[&I].insert( + cast<Instruction>(const_cast<User *>(UserI))); + continue; + } + + // If a function does not free memory we are fine + const auto &NoFreeAA = + A.getAAFor<AANoFree>(*this, IRPosition::callsite_function(*CB)); + + unsigned ArgNo = U - CB->arg_begin(); + const auto &NoCaptureAA = A.getAAFor<AANoCapture>( + *this, IRPosition::callsite_argument(*CB, ArgNo)); + + if (!NoCaptureAA.isAssumedNoCapture() || !NoFreeAA.isAssumedNoFree()) { + LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n"); + return false; + } + continue; + } + + if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI)) { + for (Use &U : UserI->uses()) + Worklist.push_back(&U); + continue; + } + + // Unknown user. + LLVM_DEBUG(dbgs() << "[H2S] Unknown user: " << *UserI << "\n"); + return false; + } + return true; + }; + + auto MallocCallocCheck = [&](Instruction &I) { + if (BadMallocCalls.count(&I)) return true; - Value *V = CS.getArgOperand(ArgNo); - if (isKnownNonZero(V, getAnchorScope().getParent()->getDataLayout())) + bool IsMalloc = isMallocLikeFn(&I, TLI); + bool IsCalloc = !IsMalloc && isCallocLikeFn(&I, TLI); + if (!IsMalloc && !IsCalloc) { + BadMallocCalls.insert(&I); return true; + } - return false; - }; - if (!A.checkForAllCallSites(F, CallSiteCheck, true)) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; - } - return ChangeStatus::UNCHANGED; -} + if (IsMalloc) { + if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(0))) + if (Size->getValue().sle(MaxHeapToStackSize)) + if (UsesCheck(I)) { + MallocCalls.insert(&I); + return true; + } + } else if (IsCalloc) { + bool Overflow = false; + if (auto *Num = dyn_cast<ConstantInt>(I.getOperand(0))) + if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(1))) + if ((Size->getValue().umul_ov(Num->getValue(), Overflow)) + .sle(MaxHeapToStackSize)) + if (!Overflow && UsesCheck(I)) { + MallocCalls.insert(&I); + return true; + } + } -ChangeStatus AANonNullCallSiteArgument::updateImpl(Attributor &A) { - // NOTE: Never look at the argument of the callee in this method. - // If we do this, "nonnull" is always deduced because of the assumption. + BadMallocCalls.insert(&I); + return true; + }; - Value &V = *getAssociatedValue(); + size_t NumBadMallocs = BadMallocCalls.size(); - auto *NonNullAA = A.getAAFor<AANonNull>(*this, V); + A.checkForAllCallLikeInstructions(MallocCallocCheck, *this); - if (!NonNullAA || !NonNullAA->isAssumedNonNull()) { - indicatePessimisticFixpoint(); + if (NumBadMallocs != BadMallocCalls.size()) return ChangeStatus::CHANGED; - } return ChangeStatus::UNCHANGED; } -/// ------------------------ Will-Return Attributes ---------------------------- +struct AAHeapToStackFunction final : public AAHeapToStackImpl { + AAHeapToStackFunction(const IRPosition &IRP) : AAHeapToStackImpl(IRP) {} -struct AAWillReturnImpl : public AAWillReturn, BooleanState { + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECL(MallocCalls, Function, + "Number of MallocCalls converted to allocas"); + BUILD_STAT_NAME(MallocCalls, Function) += MallocCalls.size(); + } +}; + +/// -------------------- Memory Behavior Attributes ---------------------------- +/// Includes read-none, read-only, and write-only. +/// ---------------------------------------------------------------------------- +struct AAMemoryBehaviorImpl : public AAMemoryBehavior { + AAMemoryBehaviorImpl(const IRPosition &IRP) : AAMemoryBehavior(IRP) {} - /// See AbstractAttribute::AbstractAttribute(...). - AAWillReturnImpl(Function &F, InformationCache &InfoCache) - : AAWillReturn(F, InfoCache) {} + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + intersectAssumedBits(BEST_STATE); + getKnownStateFromValue(getIRPosition(), getState()); + IRAttribute::initialize(A); + } - /// See AAWillReturn::isKnownWillReturn(). - bool isKnownWillReturn() const override { return getKnown(); } + /// Return the memory behavior information encoded in the IR for \p IRP. + static void getKnownStateFromValue(const IRPosition &IRP, + IntegerState &State) { + SmallVector<Attribute, 2> Attrs; + IRP.getAttrs(AttrKinds, Attrs); + for (const Attribute &Attr : Attrs) { + switch (Attr.getKindAsEnum()) { + case Attribute::ReadNone: + State.addKnownBits(NO_ACCESSES); + break; + case Attribute::ReadOnly: + State.addKnownBits(NO_WRITES); + break; + case Attribute::WriteOnly: + State.addKnownBits(NO_READS); + break; + default: + llvm_unreachable("Unexpcted attribute!"); + } + } - /// See AAWillReturn::isAssumedWillReturn(). - bool isAssumedWillReturn() const override { return getAssumed(); } + if (auto *I = dyn_cast<Instruction>(&IRP.getAnchorValue())) { + if (!I->mayReadFromMemory()) + State.addKnownBits(NO_READS); + if (!I->mayWriteToMemory()) + State.addKnownBits(NO_WRITES); + } + } - /// See AbstractAttribute::getState(...). - AbstractState &getState() override { return *this; } + /// See AbstractAttribute::getDeducedAttributes(...). + void getDeducedAttributes(LLVMContext &Ctx, + SmallVectorImpl<Attribute> &Attrs) const override { + assert(Attrs.size() == 0); + if (isAssumedReadNone()) + Attrs.push_back(Attribute::get(Ctx, Attribute::ReadNone)); + else if (isAssumedReadOnly()) + Attrs.push_back(Attribute::get(Ctx, Attribute::ReadOnly)); + else if (isAssumedWriteOnly()) + Attrs.push_back(Attribute::get(Ctx, Attribute::WriteOnly)); + assert(Attrs.size() <= 1); + } - /// See AbstractAttribute::getState(...). - const AbstractState &getState() const override { return *this; } + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + IRPosition &IRP = getIRPosition(); + + // Check if we would improve the existing attributes first. + SmallVector<Attribute, 4> DeducedAttrs; + getDeducedAttributes(IRP.getAnchorValue().getContext(), DeducedAttrs); + if (llvm::all_of(DeducedAttrs, [&](const Attribute &Attr) { + return IRP.hasAttr(Attr.getKindAsEnum(), + /* IgnoreSubsumingPositions */ true); + })) + return ChangeStatus::UNCHANGED; + + // Clear existing attributes. + IRP.removeAttrs(AttrKinds); + + // Use the generic manifest method. + return IRAttribute::manifest(A); + } - /// See AbstractAttribute::getAsStr() + /// See AbstractState::getAsStr(). const std::string getAsStr() const override { - return getAssumed() ? "willreturn" : "may-noreturn"; + if (isAssumedReadNone()) + return "readnone"; + if (isAssumedReadOnly()) + return "readonly"; + if (isAssumedWriteOnly()) + return "writeonly"; + return "may-read/write"; } + + /// The set of IR attributes AAMemoryBehavior deals with. + static const Attribute::AttrKind AttrKinds[3]; }; -struct AAWillReturnFunction final : AAWillReturnImpl { +const Attribute::AttrKind AAMemoryBehaviorImpl::AttrKinds[] = { + Attribute::ReadNone, Attribute::ReadOnly, Attribute::WriteOnly}; - /// See AbstractAttribute::AbstractAttribute(...). - AAWillReturnFunction(Function &F, InformationCache &InfoCache) - : AAWillReturnImpl(F, InfoCache) {} +/// Memory behavior attribute for a floating value. +struct AAMemoryBehaviorFloating : AAMemoryBehaviorImpl { + AAMemoryBehaviorFloating(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {} - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { - return MP_FUNCTION; + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAMemoryBehaviorImpl::initialize(A); + // Initialize the use vector with all direct uses of the associated value. + for (const Use &U : getAssociatedValue().uses()) + Uses.insert(&U); } + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override; + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_FLOATING_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_FLOATING_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_FLOATING_ATTR(writeonly) + } + +private: + /// Return true if users of \p UserI might access the underlying + /// variable/location described by \p U and should therefore be analyzed. + bool followUsersOfUseIn(Attributor &A, const Use *U, + const Instruction *UserI); + + /// Update the state according to the effect of use \p U in \p UserI. + void analyzeUseIn(Attributor &A, const Use *U, const Instruction *UserI); + +protected: + /// Container for (transitive) uses of the associated argument. + SetVector<const Use *> Uses; +}; + +/// Memory behavior attribute for function argument. +struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating { + AAMemoryBehaviorArgument(const IRPosition &IRP) + : AAMemoryBehaviorFloating(IRP) {} + /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override; + void initialize(Attributor &A) override { + AAMemoryBehaviorFloating::initialize(A); + + // Initialize the use vector with all direct uses of the associated value. + Argument *Arg = getAssociatedArgument(); + if (!Arg || !Arg->getParent()->hasExactDefinition()) + indicatePessimisticFixpoint(); + } + + ChangeStatus manifest(Attributor &A) override { + // TODO: From readattrs.ll: "inalloca parameters are always + // considered written" + if (hasAttr({Attribute::InAlloca})) { + removeKnownBits(NO_WRITES); + removeAssumedBits(NO_WRITES); + } + return AAMemoryBehaviorFloating::manifest(A); + } + + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_ARG_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_ARG_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_ARG_ATTR(writeonly) + } +}; + +struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument { + AAMemoryBehaviorCallSiteArgument(const IRPosition &IRP) + : AAMemoryBehaviorArgument(IRP) {} /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Argument *Arg = getAssociatedArgument(); + const IRPosition &ArgPos = IRPosition::argument(*Arg); + auto &ArgAA = A.getAAFor<AAMemoryBehavior>(*this, ArgPos); + return clampStateAndIndicateChange( + getState(), + static_cast<const AANoCapture::StateType &>(ArgAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_CSARG_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_CSARG_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_CSARG_ATTR(writeonly) + } }; -// Helper function that checks whether a function has any cycle. -// TODO: Replace with more efficent code -bool containsCycle(Function &F) { - SmallPtrSet<BasicBlock *, 32> Visited; +/// Memory behavior attribute for a call site return position. +struct AAMemoryBehaviorCallSiteReturned final : AAMemoryBehaviorFloating { + AAMemoryBehaviorCallSiteReturned(const IRPosition &IRP) + : AAMemoryBehaviorFloating(IRP) {} - // Traverse BB by dfs and check whether successor is already visited. - for (BasicBlock *BB : depth_first(&F)) { - Visited.insert(BB); - for (auto *SuccBB : successors(BB)) { - if (Visited.count(SuccBB)) - return true; + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + // We do not annotate returned values. + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} +}; + +/// An AA to represent the memory behavior function attributes. +struct AAMemoryBehaviorFunction final : public AAMemoryBehaviorImpl { + AAMemoryBehaviorFunction(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {} + + /// See AbstractAttribute::updateImpl(Attributor &A). + virtual ChangeStatus updateImpl(Attributor &A) override; + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + Function &F = cast<Function>(getAnchorValue()); + if (isAssumedReadNone()) { + F.removeFnAttr(Attribute::ArgMemOnly); + F.removeFnAttr(Attribute::InaccessibleMemOnly); + F.removeFnAttr(Attribute::InaccessibleMemOrArgMemOnly); } + return AAMemoryBehaviorImpl::manifest(A); } - return false; + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_FN_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_FN_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_FN_ATTR(writeonly) + } +}; + +/// AAMemoryBehavior attribute for call sites. +struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl { + AAMemoryBehaviorCallSite(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAMemoryBehaviorImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F || !F->hasExactDefinition()) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos); + return clampStateAndIndicateChange( + getState(), static_cast<const AAAlign::StateType &>(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_CS_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_CS_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_CS_ATTR(writeonly) + } +}; +} // namespace + +ChangeStatus AAMemoryBehaviorFunction::updateImpl(Attributor &A) { + + // The current assumed state used to determine a change. + auto AssumedState = getAssumed(); + + auto CheckRWInst = [&](Instruction &I) { + // If the instruction has an own memory behavior state, use it to restrict + // the local state. No further analysis is required as the other memory + // state is as optimistic as it gets. + if (ImmutableCallSite ICS = ImmutableCallSite(&I)) { + const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>( + *this, IRPosition::callsite_function(ICS)); + intersectAssumedBits(MemBehaviorAA.getAssumed()); + return !isAtFixpoint(); + } + + // Remove access kind modifiers if necessary. + if (I.mayReadFromMemory()) + removeAssumedBits(NO_READS); + if (I.mayWriteToMemory()) + removeAssumedBits(NO_WRITES); + return !isAtFixpoint(); + }; + + if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this)) + return indicatePessimisticFixpoint(); + + return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED + : ChangeStatus::UNCHANGED; } -// Helper function that checks the function have a loop which might become an -// endless loop -// FIXME: Any cycle is regarded as endless loop for now. -// We have to allow some patterns. -bool containsPossiblyEndlessLoop(Function &F) { return containsCycle(F); } +ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) { -void AAWillReturnFunction::initialize(Attributor &A) { - Function &F = getAnchorScope(); + const IRPosition &IRP = getIRPosition(); + const IRPosition &FnPos = IRPosition::function_scope(IRP); + AAMemoryBehavior::StateType &S = getState(); - if (containsPossiblyEndlessLoop(F)) - indicatePessimisticFixpoint(); + // First, check the function scope. We take the known information and we avoid + // work if the assumed information implies the current assumed information for + // this attribute. + const auto &FnMemAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos); + S.addKnownBits(FnMemAA.getKnown()); + if ((S.getAssumed() & FnMemAA.getAssumed()) == S.getAssumed()) + return ChangeStatus::UNCHANGED; + + // Make sure the value is not captured (except through "return"), if + // it is, any information derived would be irrelevant anyway as we cannot + // check the potential aliases introduced by the capture. However, no need + // to fall back to anythign less optimistic than the function state. + const auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(*this, IRP); + if (!ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) { + S.intersectAssumedBits(FnMemAA.getAssumed()); + return ChangeStatus::CHANGED; + } + + // The current assumed state used to determine a change. + auto AssumedState = S.getAssumed(); + + // Liveness information to exclude dead users. + // TODO: Take the FnPos once we have call site specific liveness information. + const auto &LivenessAA = A.getAAFor<AAIsDead>( + *this, IRPosition::function(*IRP.getAssociatedFunction())); + + // Visit and expand uses until all are analyzed or a fixpoint is reached. + for (unsigned i = 0; i < Uses.size() && !isAtFixpoint(); i++) { + const Use *U = Uses[i]; + Instruction *UserI = cast<Instruction>(U->getUser()); + LLVM_DEBUG(dbgs() << "[AAMemoryBehavior] Use: " << **U << " in " << *UserI + << " [Dead: " << (LivenessAA.isAssumedDead(UserI)) + << "]\n"); + if (LivenessAA.isAssumedDead(UserI)) + continue; + + // Check if the users of UserI should also be visited. + if (followUsersOfUseIn(A, U, UserI)) + for (const Use &UserIUse : UserI->uses()) + Uses.insert(&UserIUse); + + // If UserI might touch memory we analyze the use in detail. + if (UserI->mayReadOrWriteMemory()) + analyzeUseIn(A, U, UserI); + } + + return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED + : ChangeStatus::UNCHANGED; } -ChangeStatus AAWillReturnFunction::updateImpl(Attributor &A) { - Function &F = getAnchorScope(); +bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use *U, + const Instruction *UserI) { + // The loaded value is unrelated to the pointer argument, no need to + // follow the users of the load. + if (isa<LoadInst>(UserI)) + return false; - // The map from instruction opcodes to those instructions in the function. - auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); + // By default we follow all uses assuming UserI might leak information on U, + // we have special handling for call sites operands though. + ImmutableCallSite ICS(UserI); + if (!ICS || !ICS.isArgOperand(U)) + return true; - for (unsigned Opcode : - {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, - (unsigned)Instruction::Call}) { - for (Instruction *I : OpcodeInstMap[Opcode]) { - auto ICS = ImmutableCallSite(I); + // If the use is a call argument known not to be captured, the users of + // the call do not need to be visited because they have to be unrelated to + // the input. Note that this check is not trivial even though we disallow + // general capturing of the underlying argument. The reason is that the + // call might the argument "through return", which we allow and for which we + // need to check call users. + unsigned ArgNo = ICS.getArgumentNo(U); + const auto &ArgNoCaptureAA = + A.getAAFor<AANoCapture>(*this, IRPosition::callsite_argument(ICS, ArgNo)); + return !ArgNoCaptureAA.isAssumedNoCapture(); +} - if (ICS.hasFnAttr(Attribute::WillReturn)) - continue; +void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use *U, + const Instruction *UserI) { + assert(UserI->mayReadOrWriteMemory()); - auto *WillReturnAA = A.getAAFor<AAWillReturn>(*this, *I); - if (!WillReturnAA || !WillReturnAA->isAssumedWillReturn()) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; - } + switch (UserI->getOpcode()) { + default: + // TODO: Handle all atomics and other side-effect operations we know of. + break; + case Instruction::Load: + // Loads cause the NO_READS property to disappear. + removeAssumedBits(NO_READS); + return; - auto *NoRecurseAA = A.getAAFor<AANoRecurse>(*this, *I); + case Instruction::Store: + // Stores cause the NO_WRITES property to disappear if the use is the + // pointer operand. Note that we do assume that capturing was taken care of + // somewhere else. + if (cast<StoreInst>(UserI)->getPointerOperand() == U->get()) + removeAssumedBits(NO_WRITES); + return; - // FIXME: (i) Prohibit any recursion for now. - // (ii) AANoRecurse isn't implemented yet so currently any call is - // regarded as having recursion. - // Code below should be - // if ((!NoRecurseAA || !NoRecurseAA->isAssumedNoRecurse()) && - if (!NoRecurseAA && !ICS.hasFnAttr(Attribute::NoRecurse)) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; - } + case Instruction::Call: + case Instruction::CallBr: + case Instruction::Invoke: { + // For call sites we look at the argument memory behavior attribute (this + // could be recursive!) in order to restrict our own state. + ImmutableCallSite ICS(UserI); + + // Give up on operand bundles. + if (ICS.isBundleOperand(U)) { + indicatePessimisticFixpoint(); + return; + } + + // Calling a function does read the function pointer, maybe write it if the + // function is self-modifying. + if (ICS.isCallee(U)) { + removeAssumedBits(NO_READS); + break; } + + // Adjust the possible access behavior based on the information on the + // argument. + unsigned ArgNo = ICS.getArgumentNo(U); + const IRPosition &ArgPos = IRPosition::callsite_argument(ICS, ArgNo); + const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(*this, ArgPos); + // "assumed" has at most the same bits as the MemBehaviorAA assumed + // and at least "known". + intersectAssumedBits(MemBehaviorAA.getAssumed()); + return; } + }; - return ChangeStatus::UNCHANGED; + // Generally, look at the "may-properties" and adjust the assumed state if we + // did not trigger special handling before. + if (UserI->mayReadFromMemory()) + removeAssumedBits(NO_READS); + if (UserI->mayWriteToMemory()) + removeAssumedBits(NO_WRITES); } /// ---------------------------------------------------------------------------- /// Attributor /// ---------------------------------------------------------------------------- -bool Attributor::checkForAllCallSites(Function &F, - std::function<bool(CallSite)> &Pred, - bool RequireAllCallSites) { +bool Attributor::isAssumedDead(const AbstractAttribute &AA, + const AAIsDead *LivenessAA) { + const Instruction *CtxI = AA.getIRPosition().getCtxI(); + if (!CtxI) + return false; + + if (!LivenessAA) + LivenessAA = + &getAAFor<AAIsDead>(AA, IRPosition::function(*CtxI->getFunction()), + /* TrackDependence */ false); + + // Don't check liveness for AAIsDead. + if (&AA == LivenessAA) + return false; + + if (!LivenessAA->isAssumedDead(CtxI)) + return false; + + // We actually used liveness information so we have to record a dependence. + recordDependence(*LivenessAA, AA); + + return true; +} + +bool Attributor::checkForAllCallSites( + const function_ref<bool(AbstractCallSite)> &Pred, + const AbstractAttribute &QueryingAA, bool RequireAllCallSites) { // We can try to determine information from // the call sites. However, this is only possible all call sites are known, // hence the function has internal linkage. - if (RequireAllCallSites && !F.hasInternalLinkage()) { + const IRPosition &IRP = QueryingAA.getIRPosition(); + const Function *AssociatedFunction = IRP.getAssociatedFunction(); + if (!AssociatedFunction) { + LLVM_DEBUG(dbgs() << "[Attributor] No function associated with " << IRP + << "\n"); + return false; + } + + return checkForAllCallSites(Pred, *AssociatedFunction, RequireAllCallSites, + &QueryingAA); +} + +bool Attributor::checkForAllCallSites( + const function_ref<bool(AbstractCallSite)> &Pred, const Function &Fn, + bool RequireAllCallSites, const AbstractAttribute *QueryingAA) { + if (RequireAllCallSites && !Fn.hasLocalLinkage()) { LLVM_DEBUG( dbgs() - << "Attributor: Function " << F.getName() + << "[Attributor] Function " << Fn.getName() << " has no internal linkage, hence not all call sites are known\n"); return false; } - for (const Use &U : F.uses()) { + for (const Use &U : Fn.uses()) { + AbstractCallSite ACS(&U); + if (!ACS) { + LLVM_DEBUG(dbgs() << "[Attributor] Function " + << Fn.getName() + << " has non call site use " << *U.get() << " in " + << *U.getUser() << "\n"); + return false; + } + + Instruction *I = ACS.getInstruction(); + Function *Caller = I->getFunction(); + + const auto *LivenessAA = + lookupAAFor<AAIsDead>(IRPosition::function(*Caller), QueryingAA, + /* TrackDependence */ false); + + // Skip dead calls. + if (LivenessAA && LivenessAA->isAssumedDead(I)) { + // We actually used liveness information so we have to record a + // dependence. + if (QueryingAA) + recordDependence(*LivenessAA, *QueryingAA); + continue; + } - CallSite CS(U.getUser()); - if (!CS || !CS.isCallee(&U) || !CS.getCaller()->hasExactDefinition()) { + const Use *EffectiveUse = + ACS.isCallbackCall() ? &ACS.getCalleeUseForCallback() : &U; + if (!ACS.isCallee(EffectiveUse)) { if (!RequireAllCallSites) continue; - - LLVM_DEBUG(dbgs() << "Attributor: User " << *U.getUser() - << " is an invalid use of " << F.getName() << "\n"); + LLVM_DEBUG(dbgs() << "[Attributor] User " << EffectiveUse->getUser() + << " is an invalid use of " + << Fn.getName() << "\n"); return false; } - if (Pred(CS)) + if (Pred(ACS)) continue; - LLVM_DEBUG(dbgs() << "Attributor: Call site callback failed for " - << *CS.getInstruction() << "\n"); + LLVM_DEBUG(dbgs() << "[Attributor] Call site callback failed for " + << *ACS.getInstruction() << "\n"); return false; } return true; } -ChangeStatus Attributor::run() { - // Initialize all abstract attributes. - for (AbstractAttribute *AA : AllAbstractAttributes) - AA->initialize(*this); +bool Attributor::checkForAllReturnedValuesAndReturnInsts( + const function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> + &Pred, + const AbstractAttribute &QueryingAA) { + + const IRPosition &IRP = QueryingAA.getIRPosition(); + // Since we need to provide return instructions we have to have an exact + // definition. + const Function *AssociatedFunction = IRP.getAssociatedFunction(); + if (!AssociatedFunction) + return false; + // If this is a call site query we use the call site specific return values + // and liveness information. + // TODO: use the function scope once we have call site AAReturnedValues. + const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); + const auto &AARetVal = getAAFor<AAReturnedValues>(QueryingAA, QueryIRP); + if (!AARetVal.getState().isValidState()) + return false; + + return AARetVal.checkForAllReturnedValuesAndReturnInsts(Pred); +} + +bool Attributor::checkForAllReturnedValues( + const function_ref<bool(Value &)> &Pred, + const AbstractAttribute &QueryingAA) { + + const IRPosition &IRP = QueryingAA.getIRPosition(); + const Function *AssociatedFunction = IRP.getAssociatedFunction(); + if (!AssociatedFunction) + return false; + + // TODO: use the function scope once we have call site AAReturnedValues. + const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); + const auto &AARetVal = getAAFor<AAReturnedValues>(QueryingAA, QueryIRP); + if (!AARetVal.getState().isValidState()) + return false; + + return AARetVal.checkForAllReturnedValuesAndReturnInsts( + [&](Value &RV, const SmallSetVector<ReturnInst *, 4> &) { + return Pred(RV); + }); +} + +static bool +checkForAllInstructionsImpl(InformationCache::OpcodeInstMapTy &OpcodeInstMap, + const function_ref<bool(Instruction &)> &Pred, + const AAIsDead *LivenessAA, bool &AnyDead, + const ArrayRef<unsigned> &Opcodes) { + for (unsigned Opcode : Opcodes) { + for (Instruction *I : OpcodeInstMap[Opcode]) { + // Skip dead instructions. + if (LivenessAA && LivenessAA->isAssumedDead(I)) { + AnyDead = true; + continue; + } + + if (!Pred(*I)) + return false; + } + } + return true; +} + +bool Attributor::checkForAllInstructions( + const llvm::function_ref<bool(Instruction &)> &Pred, + const AbstractAttribute &QueryingAA, const ArrayRef<unsigned> &Opcodes) { + + const IRPosition &IRP = QueryingAA.getIRPosition(); + // Since we need to provide instructions we have to have an exact definition. + const Function *AssociatedFunction = IRP.getAssociatedFunction(); + if (!AssociatedFunction) + return false; + + // TODO: use the function scope once we have call site AAReturnedValues. + const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); + const auto &LivenessAA = + getAAFor<AAIsDead>(QueryingAA, QueryIRP, /* TrackDependence */ false); + bool AnyDead = false; + + auto &OpcodeInstMap = + InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction); + if (!checkForAllInstructionsImpl(OpcodeInstMap, Pred, &LivenessAA, AnyDead, + Opcodes)) + return false; + + // If we actually used liveness information so we have to record a dependence. + if (AnyDead) + recordDependence(LivenessAA, QueryingAA); + + return true; +} + +bool Attributor::checkForAllReadWriteInstructions( + const llvm::function_ref<bool(Instruction &)> &Pred, + AbstractAttribute &QueryingAA) { + + const Function *AssociatedFunction = + QueryingAA.getIRPosition().getAssociatedFunction(); + if (!AssociatedFunction) + return false; + + // TODO: use the function scope once we have call site AAReturnedValues. + const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); + const auto &LivenessAA = + getAAFor<AAIsDead>(QueryingAA, QueryIRP, /* TrackDependence */ false); + bool AnyDead = false; + + for (Instruction *I : + InfoCache.getReadOrWriteInstsForFunction(*AssociatedFunction)) { + // Skip dead instructions. + if (LivenessAA.isAssumedDead(I)) { + AnyDead = true; + continue; + } + + if (!Pred(*I)) + return false; + } + + // If we actually used liveness information so we have to record a dependence. + if (AnyDead) + recordDependence(LivenessAA, QueryingAA); + + return true; +} + +ChangeStatus Attributor::run(Module &M) { LLVM_DEBUG(dbgs() << "[Attributor] Identified and initialized " << AllAbstractAttributes.size() << " abstract attributes.\n"); @@ -1370,10 +4470,25 @@ ChangeStatus Attributor::run() { SetVector<AbstractAttribute *> Worklist; Worklist.insert(AllAbstractAttributes.begin(), AllAbstractAttributes.end()); + bool RecomputeDependences = false; + do { + // Remember the size to determine new attributes. + size_t NumAAs = AllAbstractAttributes.size(); LLVM_DEBUG(dbgs() << "\n\n[Attributor] #Iteration: " << IterationCounter << ", Worklist size: " << Worklist.size() << "\n"); + // If dependences (=QueryMap) are recomputed we have to look at all abstract + // attributes again, regardless of what changed in the last iteration. + if (RecomputeDependences) { + LLVM_DEBUG( + dbgs() << "[Attributor] Run all AAs to recompute dependences\n"); + QueryMap.clear(); + ChangedAAs.clear(); + Worklist.insert(AllAbstractAttributes.begin(), + AllAbstractAttributes.end()); + } + // Add all abstract attributes that are potentially dependent on one that // changed to the work list. for (AbstractAttribute *ChangedAA : ChangedAAs) { @@ -1381,27 +4496,42 @@ ChangeStatus Attributor::run() { Worklist.insert(QuerriedAAs.begin(), QuerriedAAs.end()); } + LLVM_DEBUG(dbgs() << "[Attributor] #Iteration: " << IterationCounter + << ", Worklist+Dependent size: " << Worklist.size() + << "\n"); + // Reset the changed set. ChangedAAs.clear(); // Update all abstract attribute in the work list and record the ones that // changed. for (AbstractAttribute *AA : Worklist) - if (AA->update(*this) == ChangeStatus::CHANGED) - ChangedAAs.push_back(AA); + if (!isAssumedDead(*AA, nullptr)) + if (AA->update(*this) == ChangeStatus::CHANGED) + ChangedAAs.push_back(AA); + + // Check if we recompute the dependences in the next iteration. + RecomputeDependences = (DepRecomputeInterval > 0 && + IterationCounter % DepRecomputeInterval == 0); + + // Add attributes to the changed set if they have been created in the last + // iteration. + ChangedAAs.append(AllAbstractAttributes.begin() + NumAAs, + AllAbstractAttributes.end()); // Reset the work list and repopulate with the changed abstract attributes. // Note that dependent ones are added above. Worklist.clear(); Worklist.insert(ChangedAAs.begin(), ChangedAAs.end()); - } while (!Worklist.empty() && ++IterationCounter < MaxFixpointIterations); + } while (!Worklist.empty() && (IterationCounter++ < MaxFixpointIterations || + VerifyMaxFixpointIterations)); LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: " << IterationCounter << "/" << MaxFixpointIterations << " iterations\n"); - bool FinishedAtFixpoint = Worklist.empty(); + size_t NumFinalAAs = AllAbstractAttributes.size(); // Reset abstract arguments not settled in a sound fixpoint by now. This // happens when we stopped the fixpoint iteration early. Note that only the @@ -1448,8 +4578,14 @@ ChangeStatus Attributor::run() { if (!State.isValidState()) continue; + // Skip dead code. + if (isAssumedDead(*AA, nullptr)) + continue; // Manifest the state and record if we changed the IR. ChangeStatus LocalChange = AA->manifest(*this); + if (LocalChange == ChangeStatus::CHANGED && AreStatisticsEnabled()) + AA->trackStatistics(); + ManifestChange = ManifestChange | LocalChange; NumAtFixpoint++; @@ -1462,69 +4598,92 @@ ChangeStatus Attributor::run() { << " arguments while " << NumAtFixpoint << " were in a valid fixpoint state\n"); - // If verification is requested, we finished this run at a fixpoint, and the - // IR was changed, we re-run the whole fixpoint analysis, starting at - // re-initialization of the arguments. This re-run should not result in an IR - // change. Though, the (virtual) state of attributes at the end of the re-run - // might be more optimistic than the known state or the IR state if the better - // state cannot be manifested. - if (VerifyAttributor && FinishedAtFixpoint && - ManifestChange == ChangeStatus::CHANGED) { - VerifyAttributor = false; - ChangeStatus VerifyStatus = run(); - if (VerifyStatus != ChangeStatus::UNCHANGED) - llvm_unreachable( - "Attributor verification failed, re-run did result in an IR change " - "even after a fixpoint was reached in the original run. (False " - "positives possible!)"); - VerifyAttributor = true; - } - NumAttributesManifested += NumManifested; NumAttributesValidFixpoint += NumAtFixpoint; - return ManifestChange; -} - -void Attributor::identifyDefaultAbstractAttributes( - Function &F, InformationCache &InfoCache, - DenseSet</* Attribute::AttrKind */ unsigned> *Whitelist) { + (void)NumFinalAAs; + assert( + NumFinalAAs == AllAbstractAttributes.size() && + "Expected the final number of abstract attributes to remain unchanged!"); + + // Delete stuff at the end to avoid invalid references and a nice order. + { + LLVM_DEBUG(dbgs() << "\n[Attributor] Delete at least " + << ToBeDeletedFunctions.size() << " functions and " + << ToBeDeletedBlocks.size() << " blocks and " + << ToBeDeletedInsts.size() << " instructions\n"); + for (Instruction *I : ToBeDeletedInsts) { + if (!I->use_empty()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); + } - // Every function can be nounwind. - registerAA(*new AANoUnwindFunction(F, InfoCache)); + if (unsigned NumDeadBlocks = ToBeDeletedBlocks.size()) { + SmallVector<BasicBlock *, 8> ToBeDeletedBBs; + ToBeDeletedBBs.reserve(NumDeadBlocks); + ToBeDeletedBBs.append(ToBeDeletedBlocks.begin(), ToBeDeletedBlocks.end()); + DeleteDeadBlocks(ToBeDeletedBBs); + STATS_DECLTRACK(AAIsDead, BasicBlock, + "Number of dead basic blocks deleted."); + } - // Every function might be marked "nosync" - registerAA(*new AANoSyncFunction(F, InfoCache)); + STATS_DECL(AAIsDead, Function, "Number of dead functions deleted."); + for (Function *Fn : ToBeDeletedFunctions) { + Fn->replaceAllUsesWith(UndefValue::get(Fn->getType())); + Fn->eraseFromParent(); + STATS_TRACK(AAIsDead, Function); + } - // Every function might be "no-free". - registerAA(*new AANoFreeFunction(F, InfoCache)); + // Identify dead internal functions and delete them. This happens outside + // the other fixpoint analysis as we might treat potentially dead functions + // as live to lower the number of iterations. If they happen to be dead, the + // below fixpoint loop will identify and eliminate them. + SmallVector<Function *, 8> InternalFns; + for (Function &F : M) + if (F.hasLocalLinkage()) + InternalFns.push_back(&F); + + bool FoundDeadFn = true; + while (FoundDeadFn) { + FoundDeadFn = false; + for (unsigned u = 0, e = InternalFns.size(); u < e; ++u) { + Function *F = InternalFns[u]; + if (!F) + continue; - // Return attributes are only appropriate if the return type is non void. - Type *ReturnType = F.getReturnType(); - if (!ReturnType->isVoidTy()) { - // Argument attribute "returned" --- Create only one per function even - // though it is an argument attribute. - if (!Whitelist || Whitelist->count(AAReturnedValues::ID)) - registerAA(*new AAReturnedValuesImpl(F, InfoCache)); + const auto *LivenessAA = + lookupAAFor<AAIsDead>(IRPosition::function(*F)); + if (LivenessAA && + !checkForAllCallSites([](AbstractCallSite ACS) { return false; }, + *LivenessAA, true)) + continue; - // Every function with pointer return type might be marked nonnull. - if (ReturnType->isPointerTy() && - (!Whitelist || Whitelist->count(AANonNullReturned::ID))) - registerAA(*new AANonNullReturned(F, InfoCache)); + STATS_TRACK(AAIsDead, Function); + F->replaceAllUsesWith(UndefValue::get(F->getType())); + F->eraseFromParent(); + InternalFns[u] = nullptr; + FoundDeadFn = true; + } + } } - // Every argument with pointer type might be marked nonnull. - for (Argument &Arg : F.args()) { - if (Arg.getType()->isPointerTy()) - registerAA(*new AANonNullArgument(Arg, InfoCache)); + if (VerifyMaxFixpointIterations && + IterationCounter != MaxFixpointIterations) { + errs() << "\n[Attributor] Fixpoint iteration done after: " + << IterationCounter << "/" << MaxFixpointIterations + << " iterations\n"; + llvm_unreachable("The fixpoint was not reached with exactly the number of " + "specified iterations!"); } - // Every function might be "will-return". - registerAA(*new AAWillReturnFunction(F, InfoCache)); + return ManifestChange; +} + +void Attributor::initializeInformationCache(Function &F) { - // Walk all instructions to find more attribute opportunities and also - // interesting instructions that might be queried by abstract attributes - // during their initialization or update. + // Walk all instructions to find interesting instructions that might be + // queried by abstract attributes during their initialization or update. + // This has to happen before we create attributes. auto &ReadOrWriteInsts = InfoCache.FuncRWInstsMap[&F]; auto &InstOpcodeMap = InfoCache.FuncInstOpcodeMap[&F]; @@ -1540,8 +4699,12 @@ void Attributor::identifyDefaultAbstractAttributes( default: assert((!ImmutableCallSite(&I)) && (!isa<CallBase>(&I)) && "New call site/base instruction type needs to be known int the " - "attributor."); + "Attributor."); break; + case Instruction::Load: + // The alignment of a pointer is interesting for loads. + case Instruction::Store: + // The alignment of a pointer is interesting for stores. case Instruction::Call: case Instruction::CallBr: case Instruction::Invoke: @@ -1555,18 +4718,154 @@ void Attributor::identifyDefaultAbstractAttributes( InstOpcodeMap[I.getOpcode()].push_back(&I); if (I.mayReadOrWriteMemory()) ReadOrWriteInsts.push_back(&I); + } +} + +void Attributor::identifyDefaultAbstractAttributes(Function &F) { + if (!VisitedFunctions.insert(&F).second) + return; + + IRPosition FPos = IRPosition::function(F); + + // Check for dead BasicBlocks in every function. + // We need dead instruction detection because we do not want to deal with + // broken IR in which SSA rules do not apply. + getOrCreateAAFor<AAIsDead>(FPos); + + // Every function might be "will-return". + getOrCreateAAFor<AAWillReturn>(FPos); + // Every function can be nounwind. + getOrCreateAAFor<AANoUnwind>(FPos); + + // Every function might be marked "nosync" + getOrCreateAAFor<AANoSync>(FPos); + + // Every function might be "no-free". + getOrCreateAAFor<AANoFree>(FPos); + + // Every function might be "no-return". + getOrCreateAAFor<AANoReturn>(FPos); + + // Every function might be "no-recurse". + getOrCreateAAFor<AANoRecurse>(FPos); + + // Every function might be "readnone/readonly/writeonly/...". + getOrCreateAAFor<AAMemoryBehavior>(FPos); + + // Every function might be applicable for Heap-To-Stack conversion. + if (EnableHeapToStack) + getOrCreateAAFor<AAHeapToStack>(FPos); + + // Return attributes are only appropriate if the return type is non void. + Type *ReturnType = F.getReturnType(); + if (!ReturnType->isVoidTy()) { + // Argument attribute "returned" --- Create only one per function even + // though it is an argument attribute. + getOrCreateAAFor<AAReturnedValues>(FPos); + + IRPosition RetPos = IRPosition::returned(F); + + // Every function might be simplified. + getOrCreateAAFor<AAValueSimplify>(RetPos); + + if (ReturnType->isPointerTy()) { + + // Every function with pointer return type might be marked align. + getOrCreateAAFor<AAAlign>(RetPos); + + // Every function with pointer return type might be marked nonnull. + getOrCreateAAFor<AANonNull>(RetPos); + + // Every function with pointer return type might be marked noalias. + getOrCreateAAFor<AANoAlias>(RetPos); + + // Every function with pointer return type might be marked + // dereferenceable. + getOrCreateAAFor<AADereferenceable>(RetPos); + } + } + + for (Argument &Arg : F.args()) { + IRPosition ArgPos = IRPosition::argument(Arg); + + // Every argument might be simplified. + getOrCreateAAFor<AAValueSimplify>(ArgPos); + + if (Arg.getType()->isPointerTy()) { + // Every argument with pointer type might be marked nonnull. + getOrCreateAAFor<AANonNull>(ArgPos); + + // Every argument with pointer type might be marked noalias. + getOrCreateAAFor<AANoAlias>(ArgPos); + + // Every argument with pointer type might be marked dereferenceable. + getOrCreateAAFor<AADereferenceable>(ArgPos); + + // Every argument with pointer type might be marked align. + getOrCreateAAFor<AAAlign>(ArgPos); + + // Every argument with pointer type might be marked nocapture. + getOrCreateAAFor<AANoCapture>(ArgPos); + + // Every argument with pointer type might be marked + // "readnone/readonly/writeonly/..." + getOrCreateAAFor<AAMemoryBehavior>(ArgPos); + } + } + + auto CallSitePred = [&](Instruction &I) -> bool { CallSite CS(&I); - if (CS && CS.getCalledFunction()) { + if (CS.getCalledFunction()) { for (int i = 0, e = CS.getCalledFunction()->arg_size(); i < e; i++) { + + IRPosition CSArgPos = IRPosition::callsite_argument(CS, i); + + // Call site argument might be simplified. + getOrCreateAAFor<AAValueSimplify>(CSArgPos); + if (!CS.getArgument(i)->getType()->isPointerTy()) continue; // Call site argument attribute "non-null". - registerAA(*new AANonNullCallSiteArgument(CS, i, InfoCache), i); + getOrCreateAAFor<AANonNull>(CSArgPos); + + // Call site argument attribute "no-alias". + getOrCreateAAFor<AANoAlias>(CSArgPos); + + // Call site argument attribute "dereferenceable". + getOrCreateAAFor<AADereferenceable>(CSArgPos); + + // Call site argument attribute "align". + getOrCreateAAFor<AAAlign>(CSArgPos); } } - } + return true; + }; + + auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); + bool Success, AnyDead = false; + Success = checkForAllInstructionsImpl( + OpcodeInstMap, CallSitePred, nullptr, AnyDead, + {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, + (unsigned)Instruction::Call}); + (void)Success; + assert(Success && !AnyDead && "Expected the check call to be successful!"); + + auto LoadStorePred = [&](Instruction &I) -> bool { + if (isa<LoadInst>(I)) + getOrCreateAAFor<AAAlign>( + IRPosition::value(*cast<LoadInst>(I).getPointerOperand())); + else + getOrCreateAAFor<AAAlign>( + IRPosition::value(*cast<StoreInst>(I).getPointerOperand())); + return true; + }; + Success = checkForAllInstructionsImpl( + OpcodeInstMap, LoadStorePred, nullptr, AnyDead, + {(unsigned)Instruction::Load, (unsigned)Instruction::Store}); + (void)Success; + assert(Success && !AnyDead && "Expected the check call to be successful!"); } /// Helpers to ease debugging through output streams and print calls. @@ -1576,21 +4875,39 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, ChangeStatus S) { return OS << (S == ChangeStatus::CHANGED ? "changed" : "unchanged"); } -raw_ostream &llvm::operator<<(raw_ostream &OS, - AbstractAttribute::ManifestPosition AP) { +raw_ostream &llvm::operator<<(raw_ostream &OS, IRPosition::Kind AP) { switch (AP) { - case AbstractAttribute::MP_ARGUMENT: + case IRPosition::IRP_INVALID: + return OS << "inv"; + case IRPosition::IRP_FLOAT: + return OS << "flt"; + case IRPosition::IRP_RETURNED: + return OS << "fn_ret"; + case IRPosition::IRP_CALL_SITE_RETURNED: + return OS << "cs_ret"; + case IRPosition::IRP_FUNCTION: + return OS << "fn"; + case IRPosition::IRP_CALL_SITE: + return OS << "cs"; + case IRPosition::IRP_ARGUMENT: return OS << "arg"; - case AbstractAttribute::MP_CALL_SITE_ARGUMENT: + case IRPosition::IRP_CALL_SITE_ARGUMENT: return OS << "cs_arg"; - case AbstractAttribute::MP_FUNCTION: - return OS << "fn"; - case AbstractAttribute::MP_RETURNED: - return OS << "fn_ret"; } llvm_unreachable("Unknown attribute position!"); } +raw_ostream &llvm::operator<<(raw_ostream &OS, const IRPosition &Pos) { + const Value &AV = Pos.getAssociatedValue(); + return OS << "{" << Pos.getPositionKind() << ":" << AV.getName() << " [" + << Pos.getAnchorValue().getName() << "@" << Pos.getArgNo() << "]}"; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const IntegerState &S) { + return OS << "(" << S.getKnown() << "-" << S.getAssumed() << ")" + << static_cast<const AbstractState &>(S); +} + raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractState &S) { return OS << (!S.isValidState() ? "top" : (S.isAtFixpoint() ? "fix" : "")); } @@ -1601,8 +4918,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractAttribute &AA) { } void AbstractAttribute::print(raw_ostream &OS) const { - OS << "[" << getManifestPosition() << "][" << getAsStr() << "][" - << AnchoredVal.getName() << "]"; + OS << "[P: " << getIRPosition() << "][" << getAsStr() << "][S: " << getState() + << "]"; } ///} @@ -1610,7 +4927,7 @@ void AbstractAttribute::print(raw_ostream &OS) const { /// Pass (Manager) Boilerplate /// ---------------------------------------------------------------------------- -static bool runAttributorOnModule(Module &M) { +static bool runAttributorOnModule(Module &M, AnalysisGetter &AG) { if (DisableAttributor) return false; @@ -1619,39 +4936,39 @@ static bool runAttributorOnModule(Module &M) { // Create an Attributor and initially empty information cache that is filled // while we identify default attribute opportunities. - Attributor A; - InformationCache InfoCache; + InformationCache InfoCache(M, AG); + Attributor A(InfoCache, DepRecInterval); + + for (Function &F : M) + A.initializeInformationCache(F); for (Function &F : M) { - // TODO: Not all attributes require an exact definition. Find a way to - // enable deduction for some but not all attributes in case the - // definition might be changed at runtime, see also - // http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html. - // TODO: We could always determine abstract attributes and if sufficient - // information was found we could duplicate the functions that do not - // have an exact definition. - if (!F.hasExactDefinition()) { + if (F.hasExactDefinition()) + NumFnWithExactDefinition++; + else NumFnWithoutExactDefinition++; - continue; - } - // For now we ignore naked and optnone functions. - if (F.hasFnAttribute(Attribute::Naked) || - F.hasFnAttribute(Attribute::OptimizeNone)) - continue; - - NumFnWithExactDefinition++; + // We look at internal functions only on-demand but if any use is not a + // direct call, we have to do it eagerly. + if (F.hasLocalLinkage()) { + if (llvm::all_of(F.uses(), [](const Use &U) { + return ImmutableCallSite(U.getUser()) && + ImmutableCallSite(U.getUser()).isCallee(&U); + })) + continue; + } // Populate the Attributor with abstract attribute opportunities in the // function and the information cache with IR information. - A.identifyDefaultAbstractAttributes(F, InfoCache); + A.identifyDefaultAbstractAttributes(F); } - return A.run() == ChangeStatus::CHANGED; + return A.run(M) == ChangeStatus::CHANGED; } PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) { - if (runAttributorOnModule(M)) { + AnalysisGetter AG(AM); + if (runAttributorOnModule(M, AG)) { // FIXME: Think about passes we will preserve and add them here. return PreservedAnalyses::none(); } @@ -1670,12 +4987,14 @@ struct AttributorLegacyPass : public ModulePass { bool runOnModule(Module &M) override { if (skipModule(M)) return false; - return runAttributorOnModule(M); + + AnalysisGetter AG; + return runAttributorOnModule(M, AG); } void getAnalysisUsage(AnalysisUsage &AU) const override { // FIXME: Think about passes we will preserve and add them here. - AU.setPreservesCFG(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); } }; @@ -1684,7 +5003,147 @@ struct AttributorLegacyPass : public ModulePass { Pass *llvm::createAttributorLegacyPass() { return new AttributorLegacyPass(); } char AttributorLegacyPass::ID = 0; + +const char AAReturnedValues::ID = 0; +const char AANoUnwind::ID = 0; +const char AANoSync::ID = 0; +const char AANoFree::ID = 0; +const char AANonNull::ID = 0; +const char AANoRecurse::ID = 0; +const char AAWillReturn::ID = 0; +const char AANoAlias::ID = 0; +const char AANoReturn::ID = 0; +const char AAIsDead::ID = 0; +const char AADereferenceable::ID = 0; +const char AAAlign::ID = 0; +const char AANoCapture::ID = 0; +const char AAValueSimplify::ID = 0; +const char AAHeapToStack::ID = 0; +const char AAMemoryBehavior::ID = 0; + +// Macro magic to create the static generator function for attributes that +// follow the naming scheme. + +#define SWITCH_PK_INV(CLASS, PK, POS_NAME) \ + case IRPosition::PK: \ + llvm_unreachable("Cannot create " #CLASS " for a " POS_NAME " position!"); + +#define SWITCH_PK_CREATE(CLASS, IRP, PK, SUFFIX) \ + case IRPosition::PK: \ + AA = new CLASS##SUFFIX(IRP); \ + break; + +#define CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_INV(CLASS, IRP_FLOAT, "floating") \ + SWITCH_PK_INV(CLASS, IRP_ARGUMENT, "argument") \ + SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE_RETURNED, "call site returned") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE_ARGUMENT, "call site argument") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \ + } \ + return *AA; \ + } + +#define CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_INV(CLASS, IRP_FUNCTION, "function") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_RETURNED, Returned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \ + } \ + return *AA; \ + } + +#define CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_RETURNED, Returned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \ + } \ + return *AA; \ + } + +#define CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_INV(CLASS, IRP_ARGUMENT, "argument") \ + SWITCH_PK_INV(CLASS, IRP_FLOAT, "floating") \ + SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE_RETURNED, "call site returned") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE_ARGUMENT, "call site argument") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ + } \ + return *AA; \ + } + +#define CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \ + } \ + return *AA; \ + } + +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUnwind) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoSync) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFree) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoRecurse) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAWillReturn) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoReturn) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReturnedValues) + +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture) + +CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify) + +CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHeapToStack) + +CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryBehavior) + +#undef CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef SWITCH_PK_CREATE +#undef SWITCH_PK_INV + INITIALIZE_PASS_BEGIN(AttributorLegacyPass, "attributor", "Deduce and propagate attributes", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(AttributorLegacyPass, "attributor", "Deduce and propagate attributes", false, false) diff --git a/lib/Transforms/IPO/BlockExtractor.cpp b/lib/Transforms/IPO/BlockExtractor.cpp index 6c365f3f3cbe..de80c88c1591 100644 --- a/lib/Transforms/IPO/BlockExtractor.cpp +++ b/lib/Transforms/IPO/BlockExtractor.cpp @@ -119,6 +119,8 @@ void BlockExtractor::loadFile() { /*KeepEmpty=*/false); if (LineSplit.empty()) continue; + if (LineSplit.size()!=2) + report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'"); SmallVector<StringRef, 4> BBNames; LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1, /*KeepEmpty=*/false); @@ -204,7 +206,8 @@ bool BlockExtractor::runOnModule(Module &M) { ++NumExtracted; Changed = true; } - Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(); + CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent()); + Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC); if (F) LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName() << "' in: " << F->getName() << '\n'); diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index ad877ae1786c..3cf839e397f8 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -48,7 +48,7 @@ static void FindUsedValues(GlobalVariable *LLVMUsed, ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer()); for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) { - Value *Operand = Inits->getOperand(i)->stripPointerCastsNoFollowAliases(); + Value *Operand = Inits->getOperand(i)->stripPointerCasts(); GlobalValue *GV = cast<GlobalValue>(Operand); UsedValues.insert(GV); } @@ -120,7 +120,7 @@ static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New) { // Bump the alignment if necessary. if (Old->getAlignment() || New->getAlignment()) - New->setAlignment(std::max(getAlignment(Old), getAlignment(New))); + New->setAlignment(Align(std::max(getAlignment(Old), getAlignment(New)))); copyDebugLocMetadata(Old, New); Old->replaceAllUsesWith(NewConstant); diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp index e30b33aa4872..e20159ba0db5 100644 --- a/lib/Transforms/IPO/CrossDSOCFI.cpp +++ b/lib/Transforms/IPO/CrossDSOCFI.cpp @@ -84,13 +84,9 @@ void CrossDSOCFI::buildCFICheck(Module &M) { for (GlobalObject &GO : M.global_objects()) { Types.clear(); GO.getMetadata(LLVMContext::MD_type, Types); - for (MDNode *Type : Types) { - // Sanity check. GO must not be a function declaration. - assert(!isa<Function>(&GO) || !cast<Function>(&GO)->isDeclaration()); - + for (MDNode *Type : Types) if (ConstantInt *TypeId = extractNumericTypeId(Type)) TypeIds.insert(TypeId->getZExtValue()); - } } NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions"); @@ -108,11 +104,11 @@ void CrossDSOCFI::buildCFICheck(Module &M) { FunctionCallee C = M.getOrInsertFunction( "__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx), Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx)); - Function *F = dyn_cast<Function>(C.getCallee()); + Function *F = cast<Function>(C.getCallee()); // Take over the existing function. The frontend emits a weak stub so that the // linker knows about the symbol; this pass replaces the function body. F->deleteBody(); - F->setAlignment(4096); + F->setAlignment(Align(4096)); Triple T(M.getTargetTriple()); if (T.isARM() || T.isThumb()) diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 5ccd8bc4b0fb..b174c63a577b 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -78,11 +78,8 @@ STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); STATISTIC(NumNoFree, "Number of functions marked as nofree"); -// FIXME: This is disabled by default to avoid exposing security vulnerabilities -// in C/C++ code compiled by clang: -// http://lists.llvm.org/pipermail/cfe-dev/2017-January/052066.html static cl::opt<bool> EnableNonnullArgPropagation( - "enable-nonnull-arg-prop", cl::Hidden, + "enable-nonnull-arg-prop", cl::init(true), cl::Hidden, cl::desc("Try to propagate nonnull argument attributes from callsites to " "caller functions.")); @@ -664,6 +661,25 @@ static bool addArgumentAttrsFromCallsites(Function &F) { return Changed; } +static bool addReadAttr(Argument *A, Attribute::AttrKind R) { + assert((R == Attribute::ReadOnly || R == Attribute::ReadNone) + && "Must be a Read attribute."); + assert(A && "Argument must not be null."); + + // If the argument already has the attribute, nothing needs to be done. + if (A->hasAttribute(R)) + return false; + + // Otherwise, remove potentially conflicting attribute, add the new one, + // and update statistics. + A->removeAttr(Attribute::WriteOnly); + A->removeAttr(Attribute::ReadOnly); + A->removeAttr(Attribute::ReadNone); + A->addAttr(R); + R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; + return true; +} + /// Deduce nocapture attributes for the SCC. static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { bool Changed = false; @@ -732,11 +748,8 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { SmallPtrSet<Argument *, 8> Self; Self.insert(&*A); Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self); - if (R != Attribute::None) { - A->addAttr(R); - Changed = true; - R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; - } + if (R != Attribute::None) + Changed = addReadAttr(A, R); } } } @@ -833,12 +846,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { if (ReadAttr != Attribute::None) { for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; - // Clear out existing readonly/readnone attributes - A->removeAttr(Attribute::ReadOnly); - A->removeAttr(Attribute::ReadNone); - A->addAttr(ReadAttr); - ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; - Changed = true; + Changed = addReadAttr(A, ReadAttr); } } } diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp index 62c7fbd07223..3f5cc078d75f 100644 --- a/lib/Transforms/IPO/FunctionImport.cpp +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -450,7 +450,7 @@ static void computeImportForFunction( } else if (PrintImportFailures) { assert(!FailureInfo && "Expected no FailureInfo for newly rejected candidate"); - FailureInfo = llvm::make_unique<FunctionImporter::ImportFailureInfo>( + FailureInfo = std::make_unique<FunctionImporter::ImportFailureInfo>( VI, Edge.second.getHotness(), Reason, 1); } LLVM_DEBUG( @@ -764,7 +764,7 @@ void llvm::computeDeadSymbols( } // Make value live and add it to the worklist if it was not live before. - auto visit = [&](ValueInfo VI) { + auto visit = [&](ValueInfo VI, bool IsAliasee) { // FIXME: If we knew which edges were created for indirect call profiles, // we could skip them here. Any that are live should be reached via // other edges, e.g. reference edges. Otherwise, using a profile collected @@ -800,12 +800,15 @@ void llvm::computeDeadSymbols( Interposable = true; } - if (!KeepAliveLinkage) - return; + if (!IsAliasee) { + if (!KeepAliveLinkage) + return; - if (Interposable) - report_fatal_error( - "Interposable and available_externally/linkonce_odr/weak_odr symbol"); + if (Interposable) + report_fatal_error( + "Interposable and available_externally/linkonce_odr/weak_odr " + "symbol"); + } } for (auto &S : VI.getSummaryList()) @@ -821,16 +824,16 @@ void llvm::computeDeadSymbols( // If this is an alias, visit the aliasee VI to ensure that all copies // are marked live and it is added to the worklist for further // processing of its references. - visit(AS->getAliaseeVI()); + visit(AS->getAliaseeVI(), true); continue; } Summary->setLive(true); for (auto Ref : Summary->refs()) - visit(Ref); + visit(Ref, false); if (auto *FS = dyn_cast<FunctionSummary>(Summary.get())) for (auto Call : FS->calls()) - visit(Call.first); + visit(Call.first, false); } } Index.setWithGlobalValueDeadStripping(); @@ -892,7 +895,7 @@ std::error_code llvm::EmitImportsFiles( StringRef ModulePath, StringRef OutputFilename, const std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) { std::error_code EC; - raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::OF_None); if (EC) return EC; for (auto &ILI : ModuleToSummariesForIndex) @@ -948,23 +951,15 @@ void llvm::thinLTOResolvePrevailingInModule( auto NewLinkage = GS->second->linkage(); if (NewLinkage == GV.getLinkage()) return; - - // Switch the linkage to weakany if asked for, e.g. we do this for - // linker redefined symbols (via --wrap or --defsym). - // We record that the visibility should be changed here in `addThinLTO` - // as we need access to the resolution vectors for each input file in - // order to find which symbols have been redefined. - // We may consider reorganizing this code and moving the linkage recording - // somewhere else, e.g. in thinLTOResolvePrevailingInIndex. - if (NewLinkage == GlobalValue::WeakAnyLinkage) { - GV.setLinkage(NewLinkage); - return; - } - if (GlobalValue::isLocalLinkage(GV.getLinkage()) || + // Don't internalize anything here, because the code below + // lacks necessary correctness checks. Leave this job to + // LLVM 'internalize' pass. + GlobalValue::isLocalLinkage(NewLinkage) || // In case it was dead and already converted to declaration. GV.isDeclaration()) return; + // Check for a non-prevailing def that has interposable linkage // (e.g. non-odr weak or linkonce). In that case we can't simply // convert to available_externally, since it would lose the diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 86b7f3e49ee6..f010f7b703a6 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -17,9 +17,11 @@ #include "llvm/Transforms/IPO/GlobalDCE.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/Pass.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/CtorUtils.h" @@ -29,10 +31,15 @@ using namespace llvm; #define DEBUG_TYPE "globaldce" +static cl::opt<bool> + ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true), cl::ZeroOrMore, + cl::desc("Enable virtual function elimination")); + STATISTIC(NumAliases , "Number of global aliases removed"); STATISTIC(NumFunctions, "Number of functions removed"); STATISTIC(NumIFuncs, "Number of indirect functions removed"); STATISTIC(NumVariables, "Number of global variables removed"); +STATISTIC(NumVFuncs, "Number of virtual functions removed"); namespace { class GlobalDCELegacyPass : public ModulePass { @@ -118,6 +125,15 @@ void GlobalDCEPass::UpdateGVDependencies(GlobalValue &GV) { ComputeDependencies(User, Deps); Deps.erase(&GV); // Remove self-reference. for (GlobalValue *GVU : Deps) { + // If this is a dep from a vtable to a virtual function, and we have + // complete information about all virtual call sites which could call + // though this vtable, then skip it, because the call site information will + // be more precise. + if (VFESafeVTables.count(GVU) && isa<Function>(&GV)) { + LLVM_DEBUG(dbgs() << "Ignoring dep " << GVU->getName() << " -> " + << GV.getName() << "\n"); + continue; + } GVDependencies[GVU].insert(&GV); } } @@ -132,12 +148,133 @@ void GlobalDCEPass::MarkLive(GlobalValue &GV, if (Updates) Updates->push_back(&GV); if (Comdat *C = GV.getComdat()) { - for (auto &&CM : make_range(ComdatMembers.equal_range(C))) + for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { MarkLive(*CM.second, Updates); // Recursion depth is only two because only // globals in the same comdat are visited. + } + } +} + +void GlobalDCEPass::ScanVTables(Module &M) { + SmallVector<MDNode *, 2> Types; + LLVM_DEBUG(dbgs() << "Building type info -> vtable map\n"); + + auto *LTOPostLinkMD = + cast_or_null<ConstantAsMetadata>(M.getModuleFlag("LTOPostLink")); + bool LTOPostLink = + LTOPostLinkMD && + (cast<ConstantInt>(LTOPostLinkMD->getValue())->getZExtValue() != 0); + + for (GlobalVariable &GV : M.globals()) { + Types.clear(); + GV.getMetadata(LLVMContext::MD_type, Types); + if (GV.isDeclaration() || Types.empty()) + continue; + + // Use the typeid metadata on the vtable to build a mapping from typeids to + // the list of (GV, offset) pairs which are the possible vtables for that + // typeid. + for (MDNode *Type : Types) { + Metadata *TypeID = Type->getOperand(1).get(); + + uint64_t Offset = + cast<ConstantInt>( + cast<ConstantAsMetadata>(Type->getOperand(0))->getValue()) + ->getZExtValue(); + + TypeIdMap[TypeID].insert(std::make_pair(&GV, Offset)); + } + + // If the type corresponding to the vtable is private to this translation + // unit, we know that we can see all virtual functions which might use it, + // so VFE is safe. + if (auto GO = dyn_cast<GlobalObject>(&GV)) { + GlobalObject::VCallVisibility TypeVis = GO->getVCallVisibility(); + if (TypeVis == GlobalObject::VCallVisibilityTranslationUnit || + (LTOPostLink && + TypeVis == GlobalObject::VCallVisibilityLinkageUnit)) { + LLVM_DEBUG(dbgs() << GV.getName() << " is safe for VFE\n"); + VFESafeVTables.insert(&GV); + } + } + } +} + +void GlobalDCEPass::ScanVTableLoad(Function *Caller, Metadata *TypeId, + uint64_t CallOffset) { + for (auto &VTableInfo : TypeIdMap[TypeId]) { + GlobalVariable *VTable = VTableInfo.first; + uint64_t VTableOffset = VTableInfo.second; + + Constant *Ptr = + getPointerAtOffset(VTable->getInitializer(), VTableOffset + CallOffset, + *Caller->getParent()); + if (!Ptr) { + LLVM_DEBUG(dbgs() << "can't find pointer in vtable!\n"); + VFESafeVTables.erase(VTable); + return; + } + + auto Callee = dyn_cast<Function>(Ptr->stripPointerCasts()); + if (!Callee) { + LLVM_DEBUG(dbgs() << "vtable entry is not function pointer!\n"); + VFESafeVTables.erase(VTable); + return; + } + + LLVM_DEBUG(dbgs() << "vfunc dep " << Caller->getName() << " -> " + << Callee->getName() << "\n"); + GVDependencies[Caller].insert(Callee); } } +void GlobalDCEPass::ScanTypeCheckedLoadIntrinsics(Module &M) { + LLVM_DEBUG(dbgs() << "Scanning type.checked.load intrinsics\n"); + Function *TypeCheckedLoadFunc = + M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load)); + + if (!TypeCheckedLoadFunc) + return; + + for (auto U : TypeCheckedLoadFunc->users()) { + auto CI = dyn_cast<CallInst>(U); + if (!CI) + continue; + + auto *Offset = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + Value *TypeIdValue = CI->getArgOperand(2); + auto *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata(); + + if (Offset) { + ScanVTableLoad(CI->getFunction(), TypeId, Offset->getZExtValue()); + } else { + // type.checked.load with a non-constant offset, so assume every entry in + // every matching vtable is used. + for (auto &VTableInfo : TypeIdMap[TypeId]) { + VFESafeVTables.erase(VTableInfo.first); + } + } + } +} + +void GlobalDCEPass::AddVirtualFunctionDependencies(Module &M) { + if (!ClEnableVFE) + return; + + ScanVTables(M); + + if (VFESafeVTables.empty()) + return; + + ScanTypeCheckedLoadIntrinsics(M); + + LLVM_DEBUG( + dbgs() << "VFE safe vtables:\n"; + for (auto *VTable : VFESafeVTables) + dbgs() << " " << VTable->getName() << "\n"; + ); +} + PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { bool Changed = false; @@ -163,6 +300,10 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { if (Comdat *C = GA.getComdat()) ComdatMembers.insert(std::make_pair(C, &GA)); + // Add dependencies between virtual call sites and the virtual functions they + // might call, if we have that information. + AddVirtualFunctionDependencies(M); + // Loop over the module, adding globals which are obviously necessary. for (GlobalObject &GO : M.global_objects()) { Changed |= RemoveUnusedGlobalValue(GO); @@ -257,8 +398,17 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { }; NumFunctions += DeadFunctions.size(); - for (Function *F : DeadFunctions) + for (Function *F : DeadFunctions) { + if (!F->use_empty()) { + // Virtual functions might still be referenced by one or more vtables, + // but if we've proven them to be unused then it's safe to replace the + // virtual function pointers with null, allowing us to remove the + // function itself. + ++NumVFuncs; + F->replaceNonMetadataUsesWith(ConstantPointerNull::get(F->getType())); + } EraseUnusedGlobalValue(F); + } NumVariables += DeadGlobalVars.size(); for (GlobalVariable *GV : DeadGlobalVars) @@ -277,6 +427,8 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { ConstantDependenciesCache.clear(); GVDependencies.clear(); ComdatMembers.clear(); + TypeIdMap.clear(); + VFESafeVTables.clear(); if (Changed) return PreservedAnalyses::none(); diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index c4fb3ce77f6e..819715b9f8da 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -155,7 +155,8 @@ static bool isLeakCheckerRoot(GlobalVariable *GV) { /// Given a value that is stored to a global but never read, determine whether /// it's safe to remove the store and the chain of computation that feeds the /// store. -static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) { +static bool IsSafeComputationToRemove( + Value *V, function_ref<TargetLibraryInfo &(Function &)> GetTLI) { do { if (isa<Constant>(V)) return true; @@ -164,7 +165,7 @@ static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) { if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) || isa<GlobalValue>(V)) return false; - if (isAllocationFn(V, TLI)) + if (isAllocationFn(V, GetTLI)) return true; Instruction *I = cast<Instruction>(V); @@ -184,8 +185,9 @@ static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) { /// This GV is a pointer root. Loop over all users of the global and clean up /// any that obviously don't assign the global a value that isn't dynamically /// allocated. -static bool CleanupPointerRootUsers(GlobalVariable *GV, - const TargetLibraryInfo *TLI) { +static bool +CleanupPointerRootUsers(GlobalVariable *GV, + function_ref<TargetLibraryInfo &(Function &)> GetTLI) { // A brief explanation of leak checkers. The goal is to find bugs where // pointers are forgotten, causing an accumulating growth in memory // usage over time. The common strategy for leak checkers is to whitelist the @@ -241,18 +243,18 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, C->destroyConstant(); // This could have invalidated UI, start over from scratch. Dead.clear(); - CleanupPointerRootUsers(GV, TLI); + CleanupPointerRootUsers(GV, GetTLI); return true; } } } for (int i = 0, e = Dead.size(); i != e; ++i) { - if (IsSafeComputationToRemove(Dead[i].first, TLI)) { + if (IsSafeComputationToRemove(Dead[i].first, GetTLI)) { Dead[i].second->eraseFromParent(); Instruction *I = Dead[i].first; do { - if (isAllocationFn(I, TLI)) + if (isAllocationFn(I, GetTLI)) break; Instruction *J = dyn_cast<Instruction>(I->getOperand(0)); if (!J) @@ -270,9 +272,9 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, /// We just marked GV constant. Loop over all users of the global, cleaning up /// the obvious ones. This is largely just a quick scan over the use list to /// clean up the easy and obvious cruft. This returns true if it made a change. -static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, - const DataLayout &DL, - TargetLibraryInfo *TLI) { +static bool CleanupConstantGlobalUsers( + Value *V, Constant *Init, const DataLayout &DL, + function_ref<TargetLibraryInfo &(Function &)> GetTLI) { bool Changed = false; // Note that we need to use a weak value handle for the worklist items. When // we delete a constant array, we may also be holding pointer to one of its @@ -302,12 +304,12 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, Constant *SubInit = nullptr; if (Init) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); - Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI); + Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, GetTLI); } else if ((CE->getOpcode() == Instruction::BitCast && CE->getType()->isPointerTy()) || CE->getOpcode() == Instruction::AddrSpaceCast) { // Pointer cast, delete any stores and memsets to the global. - Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, TLI); + Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, GetTLI); } if (CE->use_empty()) { @@ -321,7 +323,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, Constant *SubInit = nullptr; if (!isa<ConstantExpr>(GEP->getOperand(0))) { ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>( - ConstantFoldInstruction(GEP, DL, TLI)); + ConstantFoldInstruction(GEP, DL, &GetTLI(*GEP->getFunction()))); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); @@ -331,7 +333,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds()) SubInit = Constant::getNullValue(GEP->getResultElementType()); } - Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, TLI); + Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, GetTLI); if (GEP->use_empty()) { GEP->eraseFromParent(); @@ -348,7 +350,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, // us, and if they are all dead, nuke them without remorse. if (isSafeToDestroyConstant(C)) { C->destroyConstant(); - CleanupConstantGlobalUsers(V, Init, DL, TLI); + CleanupConstantGlobalUsers(V, Init, DL, GetTLI); return true; } } @@ -495,8 +497,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { // had 256 byte alignment for example, something might depend on that: // propagate info to each field. uint64_t FieldOffset = Layout.getElementOffset(i); - unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset); - if (NewAlign > DL.getABITypeAlignment(STy->getElementType(i))) + Align NewAlign(MinAlign(StartAlignment, FieldOffset)); + if (NewAlign > Align(DL.getABITypeAlignment(STy->getElementType(i)))) NGV->setAlignment(NewAlign); // Copy over the debug info for the variable. @@ -511,7 +513,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { NewGlobals.reserve(NumElements); auto ElTy = STy->getElementType(); uint64_t EltSize = DL.getTypeAllocSize(ElTy); - unsigned EltAlign = DL.getABITypeAlignment(ElTy); + Align EltAlign(DL.getABITypeAlignment(ElTy)); uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy); for (unsigned i = 0, e = NumElements; i != e; ++i) { Constant *In = Init->getAggregateElement(i); @@ -530,7 +532,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { // Calculate the known alignment of the field. If the original aggregate // had 256 byte alignment for example, something might depend on that: // propagate info to each field. - unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i); + Align NewAlign(MinAlign(StartAlignment, EltSize * i)); if (NewAlign > EltAlign) NGV->setAlignment(NewAlign); transferSRADebugInfo(GV, NGV, FragmentSizeInBits * i, FragmentSizeInBits, @@ -745,9 +747,9 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { /// are uses of the loaded value that would trap if the loaded value is /// dynamically null, then we know that they cannot be reachable with a null /// optimize away the load. -static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, - const DataLayout &DL, - TargetLibraryInfo *TLI) { +static bool OptimizeAwayTrappingUsesOfLoads( + GlobalVariable *GV, Constant *LV, const DataLayout &DL, + function_ref<TargetLibraryInfo &(Function &)> GetTLI) { bool Changed = false; // Keep track of whether we are able to remove all the uses of the global @@ -793,10 +795,10 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, // nor is the global. if (AllNonStoreUsesGone) { if (isLeakCheckerRoot(GV)) { - Changed |= CleanupPointerRootUsers(GV, TLI); + Changed |= CleanupPointerRootUsers(GV, GetTLI); } else { Changed = true; - CleanupConstantGlobalUsers(GV, nullptr, DL, TLI); + CleanupConstantGlobalUsers(GV, nullptr, DL, GetTLI); } if (GV->use_empty()) { LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); @@ -889,8 +891,8 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, while (!GV->use_empty()) { if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) { // The global is initialized when the store to it occurs. - new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0, - SI->getOrdering(), SI->getSyncScopeID(), SI); + new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, + None, SI->getOrdering(), SI->getSyncScopeID(), SI); SI->eraseFromParent(); continue; } @@ -907,7 +909,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, // Replace the cmp X, 0 with a use of the bool value. // Sink the load to where the compare was, if atomic rules allow us to. Value *LV = new LoadInst(InitBool->getValueType(), InitBool, - InitBool->getName() + ".val", false, 0, + InitBool->getName() + ".val", false, None, LI->getOrdering(), LI->getSyncScopeID(), LI->isUnordered() ? (Instruction *)ICI : LI); InitBoolUsed = true; @@ -1562,10 +1564,10 @@ static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, // Try to optimize globals based on the knowledge that only one value (besides // its initializer) is ever stored to the global. -static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, - AtomicOrdering Ordering, - const DataLayout &DL, - TargetLibraryInfo *TLI) { +static bool +optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, + AtomicOrdering Ordering, const DataLayout &DL, + function_ref<TargetLibraryInfo &(Function &)> GetTLI) { // Ignore no-op GEPs and bitcasts. StoredOnceVal = StoredOnceVal->stripPointerCasts(); @@ -1583,9 +1585,10 @@ static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); // Optimize away any trapping uses of the loaded value. - if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, TLI)) + if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, GetTLI)) return true; - } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) { + } else if (CallInst *CI = extractMallocCall(StoredOnceVal, GetTLI)) { + auto *TLI = &GetTLI(*CI->getFunction()); Type *MallocType = getMallocAllocatedType(CI, TLI); if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, DL, TLI)) @@ -1643,10 +1646,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { // instead of a select to synthesize the desired value. bool IsOneZero = false; bool EmitOneOrZero = true; - if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)){ + auto *CI = dyn_cast<ConstantInt>(OtherVal); + if (CI && CI->getValue().getActiveBits() <= 64) { IsOneZero = InitVal->isNullValue() && CI->isOne(); - if (ConstantInt *CIInit = dyn_cast<ConstantInt>(GV->getInitializer())){ + auto *CIInit = dyn_cast<ConstantInt>(GV->getInitializer()); + if (CIInit && CIInit->getValue().getActiveBits() <= 64) { uint64_t ValInit = CIInit->getZExtValue(); uint64_t ValOther = CI->getZExtValue(); uint64_t ValMinus = ValOther - ValInit; @@ -1711,7 +1716,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { assert(LI->getOperand(0) == GV && "Not a copy!"); // Insert a new load, to preserve the saved value. StoreVal = new LoadInst(NewGV->getValueType(), NewGV, - LI->getName() + ".b", false, 0, + LI->getName() + ".b", false, None, LI->getOrdering(), LI->getSyncScopeID(), LI); } else { assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) && @@ -1721,15 +1726,15 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { } } StoreInst *NSI = - new StoreInst(StoreVal, NewGV, false, 0, SI->getOrdering(), + new StoreInst(StoreVal, NewGV, false, None, SI->getOrdering(), SI->getSyncScopeID(), SI); NSI->setDebugLoc(SI->getDebugLoc()); } else { // Change the load into a load of bool then a select. LoadInst *LI = cast<LoadInst>(UI); - LoadInst *NLI = - new LoadInst(NewGV->getValueType(), NewGV, LI->getName() + ".b", - false, 0, LI->getOrdering(), LI->getSyncScopeID(), LI); + LoadInst *NLI = new LoadInst(NewGV->getValueType(), NewGV, + LI->getName() + ".b", false, None, + LI->getOrdering(), LI->getSyncScopeID(), LI); Instruction *NSI; if (IsOneZero) NSI = new ZExtInst(NLI, LI->getType(), "", LI); @@ -1914,9 +1919,10 @@ static void makeAllConstantUsesInstructions(Constant *C) { /// Analyze the specified global variable and optimize /// it if possible. If we make a change, return true. -static bool processInternalGlobal( - GlobalVariable *GV, const GlobalStatus &GS, TargetLibraryInfo *TLI, - function_ref<DominatorTree &(Function &)> LookupDomTree) { +static bool +processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, + function_ref<TargetLibraryInfo &(Function &)> GetTLI, + function_ref<DominatorTree &(Function &)> LookupDomTree) { auto &DL = GV->getParent()->getDataLayout(); // If this is a first class global and has only one accessing function and // this function is non-recursive, we replace the global with a local alloca @@ -1963,11 +1969,12 @@ static bool processInternalGlobal( bool Changed; if (isLeakCheckerRoot(GV)) { // Delete any constant stores to the global. - Changed = CleanupPointerRootUsers(GV, TLI); + Changed = CleanupPointerRootUsers(GV, GetTLI); } else { // Delete any stores we can find to the global. We may not be able to // make it completely dead though. - Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI); + Changed = + CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); } // If the global is dead now, delete it. @@ -1989,7 +1996,7 @@ static bool processInternalGlobal( GV->setConstant(true); // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI); + CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); // If the global is dead now, just nuke it. if (GV->use_empty()) { @@ -2019,7 +2026,7 @@ static bool processInternalGlobal( GV->setInitializer(SOVConstant); // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI); + CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); if (GV->use_empty()) { LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to " @@ -2033,7 +2040,8 @@ static bool processInternalGlobal( // Try to optimize globals based on the knowledge that only one value // (besides its initializer) is ever stored to the global. - if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, TLI)) + if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, + GetTLI)) return true; // Otherwise, if the global was not a boolean, we can shrink it to be a @@ -2054,7 +2062,8 @@ static bool processInternalGlobal( /// Analyze the specified global variable and optimize it if possible. If we /// make a change, return true. static bool -processGlobal(GlobalValue &GV, TargetLibraryInfo *TLI, +processGlobal(GlobalValue &GV, + function_ref<TargetLibraryInfo &(Function &)> GetTLI, function_ref<DominatorTree &(Function &)> LookupDomTree) { if (GV.getName().startswith("llvm.")) return false; @@ -2086,7 +2095,7 @@ processGlobal(GlobalValue &GV, TargetLibraryInfo *TLI, if (GVar->isConstant() || !GVar->hasInitializer()) return Changed; - return processInternalGlobal(GVar, GS, TLI, LookupDomTree) || Changed; + return processInternalGlobal(GVar, GS, GetTLI, LookupDomTree) || Changed; } /// Walk all of the direct calls of the specified function, changing them to @@ -2234,7 +2243,8 @@ hasOnlyColdCalls(Function &F, } static bool -OptimizeFunctions(Module &M, TargetLibraryInfo *TLI, +OptimizeFunctions(Module &M, + function_ref<TargetLibraryInfo &(Function &)> GetTLI, function_ref<TargetTransformInfo &(Function &)> GetTTI, function_ref<BlockFrequencyInfo &(Function &)> GetBFI, function_ref<DominatorTree &(Function &)> LookupDomTree, @@ -2275,17 +2285,13 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI, // So, remove unreachable blocks from the function, because a) there's // no point in analyzing them and b) GlobalOpt should otherwise grow // some more complicated logic to break these cycles. - // Removing unreachable blocks might invalidate the dominator so we - // recalculate it. if (!F->isDeclaration()) { - if (removeUnreachableBlocks(*F)) { - auto &DT = LookupDomTree(*F); - DT.recalculate(*F); - Changed = true; - } + auto &DT = LookupDomTree(*F); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + Changed |= removeUnreachableBlocks(*F, &DTU); } - Changed |= processGlobal(*F, TLI, LookupDomTree); + Changed |= processGlobal(*F, GetTLI, LookupDomTree); if (!F->hasLocalLinkage()) continue; @@ -2342,7 +2348,8 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI, } static bool -OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI, +OptimizeGlobalVars(Module &M, + function_ref<TargetLibraryInfo &(Function &)> GetTLI, function_ref<DominatorTree &(Function &)> LookupDomTree, SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { bool Changed = false; @@ -2357,7 +2364,10 @@ OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI, if (GV->hasInitializer()) if (auto *C = dyn_cast<Constant>(GV->getInitializer())) { auto &DL = M.getDataLayout(); - Constant *New = ConstantFoldConstant(C, DL, TLI); + // TLI is not used in the case of a Constant, so use default nullptr + // for that optional parameter, since we don't have a Function to + // provide GetTLI anyway. + Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr); if (New && New != C) GV->setInitializer(New); } @@ -2367,7 +2377,7 @@ OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI, continue; } - Changed |= processGlobal(*GV, TLI, LookupDomTree); + Changed |= processGlobal(*GV, GetTLI, LookupDomTree); } return Changed; } @@ -2581,8 +2591,8 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, } static int compareNames(Constant *const *A, Constant *const *B) { - Value *AStripped = (*A)->stripPointerCastsNoFollowAliases(); - Value *BStripped = (*B)->stripPointerCastsNoFollowAliases(); + Value *AStripped = (*A)->stripPointerCasts(); + Value *BStripped = (*B)->stripPointerCasts(); return AStripped->getName().compare(BStripped->getName()); } @@ -2809,7 +2819,14 @@ OptimizeGlobalAliases(Module &M, return Changed; } -static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) { +static Function * +FindCXAAtExit(Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) { + // Hack to get a default TLI before we have actual Function. + auto FuncIter = M.begin(); + if (FuncIter == M.end()) + return nullptr; + auto *TLI = &GetTLI(*FuncIter); + LibFunc F = LibFunc_cxa_atexit; if (!TLI->has(F)) return nullptr; @@ -2818,6 +2835,9 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) { if (!Fn) return nullptr; + // Now get the actual TLI for Fn. + TLI = &GetTLI(*Fn); + // Make sure that the function has the correct prototype. if (!TLI->getLibFunc(*Fn, F) || F != LibFunc_cxa_atexit) return nullptr; @@ -2889,7 +2909,8 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { } static bool optimizeGlobalsInModule( - Module &M, const DataLayout &DL, TargetLibraryInfo *TLI, + Module &M, const DataLayout &DL, + function_ref<TargetLibraryInfo &(Function &)> GetTLI, function_ref<TargetTransformInfo &(Function &)> GetTTI, function_ref<BlockFrequencyInfo &(Function &)> GetBFI, function_ref<DominatorTree &(Function &)> LookupDomTree) { @@ -2914,24 +2935,24 @@ static bool optimizeGlobalsInModule( NotDiscardableComdats.insert(C); // Delete functions that are trivially dead, ccc -> fastcc - LocalChange |= OptimizeFunctions(M, TLI, GetTTI, GetBFI, LookupDomTree, + LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree, NotDiscardableComdats); // Optimize global_ctors list. LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) { - return EvaluateStaticConstructor(F, DL, TLI); + return EvaluateStaticConstructor(F, DL, &GetTLI(*F)); }); // Optimize non-address-taken globals. - LocalChange |= OptimizeGlobalVars(M, TLI, LookupDomTree, - NotDiscardableComdats); + LocalChange |= + OptimizeGlobalVars(M, GetTLI, LookupDomTree, NotDiscardableComdats); // Resolve aliases, when possible. LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats); // Try to remove trivial global destructors if they are not removed // already. - Function *CXAAtExitFn = FindCXAAtExit(M, TLI); + Function *CXAAtExitFn = FindCXAAtExit(M, GetTLI); if (CXAAtExitFn) LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn); @@ -2946,12 +2967,14 @@ static bool optimizeGlobalsInModule( PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) { auto &DL = M.getDataLayout(); - auto &TLI = AM.getResult<TargetLibraryAnalysis>(M); auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{ return FAM.getResult<DominatorTreeAnalysis>(F); }; + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult<TargetLibraryAnalysis>(F); + }; auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { return FAM.getResult<TargetIRAnalysis>(F); }; @@ -2960,7 +2983,7 @@ PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) { return FAM.getResult<BlockFrequencyAnalysis>(F); }; - if (!optimizeGlobalsInModule(M, DL, &TLI, GetTTI, GetBFI, LookupDomTree)) + if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); } @@ -2979,10 +3002,12 @@ struct GlobalOptLegacyPass : public ModulePass { return false; auto &DL = M.getDataLayout(); - auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); auto LookupDomTree = [this](Function &F) -> DominatorTree & { return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); }; + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + }; auto GetTTI = [this](Function &F) -> TargetTransformInfo & { return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); }; @@ -2991,7 +3016,8 @@ struct GlobalOptLegacyPass : public ModulePass { return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); }; - return optimizeGlobalsInModule(M, DL, TLI, GetTTI, GetBFI, LookupDomTree); + return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, + LookupDomTree); } void getAnalysisUsage(AnalysisUsage &AU) const override { diff --git a/lib/Transforms/IPO/HotColdSplitting.cpp b/lib/Transforms/IPO/HotColdSplitting.cpp index ab1a9a79cad6..cfdcc8db7f50 100644 --- a/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/lib/Transforms/IPO/HotColdSplitting.cpp @@ -85,12 +85,6 @@ static cl::opt<int> "multiple of TCC_Basic)")); namespace { - -/// A sequence of basic blocks. -/// -/// A 0-sized SmallVector is slightly cheaper to move than a std::vector. -using BlockSequence = SmallVector<BasicBlock *, 0>; - // Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify // this function unless you modify the MBB version as well. // @@ -169,31 +163,6 @@ static bool markFunctionCold(Function &F, bool UpdateEntryCount = false) { return Changed; } -class HotColdSplitting { -public: - HotColdSplitting(ProfileSummaryInfo *ProfSI, - function_ref<BlockFrequencyInfo *(Function &)> GBFI, - function_ref<TargetTransformInfo &(Function &)> GTTI, - std::function<OptimizationRemarkEmitter &(Function &)> *GORE, - function_ref<AssumptionCache *(Function &)> LAC) - : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE), LookupAC(LAC) {} - bool run(Module &M); - -private: - bool isFunctionCold(const Function &F) const; - bool shouldOutlineFrom(const Function &F) const; - bool outlineColdRegions(Function &F, bool HasProfileSummary); - Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT, - BlockFrequencyInfo *BFI, TargetTransformInfo &TTI, - OptimizationRemarkEmitter &ORE, - AssumptionCache *AC, unsigned Count); - ProfileSummaryInfo *PSI; - function_ref<BlockFrequencyInfo *(Function &)> GetBFI; - function_ref<TargetTransformInfo &(Function &)> GetTTI; - std::function<OptimizationRemarkEmitter &(Function &)> *GetORE; - function_ref<AssumptionCache *(Function &)> LookupAC; -}; - class HotColdSplittingLegacyPass : public ModulePass { public: static char ID; @@ -321,13 +290,10 @@ static int getOutliningPenalty(ArrayRef<BasicBlock *> Region, return Penalty; } -Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region, - DominatorTree &DT, - BlockFrequencyInfo *BFI, - TargetTransformInfo &TTI, - OptimizationRemarkEmitter &ORE, - AssumptionCache *AC, - unsigned Count) { +Function *HotColdSplitting::extractColdRegion( + const BlockSequence &Region, const CodeExtractorAnalysisCache &CEAC, + DominatorTree &DT, BlockFrequencyInfo *BFI, TargetTransformInfo &TTI, + OptimizationRemarkEmitter &ORE, AssumptionCache *AC, unsigned Count) { assert(!Region.empty()); // TODO: Pass BFI and BPI to update profile information. @@ -349,7 +315,7 @@ Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region, return nullptr; Function *OrigF = Region[0]->getParent(); - if (Function *OutF = CE.extractCodeRegion()) { + if (Function *OutF = CE.extractCodeRegion(CEAC)) { User *U = *OutF->user_begin(); CallInst *CI = cast<CallInst>(U); CallSite CS(CI); @@ -607,9 +573,9 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) { }); if (!DT) - DT = make_unique<DominatorTree>(F); + DT = std::make_unique<DominatorTree>(F); if (!PDT) - PDT = make_unique<PostDominatorTree>(F); + PDT = std::make_unique<PostDominatorTree>(F); auto Regions = OutliningRegion::create(*BB, *DT, *PDT); for (OutliningRegion &Region : Regions) { @@ -637,9 +603,14 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) { } } + if (OutliningWorklist.empty()) + return Changed; + // Outline single-entry cold regions, splitting up larger regions as needed. unsigned OutlinedFunctionID = 1; - while (!OutliningWorklist.empty()) { + // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time. + CodeExtractorAnalysisCache CEAC(F); + do { OutliningRegion Region = OutliningWorklist.pop_back_val(); assert(!Region.empty() && "Empty outlining region in worklist"); do { @@ -650,14 +621,14 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) { BB->dump(); }); - Function *Outlined = extractColdRegion(SubRegion, *DT, BFI, TTI, ORE, AC, - OutlinedFunctionID); + Function *Outlined = extractColdRegion(SubRegion, CEAC, *DT, BFI, TTI, + ORE, AC, OutlinedFunctionID); if (Outlined) { ++OutlinedFunctionID; Changed = true; } } while (!Region.empty()); - } + } while (!OutliningWorklist.empty()); return Changed; } diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index 34db75dd8b03..bddf75211599 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -114,6 +114,10 @@ void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createIPSCCPPass()); } +void LLVMAddMergeFunctionsPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createMergeFunctionsPass()); +} + void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) { auto PreserveMain = [=](const GlobalValue &GV) { return AllButMain && GV.getName() == "main"; @@ -121,6 +125,15 @@ void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) { unwrap(PM)->add(createInternalizePass(PreserveMain)); } +void LLVMAddInternalizePassWithMustPreservePredicate( + LLVMPassManagerRef PM, + void *Context, + LLVMBool (*Pred)(LLVMValueRef, void *)) { + unwrap(PM)->add(createInternalizePass([=](const GlobalValue &GV) { + return Pred(wrap(&GV), Context) == 0 ? false : true; + })); +} + void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createStripDeadPrototypesPass()); } diff --git a/lib/Transforms/IPO/InferFunctionAttrs.cpp b/lib/Transforms/IPO/InferFunctionAttrs.cpp index 7f5511e008e1..d1a68b28bd33 100644 --- a/lib/Transforms/IPO/InferFunctionAttrs.cpp +++ b/lib/Transforms/IPO/InferFunctionAttrs.cpp @@ -18,24 +18,28 @@ using namespace llvm; #define DEBUG_TYPE "inferattrs" -static bool inferAllPrototypeAttributes(Module &M, - const TargetLibraryInfo &TLI) { +static bool inferAllPrototypeAttributes( + Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) { bool Changed = false; for (Function &F : M.functions()) // We only infer things using the prototype and the name; we don't need // definitions. if (F.isDeclaration() && !F.hasOptNone()) - Changed |= inferLibFuncAttributes(F, TLI); + Changed |= inferLibFuncAttributes(F, GetTLI(F)); return Changed; } PreservedAnalyses InferFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { - auto &TLI = AM.getResult<TargetLibraryAnalysis>(M); + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult<TargetLibraryAnalysis>(F); + }; - if (!inferAllPrototypeAttributes(M, TLI)) + if (!inferAllPrototypeAttributes(M, GetTLI)) // If we didn't infer anything, preserve all analyses. return PreservedAnalyses::all(); @@ -60,8 +64,10 @@ struct InferFunctionAttrsLegacyPass : public ModulePass { if (skipModule(M)) return false; - auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); - return inferAllPrototypeAttributes(M, TLI); + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + }; + return inferAllPrototypeAttributes(M, GetTLI); } }; } diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 945f8affae6e..4b72261131c1 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -239,7 +239,7 @@ static void mergeInlinedArrayAllocas( } if (Align1 > Align2) - AvailableAlloca->setAlignment(AI->getAlignment()); + AvailableAlloca->setAlignment(MaybeAlign(AI->getAlignment())); } AI->eraseFromParent(); @@ -527,7 +527,8 @@ static void setInlineRemark(CallSite &CS, StringRef message) { static bool inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, std::function<AssumptionCache &(Function &)> GetAssumptionCache, - ProfileSummaryInfo *PSI, TargetLibraryInfo &TLI, + ProfileSummaryInfo *PSI, + std::function<TargetLibraryInfo &(Function &)> GetTLI, bool InsertLifetime, function_ref<InlineCost(CallSite CS)> GetInlineCost, function_ref<AAResults &(Function &)> AARGetter, @@ -626,7 +627,8 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, Instruction *Instr = CS.getInstruction(); - bool IsTriviallyDead = isInstructionTriviallyDead(Instr, &TLI); + bool IsTriviallyDead = + isInstructionTriviallyDead(Instr, &GetTLI(*Caller)); int InlineHistoryID; if (!IsTriviallyDead) { @@ -757,13 +759,16 @@ bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); ACT = &getAnalysis<AssumptionCacheTracker>(); PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); - auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + auto GetTLI = [&](Function &F) -> TargetLibraryInfo & { + return getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + }; auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }; - return inlineCallsImpl(SCC, CG, GetAssumptionCache, PSI, TLI, InsertLifetime, - [this](CallSite CS) { return getInlineCost(CS); }, - LegacyAARGetter(*this), ImportedFunctionsStats); + return inlineCallsImpl( + SCC, CG, GetAssumptionCache, PSI, GetTLI, InsertLifetime, + [this](CallSite CS) { return getInlineCost(CS); }, LegacyAARGetter(*this), + ImportedFunctionsStats); } /// Remove now-dead linkonce functions at the end of @@ -879,7 +884,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, if (!ImportedFunctionsStats && InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) { ImportedFunctionsStats = - llvm::make_unique<ImportedFunctionsInliningStatistics>(); + std::make_unique<ImportedFunctionsInliningStatistics>(); ImportedFunctionsStats->setModuleInfo(M); } diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp index 91c7b5f5f135..add2ae053735 100644 --- a/lib/Transforms/IPO/LoopExtractor.cpp +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -141,10 +141,12 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { if (NumLoops == 0) return Changed; --NumLoops; AssumptionCache *AC = nullptr; + Function &Func = *L->getHeader()->getParent(); if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>()) - AC = ACT->lookupAssumptionCache(*L->getHeader()->getParent()); + AC = ACT->lookupAssumptionCache(Func); + CodeExtractorAnalysisCache CEAC(Func); CodeExtractor Extractor(DT, *L, false, nullptr, nullptr, AC); - if (Extractor.extractCodeRegion() != nullptr) { + if (Extractor.extractCodeRegion(CEAC) != nullptr) { Changed = true; // After extraction, the loop is replaced by a function call, so // we shouldn't try to run any more loop passes on it. diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp index f7371284f47e..2dec366d70e2 100644 --- a/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/lib/Transforms/IPO/LowerTypeTests.cpp @@ -230,6 +230,16 @@ void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits, Bytes[AllocByteOffset + B] |= AllocMask; } +bool lowertypetests::isJumpTableCanonical(Function *F) { + if (F->isDeclarationForLinker()) + return false; + auto *CI = mdconst::extract_or_null<ConstantInt>( + F->getParent()->getModuleFlag("CFI Canonical Jump Tables")); + if (!CI || CI->getZExtValue() != 0) + return true; + return F->hasFnAttribute("cfi-canonical-jump-table"); +} + namespace { struct ByteArrayInfo { @@ -251,9 +261,12 @@ class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> { GlobalObject *GO; size_t NTypes; - // For functions: true if this is a definition (either in the merged module or - // in one of the thinlto modules). - bool IsDefinition; + // For functions: true if the jump table is canonical. This essentially means + // whether the canonical address (i.e. the symbol table entry) of the function + // is provided by the local jump table. This is normally the same as whether + // the function is defined locally, but if canonical jump tables are disabled + // by the user then the jump table never provides a canonical definition. + bool IsJumpTableCanonical; // For functions: true if this function is either defined or used in a thinlto // module and its jumptable entry needs to be exported to thinlto backends. @@ -263,13 +276,13 @@ class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> { public: static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO, - bool IsDefinition, bool IsExported, + bool IsJumpTableCanonical, bool IsExported, ArrayRef<MDNode *> Types) { auto *GTM = static_cast<GlobalTypeMember *>(Alloc.Allocate( totalSizeToAlloc<MDNode *>(Types.size()), alignof(GlobalTypeMember))); GTM->GO = GO; GTM->NTypes = Types.size(); - GTM->IsDefinition = IsDefinition; + GTM->IsJumpTableCanonical = IsJumpTableCanonical; GTM->IsExported = IsExported; std::uninitialized_copy(Types.begin(), Types.end(), GTM->getTrailingObjects<MDNode *>()); @@ -280,8 +293,8 @@ public: return GO; } - bool isDefinition() const { - return IsDefinition; + bool isJumpTableCanonical() const { + return IsJumpTableCanonical; } bool isExported() const { @@ -320,6 +333,49 @@ private: size_t NTargets; }; +struct ScopedSaveAliaseesAndUsed { + Module &M; + SmallPtrSet<GlobalValue *, 16> Used, CompilerUsed; + std::vector<std::pair<GlobalIndirectSymbol *, Function *>> FunctionAliases; + + ScopedSaveAliaseesAndUsed(Module &M) : M(M) { + // The users of this class want to replace all function references except + // for aliases and llvm.used/llvm.compiler.used with references to a jump + // table. We avoid replacing aliases in order to avoid introducing a double + // indirection (or an alias pointing to a declaration in ThinLTO mode), and + // we avoid replacing llvm.used/llvm.compiler.used because these global + // variables describe properties of the global, not the jump table (besides, + // offseted references to the jump table in llvm.used are invalid). + // Unfortunately, LLVM doesn't have a "RAUW except for these (possibly + // indirect) users", so what we do is save the list of globals referenced by + // llvm.used/llvm.compiler.used and aliases, erase the used lists, let RAUW + // replace the aliasees and then set them back to their original values at + // the end. + if (GlobalVariable *GV = collectUsedGlobalVariables(M, Used, false)) + GV->eraseFromParent(); + if (GlobalVariable *GV = collectUsedGlobalVariables(M, CompilerUsed, true)) + GV->eraseFromParent(); + + for (auto &GIS : concat<GlobalIndirectSymbol>(M.aliases(), M.ifuncs())) { + // FIXME: This should look past all aliases not just interposable ones, + // see discussion on D65118. + if (auto *F = + dyn_cast<Function>(GIS.getIndirectSymbol()->stripPointerCasts())) + FunctionAliases.push_back({&GIS, F}); + } + } + + ~ScopedSaveAliaseesAndUsed() { + appendToUsed(M, std::vector<GlobalValue *>(Used.begin(), Used.end())); + appendToCompilerUsed(M, std::vector<GlobalValue *>(CompilerUsed.begin(), + CompilerUsed.end())); + + for (auto P : FunctionAliases) + P.first->setIndirectSymbol( + ConstantExpr::getBitCast(P.second, P.first->getType())); + } +}; + class LowerTypeTestsModule { Module &M; @@ -387,7 +443,8 @@ class LowerTypeTestsModule { uint8_t *exportTypeId(StringRef TypeId, const TypeIdLowering &TIL); TypeIdLowering importTypeId(StringRef TypeId); void importTypeTest(CallInst *CI); - void importFunction(Function *F, bool isDefinition); + void importFunction(Function *F, bool isJumpTableCanonical, + std::vector<GlobalAlias *> &AliasesToErase); BitSetInfo buildBitSet(Metadata *TypeId, @@ -421,7 +478,8 @@ class LowerTypeTestsModule { ArrayRef<GlobalTypeMember *> Globals, ArrayRef<ICallBranchFunnel *> ICallBranchFunnels); - void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT, bool IsDefinition); + void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT, + bool IsJumpTableCanonical); void moveInitializerToModuleConstructor(GlobalVariable *GV); void findGlobalVariableUsersOf(Constant *C, SmallSetVector<GlobalVariable *, 8> &Out); @@ -433,7 +491,7 @@ class LowerTypeTestsModule { /// the block. 'This's use list is expected to have at least one element. /// Unlike replaceAllUsesWith this function skips blockaddr and direct call /// uses. - void replaceCfiUses(Function *Old, Value *New, bool IsDefinition); + void replaceCfiUses(Function *Old, Value *New, bool IsJumpTableCanonical); /// replaceDirectCalls - Go through the uses list for this definition and /// replace each use, which is a direct function call. @@ -759,43 +817,50 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables( // Build a new global with the combined contents of the referenced globals. // This global is a struct whose even-indexed elements contain the original // contents of the referenced globals and whose odd-indexed elements contain - // any padding required to align the next element to the next power of 2. + // any padding required to align the next element to the next power of 2 plus + // any additional padding required to meet its alignment requirements. std::vector<Constant *> GlobalInits; const DataLayout &DL = M.getDataLayout(); + DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout; + Align MaxAlign; + uint64_t CurOffset = 0; + uint64_t DesiredPadding = 0; for (GlobalTypeMember *G : Globals) { - GlobalVariable *GV = cast<GlobalVariable>(G->getGlobal()); + auto *GV = cast<GlobalVariable>(G->getGlobal()); + MaybeAlign Alignment(GV->getAlignment()); + if (!Alignment) + Alignment = Align(DL.getABITypeAlignment(GV->getValueType())); + MaxAlign = std::max(MaxAlign, *Alignment); + uint64_t GVOffset = alignTo(CurOffset + DesiredPadding, *Alignment); + GlobalLayout[G] = GVOffset; + if (GVOffset != 0) { + uint64_t Padding = GVOffset - CurOffset; + GlobalInits.push_back( + ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding))); + } + GlobalInits.push_back(GV->getInitializer()); uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType()); + CurOffset = GVOffset + InitSize; - // Compute the amount of padding required. - uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize; + // Compute the amount of padding that we'd like for the next element. + DesiredPadding = NextPowerOf2(InitSize - 1) - InitSize; // Experiments of different caps with Chromium on both x64 and ARM64 // have shown that the 32-byte cap generates the smallest binary on // both platforms while different caps yield similar performance. // (see https://lists.llvm.org/pipermail/llvm-dev/2018-July/124694.html) - if (Padding > 32) - Padding = alignTo(InitSize, 32) - InitSize; - - GlobalInits.push_back( - ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding))); + if (DesiredPadding > 32) + DesiredPadding = alignTo(InitSize, 32) - InitSize; } - if (!GlobalInits.empty()) - GlobalInits.pop_back(); + Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits); auto *CombinedGlobal = new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true, GlobalValue::PrivateLinkage, NewInit); + CombinedGlobal->setAlignment(MaxAlign); StructType *NewTy = cast<StructType>(NewInit->getType()); - const StructLayout *CombinedGlobalLayout = DL.getStructLayout(NewTy); - - // Compute the offsets of the original globals within the new global. - DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout; - for (unsigned I = 0; I != Globals.size(); ++I) - // Multiply by 2 to account for padding elements. - GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2); - lowerTypeTestCalls(TypeIds, CombinedGlobal, GlobalLayout); // Build aliases pointing to offsets into the combined global for each @@ -975,14 +1040,16 @@ void LowerTypeTestsModule::importTypeTest(CallInst *CI) { } // ThinLTO backend: the function F has a jump table entry; update this module -// accordingly. isDefinition describes the type of the jump table entry. -void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) { +// accordingly. isJumpTableCanonical describes the type of the jump table entry. +void LowerTypeTestsModule::importFunction( + Function *F, bool isJumpTableCanonical, + std::vector<GlobalAlias *> &AliasesToErase) { assert(F->getType()->getAddressSpace() == 0); GlobalValue::VisibilityTypes Visibility = F->getVisibility(); std::string Name = F->getName(); - if (F->isDeclarationForLinker() && isDefinition) { + if (F->isDeclarationForLinker() && isJumpTableCanonical) { // Non-dso_local functions may be overriden at run time, // don't short curcuit them if (F->isDSOLocal()) { @@ -997,12 +1064,13 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) { } Function *FDecl; - if (F->isDeclarationForLinker() && !isDefinition) { - // Declaration of an external function. + if (!isJumpTableCanonical) { + // Either a declaration of an external function or a reference to a locally + // defined jump table. FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, F->getAddressSpace(), Name + ".cfi_jt", &M); FDecl->setVisibility(GlobalValue::HiddenVisibility); - } else if (isDefinition) { + } else { F->setName(Name + ".cfi"); F->setLinkage(GlobalValue::ExternalLinkage); FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, @@ -1011,8 +1079,8 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) { Visibility = GlobalValue::HiddenVisibility; // Delete aliases pointing to this function, they'll be re-created in the - // merged output - SmallVector<GlobalAlias*, 4> ToErase; + // merged output. Don't do it yet though because ScopedSaveAliaseesAndUsed + // will want to reset the aliasees first. for (auto &U : F->uses()) { if (auto *A = dyn_cast<GlobalAlias>(U.getUser())) { Function *AliasDecl = Function::Create( @@ -1020,24 +1088,15 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) { F->getAddressSpace(), "", &M); AliasDecl->takeName(A); A->replaceAllUsesWith(AliasDecl); - ToErase.push_back(A); + AliasesToErase.push_back(A); } } - for (auto *A : ToErase) - A->eraseFromParent(); - } else { - // Function definition without type metadata, where some other translation - // unit contained a declaration with type metadata. This normally happens - // during mixed CFI + non-CFI compilation. We do nothing with the function - // so that it is treated the same way as a function defined outside of the - // LTO unit. - return; } - if (F->isWeakForLinker()) - replaceWeakDeclarationWithJumpTablePtr(F, FDecl, isDefinition); + if (F->hasExternalWeakLinkage()) + replaceWeakDeclarationWithJumpTablePtr(F, FDecl, isJumpTableCanonical); else - replaceCfiUses(F, FDecl, isDefinition); + replaceCfiUses(F, FDecl, isJumpTableCanonical); // Set visibility late because it's used in replaceCfiUses() to determine // whether uses need to to be replaced. @@ -1225,7 +1284,7 @@ void LowerTypeTestsModule::findGlobalVariableUsersOf( // Replace all uses of F with (F ? JT : 0). void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr( - Function *F, Constant *JT, bool IsDefinition) { + Function *F, Constant *JT, bool IsJumpTableCanonical) { // The target expression can not appear in a constant initializer on most // (all?) targets. Switch to a runtime initializer. SmallSetVector<GlobalVariable *, 8> GlobalVarUsers; @@ -1239,7 +1298,7 @@ void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr( Function::Create(cast<FunctionType>(F->getValueType()), GlobalValue::ExternalWeakLinkage, F->getAddressSpace(), "", &M); - replaceCfiUses(F, PlaceholderFn, IsDefinition); + replaceCfiUses(F, PlaceholderFn, IsJumpTableCanonical); Constant *Target = ConstantExpr::getSelect( ConstantExpr::getICmp(CmpInst::ICMP_NE, F, @@ -1276,8 +1335,9 @@ selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions, unsigned ArmCount = 0, ThumbCount = 0; for (const auto GTM : Functions) { - if (!GTM->isDefinition()) { + if (!GTM->isJumpTableCanonical()) { // PLT stubs are always ARM. + // FIXME: This is the wrong heuristic for non-canonical jump tables. ++ArmCount; continue; } @@ -1303,7 +1363,7 @@ void LowerTypeTestsModule::createJumpTable( cast<Function>(Functions[I]->getGlobal())); // Align the whole table by entry size. - F->setAlignment(getJumpTableEntrySize()); + F->setAlignment(Align(getJumpTableEntrySize())); // Skip prologue. // Disabled on win32 due to https://llvm.org/bugs/show_bug.cgi?id=28641#c3. // Luckily, this function does not get any prologue even without the @@ -1438,47 +1498,53 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( lowerTypeTestCalls(TypeIds, JumpTable, GlobalLayout); - // Build aliases pointing to offsets into the jump table, and replace - // references to the original functions with references to the aliases. - for (unsigned I = 0; I != Functions.size(); ++I) { - Function *F = cast<Function>(Functions[I]->getGlobal()); - bool IsDefinition = Functions[I]->isDefinition(); - - Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast( - ConstantExpr::getInBoundsGetElementPtr( - JumpTableType, JumpTable, - ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0), - ConstantInt::get(IntPtrTy, I)}), - F->getType()); - if (Functions[I]->isExported()) { - if (IsDefinition) { - ExportSummary->cfiFunctionDefs().insert(F->getName()); + { + ScopedSaveAliaseesAndUsed S(M); + + // Build aliases pointing to offsets into the jump table, and replace + // references to the original functions with references to the aliases. + for (unsigned I = 0; I != Functions.size(); ++I) { + Function *F = cast<Function>(Functions[I]->getGlobal()); + bool IsJumpTableCanonical = Functions[I]->isJumpTableCanonical(); + + Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast( + ConstantExpr::getInBoundsGetElementPtr( + JumpTableType, JumpTable, + ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0), + ConstantInt::get(IntPtrTy, I)}), + F->getType()); + if (Functions[I]->isExported()) { + if (IsJumpTableCanonical) { + ExportSummary->cfiFunctionDefs().insert(F->getName()); + } else { + GlobalAlias *JtAlias = GlobalAlias::create( + F->getValueType(), 0, GlobalValue::ExternalLinkage, + F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M); + JtAlias->setVisibility(GlobalValue::HiddenVisibility); + ExportSummary->cfiFunctionDecls().insert(F->getName()); + } + } + if (!IsJumpTableCanonical) { + if (F->hasExternalWeakLinkage()) + replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr, + IsJumpTableCanonical); + else + replaceCfiUses(F, CombinedGlobalElemPtr, IsJumpTableCanonical); } else { - GlobalAlias *JtAlias = GlobalAlias::create( - F->getValueType(), 0, GlobalValue::ExternalLinkage, - F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M); - JtAlias->setVisibility(GlobalValue::HiddenVisibility); - ExportSummary->cfiFunctionDecls().insert(F->getName()); + assert(F->getType()->getAddressSpace() == 0); + + GlobalAlias *FAlias = + GlobalAlias::create(F->getValueType(), 0, F->getLinkage(), "", + CombinedGlobalElemPtr, &M); + FAlias->setVisibility(F->getVisibility()); + FAlias->takeName(F); + if (FAlias->hasName()) + F->setName(FAlias->getName() + ".cfi"); + replaceCfiUses(F, FAlias, IsJumpTableCanonical); + if (!F->hasLocalLinkage()) + F->setVisibility(GlobalVariable::HiddenVisibility); } } - if (!IsDefinition) { - if (F->isWeakForLinker()) - replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr, IsDefinition); - else - replaceCfiUses(F, CombinedGlobalElemPtr, IsDefinition); - } else { - assert(F->getType()->getAddressSpace() == 0); - - GlobalAlias *FAlias = GlobalAlias::create( - F->getValueType(), 0, F->getLinkage(), "", CombinedGlobalElemPtr, &M); - FAlias->setVisibility(F->getVisibility()); - FAlias->takeName(F); - if (FAlias->hasName()) - F->setName(FAlias->getName() + ".cfi"); - replaceCfiUses(F, FAlias, IsDefinition); - if (!F->hasLocalLinkage()) - F->setVisibility(GlobalVariable::HiddenVisibility); - } } createJumpTable(JumpTableFn, Functions); @@ -1623,7 +1689,7 @@ bool LowerTypeTestsModule::runForTesting(Module &M) { ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary + ": "); std::error_code EC; - raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text); + raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_Text); ExitOnErr(errorCodeToError(EC)); yaml::Output Out(OS); @@ -1643,7 +1709,8 @@ static bool isDirectCall(Use& U) { return false; } -void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, bool IsDefinition) { +void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, + bool IsJumpTableCanonical) { SmallSetVector<Constant *, 4> Constants; auto UI = Old->use_begin(), E = Old->use_end(); for (; UI != E;) { @@ -1655,7 +1722,7 @@ void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, bool IsDefi continue; // Skip direct calls to externally defined or non-dso_local functions - if (isDirectCall(U) && (Old->isDSOLocal() || !IsDefinition)) + if (isDirectCall(U) && (Old->isDSOLocal() || !IsJumpTableCanonical)) continue; // Must handle Constants specially, we cannot call replaceUsesOfWith on a @@ -1678,16 +1745,7 @@ void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, bool IsDefi } void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) { - auto UI = Old->use_begin(), E = Old->use_end(); - for (; UI != E;) { - Use &U = *UI; - ++UI; - - if (!isDirectCall(U)) - continue; - - U.set(New); - } + Old->replaceUsesWithIf(New, [](Use &U) { return isDirectCall(U); }); } bool LowerTypeTestsModule::lower() { @@ -1734,10 +1792,16 @@ bool LowerTypeTestsModule::lower() { Decls.push_back(&F); } - for (auto F : Defs) - importFunction(F, /*isDefinition*/ true); - for (auto F : Decls) - importFunction(F, /*isDefinition*/ false); + std::vector<GlobalAlias *> AliasesToErase; + { + ScopedSaveAliaseesAndUsed S(M); + for (auto F : Defs) + importFunction(F, /*isJumpTableCanonical*/ true, AliasesToErase); + for (auto F : Decls) + importFunction(F, /*isJumpTableCanonical*/ false, AliasesToErase); + } + for (GlobalAlias *GA : AliasesToErase) + GA->eraseFromParent(); return true; } @@ -1823,6 +1887,17 @@ bool LowerTypeTestsModule::lower() { CfiFunctionLinkage Linkage = P.second.Linkage; MDNode *FuncMD = P.second.FuncMD; Function *F = M.getFunction(FunctionName); + if (F && F->hasLocalLinkage()) { + // Locally defined function that happens to have the same name as a + // function defined in a ThinLTO module. Rename it to move it out of + // the way of the external reference that we're about to create. + // Note that setName will find a unique name for the function, so even + // if there is an existing function with the suffix there won't be a + // name collision. + F->setName(F->getName() + ".1"); + F = nullptr; + } + if (!F) F = Function::Create( FunctionType::get(Type::getVoidTy(M.getContext()), false), @@ -1871,24 +1946,26 @@ bool LowerTypeTestsModule::lower() { Types.clear(); GO.getMetadata(LLVMContext::MD_type, Types); - bool IsDefinition = !GO.isDeclarationForLinker(); + bool IsJumpTableCanonical = false; bool IsExported = false; if (Function *F = dyn_cast<Function>(&GO)) { + IsJumpTableCanonical = isJumpTableCanonical(F); if (ExportedFunctions.count(F->getName())) { - IsDefinition |= ExportedFunctions[F->getName()].Linkage == CFL_Definition; + IsJumpTableCanonical |= + ExportedFunctions[F->getName()].Linkage == CFL_Definition; IsExported = true; // TODO: The logic here checks only that the function is address taken, // not that the address takers are live. This can be updated to check // their liveness and emit fewer jumptable entries once monolithic LTO // builds also emit summaries. } else if (!F->hasAddressTaken()) { - if (!CrossDsoCfi || !IsDefinition || F->hasLocalLinkage()) + if (!CrossDsoCfi || !IsJumpTableCanonical || F->hasLocalLinkage()) continue; } } - auto *GTM = - GlobalTypeMember::create(Alloc, &GO, IsDefinition, IsExported, Types); + auto *GTM = GlobalTypeMember::create(Alloc, &GO, IsJumpTableCanonical, + IsExported, Types); GlobalTypeMembers[&GO] = GTM; for (MDNode *Type : Types) { verifyTypeMDNode(&GO, Type); diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 3a08069dcd4a..8b9abaddc84c 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -769,7 +769,7 @@ void MergeFunctions::writeAlias(Function *F, Function *G) { PtrType->getElementType(), PtrType->getAddressSpace(), G->getLinkage(), "", BitcastF, G->getParent()); - F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + F->setAlignment(MaybeAlign(std::max(F->getAlignment(), G->getAlignment()))); GA->takeName(G); GA->setVisibility(G->getVisibility()); GA->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); @@ -816,7 +816,7 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { removeUsers(F); F->replaceAllUsesWith(NewF); - unsigned MaxAlignment = std::max(G->getAlignment(), NewF->getAlignment()); + MaybeAlign MaxAlignment(std::max(G->getAlignment(), NewF->getAlignment())); writeThunkOrAlias(F, G); writeThunkOrAlias(F, NewF); diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 733782e8764d..e193074884af 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -409,7 +409,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F, return std::unique_ptr<FunctionOutliningMultiRegionInfo>(); std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo = - llvm::make_unique<FunctionOutliningMultiRegionInfo>(); + std::make_unique<FunctionOutliningMultiRegionInfo>(); auto IsSingleEntry = [](SmallVectorImpl<BasicBlock *> &BlockList) { BasicBlock *Dom = BlockList.front(); @@ -589,7 +589,7 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) { }; std::unique_ptr<FunctionOutliningInfo> OutliningInfo = - llvm::make_unique<FunctionOutliningInfo>(); + std::make_unique<FunctionOutliningInfo>(); BasicBlock *CurrEntry = EntryBlock; bool CandidateFound = false; @@ -966,7 +966,7 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner( Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE, function_ref<AssumptionCache *(Function &)> LookupAC) : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) { - ClonedOI = llvm::make_unique<FunctionOutliningInfo>(); + ClonedOI = std::make_unique<FunctionOutliningInfo>(); // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; @@ -991,7 +991,7 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner( OptimizationRemarkEmitter &ORE, function_ref<AssumptionCache *(Function &)> LookupAC) : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) { - ClonedOMRI = llvm::make_unique<FunctionOutliningMultiRegionInfo>(); + ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>(); // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; @@ -1122,6 +1122,9 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() { BranchProbabilityInfo BPI(*ClonedFunc, LI); ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI)); + // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time. + CodeExtractorAnalysisCache CEAC(*ClonedFunc); + SetVector<Value *> Inputs, Outputs, Sinks; for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo : ClonedOMRI->ORI) { @@ -1148,7 +1151,7 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() { if (Outputs.size() > 0 && !ForceLiveExit) continue; - Function *OutlinedFunc = CE.extractCodeRegion(); + Function *OutlinedFunc = CE.extractCodeRegion(CEAC); if (OutlinedFunc) { CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc); @@ -1210,11 +1213,12 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() { } // Extract the body of the if. + CodeExtractorAnalysisCache CEAC(*ClonedFunc); Function *OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc), /* AllowVarargs */ true) - .extractCodeRegion(); + .extractCodeRegion(CEAC); if (OutlinedFunc) { BasicBlock *OutliningCallBB = @@ -1264,7 +1268,7 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) { if (PSI->isFunctionEntryCold(F)) return {false, nullptr}; - if (empty(F->users())) + if (F->users().empty()) return {false, nullptr}; OptimizationRemarkEmitter ORE(F); @@ -1370,7 +1374,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { return false; } - assert(empty(Cloner.OrigFunc->users()) && + assert(Cloner.OrigFunc->users().empty() && "F's users should all be replaced!"); std::vector<User *> Users(Cloner.ClonedFunc->user_begin(), diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 3ea77f08fd3c..5314a8219b1e 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -654,6 +654,7 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createGlobalsAAWrapperPass()); MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); addExtensionsToPM(EP_VectorizerStart, MPM); diff --git a/lib/Transforms/IPO/SCCP.cpp b/lib/Transforms/IPO/SCCP.cpp index 7be3608bd2ec..307690729b14 100644 --- a/lib/Transforms/IPO/SCCP.cpp +++ b/lib/Transforms/IPO/SCCP.cpp @@ -9,16 +9,18 @@ using namespace llvm; PreservedAnalyses IPSCCPPass::run(Module &M, ModuleAnalysisManager &AM) { const DataLayout &DL = M.getDataLayout(); - auto &TLI = AM.getResult<TargetLibraryAnalysis>(M); auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + auto GetTLI = [&FAM](Function &F) -> const TargetLibraryInfo & { + return FAM.getResult<TargetLibraryAnalysis>(F); + }; auto getAnalysis = [&FAM](Function &F) -> AnalysisResultsForFn { DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F); return { - make_unique<PredicateInfo>(F, DT, FAM.getResult<AssumptionAnalysis>(F)), + std::make_unique<PredicateInfo>(F, DT, FAM.getResult<AssumptionAnalysis>(F)), &DT, FAM.getCachedResult<PostDominatorTreeAnalysis>(F)}; }; - if (!runIPSCCP(M, DL, &TLI, getAnalysis)) + if (!runIPSCCP(M, DL, GetTLI, getAnalysis)) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -47,14 +49,14 @@ public: if (skipModule(M)) return false; const DataLayout &DL = M.getDataLayout(); - const TargetLibraryInfo *TLI = - &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); - + auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & { + return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + }; auto getAnalysis = [this](Function &F) -> AnalysisResultsForFn { DominatorTree &DT = this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); return { - make_unique<PredicateInfo>( + std::make_unique<PredicateInfo>( F, DT, this->getAnalysis<AssumptionCacheTracker>().getAssumptionCache( F)), @@ -62,7 +64,7 @@ public: nullptr}; // manager, so set them to nullptr. }; - return runIPSCCP(M, DL, TLI, getAnalysis); + return runIPSCCP(M, DL, GetTLI, getAnalysis); } void getAnalysisUsage(AnalysisUsage &AU) const override { diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index 877d20e72ffc..6184681db8a2 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -72,6 +72,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/MisExpect.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -79,6 +80,7 @@ #include <limits> #include <map> #include <memory> +#include <queue> #include <string> #include <system_error> #include <utility> @@ -128,6 +130,12 @@ static cl::opt<bool> ProfileSampleAccurate( "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. ")); +static cl::opt<bool> ProfileAccurateForSymsInList( + "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore, + cl::init(true), + cl::desc("For symbols in profile symbol list, regard their profiles to " + "be accurate. It may be overriden by profile-sample-accurate. ")); + namespace { using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>; @@ -137,9 +145,11 @@ using EdgeWeightMap = DenseMap<Edge, uint64_t>; using BlockEdgeMap = DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>; +class SampleProfileLoader; + class SampleCoverageTracker { public: - SampleCoverageTracker() = default; + SampleCoverageTracker(SampleProfileLoader &SPL) : SPLoader(SPL){}; bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset, uint32_t Discriminator, uint64_t Samples); @@ -185,6 +195,76 @@ private: /// keyed by FunctionSamples pointers, but these stats are cleared after /// every function, so we just need to keep a single counter. uint64_t TotalUsedSamples = 0; + + SampleProfileLoader &SPLoader; +}; + +class GUIDToFuncNameMapper { +public: + GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader, + DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap) + : CurrentReader(Reader), CurrentModule(M), + CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) { + if (CurrentReader.getFormat() != SPF_Compact_Binary) + return; + + for (const auto &F : CurrentModule) { + StringRef OrigName = F.getName(); + CurrentGUIDToFuncNameMap.insert( + {Function::getGUID(OrigName), OrigName}); + + // Local to global var promotion used by optimization like thinlto + // will rename the var and add suffix like ".llvm.xxx" to the + // original local name. In sample profile, the suffixes of function + // names are all stripped. Since it is possible that the mapper is + // built in post-thin-link phase and var promotion has been done, + // we need to add the substring of function name without the suffix + // into the GUIDToFuncNameMap. + StringRef CanonName = FunctionSamples::getCanonicalFnName(F); + if (CanonName != OrigName) + CurrentGUIDToFuncNameMap.insert( + {Function::getGUID(CanonName), CanonName}); + } + + // Update GUIDToFuncNameMap for each function including inlinees. + SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap); + } + + ~GUIDToFuncNameMapper() { + if (CurrentReader.getFormat() != SPF_Compact_Binary) + return; + + CurrentGUIDToFuncNameMap.clear(); + + // Reset GUIDToFuncNameMap for of each function as they're no + // longer valid at this point. + SetGUIDToFuncNameMapForAll(nullptr); + } + +private: + void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) { + std::queue<FunctionSamples *> FSToUpdate; + for (auto &IFS : CurrentReader.getProfiles()) { + FSToUpdate.push(&IFS.second); + } + + while (!FSToUpdate.empty()) { + FunctionSamples *FS = FSToUpdate.front(); + FSToUpdate.pop(); + FS->GUIDToFuncNameMap = Map; + for (const auto &ICS : FS->getCallsiteSamples()) { + const FunctionSamplesMap &FSMap = ICS.second; + for (auto &IFS : FSMap) { + FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second); + FSToUpdate.push(&FS); + } + } + } + } + + SampleProfileReader &CurrentReader; + Module &CurrentModule; + DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap; }; /// Sample profile pass. @@ -199,8 +279,9 @@ public: std::function<AssumptionCache &(Function &)> GetAssumptionCache, std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo) : GetAC(std::move(GetAssumptionCache)), - GetTTI(std::move(GetTargetTransformInfo)), Filename(Name), - RemappingFilename(RemapName), IsThinLTOPreLink(IsThinLTOPreLink) {} + GetTTI(std::move(GetTargetTransformInfo)), CoverageTracker(*this), + Filename(Name), RemappingFilename(RemapName), + IsThinLTOPreLink(IsThinLTOPreLink) {} bool doInitialization(Module &M); bool runOnModule(Module &M, ModuleAnalysisManager *AM, @@ -209,6 +290,8 @@ public: void dump() { Reader->dump(); } protected: + friend class SampleCoverageTracker; + bool runOnFunction(Function &F, ModuleAnalysisManager *AM); unsigned getFunctionLoc(Function &F); bool emitAnnotations(Function &F); @@ -237,6 +320,8 @@ protected: bool propagateThroughEdges(Function &F, bool UpdateBlockCount); void computeDominanceAndLoopInfo(Function &F); void clearFunctionData(); + bool callsiteIsHot(const FunctionSamples *CallsiteFS, + ProfileSummaryInfo *PSI); /// Map basic blocks to their computed weights. /// @@ -310,6 +395,10 @@ protected: /// Profile Summary Info computed from sample profile. ProfileSummaryInfo *PSI = nullptr; + /// Profle Symbol list tells whether a function name appears in the binary + /// used to generate the current profile. + std::unique_ptr<ProfileSymbolList> PSL; + /// Total number of samples collected in this profile. /// /// This is the sum of all the samples collected in all the functions executed @@ -326,6 +415,21 @@ protected: uint64_t entryCount; }; DenseMap<Function *, NotInlinedProfileInfo> notInlinedCallInfo; + + // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for + // all the function symbols defined or declared in current module. + DenseMap<uint64_t, StringRef> GUIDToFuncNameMap; + + // All the Names used in FunctionSamples including outline function + // names, inline instance names and call target names. + StringSet<> NamesInProfile; + + // For symbol in profile symbol list, whether to regard their profiles + // to be accurate. It is mainly decided by existance of profile symbol + // list and -profile-accurate-for-symsinlist flag, but it can be + // overriden by -profile-sample-accurate or profile-sample-accurate + // attribute. + bool ProfAccForSymsInList; }; class SampleProfileLoaderLegacyPass : public ModulePass { @@ -381,14 +485,23 @@ private: /// To decide whether an inlined callsite is hot, we compare the callsite /// sample count with the hot cutoff computed by ProfileSummaryInfo, it is /// regarded as hot if the count is above the cutoff value. -static bool callsiteIsHot(const FunctionSamples *CallsiteFS, - ProfileSummaryInfo *PSI) { +/// +/// When ProfileAccurateForSymsInList is enabled and profile symbol list +/// is present, functions in the profile symbol list but without profile will +/// be regarded as cold and much less inlining will happen in CGSCC inlining +/// pass, so we tend to lower the hot criteria here to allow more early +/// inlining to happen for warm callsites and it is helpful for performance. +bool SampleProfileLoader::callsiteIsHot(const FunctionSamples *CallsiteFS, + ProfileSummaryInfo *PSI) { if (!CallsiteFS) return false; // The callsite was not inlined in the original binary. assert(PSI && "PSI is expected to be non null"); uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples(); - return PSI->isHotCount(CallsiteTotalSamples); + if (ProfAccForSymsInList) + return !PSI->isColdCount(CallsiteTotalSamples); + else + return PSI->isHotCount(CallsiteTotalSamples); } /// Mark as used the sample record for the given function samples at @@ -425,7 +538,7 @@ SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS, for (const auto &I : FS->getCallsiteSamples()) for (const auto &J : I.second) { const FunctionSamples *CalleeSamples = &J.second; - if (callsiteIsHot(CalleeSamples, PSI)) + if (SPLoader.callsiteIsHot(CalleeSamples, PSI)) Count += countUsedRecords(CalleeSamples, PSI); } @@ -444,7 +557,7 @@ SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS, for (const auto &I : FS->getCallsiteSamples()) for (const auto &J : I.second) { const FunctionSamples *CalleeSamples = &J.second; - if (callsiteIsHot(CalleeSamples, PSI)) + if (SPLoader.callsiteIsHot(CalleeSamples, PSI)) Count += countBodyRecords(CalleeSamples, PSI); } @@ -465,7 +578,7 @@ SampleCoverageTracker::countBodySamples(const FunctionSamples *FS, for (const auto &I : FS->getCallsiteSamples()) for (const auto &J : I.second) { const FunctionSamples *CalleeSamples = &J.second; - if (callsiteIsHot(CalleeSamples, PSI)) + if (SPLoader.callsiteIsHot(CalleeSamples, PSI)) Total += countBodySamples(CalleeSamples, PSI); } @@ -788,6 +901,14 @@ bool SampleProfileLoader::inlineHotFunctions( Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) { DenseSet<Instruction *> PromotedInsns; + // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure + // Profile symbol list is ignored when profile-sample-accurate is on. + assert((!ProfAccForSymsInList || + (!ProfileSampleAccurate && + !F.hasFnAttribute("profile-sample-accurate"))) && + "ProfAccForSymsInList should be false when profile-sample-accurate " + "is enabled"); + DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites; bool Changed = false; while (true) { @@ -1219,17 +1340,12 @@ void SampleProfileLoader::buildEdges(Function &F) { } /// Returns the sorted CallTargetMap \p M by count in descending order. -static SmallVector<InstrProfValueData, 2> SortCallTargets( - const SampleRecord::CallTargetMap &M) { +static SmallVector<InstrProfValueData, 2> GetSortedValueDataFromCallTargets( + const SampleRecord::CallTargetMap & M) { SmallVector<InstrProfValueData, 2> R; - for (auto I = M.begin(); I != M.end(); ++I) - R.push_back({FunctionSamples::getGUID(I->getKey()), I->getValue()}); - llvm::sort(R, [](const InstrProfValueData &L, const InstrProfValueData &R) { - if (L.Count == R.Count) - return L.Value > R.Value; - else - return L.Count > R.Count; - }); + for (const auto &I : SampleRecord::SortCallTargets(M)) { + R.emplace_back(InstrProfValueData{FunctionSamples::getGUID(I.first), I.second}); + } return R; } @@ -1324,7 +1440,7 @@ void SampleProfileLoader::propagateWeights(Function &F) { if (!T || T.get().empty()) continue; SmallVector<InstrProfValueData, 2> SortedCallTargets = - SortCallTargets(T.get()); + GetSortedValueDataFromCallTargets(T.get()); uint64_t Sum; findIndirectCallFunctionSamples(I, Sum); annotateValueSite(*I.getParent()->getParent()->getParent(), I, @@ -1374,6 +1490,8 @@ void SampleProfileLoader::propagateWeights(Function &F) { } } + misexpect::verifyMisExpect(TI, Weights, TI->getContext()); + uint64_t TempWeight; // Only set weights if there is at least one non-zero weight. // In any other case, let the analyzer set weights. @@ -1557,30 +1675,29 @@ INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile", bool SampleProfileLoader::doInitialization(Module &M) { auto &Ctx = M.getContext(); - auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx); + + std::unique_ptr<SampleProfileReaderItaniumRemapper> RemapReader; + auto ReaderOrErr = + SampleProfileReader::create(Filename, Ctx, RemappingFilename); if (std::error_code EC = ReaderOrErr.getError()) { std::string Msg = "Could not open profile: " + EC.message(); Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); return false; } Reader = std::move(ReaderOrErr.get()); - Reader->collectFuncsToUse(M); + Reader->collectFuncsFrom(M); ProfileIsValid = (Reader->read() == sampleprof_error::success); - - if (!RemappingFilename.empty()) { - // Apply profile remappings to the loaded profile data if requested. - // For now, we only support remapping symbols encoded using the Itanium - // C++ ABI's name mangling scheme. - ReaderOrErr = SampleProfileReaderItaniumRemapper::create( - RemappingFilename, Ctx, std::move(Reader)); - if (std::error_code EC = ReaderOrErr.getError()) { - std::string Msg = "Could not open profile remapping file: " + EC.message(); - Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); - return false; - } - Reader = std::move(ReaderOrErr.get()); - ProfileIsValid = (Reader->read() == sampleprof_error::success); + PSL = Reader->getProfileSymbolList(); + + // While profile-sample-accurate is on, ignore symbol list. + ProfAccForSymsInList = + ProfileAccurateForSymsInList && PSL && !ProfileSampleAccurate; + if (ProfAccForSymsInList) { + NamesInProfile.clear(); + if (auto NameTable = Reader->getNameTable()) + NamesInProfile.insert(NameTable->begin(), NameTable->end()); } + return true; } @@ -1594,7 +1711,7 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) { bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, ProfileSummaryInfo *_PSI) { - FunctionSamples::GUIDToFuncNameMapper Mapper(M); + GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); if (!ProfileIsValid) return false; @@ -1651,19 +1768,48 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) { } bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) { - + DILocation2SampleMap.clear(); // By default the entry count is initialized to -1, which will be treated // conservatively by getEntryCount as the same as unknown (None). This is // to avoid newly added code to be treated as cold. If we have samples // this will be overwritten in emitAnnotations. - // If ProfileSampleAccurate is true or F has profile-sample-accurate - // attribute, initialize the entry count to 0 so callsites or functions - // unsampled will be treated as cold. - uint64_t initialEntryCount = - (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) - ? 0 - : -1; + uint64_t initialEntryCount = -1; + + ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL; + if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) { + // initialize all the function entry counts to 0. It means all the + // functions without profile will be regarded as cold. + initialEntryCount = 0; + // profile-sample-accurate is a user assertion which has a higher precedence + // than symbol list. When profile-sample-accurate is on, ignore symbol list. + ProfAccForSymsInList = false; + } + + // PSL -- profile symbol list include all the symbols in sampled binary. + // If ProfileAccurateForSymsInList is enabled, PSL is used to treat + // old functions without samples being cold, without having to worry + // about new and hot functions being mistakenly treated as cold. + if (ProfAccForSymsInList) { + // Initialize the entry count to 0 for functions in the list. + if (PSL->contains(F.getName())) + initialEntryCount = 0; + + // Function in the symbol list but without sample will be regarded as + // cold. To minimize the potential negative performance impact it could + // have, we want to be a little conservative here saying if a function + // shows up in the profile, no matter as outline function, inline instance + // or call targets, treat the function as not being cold. This will handle + // the cases such as most callsites of a function are inlined in sampled + // binary but not inlined in current build (because of source code drift, + // imprecise debug information, or the callsites are all cold individually + // but not cold accumulatively...), so the outline function showing up as + // cold in sampled binary will actually not be cold after current build. + StringRef CanonName = FunctionSamples::getCanonicalFnName(F); + if (NamesInProfile.count(CanonName)) + initialEntryCount = -1; + } + F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real)); std::unique_ptr<OptimizationRemarkEmitter> OwnedORE; if (AM) { @@ -1672,7 +1818,7 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) .getManager(); ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); } else { - OwnedORE = make_unique<OptimizationRemarkEmitter>(&F); + OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F); ORE = OwnedORE.get(); } Samples = Reader->getSamplesFor(F); diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 24c476376c14..690b5e8bf49e 100644 --- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -24,6 +24,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionImport.h" +#include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; @@ -218,10 +219,18 @@ void splitAndWriteThinLTOBitcode( promoteTypeIds(M, ModuleId); - // Returns whether a global has attached type metadata. Such globals may - // participate in CFI or whole-program devirtualization, so they need to - // appear in the merged module instead of the thin LTO module. + // Returns whether a global or its associated global has attached type + // metadata. The former may participate in CFI or whole-program + // devirtualization, so they need to appear in the merged module instead of + // the thin LTO module. Similarly, globals that are associated with globals + // with type metadata need to appear in the merged module because they will + // reference the global's section directly. auto HasTypeMetadata = [](const GlobalObject *GO) { + if (MDNode *MD = GO->getMetadata(LLVMContext::MD_associated)) + if (auto *AssocVM = dyn_cast_or_null<ValueAsMetadata>(MD->getOperand(0))) + if (auto *AssocGO = dyn_cast<GlobalObject>(AssocVM->getValue())) + if (AssocGO->hasMetadata(LLVMContext::MD_type)) + return true; return GO->hasMetadata(LLVMContext::MD_type); }; @@ -315,9 +324,9 @@ void splitAndWriteThinLTOBitcode( SmallVector<Metadata *, 4> Elts; Elts.push_back(MDString::get(Ctx, F.getName())); CfiFunctionLinkage Linkage; - if (!F.isDeclarationForLinker()) + if (lowertypetests::isJumpTableCanonical(&F)) Linkage = CFL_Definition; - else if (F.isWeakForLinker()) + else if (F.hasExternalWeakLinkage()) Linkage = CFL_WeakDeclaration; else Linkage = CFL_Declaration; @@ -457,7 +466,7 @@ void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS, // splitAndWriteThinLTOBitcode). Just always build it once via the // buildModuleSummaryIndex when Module(s) are ready. ProfileSummaryInfo PSI(M); - NewIndex = llvm::make_unique<ModuleSummaryIndex>( + NewIndex = std::make_unique<ModuleSummaryIndex>( buildModuleSummaryIndex(M, nullptr, &PSI)); Index = NewIndex.get(); } diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp index 6b6dd6194e17..f0cf5581ba8a 100644 --- a/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -24,12 +24,14 @@ // returns 0, or a single vtable's function returns 1, replace each virtual // call with a comparison of the vptr against that vtable's address. // -// This pass is intended to be used during the regular and thin LTO pipelines. +// This pass is intended to be used during the regular and thin LTO pipelines: +// // During regular LTO, the pass determines the best optimization for each // virtual call and applies the resolutions directly to virtual calls that are // eligible for virtual call optimization (i.e. calls that use either of the -// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics). During -// ThinLTO, the pass operates in two phases: +// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics). +// +// During hybrid Regular/ThinLTO, the pass operates in two phases: // - Export phase: this is run during the thin link over a single merged module // that contains all vtables with !type metadata that participate in the link. // The pass computes a resolution for each virtual call and stores it in the @@ -38,6 +40,14 @@ // modules. The pass applies the resolutions previously computed during the // import phase to each eligible virtual call. // +// During ThinLTO, the pass operates in two phases: +// - Export phase: this is run during the thin link over the index which +// contains a summary of all vtables with !type metadata that participate in +// the link. It computes a resolution for each virtual call and stores it in +// the type identifier summary. Only single implementation devirtualization +// is supported. +// - Import phase: (same as with hybrid case above). +// //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/WholeProgramDevirt.h" @@ -117,6 +127,11 @@ static cl::opt<unsigned> cl::desc("Maximum number of call targets per " "call site to enable branch funnels")); +static cl::opt<bool> + PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden, + cl::init(false), cl::ZeroOrMore, + cl::desc("Print index-based devirtualization messages")); + // Find the minimum offset that we may store a value of size Size bits at. If // IsAfter is set, look for an offset before the object, otherwise look for an // offset after the object. @@ -265,6 +280,25 @@ template <> struct DenseMapInfo<VTableSlot> { } }; +template <> struct DenseMapInfo<VTableSlotSummary> { + static VTableSlotSummary getEmptyKey() { + return {DenseMapInfo<StringRef>::getEmptyKey(), + DenseMapInfo<uint64_t>::getEmptyKey()}; + } + static VTableSlotSummary getTombstoneKey() { + return {DenseMapInfo<StringRef>::getTombstoneKey(), + DenseMapInfo<uint64_t>::getTombstoneKey()}; + } + static unsigned getHashValue(const VTableSlotSummary &I) { + return DenseMapInfo<StringRef>::getHashValue(I.TypeID) ^ + DenseMapInfo<uint64_t>::getHashValue(I.ByteOffset); + } + static bool isEqual(const VTableSlotSummary &LHS, + const VTableSlotSummary &RHS) { + return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset; + } +}; + } // end namespace llvm namespace { @@ -342,19 +376,21 @@ struct CallSiteInfo { /// pass the vector is non-empty, we will need to add a use of llvm.type.test /// to each of the function summaries in the vector. std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers; + std::vector<FunctionSummary *> SummaryTypeTestAssumeUsers; bool isExported() const { return SummaryHasTypeTestAssumeUsers || !SummaryTypeCheckedLoadUsers.empty(); } - void markSummaryHasTypeTestAssumeUsers() { - SummaryHasTypeTestAssumeUsers = true; + void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) { + SummaryTypeCheckedLoadUsers.push_back(FS); AllCallSitesDevirted = false; } - void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) { - SummaryTypeCheckedLoadUsers.push_back(FS); + void addSummaryTypeTestAssumeUser(FunctionSummary *FS) { + SummaryTypeTestAssumeUsers.push_back(FS); + SummaryHasTypeTestAssumeUsers = true; AllCallSitesDevirted = false; } @@ -456,7 +492,6 @@ struct DevirtModule { void buildTypeIdentifierMap( std::vector<VTableBits> &Bits, DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap); - Constant *getPointerAtOffset(Constant *I, uint64_t Offset); bool tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot, const std::set<TypeMemberInfo> &TypeMemberInfos, @@ -464,7 +499,8 @@ struct DevirtModule { void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn, bool &IsExported); - bool trySingleImplDevirt(MutableArrayRef<VirtualCallTarget> TargetsForSlot, + bool trySingleImplDevirt(ModuleSummaryIndex *ExportSummary, + MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res); @@ -542,6 +578,38 @@ struct DevirtModule { function_ref<DominatorTree &(Function &)> LookupDomTree); }; +struct DevirtIndex { + ModuleSummaryIndex &ExportSummary; + // The set in which to record GUIDs exported from their module by + // devirtualization, used by client to ensure they are not internalized. + std::set<GlobalValue::GUID> &ExportedGUIDs; + // A map in which to record the information necessary to locate the WPD + // resolution for local targets in case they are exported by cross module + // importing. + std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap; + + MapVector<VTableSlotSummary, VTableSlotInfo> CallSlots; + + DevirtIndex( + ModuleSummaryIndex &ExportSummary, + std::set<GlobalValue::GUID> &ExportedGUIDs, + std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) + : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs), + LocalWPDTargetsMap(LocalWPDTargetsMap) {} + + bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot, + const TypeIdCompatibleVtableInfo TIdInfo, + uint64_t ByteOffset); + + bool trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot, + VTableSlotSummary &SlotSummary, + VTableSlotInfo &SlotInfo, + WholeProgramDevirtResolution *Res, + std::set<ValueInfo> &DevirtTargets); + + void run(); +}; + struct WholeProgramDevirt : public ModulePass { static char ID; @@ -572,7 +640,7 @@ struct WholeProgramDevirt : public ModulePass { // an optimization remark emitter on the fly, when we need it. std::unique_ptr<OptimizationRemarkEmitter> ORE; auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & { - ORE = make_unique<OptimizationRemarkEmitter>(F); + ORE = std::make_unique<OptimizationRemarkEmitter>(F); return *ORE; }; @@ -632,6 +700,41 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M, return PreservedAnalyses::none(); } +namespace llvm { +void runWholeProgramDevirtOnIndex( + ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs, + std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) { + DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run(); +} + +void updateIndexWPDForExports( + ModuleSummaryIndex &Summary, + function_ref<bool(StringRef, GlobalValue::GUID)> isExported, + std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) { + for (auto &T : LocalWPDTargetsMap) { + auto &VI = T.first; + // This was enforced earlier during trySingleImplDevirt. + assert(VI.getSummaryList().size() == 1 && + "Devirt of local target has more than one copy"); + auto &S = VI.getSummaryList()[0]; + if (!isExported(S->modulePath(), VI.getGUID())) + continue; + + // It's been exported by a cross module import. + for (auto &SlotSummary : T.second) { + auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID); + assert(TIdSum); + auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset); + assert(WPDRes != TIdSum->WPDRes.end()); + WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal( + WPDRes->second.SingleImplName, + Summary.getModuleHash(S->modulePath())); + } + } +} + +} // end namespace llvm + bool DevirtModule::runForTesting( Module &M, function_ref<AAResults &(Function &)> AARGetter, function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter, @@ -662,7 +765,7 @@ bool DevirtModule::runForTesting( ExitOnError ExitOnErr( "-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": "); std::error_code EC; - raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text); + raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_Text); ExitOnErr(errorCodeToError(EC)); yaml::Output Out(OS); @@ -706,38 +809,6 @@ void DevirtModule::buildTypeIdentifierMap( } } -Constant *DevirtModule::getPointerAtOffset(Constant *I, uint64_t Offset) { - if (I->getType()->isPointerTy()) { - if (Offset == 0) - return I; - return nullptr; - } - - const DataLayout &DL = M.getDataLayout(); - - if (auto *C = dyn_cast<ConstantStruct>(I)) { - const StructLayout *SL = DL.getStructLayout(C->getType()); - if (Offset >= SL->getSizeInBytes()) - return nullptr; - - unsigned Op = SL->getElementContainingOffset(Offset); - return getPointerAtOffset(cast<Constant>(I->getOperand(Op)), - Offset - SL->getElementOffset(Op)); - } - if (auto *C = dyn_cast<ConstantArray>(I)) { - ArrayType *VTableTy = C->getType(); - uint64_t ElemSize = DL.getTypeAllocSize(VTableTy->getElementType()); - - unsigned Op = Offset / ElemSize; - if (Op >= C->getNumOperands()) - return nullptr; - - return getPointerAtOffset(cast<Constant>(I->getOperand(Op)), - Offset % ElemSize); - } - return nullptr; -} - bool DevirtModule::tryFindVirtualCallTargets( std::vector<VirtualCallTarget> &TargetsForSlot, const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset) { @@ -746,7 +817,7 @@ bool DevirtModule::tryFindVirtualCallTargets( return false; Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(), - TM.Offset + ByteOffset); + TM.Offset + ByteOffset, M); if (!Ptr) return false; @@ -766,6 +837,34 @@ bool DevirtModule::tryFindVirtualCallTargets( return !TargetsForSlot.empty(); } +bool DevirtIndex::tryFindVirtualCallTargets( + std::vector<ValueInfo> &TargetsForSlot, const TypeIdCompatibleVtableInfo TIdInfo, + uint64_t ByteOffset) { + for (const TypeIdOffsetVtableInfo P : TIdInfo) { + // VTable initializer should have only one summary, or all copies must be + // linkonce/weak ODR. + assert(P.VTableVI.getSummaryList().size() == 1 || + llvm::all_of( + P.VTableVI.getSummaryList(), + [&](const std::unique_ptr<GlobalValueSummary> &Summary) { + return GlobalValue::isLinkOnceODRLinkage(Summary->linkage()) || + GlobalValue::isWeakODRLinkage(Summary->linkage()); + })); + const auto *VS = cast<GlobalVarSummary>(P.VTableVI.getSummaryList()[0].get()); + if (!P.VTableVI.getSummaryList()[0]->isLive()) + continue; + for (auto VTP : VS->vTableFuncs()) { + if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset) + continue; + + TargetsForSlot.push_back(VTP.FuncVI); + } + } + + // Give up if we couldn't find any targets. + return !TargetsForSlot.empty(); +} + void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn, bool &IsExported) { auto Apply = [&](CallSiteInfo &CSInfo) { @@ -788,9 +887,38 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, Apply(P.second); } +static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) { + // We can't add calls if we haven't seen a definition + if (Callee.getSummaryList().empty()) + return false; + + // Insert calls into the summary index so that the devirtualized targets + // are eligible for import. + // FIXME: Annotate type tests with hotness. For now, mark these as hot + // to better ensure we have the opportunity to inline them. + bool IsExported = false; + auto &S = Callee.getSummaryList()[0]; + CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0); + auto AddCalls = [&](CallSiteInfo &CSInfo) { + for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) { + FS->addCall({Callee, CI}); + IsExported |= S->modulePath() != FS->modulePath(); + } + for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) { + FS->addCall({Callee, CI}); + IsExported |= S->modulePath() != FS->modulePath(); + } + }; + AddCalls(SlotInfo.CSInfo); + for (auto &P : SlotInfo.ConstCSInfo) + AddCalls(P.second); + return IsExported; +} + bool DevirtModule::trySingleImplDevirt( - MutableArrayRef<VirtualCallTarget> TargetsForSlot, - VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res) { + ModuleSummaryIndex *ExportSummary, + MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo, + WholeProgramDevirtResolution *Res) { // See if the program contains a single implementation of this virtual // function. Function *TheFn = TargetsForSlot[0].Fn; @@ -830,6 +958,10 @@ bool DevirtModule::trySingleImplDevirt( TheFn->setVisibility(GlobalValue::HiddenVisibility); TheFn->setName(NewName); } + if (ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFn->getGUID())) + // Any needed promotion of 'TheFn' has already been done during + // LTO unit split, so we can ignore return value of AddCalls. + AddCalls(SlotInfo, TheFnVI); Res->TheKind = WholeProgramDevirtResolution::SingleImpl; Res->SingleImplName = TheFn->getName(); @@ -837,6 +969,63 @@ bool DevirtModule::trySingleImplDevirt( return true; } +bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot, + VTableSlotSummary &SlotSummary, + VTableSlotInfo &SlotInfo, + WholeProgramDevirtResolution *Res, + std::set<ValueInfo> &DevirtTargets) { + // See if the program contains a single implementation of this virtual + // function. + auto TheFn = TargetsForSlot[0]; + for (auto &&Target : TargetsForSlot) + if (TheFn != Target) + return false; + + // Don't devirtualize if we don't have target definition. + auto Size = TheFn.getSummaryList().size(); + if (!Size) + return false; + + // If the summary list contains multiple summaries where at least one is + // a local, give up, as we won't know which (possibly promoted) name to use. + for (auto &S : TheFn.getSummaryList()) + if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1) + return false; + + // Collect functions devirtualized at least for one call site for stats. + if (PrintSummaryDevirt) + DevirtTargets.insert(TheFn); + + auto &S = TheFn.getSummaryList()[0]; + bool IsExported = AddCalls(SlotInfo, TheFn); + if (IsExported) + ExportedGUIDs.insert(TheFn.getGUID()); + + // Record in summary for use in devirtualization during the ThinLTO import + // step. + Res->TheKind = WholeProgramDevirtResolution::SingleImpl; + if (GlobalValue::isLocalLinkage(S->linkage())) { + if (IsExported) + // If target is a local function and we are exporting it by + // devirtualizing a call in another module, we need to record the + // promoted name. + Res->SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal( + TheFn.name(), ExportSummary.getModuleHash(S->modulePath())); + else { + LocalWPDTargetsMap[TheFn].push_back(SlotSummary); + Res->SingleImplName = TheFn.name(); + } + } else + Res->SingleImplName = TheFn.name(); + + // Name will be empty if this thin link driven off of serialized combined + // index (e.g. llvm-lto). However, WPD is not supported/invoked for the + // legacy LTO API anyway. + assert(!Res->SingleImplName.empty()); + + return true; +} + void DevirtModule::tryICallBranchFunnel( MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res, VTableSlot Slot) { @@ -1302,10 +1491,13 @@ void DevirtModule::rebuildGlobal(VTableBits &B) { if (B.Before.Bytes.empty() && B.After.Bytes.empty()) return; - // Align each byte array to pointer width. - unsigned PointerSize = M.getDataLayout().getPointerSize(); - B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), PointerSize)); - B.After.Bytes.resize(alignTo(B.After.Bytes.size(), PointerSize)); + // Align the before byte array to the global's minimum alignment so that we + // don't break any alignment requirements on the global. + MaybeAlign Alignment(B.GV->getAlignment()); + if (!Alignment) + Alignment = + Align(M.getDataLayout().getABITypeAlignment(B.GV->getValueType())); + B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), Alignment)); // Before was stored in reverse order; flip it now. for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I) @@ -1322,6 +1514,7 @@ void DevirtModule::rebuildGlobal(VTableBits &B) { GlobalVariable::PrivateLinkage, NewInit, "", B.GV); NewGV->setSection(B.GV->getSection()); NewGV->setComdat(B.GV->getComdat()); + NewGV->setAlignment(MaybeAlign(B.GV->getAlignment())); // Copy the original vtable's metadata to the anonymous global, adjusting // offsets as required. @@ -1483,8 +1676,11 @@ void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) { } void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) { + auto *TypeId = dyn_cast<MDString>(Slot.TypeID); + if (!TypeId) + return; const TypeIdSummary *TidSummary = - ImportSummary->getTypeIdSummary(cast<MDString>(Slot.TypeID)->getString()); + ImportSummary->getTypeIdSummary(TypeId->getString()); if (!TidSummary) return; auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset); @@ -1493,6 +1689,7 @@ void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) { const WholeProgramDevirtResolution &Res = ResI->second; if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) { + assert(!Res.SingleImplName.empty()); // The type of the function in the declaration is irrelevant because every // call site will cast it to the correct type. Constant *SingleImpl = @@ -1627,8 +1824,7 @@ bool DevirtModule::run() { // FIXME: Only add live functions. for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) { for (Metadata *MD : MetadataByGUID[VF.GUID]) { - CallSlots[{MD, VF.Offset}] - .CSInfo.markSummaryHasTypeTestAssumeUsers(); + CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS); } } for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) { @@ -1641,7 +1837,7 @@ bool DevirtModule::run() { for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) { CallSlots[{MD, VC.VFunc.Offset}] .ConstCSInfo[VC.Args] - .markSummaryHasTypeTestAssumeUsers(); + .addSummaryTypeTestAssumeUser(FS); } } for (const FunctionSummary::ConstVCall &VC : @@ -1673,7 +1869,7 @@ bool DevirtModule::run() { cast<MDString>(S.first.TypeID)->getString()) .WPDRes[S.first.ByteOffset]; - if (!trySingleImplDevirt(TargetsForSlot, S.second, Res)) { + if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) { DidVirtualConstProp |= tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first); @@ -1710,7 +1906,7 @@ bool DevirtModule::run() { using namespace ore; OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F) << "devirtualized " - << NV("FunctionName", F->getName())); + << NV("FunctionName", DT.first)); } } @@ -1722,5 +1918,86 @@ bool DevirtModule::run() { for (VTableBits &B : Bits) rebuildGlobal(B); + // We have lowered or deleted the type checked load intrinsics, so we no + // longer have enough information to reason about the liveness of virtual + // function pointers in GlobalDCE. + for (GlobalVariable &GV : M.globals()) + GV.eraseMetadata(LLVMContext::MD_vcall_visibility); + return true; } + +void DevirtIndex::run() { + if (ExportSummary.typeIdCompatibleVtableMap().empty()) + return; + + DenseMap<GlobalValue::GUID, std::vector<StringRef>> NameByGUID; + for (auto &P : ExportSummary.typeIdCompatibleVtableMap()) { + NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first); + } + + // Collect information from summary about which calls to try to devirtualize. + for (auto &P : ExportSummary) { + for (auto &S : P.second.SummaryList) { + auto *FS = dyn_cast<FunctionSummary>(S.get()); + if (!FS) + continue; + // FIXME: Only add live functions. + for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) { + for (StringRef Name : NameByGUID[VF.GUID]) { + CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS); + } + } + for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) { + for (StringRef Name : NameByGUID[VF.GUID]) { + CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS); + } + } + for (const FunctionSummary::ConstVCall &VC : + FS->type_test_assume_const_vcalls()) { + for (StringRef Name : NameByGUID[VC.VFunc.GUID]) { + CallSlots[{Name, VC.VFunc.Offset}] + .ConstCSInfo[VC.Args] + .addSummaryTypeTestAssumeUser(FS); + } + } + for (const FunctionSummary::ConstVCall &VC : + FS->type_checked_load_const_vcalls()) { + for (StringRef Name : NameByGUID[VC.VFunc.GUID]) { + CallSlots[{Name, VC.VFunc.Offset}] + .ConstCSInfo[VC.Args] + .addSummaryTypeCheckedLoadUser(FS); + } + } + } + } + + std::set<ValueInfo> DevirtTargets; + // For each (type, offset) pair: + for (auto &S : CallSlots) { + // Search each of the members of the type identifier for the virtual + // function implementation at offset S.first.ByteOffset, and add to + // TargetsForSlot. + std::vector<ValueInfo> TargetsForSlot; + auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID); + assert(TidSummary); + if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary, + S.first.ByteOffset)) { + WholeProgramDevirtResolution *Res = + &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID) + .WPDRes[S.first.ByteOffset]; + + if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res, + DevirtTargets)) + continue; + } + } + + // Optionally have the thin link print message for each devirtualized + // function. + if (PrintSummaryDevirt) + for (const auto &DT : DevirtTargets) + errs() << "Devirtualized call to " << DT << "\n"; + + return; +} |