summaryrefslogtreecommitdiff
path: root/lib/Transforms/IPO
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/IPO')
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp2
-rw-r--r--lib/Transforms/IPO/Attributor.cpp4959
-rw-r--r--lib/Transforms/IPO/BlockExtractor.cpp5
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp4
-rw-r--r--lib/Transforms/IPO/CrossDSOCFI.cpp10
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp38
-rw-r--r--lib/Transforms/IPO/FunctionImport.cpp43
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp156
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp176
-rw-r--r--lib/Transforms/IPO/HotColdSplitting.cpp61
-rw-r--r--lib/Transforms/IPO/IPO.cpp13
-rw-r--r--lib/Transforms/IPO/InferFunctionAttrs.cpp20
-rw-r--r--lib/Transforms/IPO/Inliner.cpp21
-rw-r--r--lib/Transforms/IPO/LoopExtractor.cpp6
-rw-r--r--lib/Transforms/IPO/LowerTypeTests.cpp305
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp4
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp20
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp1
-rw-r--r--lib/Transforms/IPO/SCCP.cpp18
-rw-r--r--lib/Transforms/IPO/SampleProfile.cpp238
-rw-r--r--lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp21
-rw-r--r--lib/Transforms/IPO/WholeProgramDevirt.cpp389
22 files changed, 5331 insertions, 1179 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 95a9f31cced3..dd9f74a881ee 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -304,7 +304,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// of the previous load.
LoadInst *newLoad =
IRB.CreateLoad(OrigLoad->getType(), V, V->getName() + ".val");
- newLoad->setAlignment(OrigLoad->getAlignment());
+ newLoad->setAlignment(MaybeAlign(OrigLoad->getAlignment()));
// Transfer the AA info too.
AAMDNodes AAInfo;
OrigLoad->getAAMetadata(AAInfo);
diff --git a/lib/Transforms/IPO/Attributor.cpp b/lib/Transforms/IPO/Attributor.cpp
index 2a52c6b9b4ad..95f47345d8fd 100644
--- a/lib/Transforms/IPO/Attributor.cpp
+++ b/lib/Transforms/IPO/Attributor.cpp
@@ -16,11 +16,15 @@
#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
@@ -30,6 +34,9 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+
#include <cassert>
using namespace llvm;
@@ -46,19 +53,50 @@ STATISTIC(NumAttributesValidFixpoint,
"Number of abstract attributes in a valid fixpoint state");
STATISTIC(NumAttributesManifested,
"Number of abstract attributes manifested in IR");
-STATISTIC(NumFnNoUnwind, "Number of functions marked nounwind");
-
-STATISTIC(NumFnUniqueReturned, "Number of function with unique return");
-STATISTIC(NumFnKnownReturns, "Number of function with known return values");
-STATISTIC(NumFnArgumentReturned,
- "Number of function arguments marked returned");
-STATISTIC(NumFnNoSync, "Number of functions marked nosync");
-STATISTIC(NumFnNoFree, "Number of functions marked nofree");
-STATISTIC(NumFnReturnedNonNull,
- "Number of function return values marked nonnull");
-STATISTIC(NumFnArgumentNonNull, "Number of function arguments marked nonnull");
-STATISTIC(NumCSArgumentNonNull, "Number of call site arguments marked nonnull");
-STATISTIC(NumFnWillReturn, "Number of functions marked willreturn");
+
+// Some helper macros to deal with statistics tracking.
+//
+// Usage:
+// For simple IR attribute tracking overload trackStatistics in the abstract
+// attribute and choose the right STATS_DECLTRACK_********* macro,
+// e.g.,:
+// void trackStatistics() const override {
+// STATS_DECLTRACK_ARG_ATTR(returned)
+// }
+// If there is a single "increment" side one can use the macro
+// STATS_DECLTRACK with a custom message. If there are multiple increment
+// sides, STATS_DECL and STATS_TRACK can also be used separatly.
+//
+#define BUILD_STAT_MSG_IR_ATTR(TYPE, NAME) \
+ ("Number of " #TYPE " marked '" #NAME "'")
+#define BUILD_STAT_NAME(NAME, TYPE) NumIR##TYPE##_##NAME
+#define STATS_DECL_(NAME, MSG) STATISTIC(NAME, MSG);
+#define STATS_DECL(NAME, TYPE, MSG) \
+ STATS_DECL_(BUILD_STAT_NAME(NAME, TYPE), MSG);
+#define STATS_TRACK(NAME, TYPE) ++(BUILD_STAT_NAME(NAME, TYPE));
+#define STATS_DECLTRACK(NAME, TYPE, MSG) \
+ { \
+ STATS_DECL(NAME, TYPE, MSG) \
+ STATS_TRACK(NAME, TYPE) \
+ }
+#define STATS_DECLTRACK_ARG_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, Arguments, BUILD_STAT_MSG_IR_ATTR(arguments, NAME))
+#define STATS_DECLTRACK_CSARG_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, CSArguments, \
+ BUILD_STAT_MSG_IR_ATTR(call site arguments, NAME))
+#define STATS_DECLTRACK_FN_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, Function, BUILD_STAT_MSG_IR_ATTR(functions, NAME))
+#define STATS_DECLTRACK_CS_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, CS, BUILD_STAT_MSG_IR_ATTR(call site, NAME))
+#define STATS_DECLTRACK_FNRET_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, FunctionReturn, \
+ BUILD_STAT_MSG_IR_ATTR(function returns, NAME))
+#define STATS_DECLTRACK_CSRET_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, CSReturn, \
+ BUILD_STAT_MSG_IR_ATTR(call site returns, NAME))
+#define STATS_DECLTRACK_FLOATING_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, Floating, \
+ ("Number of floating values known to be '" #NAME "'"))
// TODO: Determine a good default value.
//
@@ -72,18 +110,32 @@ static cl::opt<unsigned>
MaxFixpointIterations("attributor-max-iterations", cl::Hidden,
cl::desc("Maximal number of fixpoint iterations."),
cl::init(32));
+static cl::opt<bool> VerifyMaxFixpointIterations(
+ "attributor-max-iterations-verify", cl::Hidden,
+ cl::desc("Verify that max-iterations is a tight bound for a fixpoint"),
+ cl::init(false));
static cl::opt<bool> DisableAttributor(
"attributor-disable", cl::Hidden,
cl::desc("Disable the attributor inter-procedural deduction pass."),
cl::init(true));
-static cl::opt<bool> VerifyAttributor(
- "attributor-verify", cl::Hidden,
- cl::desc("Verify the Attributor deduction and "
- "manifestation of attributes -- may issue false-positive errors"),
+static cl::opt<bool> ManifestInternal(
+ "attributor-manifest-internal", cl::Hidden,
+ cl::desc("Manifest Attributor internal string attributes."),
cl::init(false));
+static cl::opt<unsigned> DepRecInterval(
+ "attributor-dependence-recompute-interval", cl::Hidden,
+ cl::desc("Number of iterations until dependences are recomputed."),
+ cl::init(4));
+
+static cl::opt<bool> EnableHeapToStack("enable-heap-to-stack-conversion",
+ cl::init(true), cl::Hidden);
+
+static cl::opt<int> MaxHeapToStackSize("max-heap-to-stack-size", cl::init(128),
+ cl::Hidden);
+
/// Logic operators for the change status enum class.
///
///{
@@ -95,78 +147,30 @@ ChangeStatus llvm::operator&(ChangeStatus l, ChangeStatus r) {
}
///}
-/// Helper to adjust the statistics.
-static void bookkeeping(AbstractAttribute::ManifestPosition MP,
- const Attribute &Attr) {
- if (!AreStatisticsEnabled())
- return;
-
- if (!Attr.isEnumAttribute())
- return;
- switch (Attr.getKindAsEnum()) {
- case Attribute::NoUnwind:
- NumFnNoUnwind++;
- return;
- case Attribute::Returned:
- NumFnArgumentReturned++;
- return;
- case Attribute::NoSync:
- NumFnNoSync++;
- break;
- case Attribute::NoFree:
- NumFnNoFree++;
- break;
- case Attribute::NonNull:
- switch (MP) {
- case AbstractAttribute::MP_RETURNED:
- NumFnReturnedNonNull++;
- break;
- case AbstractAttribute::MP_ARGUMENT:
- NumFnArgumentNonNull++;
- break;
- case AbstractAttribute::MP_CALL_SITE_ARGUMENT:
- NumCSArgumentNonNull++;
- break;
- default:
- break;
- }
- break;
- case Attribute::WillReturn:
- NumFnWillReturn++;
- break;
- default:
- return;
- }
-}
-
-template <typename StateTy>
-using followValueCB_t = std::function<bool(Value *, StateTy &State)>;
-template <typename StateTy>
-using visitValueCB_t = std::function<void(Value *, StateTy &State)>;
-
-/// Recursively visit all values that might become \p InitV at some point. This
+/// Recursively visit all values that might become \p IRP at some point. This
/// will be done by looking through cast instructions, selects, phis, and calls
-/// with the "returned" attribute. The callback \p FollowValueCB is asked before
-/// a potential origin value is looked at. If no \p FollowValueCB is passed, a
-/// default one is used that will make sure we visit every value only once. Once
-/// we cannot look through the value any further, the callback \p VisitValueCB
-/// is invoked and passed the current value and the \p State. To limit how much
-/// effort is invested, we will never visit more than \p MaxValues values.
-template <typename StateTy>
+/// with the "returned" attribute. Once we cannot look through the value any
+/// further, the callback \p VisitValueCB is invoked and passed the current
+/// value, the \p State, and a flag to indicate if we stripped anything. To
+/// limit how much effort is invested, we will never visit more values than
+/// specified by \p MaxValues.
+template <typename AAType, typename StateTy>
static bool genericValueTraversal(
- Value *InitV, StateTy &State, visitValueCB_t<StateTy> &VisitValueCB,
- followValueCB_t<StateTy> *FollowValueCB = nullptr, int MaxValues = 8) {
-
+ Attributor &A, IRPosition IRP, const AAType &QueryingAA, StateTy &State,
+ const function_ref<bool(Value &, StateTy &, bool)> &VisitValueCB,
+ int MaxValues = 8) {
+
+ const AAIsDead *LivenessAA = nullptr;
+ if (IRP.getAnchorScope())
+ LivenessAA = &A.getAAFor<AAIsDead>(
+ QueryingAA, IRPosition::function(*IRP.getAnchorScope()),
+ /* TrackDependence */ false);
+ bool AnyDead = false;
+
+ // TODO: Use Positions here to allow context sensitivity in VisitValueCB
SmallPtrSet<Value *, 16> Visited;
- followValueCB_t<bool> DefaultFollowValueCB = [&](Value *Val, bool &) {
- return Visited.insert(Val).second;
- };
-
- if (!FollowValueCB)
- FollowValueCB = &DefaultFollowValueCB;
-
SmallVector<Value *, 16> Worklist;
- Worklist.push_back(InitV);
+ Worklist.push_back(&IRP.getAssociatedValue());
int Iteration = 0;
do {
@@ -174,7 +178,7 @@ static bool genericValueTraversal(
// Check if we should process the current value. To prevent endless
// recursion keep a record of the values we followed!
- if (!(*FollowValueCB)(V, State))
+ if (!Visited.insert(V).second)
continue;
// Make sure we limit the compile time for complex expressions.
@@ -183,23 +187,23 @@ static bool genericValueTraversal(
// Explicitly look through calls with a "returned" attribute if we do
// not have a pointer as stripPointerCasts only works on them.
+ Value *NewV = nullptr;
if (V->getType()->isPointerTy()) {
- V = V->stripPointerCasts();
+ NewV = V->stripPointerCasts();
} else {
CallSite CS(V);
if (CS && CS.getCalledFunction()) {
- Value *NewV = nullptr;
for (Argument &Arg : CS.getCalledFunction()->args())
if (Arg.hasReturnedAttr()) {
NewV = CS.getArgOperand(Arg.getArgNo());
break;
}
- if (NewV) {
- Worklist.push_back(NewV);
- continue;
- }
}
}
+ if (NewV && NewV != V) {
+ Worklist.push_back(NewV);
+ continue;
+ }
// Look through select instructions, visit both potential values.
if (auto *SI = dyn_cast<SelectInst>(V)) {
@@ -208,35 +212,34 @@ static bool genericValueTraversal(
continue;
}
- // Look through phi nodes, visit all operands.
+ // Look through phi nodes, visit all live operands.
if (auto *PHI = dyn_cast<PHINode>(V)) {
- Worklist.append(PHI->op_begin(), PHI->op_end());
+ assert(LivenessAA &&
+ "Expected liveness in the presence of instructions!");
+ for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) {
+ const BasicBlock *IncomingBB = PHI->getIncomingBlock(u);
+ if (LivenessAA->isAssumedDead(IncomingBB->getTerminator())) {
+ AnyDead = true;
+ continue;
+ }
+ Worklist.push_back(PHI->getIncomingValue(u));
+ }
continue;
}
// Once a leaf is reached we inform the user through the callback.
- VisitValueCB(V, State);
+ if (!VisitValueCB(*V, State, Iteration > 1))
+ return false;
} while (!Worklist.empty());
+ // If we actually used liveness information so we have to record a dependence.
+ if (AnyDead)
+ A.recordDependence(*LivenessAA, QueryingAA);
+
// All values have been visited.
return true;
}
-/// Helper to identify the correct offset into an attribute list.
-static unsigned getAttrIndex(AbstractAttribute::ManifestPosition MP,
- unsigned ArgNo = 0) {
- switch (MP) {
- case AbstractAttribute::MP_ARGUMENT:
- case AbstractAttribute::MP_CALL_SITE_ARGUMENT:
- return ArgNo + AttributeList::FirstArgIndex;
- case AbstractAttribute::MP_FUNCTION:
- return AttributeList::FunctionIndex;
- case AbstractAttribute::MP_RETURNED:
- return AttributeList::ReturnIndex;
- }
- llvm_unreachable("Unknown manifest position!");
-}
-
/// Return true if \p New is equal or worse than \p Old.
static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) {
if (!Old.isIntAttribute())
@@ -247,12 +250,9 @@ static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) {
/// Return true if the information provided by \p Attr was added to the
/// attribute list \p Attrs. This is only the case if it was not already present
-/// in \p Attrs at the position describe by \p MP and \p ArgNo.
+/// in \p Attrs at the position describe by \p PK and \p AttrIdx.
static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,
- AttributeList &Attrs,
- AbstractAttribute::ManifestPosition MP,
- unsigned ArgNo = 0) {
- unsigned AttrIdx = getAttrIndex(MP, ArgNo);
+ AttributeList &Attrs, int AttrIdx) {
if (Attr.isEnumAttribute()) {
Attribute::AttrKind Kind = Attr.getKindAsEnum();
@@ -270,9 +270,47 @@ static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,
Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
return true;
}
+ if (Attr.isIntAttribute()) {
+ Attribute::AttrKind Kind = Attr.getKindAsEnum();
+ if (Attrs.hasAttribute(AttrIdx, Kind))
+ if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+ return false;
+ Attrs = Attrs.removeAttribute(Ctx, AttrIdx, Kind);
+ Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+ return true;
+ }
llvm_unreachable("Expected enum or string attribute!");
}
+static const Value *getPointerOperand(const Instruction *I) {
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ if (!LI->isVolatile())
+ return LI->getPointerOperand();
+
+ if (auto *SI = dyn_cast<StoreInst>(I))
+ if (!SI->isVolatile())
+ return SI->getPointerOperand();
+
+ if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(I))
+ if (!CXI->isVolatile())
+ return CXI->getPointerOperand();
+
+ if (auto *RMWI = dyn_cast<AtomicRMWInst>(I))
+ if (!RMWI->isVolatile())
+ return RMWI->getPointerOperand();
+
+ return nullptr;
+}
+static const Value *getBasePointerOfAccessPointerOperand(const Instruction *I,
+ int64_t &BytesOffset,
+ const DataLayout &DL) {
+ const Value *Ptr = getPointerOperand(I);
+ if (!Ptr)
+ return nullptr;
+
+ return GetPointerBaseWithConstantOffset(Ptr, BytesOffset, DL,
+ /*AllowNonInbounds*/ false);
+}
ChangeStatus AbstractAttribute::update(Attributor &A) {
ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
@@ -289,143 +327,527 @@ ChangeStatus AbstractAttribute::update(Attributor &A) {
return HasChanged;
}
-ChangeStatus AbstractAttribute::manifest(Attributor &A) {
- assert(getState().isValidState() &&
- "Attempted to manifest an invalid state!");
- assert(getAssociatedValue() &&
- "Attempted to manifest an attribute without associated value!");
-
- ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
- SmallVector<Attribute, 4> DeducedAttrs;
- getDeducedAttributes(DeducedAttrs);
-
- Function &ScopeFn = getAnchorScope();
- LLVMContext &Ctx = ScopeFn.getContext();
- ManifestPosition MP = getManifestPosition();
-
- AttributeList Attrs;
- SmallVector<unsigned, 4> ArgNos;
+ChangeStatus
+IRAttributeManifest::manifestAttrs(Attributor &A, IRPosition &IRP,
+ const ArrayRef<Attribute> &DeducedAttrs) {
+ Function *ScopeFn = IRP.getAssociatedFunction();
+ IRPosition::Kind PK = IRP.getPositionKind();
// In the following some generic code that will manifest attributes in
// DeducedAttrs if they improve the current IR. Due to the different
// annotation positions we use the underlying AttributeList interface.
- // Note that MP_CALL_SITE_ARGUMENT can annotate multiple locations.
- switch (MP) {
- case MP_ARGUMENT:
- ArgNos.push_back(cast<Argument>(getAssociatedValue())->getArgNo());
- Attrs = ScopeFn.getAttributes();
+ AttributeList Attrs;
+ switch (PK) {
+ case IRPosition::IRP_INVALID:
+ case IRPosition::IRP_FLOAT:
+ return ChangeStatus::UNCHANGED;
+ case IRPosition::IRP_ARGUMENT:
+ case IRPosition::IRP_FUNCTION:
+ case IRPosition::IRP_RETURNED:
+ Attrs = ScopeFn->getAttributes();
break;
- case MP_FUNCTION:
- case MP_RETURNED:
- ArgNos.push_back(0);
- Attrs = ScopeFn.getAttributes();
+ case IRPosition::IRP_CALL_SITE:
+ case IRPosition::IRP_CALL_SITE_RETURNED:
+ case IRPosition::IRP_CALL_SITE_ARGUMENT:
+ Attrs = ImmutableCallSite(&IRP.getAnchorValue()).getAttributes();
break;
- case MP_CALL_SITE_ARGUMENT: {
- CallSite CS(&getAnchoredValue());
- for (unsigned u = 0, e = CS.getNumArgOperands(); u != e; u++)
- if (CS.getArgOperand(u) == getAssociatedValue())
- ArgNos.push_back(u);
- Attrs = CS.getAttributes();
- }
}
+ ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
+ LLVMContext &Ctx = IRP.getAnchorValue().getContext();
for (const Attribute &Attr : DeducedAttrs) {
- for (unsigned ArgNo : ArgNos) {
- if (!addIfNotExistent(Ctx, Attr, Attrs, MP, ArgNo))
- continue;
+ if (!addIfNotExistent(Ctx, Attr, Attrs, IRP.getAttrIdx()))
+ continue;
- HasChanged = ChangeStatus::CHANGED;
- bookkeeping(MP, Attr);
- }
+ HasChanged = ChangeStatus::CHANGED;
}
if (HasChanged == ChangeStatus::UNCHANGED)
return HasChanged;
- switch (MP) {
- case MP_ARGUMENT:
- case MP_FUNCTION:
- case MP_RETURNED:
- ScopeFn.setAttributes(Attrs);
+ switch (PK) {
+ case IRPosition::IRP_ARGUMENT:
+ case IRPosition::IRP_FUNCTION:
+ case IRPosition::IRP_RETURNED:
+ ScopeFn->setAttributes(Attrs);
+ break;
+ case IRPosition::IRP_CALL_SITE:
+ case IRPosition::IRP_CALL_SITE_RETURNED:
+ case IRPosition::IRP_CALL_SITE_ARGUMENT:
+ CallSite(&IRP.getAnchorValue()).setAttributes(Attrs);
+ break;
+ case IRPosition::IRP_INVALID:
+ case IRPosition::IRP_FLOAT:
break;
- case MP_CALL_SITE_ARGUMENT:
- CallSite(&getAnchoredValue()).setAttributes(Attrs);
}
return HasChanged;
}
-Function &AbstractAttribute::getAnchorScope() {
- Value &V = getAnchoredValue();
- if (isa<Function>(V))
- return cast<Function>(V);
- if (isa<Argument>(V))
- return *cast<Argument>(V).getParent();
- if (isa<Instruction>(V))
- return *cast<Instruction>(V).getFunction();
- llvm_unreachable("No scope for anchored value found!");
+const IRPosition IRPosition::EmptyKey(255);
+const IRPosition IRPosition::TombstoneKey(256);
+
+SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
+ IRPositions.emplace_back(IRP);
+
+ ImmutableCallSite ICS(&IRP.getAnchorValue());
+ switch (IRP.getPositionKind()) {
+ case IRPosition::IRP_INVALID:
+ case IRPosition::IRP_FLOAT:
+ case IRPosition::IRP_FUNCTION:
+ return;
+ case IRPosition::IRP_ARGUMENT:
+ case IRPosition::IRP_RETURNED:
+ IRPositions.emplace_back(
+ IRPosition::function(*IRP.getAssociatedFunction()));
+ return;
+ case IRPosition::IRP_CALL_SITE:
+ assert(ICS && "Expected call site!");
+ // TODO: We need to look at the operand bundles similar to the redirection
+ // in CallBase.
+ if (!ICS.hasOperandBundles())
+ if (const Function *Callee = ICS.getCalledFunction())
+ IRPositions.emplace_back(IRPosition::function(*Callee));
+ return;
+ case IRPosition::IRP_CALL_SITE_RETURNED:
+ assert(ICS && "Expected call site!");
+ // TODO: We need to look at the operand bundles similar to the redirection
+ // in CallBase.
+ if (!ICS.hasOperandBundles()) {
+ if (const Function *Callee = ICS.getCalledFunction()) {
+ IRPositions.emplace_back(IRPosition::returned(*Callee));
+ IRPositions.emplace_back(IRPosition::function(*Callee));
+ }
+ }
+ IRPositions.emplace_back(
+ IRPosition::callsite_function(cast<CallBase>(*ICS.getInstruction())));
+ return;
+ case IRPosition::IRP_CALL_SITE_ARGUMENT: {
+ int ArgNo = IRP.getArgNo();
+ assert(ICS && ArgNo >= 0 && "Expected call site!");
+ // TODO: We need to look at the operand bundles similar to the redirection
+ // in CallBase.
+ if (!ICS.hasOperandBundles()) {
+ const Function *Callee = ICS.getCalledFunction();
+ if (Callee && Callee->arg_size() > unsigned(ArgNo))
+ IRPositions.emplace_back(IRPosition::argument(*Callee->getArg(ArgNo)));
+ if (Callee)
+ IRPositions.emplace_back(IRPosition::function(*Callee));
+ }
+ IRPositions.emplace_back(IRPosition::value(IRP.getAssociatedValue()));
+ return;
+ }
+ }
+}
+
+bool IRPosition::hasAttr(ArrayRef<Attribute::AttrKind> AKs,
+ bool IgnoreSubsumingPositions) const {
+ for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this)) {
+ for (Attribute::AttrKind AK : AKs)
+ if (EquivIRP.getAttr(AK).getKindAsEnum() == AK)
+ return true;
+ // The first position returned by the SubsumingPositionIterator is
+ // always the position itself. If we ignore subsuming positions we
+ // are done after the first iteration.
+ if (IgnoreSubsumingPositions)
+ break;
+ }
+ return false;
}
-const Function &AbstractAttribute::getAnchorScope() const {
- return const_cast<AbstractAttribute *>(this)->getAnchorScope();
+void IRPosition::getAttrs(ArrayRef<Attribute::AttrKind> AKs,
+ SmallVectorImpl<Attribute> &Attrs) const {
+ for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this))
+ for (Attribute::AttrKind AK : AKs) {
+ const Attribute &Attr = EquivIRP.getAttr(AK);
+ if (Attr.getKindAsEnum() == AK)
+ Attrs.push_back(Attr);
+ }
}
-/// -----------------------NoUnwind Function Attribute--------------------------
+void IRPosition::verify() {
+ switch (KindOrArgNo) {
+ default:
+ assert(KindOrArgNo >= 0 && "Expected argument or call site argument!");
+ assert((isa<CallBase>(AnchorVal) || isa<Argument>(AnchorVal)) &&
+ "Expected call base or argument for positive attribute index!");
+ if (isa<Argument>(AnchorVal)) {
+ assert(cast<Argument>(AnchorVal)->getArgNo() == unsigned(getArgNo()) &&
+ "Argument number mismatch!");
+ assert(cast<Argument>(AnchorVal) == &getAssociatedValue() &&
+ "Associated value mismatch!");
+ } else {
+ assert(cast<CallBase>(*AnchorVal).arg_size() > unsigned(getArgNo()) &&
+ "Call site argument number mismatch!");
+ assert(cast<CallBase>(*AnchorVal).getArgOperand(getArgNo()) ==
+ &getAssociatedValue() &&
+ "Associated value mismatch!");
+ }
+ break;
+ case IRP_INVALID:
+ assert(!AnchorVal && "Expected no value for an invalid position!");
+ break;
+ case IRP_FLOAT:
+ assert((!isa<CallBase>(&getAssociatedValue()) &&
+ !isa<Argument>(&getAssociatedValue())) &&
+ "Expected specialized kind for call base and argument values!");
+ break;
+ case IRP_RETURNED:
+ assert(isa<Function>(AnchorVal) &&
+ "Expected function for a 'returned' position!");
+ assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!");
+ break;
+ case IRP_CALL_SITE_RETURNED:
+ assert((isa<CallBase>(AnchorVal)) &&
+ "Expected call base for 'call site returned' position!");
+ assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!");
+ break;
+ case IRP_CALL_SITE:
+ assert((isa<CallBase>(AnchorVal)) &&
+ "Expected call base for 'call site function' position!");
+ assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!");
+ break;
+ case IRP_FUNCTION:
+ assert(isa<Function>(AnchorVal) &&
+ "Expected function for a 'function' position!");
+ assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!");
+ break;
+ }
+}
+
+namespace {
+/// Helper functions to clamp a state \p S of type \p StateType with the
+/// information in \p R and indicate/return if \p S did change (as-in update is
+/// required to be run again).
+///
+///{
+template <typename StateType>
+ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R);
+
+template <>
+ChangeStatus clampStateAndIndicateChange<IntegerState>(IntegerState &S,
+ const IntegerState &R) {
+ auto Assumed = S.getAssumed();
+ S ^= R;
+ return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+}
-struct AANoUnwindFunction : AANoUnwind, BooleanState {
+template <>
+ChangeStatus clampStateAndIndicateChange<BooleanState>(BooleanState &S,
+ const BooleanState &R) {
+ return clampStateAndIndicateChange<IntegerState>(S, R);
+}
+///}
- AANoUnwindFunction(Function &F, InformationCache &InfoCache)
- : AANoUnwind(F, InfoCache) {}
+/// Clamp the information known for all returned values of a function
+/// (identified by \p QueryingAA) into \p S.
+template <typename AAType, typename StateType = typename AAType::StateType>
+static void clampReturnedValueStates(Attributor &A, const AAType &QueryingAA,
+ StateType &S) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Clamp return value states for "
+ << static_cast<const AbstractAttribute &>(QueryingAA)
+ << " into " << S << "\n");
+
+ assert((QueryingAA.getIRPosition().getPositionKind() ==
+ IRPosition::IRP_RETURNED ||
+ QueryingAA.getIRPosition().getPositionKind() ==
+ IRPosition::IRP_CALL_SITE_RETURNED) &&
+ "Can only clamp returned value states for a function returned or call "
+ "site returned position!");
+
+ // Use an optional state as there might not be any return values and we want
+ // to join (IntegerState::operator&) the state of all there are.
+ Optional<StateType> T;
+
+ // Callback for each possibly returned value.
+ auto CheckReturnValue = [&](Value &RV) -> bool {
+ const IRPosition &RVPos = IRPosition::value(RV);
+ const AAType &AA = A.getAAFor<AAType>(QueryingAA, RVPos);
+ LLVM_DEBUG(dbgs() << "[Attributor] RV: " << RV << " AA: " << AA.getAsStr()
+ << " @ " << RVPos << "\n");
+ const StateType &AAS = static_cast<const StateType &>(AA.getState());
+ if (T.hasValue())
+ *T &= AAS;
+ else
+ T = AAS;
+ LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " RV State: " << T
+ << "\n");
+ return T->isValidState();
+ };
- /// See AbstractAttribute::getState()
- /// {
- AbstractState &getState() override { return *this; }
- const AbstractState &getState() const override { return *this; }
- /// }
+ if (!A.checkForAllReturnedValues(CheckReturnValue, QueryingAA))
+ S.indicatePessimisticFixpoint();
+ else if (T.hasValue())
+ S ^= *T;
+}
- /// See AbstractAttribute::getManifestPosition().
- ManifestPosition getManifestPosition() const override { return MP_FUNCTION; }
+/// Helper class to compose two generic deduction
+template <typename AAType, typename Base, typename StateType,
+ template <typename...> class F, template <typename...> class G>
+struct AAComposeTwoGenericDeduction
+ : public F<AAType, G<AAType, Base, StateType>, StateType> {
+ AAComposeTwoGenericDeduction(const IRPosition &IRP)
+ : F<AAType, G<AAType, Base, StateType>, StateType>(IRP) {}
- const std::string getAsStr() const override {
- return getAssumed() ? "nounwind" : "may-unwind";
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus ChangedF = F<AAType, G<AAType, Base, StateType>, StateType>::updateImpl(A);
+ ChangeStatus ChangedG = G<AAType, Base, StateType>::updateImpl(A);
+ return ChangedF | ChangedG;
}
+};
+
+/// Helper class for generic deduction: return value -> returned position.
+template <typename AAType, typename Base,
+ typename StateType = typename AAType::StateType>
+struct AAReturnedFromReturnedValues : public Base {
+ AAReturnedFromReturnedValues(const IRPosition &IRP) : Base(IRP) {}
/// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override;
+ ChangeStatus updateImpl(Attributor &A) override {
+ StateType S;
+ clampReturnedValueStates<AAType, StateType>(A, *this, S);
+ // TODO: If we know we visited all returned values, thus no are assumed
+ // dead, we can take the known information from the state T.
+ return clampStateAndIndicateChange<StateType>(this->getState(), S);
+ }
+};
- /// See AANoUnwind::isAssumedNoUnwind().
- bool isAssumedNoUnwind() const override { return getAssumed(); }
+/// Clamp the information known at all call sites for a given argument
+/// (identified by \p QueryingAA) into \p S.
+template <typename AAType, typename StateType = typename AAType::StateType>
+static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA,
+ StateType &S) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Clamp call site argument states for "
+ << static_cast<const AbstractAttribute &>(QueryingAA)
+ << " into " << S << "\n");
+
+ assert(QueryingAA.getIRPosition().getPositionKind() ==
+ IRPosition::IRP_ARGUMENT &&
+ "Can only clamp call site argument states for an argument position!");
+
+ // Use an optional state as there might not be any return values and we want
+ // to join (IntegerState::operator&) the state of all there are.
+ Optional<StateType> T;
+
+ // The argument number which is also the call site argument number.
+ unsigned ArgNo = QueryingAA.getIRPosition().getArgNo();
+
+ auto CallSiteCheck = [&](AbstractCallSite ACS) {
+ const IRPosition &ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo);
+ // Check if a coresponding argument was found or if it is on not associated
+ // (which can happen for callback calls).
+ if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID)
+ return false;
- /// See AANoUnwind::isKnownNoUnwind().
- bool isKnownNoUnwind() const override { return getKnown(); }
+ const AAType &AA = A.getAAFor<AAType>(QueryingAA, ACSArgPos);
+ LLVM_DEBUG(dbgs() << "[Attributor] ACS: " << *ACS.getInstruction()
+ << " AA: " << AA.getAsStr() << " @" << ACSArgPos << "\n");
+ const StateType &AAS = static_cast<const StateType &>(AA.getState());
+ if (T.hasValue())
+ *T &= AAS;
+ else
+ T = AAS;
+ LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " CSA State: " << T
+ << "\n");
+ return T->isValidState();
+ };
+
+ if (!A.checkForAllCallSites(CallSiteCheck, QueryingAA, true))
+ S.indicatePessimisticFixpoint();
+ else if (T.hasValue())
+ S ^= *T;
+}
+
+/// Helper class for generic deduction: call site argument -> argument position.
+template <typename AAType, typename Base,
+ typename StateType = typename AAType::StateType>
+struct AAArgumentFromCallSiteArguments : public Base {
+ AAArgumentFromCallSiteArguments(const IRPosition &IRP) : Base(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ StateType S;
+ clampCallSiteArgumentStates<AAType, StateType>(A, *this, S);
+ // TODO: If we know we visited all incoming values, thus no are assumed
+ // dead, we can take the known information from the state T.
+ return clampStateAndIndicateChange<StateType>(this->getState(), S);
+ }
};
-ChangeStatus AANoUnwindFunction::updateImpl(Attributor &A) {
- Function &F = getAnchorScope();
+/// Helper class for generic replication: function returned -> cs returned.
+template <typename AAType, typename Base,
+ typename StateType = typename AAType::StateType>
+struct AACallSiteReturnedFromReturned : public Base {
+ AACallSiteReturnedFromReturned(const IRPosition &IRP) : Base(IRP) {}
- // The map from instruction opcodes to those instructions in the function.
- auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
- auto Opcodes = {
- (unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
- (unsigned)Instruction::Call, (unsigned)Instruction::CleanupRet,
- (unsigned)Instruction::CatchSwitch, (unsigned)Instruction::Resume};
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ assert(this->getIRPosition().getPositionKind() ==
+ IRPosition::IRP_CALL_SITE_RETURNED &&
+ "Can only wrap function returned positions for call site returned "
+ "positions!");
+ auto &S = this->getState();
+
+ const Function *AssociatedFunction =
+ this->getIRPosition().getAssociatedFunction();
+ if (!AssociatedFunction)
+ return S.indicatePessimisticFixpoint();
+
+ IRPosition FnPos = IRPosition::returned(*AssociatedFunction);
+ const AAType &AA = A.getAAFor<AAType>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ S, static_cast<const typename AAType::StateType &>(AA.getState()));
+ }
+};
- for (unsigned Opcode : Opcodes) {
- for (Instruction *I : OpcodeInstMap[Opcode]) {
- if (!I->mayThrow())
- continue;
+/// Helper class for generic deduction using must-be-executed-context
+/// Base class is required to have `followUse` method.
- auto *NoUnwindAA = A.getAAFor<AANoUnwind>(*this, *I);
+/// bool followUse(Attributor &A, const Use *U, const Instruction *I)
+/// U - Underlying use.
+/// I - The user of the \p U.
+/// `followUse` returns true if the value should be tracked transitively.
- if (!NoUnwindAA || !NoUnwindAA->isAssumedNoUnwind()) {
- indicatePessimisticFixpoint();
- return ChangeStatus::CHANGED;
+template <typename AAType, typename Base,
+ typename StateType = typename AAType::StateType>
+struct AAFromMustBeExecutedContext : public Base {
+ AAFromMustBeExecutedContext(const IRPosition &IRP) : Base(IRP) {}
+
+ void initialize(Attributor &A) override {
+ Base::initialize(A);
+ IRPosition &IRP = this->getIRPosition();
+ Instruction *CtxI = IRP.getCtxI();
+
+ if (!CtxI)
+ return;
+
+ for (const Use &U : IRP.getAssociatedValue().uses())
+ Uses.insert(&U);
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ auto BeforeState = this->getState();
+ auto &S = this->getState();
+ Instruction *CtxI = this->getIRPosition().getCtxI();
+ if (!CtxI)
+ return ChangeStatus::UNCHANGED;
+
+ MustBeExecutedContextExplorer &Explorer =
+ A.getInfoCache().getMustBeExecutedContextExplorer();
+
+ SetVector<const Use *> NextUses;
+
+ for (const Use *U : Uses) {
+ if (const Instruction *UserI = dyn_cast<Instruction>(U->getUser())) {
+ auto EIt = Explorer.begin(CtxI), EEnd = Explorer.end(CtxI);
+ bool Found = EIt.count(UserI);
+ while (!Found && ++EIt != EEnd)
+ Found = EIt.getCurrentInst() == UserI;
+ if (Found && Base::followUse(A, U, UserI))
+ for (const Use &Us : UserI->uses())
+ NextUses.insert(&Us);
}
}
+ for (const Use *U : NextUses)
+ Uses.insert(U);
+
+ return BeforeState == S ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
}
- return ChangeStatus::UNCHANGED;
-}
+
+private:
+ /// Container for (transitive) uses of the associated value.
+ SetVector<const Use *> Uses;
+};
+
+template <typename AAType, typename Base,
+ typename StateType = typename AAType::StateType>
+using AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext =
+ AAComposeTwoGenericDeduction<AAType, Base, StateType,
+ AAFromMustBeExecutedContext,
+ AAArgumentFromCallSiteArguments>;
+
+template <typename AAType, typename Base,
+ typename StateType = typename AAType::StateType>
+using AACallSiteReturnedFromReturnedAndMustBeExecutedContext =
+ AAComposeTwoGenericDeduction<AAType, Base, StateType,
+ AAFromMustBeExecutedContext,
+ AACallSiteReturnedFromReturned>;
+
+/// -----------------------NoUnwind Function Attribute--------------------------
+
+struct AANoUnwindImpl : AANoUnwind {
+ AANoUnwindImpl(const IRPosition &IRP) : AANoUnwind(IRP) {}
+
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nounwind" : "may-unwind";
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ auto Opcodes = {
+ (unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
+ (unsigned)Instruction::Call, (unsigned)Instruction::CleanupRet,
+ (unsigned)Instruction::CatchSwitch, (unsigned)Instruction::Resume};
+
+ auto CheckForNoUnwind = [&](Instruction &I) {
+ if (!I.mayThrow())
+ return true;
+
+ if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
+ const auto &NoUnwindAA =
+ A.getAAFor<AANoUnwind>(*this, IRPosition::callsite_function(ICS));
+ return NoUnwindAA.isAssumedNoUnwind();
+ }
+ return false;
+ };
+
+ if (!A.checkForAllInstructions(CheckForNoUnwind, *this, Opcodes))
+ return indicatePessimisticFixpoint();
+
+ return ChangeStatus::UNCHANGED;
+ }
+};
+
+struct AANoUnwindFunction final : public AANoUnwindImpl {
+ AANoUnwindFunction(const IRPosition &IRP) : AANoUnwindImpl(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nounwind) }
+};
+
+/// NoUnwind attribute deduction for a call sites.
+struct AANoUnwindCallSite final : AANoUnwindImpl {
+ AANoUnwindCallSite(const IRPosition &IRP) : AANoUnwindImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoUnwindImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AANoUnwind>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(),
+ static_cast<const AANoUnwind::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nounwind); }
+};
/// --------------------- Function Return Values -------------------------------
@@ -434,68 +856,48 @@ ChangeStatus AANoUnwindFunction::updateImpl(Attributor &A) {
///
/// If there is a unique returned value R, the manifest method will:
/// - mark R with the "returned" attribute, if R is an argument.
-class AAReturnedValuesImpl final : public AAReturnedValues, AbstractState {
+class AAReturnedValuesImpl : public AAReturnedValues, public AbstractState {
/// Mapping of values potentially returned by the associated function to the
/// return instructions that might return them.
- DenseMap<Value *, SmallPtrSet<ReturnInst *, 2>> ReturnedValues;
+ MapVector<Value *, SmallSetVector<ReturnInst *, 4>> ReturnedValues;
+
+ /// Mapping to remember the number of returned values for a call site such
+ /// that we can avoid updates if nothing changed.
+ DenseMap<const CallBase *, unsigned> NumReturnedValuesPerKnownAA;
+
+ /// Set of unresolved calls returned by the associated function.
+ SmallSetVector<CallBase *, 4> UnresolvedCalls;
/// State flags
///
///{
- bool IsFixed;
- bool IsValidState;
- bool HasOverdefinedReturnedCalls;
+ bool IsFixed = false;
+ bool IsValidState = true;
///}
- /// Collect values that could become \p V in the set \p Values, each mapped to
- /// \p ReturnInsts.
- void collectValuesRecursively(
- Attributor &A, Value *V, SmallPtrSetImpl<ReturnInst *> &ReturnInsts,
- DenseMap<Value *, SmallPtrSet<ReturnInst *, 2>> &Values) {
-
- visitValueCB_t<bool> VisitValueCB = [&](Value *Val, bool &) {
- assert(!isa<Instruction>(Val) ||
- &getAnchorScope() == cast<Instruction>(Val)->getFunction());
- Values[Val].insert(ReturnInsts.begin(), ReturnInsts.end());
- };
-
- bool UnusedBool;
- bool Success = genericValueTraversal(V, UnusedBool, VisitValueCB);
-
- // If we did abort the above traversal we haven't see all the values.
- // Consequently, we cannot know if the information we would derive is
- // accurate so we give up early.
- if (!Success)
- indicatePessimisticFixpoint();
- }
-
public:
- /// See AbstractAttribute::AbstractAttribute(...).
- AAReturnedValuesImpl(Function &F, InformationCache &InfoCache)
- : AAReturnedValues(F, InfoCache) {
- // We do not have an associated argument yet.
- AssociatedVal = nullptr;
- }
+ AAReturnedValuesImpl(const IRPosition &IRP) : AAReturnedValues(IRP) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
// Reset the state.
- AssociatedVal = nullptr;
IsFixed = false;
IsValidState = true;
- HasOverdefinedReturnedCalls = false;
ReturnedValues.clear();
- Function &F = cast<Function>(getAnchoredValue());
+ Function *F = getAssociatedFunction();
+ if (!F) {
+ indicatePessimisticFixpoint();
+ return;
+ }
// The map from instruction opcodes to those instructions in the function.
- auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+ auto &OpcodeInstMap = A.getInfoCache().getOpcodeInstMapForFunction(*F);
// Look through all arguments, if one is marked as returned we are done.
- for (Argument &Arg : F.args()) {
+ for (Argument &Arg : F->args()) {
if (Arg.hasReturnedAttr()) {
-
auto &ReturnInstSet = ReturnedValues[&Arg];
for (Instruction *RI : OpcodeInstMap[Instruction::Ret])
ReturnInstSet.insert(cast<ReturnInst>(RI));
@@ -505,13 +907,8 @@ public:
}
}
- // If no argument was marked as returned we look at all return instructions
- // and collect potentially returned values.
- for (Instruction *RI : OpcodeInstMap[Instruction::Ret]) {
- SmallPtrSet<ReturnInst *, 1> RISet({cast<ReturnInst>(RI)});
- collectValuesRecursively(A, cast<ReturnInst>(RI)->getReturnValue(), RISet,
- ReturnedValues);
- }
+ if (!F->hasExactDefinition())
+ indicatePessimisticFixpoint();
}
/// See AbstractAttribute::manifest(...).
@@ -523,25 +920,35 @@ public:
/// See AbstractAttribute::getState(...).
const AbstractState &getState() const override { return *this; }
- /// See AbstractAttribute::getManifestPosition().
- ManifestPosition getManifestPosition() const override { return MP_ARGUMENT; }
-
/// See AbstractAttribute::updateImpl(Attributor &A).
ChangeStatus updateImpl(Attributor &A) override;
+ llvm::iterator_range<iterator> returned_values() override {
+ return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end());
+ }
+
+ llvm::iterator_range<const_iterator> returned_values() const override {
+ return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end());
+ }
+
+ const SmallSetVector<CallBase *, 4> &getUnresolvedCalls() const override {
+ return UnresolvedCalls;
+ }
+
/// Return the number of potential return values, -1 if unknown.
- size_t getNumReturnValues() const {
+ size_t getNumReturnValues() const override {
return isValidState() ? ReturnedValues.size() : -1;
}
/// Return an assumed unique return value if a single candidate is found. If
/// there cannot be one, return a nullptr. If it is not clear yet, return the
/// Optional::NoneType.
- Optional<Value *> getAssumedUniqueReturnValue() const;
+ Optional<Value *> getAssumedUniqueReturnValue(Attributor &A) const;
- /// See AbstractState::checkForallReturnedValues(...).
- bool
- checkForallReturnedValues(std::function<bool(Value &)> &Pred) const override;
+ /// See AbstractState::checkForAllReturnedValues(...).
+ bool checkForAllReturnedValuesAndReturnInsts(
+ const function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)>
+ &Pred) const override;
/// Pretty print the attribute similar to the IR representation.
const std::string getAsStr() const override;
@@ -553,13 +960,15 @@ public:
bool isValidState() const override { return IsValidState; }
/// See AbstractState::indicateOptimisticFixpoint(...).
- void indicateOptimisticFixpoint() override {
+ ChangeStatus indicateOptimisticFixpoint() override {
IsFixed = true;
- IsValidState &= true;
+ return ChangeStatus::UNCHANGED;
}
- void indicatePessimisticFixpoint() override {
+
+ ChangeStatus indicatePessimisticFixpoint() override {
IsFixed = true;
IsValidState = false;
+ return ChangeStatus::CHANGED;
}
};
@@ -568,21 +977,52 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) {
// Bookkeeping.
assert(isValidState());
- NumFnKnownReturns++;
+ STATS_DECLTRACK(KnownReturnValues, FunctionReturn,
+ "Number of function with known return values");
// Check if we have an assumed unique return value that we could manifest.
- Optional<Value *> UniqueRV = getAssumedUniqueReturnValue();
+ Optional<Value *> UniqueRV = getAssumedUniqueReturnValue(A);
if (!UniqueRV.hasValue() || !UniqueRV.getValue())
return Changed;
// Bookkeeping.
- NumFnUniqueReturned++;
+ STATS_DECLTRACK(UniqueReturnValue, FunctionReturn,
+ "Number of function with unique return");
+
+ // Callback to replace the uses of CB with the constant C.
+ auto ReplaceCallSiteUsersWith = [](CallBase &CB, Constant &C) {
+ if (CB.getNumUses() == 0 || CB.isMustTailCall())
+ return ChangeStatus::UNCHANGED;
+ CB.replaceAllUsesWith(&C);
+ return ChangeStatus::CHANGED;
+ };
// If the assumed unique return value is an argument, annotate it.
if (auto *UniqueRVArg = dyn_cast<Argument>(UniqueRV.getValue())) {
- AssociatedVal = UniqueRVArg;
- Changed = AbstractAttribute::manifest(A) | Changed;
+ getIRPosition() = IRPosition::argument(*UniqueRVArg);
+ Changed = IRAttribute::manifest(A);
+ } else if (auto *RVC = dyn_cast<Constant>(UniqueRV.getValue())) {
+ // We can replace the returned value with the unique returned constant.
+ Value &AnchorValue = getAnchorValue();
+ if (Function *F = dyn_cast<Function>(&AnchorValue)) {
+ for (const Use &U : F->uses())
+ if (CallBase *CB = dyn_cast<CallBase>(U.getUser()))
+ if (CB->isCallee(&U)) {
+ Constant *RVCCast =
+ ConstantExpr::getTruncOrBitCast(RVC, CB->getType());
+ Changed = ReplaceCallSiteUsersWith(*CB, *RVCCast) | Changed;
+ }
+ } else {
+ assert(isa<CallBase>(AnchorValue) &&
+ "Expcected a function or call base anchor!");
+ Constant *RVCCast =
+ ConstantExpr::getTruncOrBitCast(RVC, AnchorValue.getType());
+ Changed = ReplaceCallSiteUsersWith(cast<CallBase>(AnchorValue), *RVCCast);
+ }
+ if (Changed == ChangeStatus::CHANGED)
+ STATS_DECLTRACK(UniqueConstantReturnValue, FunctionReturn,
+ "Number of function returns replaced by constant return");
}
return Changed;
@@ -590,18 +1030,20 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) {
const std::string AAReturnedValuesImpl::getAsStr() const {
return (isAtFixpoint() ? "returns(#" : "may-return(#") +
- (isValidState() ? std::to_string(getNumReturnValues()) : "?") + ")";
+ (isValidState() ? std::to_string(getNumReturnValues()) : "?") +
+ ")[#UC: " + std::to_string(UnresolvedCalls.size()) + "]";
}
-Optional<Value *> AAReturnedValuesImpl::getAssumedUniqueReturnValue() const {
- // If checkForallReturnedValues provides a unique value, ignoring potential
+Optional<Value *>
+AAReturnedValuesImpl::getAssumedUniqueReturnValue(Attributor &A) const {
+ // If checkForAllReturnedValues provides a unique value, ignoring potential
// undef values that can also be present, it is assumed to be the actual
// return value and forwarded to the caller of this method. If there are
// multiple, a nullptr is returned indicating there cannot be a unique
// returned value.
Optional<Value *> UniqueRV;
- std::function<bool(Value &)> Pred = [&](Value &RV) -> bool {
+ auto Pred = [&](Value &RV) -> bool {
// If we found a second returned value and neither the current nor the saved
// one is an undef, there is no unique returned value. Undefs are special
// since we can pretend they have any value.
@@ -618,14 +1060,15 @@ Optional<Value *> AAReturnedValuesImpl::getAssumedUniqueReturnValue() const {
return true;
};
- if (!checkForallReturnedValues(Pred))
+ if (!A.checkForAllReturnedValues(Pred, *this))
UniqueRV = nullptr;
return UniqueRV;
}
-bool AAReturnedValuesImpl::checkForallReturnedValues(
- std::function<bool(Value &)> &Pred) const {
+bool AAReturnedValuesImpl::checkForAllReturnedValuesAndReturnInsts(
+ const function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)>
+ &Pred) const {
if (!isValidState())
return false;
@@ -634,11 +1077,11 @@ bool AAReturnedValuesImpl::checkForallReturnedValues(
for (auto &It : ReturnedValues) {
Value *RV = It.first;
- ImmutableCallSite ICS(RV);
- if (ICS && !HasOverdefinedReturnedCalls)
+ CallBase *CB = dyn_cast<CallBase>(RV);
+ if (CB && !UnresolvedCalls.count(CB))
continue;
- if (!Pred(*RV))
+ if (!Pred(*RV, It.second))
return false;
}
@@ -646,125 +1089,196 @@ bool AAReturnedValuesImpl::checkForallReturnedValues(
}
ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
+ size_t NumUnresolvedCalls = UnresolvedCalls.size();
+ bool Changed = false;
+
+ // State used in the value traversals starting in returned values.
+ struct RVState {
+ // The map in which we collect return values -> return instrs.
+ decltype(ReturnedValues) &RetValsMap;
+ // The flag to indicate a change.
+ bool &Changed;
+ // The return instrs we come from.
+ SmallSetVector<ReturnInst *, 4> RetInsts;
+ };
- // Check if we know of any values returned by the associated function,
- // if not, we are done.
- if (getNumReturnValues() == 0) {
- indicateOptimisticFixpoint();
- return ChangeStatus::UNCHANGED;
- }
+ // Callback for a leaf value returned by the associated function.
+ auto VisitValueCB = [](Value &Val, RVState &RVS, bool) -> bool {
+ auto Size = RVS.RetValsMap[&Val].size();
+ RVS.RetValsMap[&Val].insert(RVS.RetInsts.begin(), RVS.RetInsts.end());
+ bool Inserted = RVS.RetValsMap[&Val].size() != Size;
+ RVS.Changed |= Inserted;
+ LLVM_DEBUG({
+ if (Inserted)
+ dbgs() << "[AAReturnedValues] 1 Add new returned value " << Val
+ << " => " << RVS.RetInsts.size() << "\n";
+ });
+ return true;
+ };
- // Check if any of the returned values is a call site we can refine.
- decltype(ReturnedValues) AddRVs;
- bool HasCallSite = false;
+ // Helper method to invoke the generic value traversal.
+ auto VisitReturnedValue = [&](Value &RV, RVState &RVS) {
+ IRPosition RetValPos = IRPosition::value(RV);
+ return genericValueTraversal<AAReturnedValues, RVState>(A, RetValPos, *this,
+ RVS, VisitValueCB);
+ };
- // Look at all returned call sites.
- for (auto &It : ReturnedValues) {
- SmallPtrSet<ReturnInst *, 2> &ReturnInsts = It.second;
- Value *RV = It.first;
- LLVM_DEBUG(dbgs() << "[AAReturnedValues] Potentially returned value " << *RV
- << "\n");
+ // Callback for all "return intructions" live in the associated function.
+ auto CheckReturnInst = [this, &VisitReturnedValue, &Changed](Instruction &I) {
+ ReturnInst &Ret = cast<ReturnInst>(I);
+ RVState RVS({ReturnedValues, Changed, {}});
+ RVS.RetInsts.insert(&Ret);
+ return VisitReturnedValue(*Ret.getReturnValue(), RVS);
+ };
- // Only call sites can change during an update, ignore the rest.
- CallSite RetCS(RV);
- if (!RetCS)
+ // Start by discovering returned values from all live returned instructions in
+ // the associated function.
+ if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret}))
+ return indicatePessimisticFixpoint();
+
+ // Once returned values "directly" present in the code are handled we try to
+ // resolve returned calls.
+ decltype(ReturnedValues) NewRVsMap;
+ for (auto &It : ReturnedValues) {
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *It.first
+ << " by #" << It.second.size() << " RIs\n");
+ CallBase *CB = dyn_cast<CallBase>(It.first);
+ if (!CB || UnresolvedCalls.count(CB))
continue;
- // For now, any call site we see will prevent us from directly fixing the
- // state. However, if the information on the callees is fixed, the call
- // sites will be removed and we will fix the information for this state.
- HasCallSite = true;
-
- // Try to find a assumed unique return value for the called function.
- auto *RetCSAA = A.getAAFor<AAReturnedValuesImpl>(*this, *RV);
- if (!RetCSAA) {
- HasOverdefinedReturnedCalls = true;
- LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned call site (" << *RV
- << ") with " << (RetCSAA ? "invalid" : "no")
- << " associated state\n");
+ if (!CB->getCalledFunction()) {
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Unresolved call: " << *CB
+ << "\n");
+ UnresolvedCalls.insert(CB);
continue;
}
- // Try to find a assumed unique return value for the called function.
- Optional<Value *> AssumedUniqueRV = RetCSAA->getAssumedUniqueReturnValue();
+ // TODO: use the function scope once we have call site AAReturnedValues.
+ const auto &RetValAA = A.getAAFor<AAReturnedValues>(
+ *this, IRPosition::function(*CB->getCalledFunction()));
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Found another AAReturnedValues: "
+ << static_cast<const AbstractAttribute &>(RetValAA)
+ << "\n");
- // If no assumed unique return value was found due to the lack of
- // candidates, we may need to resolve more calls (through more update
- // iterations) or the called function will not return. Either way, we simply
- // stick with the call sites as return values. Because there were not
- // multiple possibilities, we do not treat it as overdefined.
- if (!AssumedUniqueRV.hasValue())
+ // Skip dead ends, thus if we do not know anything about the returned
+ // call we mark it as unresolved and it will stay that way.
+ if (!RetValAA.getState().isValidState()) {
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Unresolved call: " << *CB
+ << "\n");
+ UnresolvedCalls.insert(CB);
continue;
+ }
- // If multiple, non-refinable values were found, there cannot be a unique
- // return value for the called function. The returned call is overdefined!
- if (!AssumedUniqueRV.getValue()) {
- HasOverdefinedReturnedCalls = true;
- LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned call site has multiple "
- "potentially returned values\n");
+ // Do not try to learn partial information. If the callee has unresolved
+ // return values we will treat the call as unresolved/opaque.
+ auto &RetValAAUnresolvedCalls = RetValAA.getUnresolvedCalls();
+ if (!RetValAAUnresolvedCalls.empty()) {
+ UnresolvedCalls.insert(CB);
continue;
}
- LLVM_DEBUG({
- bool UniqueRVIsKnown = RetCSAA->isAtFixpoint();
- dbgs() << "[AAReturnedValues] Returned call site "
- << (UniqueRVIsKnown ? "known" : "assumed")
- << " unique return value: " << *AssumedUniqueRV << "\n";
- });
+ // Now check if we can track transitively returned values. If possible, thus
+ // if all return value can be represented in the current scope, do so.
+ bool Unresolved = false;
+ for (auto &RetValAAIt : RetValAA.returned_values()) {
+ Value *RetVal = RetValAAIt.first;
+ if (isa<Argument>(RetVal) || isa<CallBase>(RetVal) ||
+ isa<Constant>(RetVal))
+ continue;
+ // Anything that did not fit in the above categories cannot be resolved,
+ // mark the call as unresolved.
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] transitively returned value "
+ "cannot be translated: "
+ << *RetVal << "\n");
+ UnresolvedCalls.insert(CB);
+ Unresolved = true;
+ break;
+ }
- // The assumed unique return value.
- Value *AssumedRetVal = AssumedUniqueRV.getValue();
-
- // If the assumed unique return value is an argument, lookup the matching
- // call site operand and recursively collect new returned values.
- // If it is not an argument, it is just put into the set of returned values
- // as we would have already looked through casts, phis, and similar values.
- if (Argument *AssumedRetArg = dyn_cast<Argument>(AssumedRetVal))
- collectValuesRecursively(A,
- RetCS.getArgOperand(AssumedRetArg->getArgNo()),
- ReturnInsts, AddRVs);
- else
- AddRVs[AssumedRetVal].insert(ReturnInsts.begin(), ReturnInsts.end());
- }
+ if (Unresolved)
+ continue;
- // Keep track of any change to trigger updates on dependent attributes.
- ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ // Now track transitively returned values.
+ unsigned &NumRetAA = NumReturnedValuesPerKnownAA[CB];
+ if (NumRetAA == RetValAA.getNumReturnValues()) {
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Skip call as it has not "
+ "changed since it was seen last\n");
+ continue;
+ }
+ NumRetAA = RetValAA.getNumReturnValues();
+
+ for (auto &RetValAAIt : RetValAA.returned_values()) {
+ Value *RetVal = RetValAAIt.first;
+ if (Argument *Arg = dyn_cast<Argument>(RetVal)) {
+ // Arguments are mapped to call site operands and we begin the traversal
+ // again.
+ bool Unused = false;
+ RVState RVS({NewRVsMap, Unused, RetValAAIt.second});
+ VisitReturnedValue(*CB->getArgOperand(Arg->getArgNo()), RVS);
+ continue;
+ } else if (isa<CallBase>(RetVal)) {
+ // Call sites are resolved by the callee attribute over time, no need to
+ // do anything for us.
+ continue;
+ } else if (isa<Constant>(RetVal)) {
+ // Constants are valid everywhere, we can simply take them.
+ NewRVsMap[RetVal].insert(It.second.begin(), It.second.end());
+ continue;
+ }
+ }
+ }
- for (auto &It : AddRVs) {
+ // To avoid modifications to the ReturnedValues map while we iterate over it
+ // we kept record of potential new entries in a copy map, NewRVsMap.
+ for (auto &It : NewRVsMap) {
assert(!It.second.empty() && "Entry does not add anything.");
auto &ReturnInsts = ReturnedValues[It.first];
for (ReturnInst *RI : It.second)
- if (ReturnInsts.insert(RI).second) {
+ if (ReturnInsts.insert(RI)) {
LLVM_DEBUG(dbgs() << "[AAReturnedValues] Add new returned value "
<< *It.first << " => " << *RI << "\n");
- Changed = ChangeStatus::CHANGED;
+ Changed = true;
}
}
- // If there is no call site in the returned values we are done.
- if (!HasCallSite) {
- indicateOptimisticFixpoint();
- return ChangeStatus::CHANGED;
- }
-
- return Changed;
+ Changed |= (NumUnresolvedCalls != UnresolvedCalls.size());
+ return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
}
-/// ------------------------ NoSync Function Attribute -------------------------
+struct AAReturnedValuesFunction final : public AAReturnedValuesImpl {
+ AAReturnedValuesFunction(const IRPosition &IRP) : AAReturnedValuesImpl(IRP) {}
-struct AANoSyncFunction : AANoSync, BooleanState {
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(returned) }
+};
- AANoSyncFunction(Function &F, InformationCache &InfoCache)
- : AANoSync(F, InfoCache) {}
+/// Returned values information for a call sites.
+struct AAReturnedValuesCallSite final : AAReturnedValuesImpl {
+ AAReturnedValuesCallSite(const IRPosition &IRP) : AAReturnedValuesImpl(IRP) {}
- /// See AbstractAttribute::getState()
- /// {
- AbstractState &getState() override { return *this; }
- const AbstractState &getState() const override { return *this; }
- /// }
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites instead of
+ // redirecting requests to the callee.
+ llvm_unreachable("Abstract attributes for returned values are not "
+ "supported for call sites yet!");
+ }
- /// See AbstractAttribute::getManifestPosition().
- ManifestPosition getManifestPosition() const override { return MP_FUNCTION; }
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ return indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+};
+
+/// ------------------------ NoSync Function Attribute -------------------------
+
+struct AANoSyncImpl : AANoSync {
+ AANoSyncImpl(const IRPosition &IRP) : AANoSync(IRP) {}
const std::string getAsStr() const override {
return getAssumed() ? "nosync" : "may-sync";
@@ -773,12 +1287,6 @@ struct AANoSyncFunction : AANoSync, BooleanState {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override;
- /// See AANoSync::isAssumedNoSync()
- bool isAssumedNoSync() const override { return getAssumed(); }
-
- /// See AANoSync::isKnownNoSync()
- bool isKnownNoSync() const override { return getKnown(); }
-
/// Helper function used to determine whether an instruction is non-relaxed
/// atomic. In other words, if an atomic instruction does not have unordered
/// or monotonic ordering
@@ -792,7 +1300,7 @@ struct AANoSyncFunction : AANoSync, BooleanState {
static bool isNoSyncIntrinsic(Instruction *I);
};
-bool AANoSyncFunction::isNonRelaxedAtomic(Instruction *I) {
+bool AANoSyncImpl::isNonRelaxedAtomic(Instruction *I) {
if (!I->isAtomic())
return false;
@@ -841,7 +1349,7 @@ bool AANoSyncFunction::isNonRelaxedAtomic(Instruction *I) {
/// Checks if an intrinsic is nosync. Currently only checks mem* intrinsics.
/// FIXME: We should ipmrove the handling of intrinsics.
-bool AANoSyncFunction::isNoSyncIntrinsic(Instruction *I) {
+bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) {
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
/// Element wise atomic memory intrinsics are can only be unordered,
@@ -863,7 +1371,7 @@ bool AANoSyncFunction::isNoSyncIntrinsic(Instruction *I) {
return false;
}
-bool AANoSyncFunction::isVolatile(Instruction *I) {
+bool AANoSyncImpl::isVolatile(Instruction *I) {
assert(!ImmutableCallSite(I) && !isa<CallBase>(I) &&
"Calls should not be checked here");
@@ -881,482 +1389,3074 @@ bool AANoSyncFunction::isVolatile(Instruction *I) {
}
}
-ChangeStatus AANoSyncFunction::updateImpl(Attributor &A) {
- Function &F = getAnchorScope();
+ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) {
- /// We are looking for volatile instructions or Non-Relaxed atomics.
- /// FIXME: We should ipmrove the handling of intrinsics.
- for (Instruction *I : InfoCache.getReadOrWriteInstsForFunction(F)) {
- ImmutableCallSite ICS(I);
- auto *NoSyncAA = A.getAAFor<AANoSyncFunction>(*this, *I);
+ auto CheckRWInstForNoSync = [&](Instruction &I) {
+ /// We are looking for volatile instructions or Non-Relaxed atomics.
+ /// FIXME: We should ipmrove the handling of intrinsics.
- if (isa<IntrinsicInst>(I) && isNoSyncIntrinsic(I))
- continue;
+ if (isa<IntrinsicInst>(&I) && isNoSyncIntrinsic(&I))
+ return true;
+
+ if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
+ if (ICS.hasFnAttr(Attribute::NoSync))
+ return true;
+
+ const auto &NoSyncAA =
+ A.getAAFor<AANoSync>(*this, IRPosition::callsite_function(ICS));
+ if (NoSyncAA.isAssumedNoSync())
+ return true;
+ return false;
+ }
+
+ if (!isVolatile(&I) && !isNonRelaxedAtomic(&I))
+ return true;
+
+ return false;
+ };
- if (ICS && (!NoSyncAA || !NoSyncAA->isAssumedNoSync()) &&
- !ICS.hasFnAttr(Attribute::NoSync)) {
+ auto CheckForNoSync = [&](Instruction &I) {
+ // At this point we handled all read/write effects and they are all
+ // nosync, so they can be skipped.
+ if (I.mayReadOrWriteMemory())
+ return true;
+
+ // non-convergent and readnone imply nosync.
+ return !ImmutableCallSite(&I).isConvergent();
+ };
+
+ if (!A.checkForAllReadWriteInstructions(CheckRWInstForNoSync, *this) ||
+ !A.checkForAllCallLikeInstructions(CheckForNoSync, *this))
+ return indicatePessimisticFixpoint();
+
+ return ChangeStatus::UNCHANGED;
+}
+
+struct AANoSyncFunction final : public AANoSyncImpl {
+ AANoSyncFunction(const IRPosition &IRP) : AANoSyncImpl(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nosync) }
+};
+
+/// NoSync attribute deduction for a call sites.
+struct AANoSyncCallSite final : AANoSyncImpl {
+ AANoSyncCallSite(const IRPosition &IRP) : AANoSyncImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoSyncImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
indicatePessimisticFixpoint();
- return ChangeStatus::CHANGED;
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AANoSync>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(), static_cast<const AANoSync::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nosync); }
+};
+
+/// ------------------------ No-Free Attributes ----------------------------
+
+struct AANoFreeImpl : public AANoFree {
+ AANoFreeImpl(const IRPosition &IRP) : AANoFree(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ auto CheckForNoFree = [&](Instruction &I) {
+ ImmutableCallSite ICS(&I);
+ if (ICS.hasFnAttr(Attribute::NoFree))
+ return true;
+
+ const auto &NoFreeAA =
+ A.getAAFor<AANoFree>(*this, IRPosition::callsite_function(ICS));
+ return NoFreeAA.isAssumedNoFree();
+ };
+
+ if (!A.checkForAllCallLikeInstructions(CheckForNoFree, *this))
+ return indicatePessimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nofree" : "may-free";
+ }
+};
+
+struct AANoFreeFunction final : public AANoFreeImpl {
+ AANoFreeFunction(const IRPosition &IRP) : AANoFreeImpl(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nofree) }
+};
+
+/// NoFree attribute deduction for a call sites.
+struct AANoFreeCallSite final : AANoFreeImpl {
+ AANoFreeCallSite(const IRPosition &IRP) : AANoFreeImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoFreeImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AANoFree>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(), static_cast<const AANoFree::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nofree); }
+};
+
+/// ------------------------ NonNull Argument Attribute ------------------------
+static int64_t getKnownNonNullAndDerefBytesForUse(
+ Attributor &A, AbstractAttribute &QueryingAA, Value &AssociatedValue,
+ const Use *U, const Instruction *I, bool &IsNonNull, bool &TrackUse) {
+ TrackUse = false;
+
+ const Value *UseV = U->get();
+ if (!UseV->getType()->isPointerTy())
+ return 0;
+
+ Type *PtrTy = UseV->getType();
+ const Function *F = I->getFunction();
+ bool NullPointerIsDefined =
+ F ? llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace()) : true;
+ const DataLayout &DL = A.getInfoCache().getDL();
+ if (ImmutableCallSite ICS = ImmutableCallSite(I)) {
+ if (ICS.isBundleOperand(U))
+ return 0;
+
+ if (ICS.isCallee(U)) {
+ IsNonNull |= !NullPointerIsDefined;
+ return 0;
}
- if (ICS)
- continue;
+ unsigned ArgNo = ICS.getArgumentNo(U);
+ IRPosition IRP = IRPosition::callsite_argument(ICS, ArgNo);
+ auto &DerefAA = A.getAAFor<AADereferenceable>(QueryingAA, IRP);
+ IsNonNull |= DerefAA.isKnownNonNull();
+ return DerefAA.getKnownDereferenceableBytes();
+ }
- if (!isVolatile(I) && !isNonRelaxedAtomic(I))
- continue;
+ int64_t Offset;
+ if (const Value *Base = getBasePointerOfAccessPointerOperand(I, Offset, DL)) {
+ if (Base == &AssociatedValue && getPointerOperand(I) == UseV) {
+ int64_t DerefBytes =
+ Offset + (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType());
+
+ IsNonNull |= !NullPointerIsDefined;
+ return DerefBytes;
+ }
+ }
+ if (const Value *Base =
+ GetPointerBaseWithConstantOffset(UseV, Offset, DL,
+ /*AllowNonInbounds*/ false)) {
+ auto &DerefAA =
+ A.getAAFor<AADereferenceable>(QueryingAA, IRPosition::value(*Base));
+ IsNonNull |= (!NullPointerIsDefined && DerefAA.isKnownNonNull());
+ IsNonNull |= (!NullPointerIsDefined && (Offset != 0));
+ int64_t DerefBytes = DerefAA.getKnownDereferenceableBytes();
+ return std::max(int64_t(0), DerefBytes - Offset);
+ }
+
+ return 0;
+}
+
+struct AANonNullImpl : AANonNull {
+ AANonNullImpl(const IRPosition &IRP)
+ : AANonNull(IRP),
+ NullIsDefined(NullPointerIsDefined(
+ getAnchorScope(),
+ getAssociatedValue().getType()->getPointerAddressSpace())) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ if (!NullIsDefined &&
+ hasAttr({Attribute::NonNull, Attribute::Dereferenceable}))
+ indicateOptimisticFixpoint();
+ else
+ AANonNull::initialize(A);
+ }
+
+ /// See AAFromMustBeExecutedContext
+ bool followUse(Attributor &A, const Use *U, const Instruction *I) {
+ bool IsNonNull = false;
+ bool TrackUse = false;
+ getKnownNonNullAndDerefBytesForUse(A, *this, getAssociatedValue(), U, I,
+ IsNonNull, TrackUse);
+ takeKnownMaximum(IsNonNull);
+ return TrackUse;
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nonnull" : "may-null";
+ }
+
+ /// Flag to determine if the underlying value can be null and still allow
+ /// valid accesses.
+ const bool NullIsDefined;
+};
+
+/// NonNull attribute for a floating value.
+struct AANonNullFloating
+ : AAFromMustBeExecutedContext<AANonNull, AANonNullImpl> {
+ using Base = AAFromMustBeExecutedContext<AANonNull, AANonNullImpl>;
+ AANonNullFloating(const IRPosition &IRP) : Base(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ Base::initialize(A);
+
+ if (isAtFixpoint())
+ return;
+
+ const IRPosition &IRP = getIRPosition();
+ const Value &V = IRP.getAssociatedValue();
+ const DataLayout &DL = A.getDataLayout();
+
+ // TODO: This context sensitive query should be removed once we can do
+ // context sensitive queries in the genericValueTraversal below.
+ if (isKnownNonZero(&V, DL, 0, /* TODO: AC */ nullptr, IRP.getCtxI(),
+ /* TODO: DT */ nullptr))
+ indicateOptimisticFixpoint();
+ }
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Change = Base::updateImpl(A);
+ if (isKnownNonNull())
+ return Change;
+
+ if (!NullIsDefined) {
+ const auto &DerefAA = A.getAAFor<AADereferenceable>(*this, getIRPosition());
+ if (DerefAA.getAssumedDereferenceableBytes())
+ return Change;
+ }
+
+ const DataLayout &DL = A.getDataLayout();
+
+ auto VisitValueCB = [&](Value &V, AAAlign::StateType &T,
+ bool Stripped) -> bool {
+ const auto &AA = A.getAAFor<AANonNull>(*this, IRPosition::value(V));
+ if (!Stripped && this == &AA) {
+ if (!isKnownNonZero(&V, DL, 0, /* TODO: AC */ nullptr,
+ /* CtxI */ getCtxI(),
+ /* TODO: DT */ nullptr))
+ T.indicatePessimisticFixpoint();
+ } else {
+ // Use abstract attribute information.
+ const AANonNull::StateType &NS =
+ static_cast<const AANonNull::StateType &>(AA.getState());
+ T ^= NS;
+ }
+ return T.isValidState();
+ };
+
+ StateType T;
+ if (!genericValueTraversal<AANonNull, StateType>(A, getIRPosition(), *this,
+ T, VisitValueCB))
+ return indicatePessimisticFixpoint();
+
+ return clampStateAndIndicateChange(getState(), T);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) }
+};
+
+/// NonNull attribute for function return value.
+struct AANonNullReturned final
+ : AAReturnedFromReturnedValues<AANonNull, AANonNullImpl> {
+ AANonNullReturned(const IRPosition &IRP)
+ : AAReturnedFromReturnedValues<AANonNull, AANonNullImpl>(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) }
+};
+
+/// NonNull attribute for function argument.
+struct AANonNullArgument final
+ : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<AANonNull,
+ AANonNullImpl> {
+ AANonNullArgument(const IRPosition &IRP)
+ : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<AANonNull,
+ AANonNullImpl>(
+ IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nonnull) }
+};
+
+struct AANonNullCallSiteArgument final : AANonNullFloating {
+ AANonNullCallSiteArgument(const IRPosition &IRP) : AANonNullFloating(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(nonnull) }
+};
+
+/// NonNull attribute for a call site return position.
+struct AANonNullCallSiteReturned final
+ : AACallSiteReturnedFromReturnedAndMustBeExecutedContext<AANonNull,
+ AANonNullImpl> {
+ AANonNullCallSiteReturned(const IRPosition &IRP)
+ : AACallSiteReturnedFromReturnedAndMustBeExecutedContext<AANonNull,
+ AANonNullImpl>(
+ IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nonnull) }
+};
+
+/// ------------------------ No-Recurse Attributes ----------------------------
+
+struct AANoRecurseImpl : public AANoRecurse {
+ AANoRecurseImpl(const IRPosition &IRP) : AANoRecurse(IRP) {}
+
+ /// See AbstractAttribute::getAsStr()
+ const std::string getAsStr() const override {
+ return getAssumed() ? "norecurse" : "may-recurse";
+ }
+};
+
+struct AANoRecurseFunction final : AANoRecurseImpl {
+ AANoRecurseFunction(const IRPosition &IRP) : AANoRecurseImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoRecurseImpl::initialize(A);
+ if (const Function *F = getAnchorScope())
+ if (A.getInfoCache().getSccSize(*F) == 1)
+ return;
indicatePessimisticFixpoint();
- return ChangeStatus::CHANGED;
}
- auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
- auto Opcodes = {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
- (unsigned)Instruction::Call};
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
- for (unsigned Opcode : Opcodes) {
- for (Instruction *I : OpcodeInstMap[Opcode]) {
- // At this point we handled all read/write effects and they are all
- // nosync, so they can be skipped.
- if (I->mayReadOrWriteMemory())
- continue;
+ auto CheckForNoRecurse = [&](Instruction &I) {
+ ImmutableCallSite ICS(&I);
+ if (ICS.hasFnAttr(Attribute::NoRecurse))
+ return true;
- ImmutableCallSite ICS(I);
+ const auto &NoRecurseAA =
+ A.getAAFor<AANoRecurse>(*this, IRPosition::callsite_function(ICS));
+ if (!NoRecurseAA.isAssumedNoRecurse())
+ return false;
- // non-convergent and readnone imply nosync.
- if (!ICS.isConvergent())
- continue;
+ // Recursion to the same function
+ if (ICS.getCalledFunction() == getAnchorScope())
+ return false;
+
+ return true;
+ };
+
+ if (!A.checkForAllCallLikeInstructions(CheckForNoRecurse, *this))
+ return indicatePessimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
+ }
+
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(norecurse) }
+};
+
+/// NoRecurse attribute deduction for a call sites.
+struct AANoRecurseCallSite final : AANoRecurseImpl {
+ AANoRecurseCallSite(const IRPosition &IRP) : AANoRecurseImpl(IRP) {}
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoRecurseImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
indicatePessimisticFixpoint();
- return ChangeStatus::CHANGED;
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AANoRecurse>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(),
+ static_cast<const AANoRecurse::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(norecurse); }
+};
+
+/// ------------------------ Will-Return Attributes ----------------------------
+
+// Helper function that checks whether a function has any cycle.
+// TODO: Replace with more efficent code
+static bool containsCycle(Function &F) {
+ SmallPtrSet<BasicBlock *, 32> Visited;
+
+ // Traverse BB by dfs and check whether successor is already visited.
+ for (BasicBlock *BB : depth_first(&F)) {
+ Visited.insert(BB);
+ for (auto *SuccBB : successors(BB)) {
+ if (Visited.count(SuccBB))
+ return true;
}
}
+ return false;
+}
- return ChangeStatus::UNCHANGED;
+// Helper function that checks the function have a loop which might become an
+// endless loop
+// FIXME: Any cycle is regarded as endless loop for now.
+// We have to allow some patterns.
+static bool containsPossiblyEndlessLoop(Function *F) {
+ return !F || !F->hasExactDefinition() || containsCycle(*F);
}
-/// ------------------------ No-Free Attributes ----------------------------
+struct AAWillReturnImpl : public AAWillReturn {
+ AAWillReturnImpl(const IRPosition &IRP) : AAWillReturn(IRP) {}
-struct AANoFreeFunction : AbstractAttribute, BooleanState {
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AAWillReturn::initialize(A);
- /// See AbstractAttribute::AbstractAttribute(...).
- AANoFreeFunction(Function &F, InformationCache &InfoCache)
- : AbstractAttribute(F, InfoCache) {}
+ Function *F = getAssociatedFunction();
+ if (containsPossiblyEndlessLoop(F))
+ indicatePessimisticFixpoint();
+ }
- /// See AbstractAttribute::getState()
- ///{
- AbstractState &getState() override { return *this; }
- const AbstractState &getState() const override { return *this; }
- ///}
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ auto CheckForWillReturn = [&](Instruction &I) {
+ IRPosition IPos = IRPosition::callsite_function(ImmutableCallSite(&I));
+ const auto &WillReturnAA = A.getAAFor<AAWillReturn>(*this, IPos);
+ if (WillReturnAA.isKnownWillReturn())
+ return true;
+ if (!WillReturnAA.isAssumedWillReturn())
+ return false;
+ const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(*this, IPos);
+ return NoRecurseAA.isAssumedNoRecurse();
+ };
+
+ if (!A.checkForAllCallLikeInstructions(CheckForWillReturn, *this))
+ return indicatePessimisticFixpoint();
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::getAsStr()
+ const std::string getAsStr() const override {
+ return getAssumed() ? "willreturn" : "may-noreturn";
+ }
+};
+
+struct AAWillReturnFunction final : AAWillReturnImpl {
+ AAWillReturnFunction(const IRPosition &IRP) : AAWillReturnImpl(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(willreturn) }
+};
+
+/// WillReturn attribute deduction for a call sites.
+struct AAWillReturnCallSite final : AAWillReturnImpl {
+ AAWillReturnCallSite(const IRPosition &IRP) : AAWillReturnImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AAWillReturnImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AAWillReturn>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(),
+ static_cast<const AAWillReturn::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(willreturn); }
+};
+
+/// ------------------------ NoAlias Argument Attribute ------------------------
+
+struct AANoAliasImpl : AANoAlias {
+ AANoAliasImpl(const IRPosition &IRP) : AANoAlias(IRP) {}
+
+ const std::string getAsStr() const override {
+ return getAssumed() ? "noalias" : "may-alias";
+ }
+};
+
+/// NoAlias attribute for a floating value.
+struct AANoAliasFloating final : AANoAliasImpl {
+ AANoAliasFloating(const IRPosition &IRP) : AANoAliasImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoAliasImpl::initialize(A);
+ Value &Val = getAssociatedValue();
+ if (isa<AllocaInst>(Val))
+ indicateOptimisticFixpoint();
+ if (isa<ConstantPointerNull>(Val) &&
+ Val.getType()->getPointerAddressSpace() == 0)
+ indicateOptimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Implement this.
+ return indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(noalias)
+ }
+};
+
+/// NoAlias attribute for an argument.
+struct AANoAliasArgument final
+ : AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl> {
+ AANoAliasArgument(const IRPosition &IRP)
+ : AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl>(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noalias) }
+};
+
+struct AANoAliasCallSiteArgument final : AANoAliasImpl {
+ AANoAliasCallSiteArgument(const IRPosition &IRP) : AANoAliasImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // See callsite argument attribute and callee argument attribute.
+ ImmutableCallSite ICS(&getAnchorValue());
+ if (ICS.paramHasAttr(getArgNo(), Attribute::NoAlias))
+ indicateOptimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // We can deduce "noalias" if the following conditions hold.
+ // (i) Associated value is assumed to be noalias in the definition.
+ // (ii) Associated value is assumed to be no-capture in all the uses
+ // possibly executed before this callsite.
+ // (iii) There is no other pointer argument which could alias with the
+ // value.
+
+ const Value &V = getAssociatedValue();
+ const IRPosition IRP = IRPosition::value(V);
+
+ // (i) Check whether noalias holds in the definition.
+
+ auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, IRP);
+
+ if (!NoAliasAA.isAssumedNoAlias())
+ return indicatePessimisticFixpoint();
+
+ LLVM_DEBUG(dbgs() << "[Attributor][AANoAliasCSArg] " << V
+ << " is assumed NoAlias in the definition\n");
+
+ // (ii) Check whether the value is captured in the scope using AANoCapture.
+ // FIXME: This is conservative though, it is better to look at CFG and
+ // check only uses possibly executed before this callsite.
- /// See AbstractAttribute::getManifestPosition().
- ManifestPosition getManifestPosition() const override { return MP_FUNCTION; }
+ auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, IRP);
+ if (!NoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
+ LLVM_DEBUG(
+ dbgs() << "[Attributor][AANoAliasCSArg] " << V
+ << " cannot be noalias as it is potentially captured\n");
+ return indicatePessimisticFixpoint();
+ }
+
+ // (iii) Check there is no other pointer argument which could alias with the
+ // value.
+ ImmutableCallSite ICS(&getAnchorValue());
+ for (unsigned i = 0; i < ICS.getNumArgOperands(); i++) {
+ if (getArgNo() == (int)i)
+ continue;
+ const Value *ArgOp = ICS.getArgOperand(i);
+ if (!ArgOp->getType()->isPointerTy())
+ continue;
+
+ if (const Function *F = getAnchorScope()) {
+ if (AAResults *AAR = A.getInfoCache().getAAResultsForFunction(*F)) {
+ bool IsAliasing = AAR->isNoAlias(&getAssociatedValue(), ArgOp);
+ LLVM_DEBUG(dbgs()
+ << "[Attributor][NoAliasCSArg] Check alias between "
+ "callsite arguments "
+ << AAR->isNoAlias(&getAssociatedValue(), ArgOp) << " "
+ << getAssociatedValue() << " " << *ArgOp << " => "
+ << (IsAliasing ? "" : "no-") << "alias \n");
+
+ if (IsAliasing)
+ continue;
+ }
+ }
+ return indicatePessimisticFixpoint();
+ }
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(noalias) }
+};
+
+/// NoAlias attribute for function return value.
+struct AANoAliasReturned final : AANoAliasImpl {
+ AANoAliasReturned(const IRPosition &IRP) : AANoAliasImpl(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ virtual ChangeStatus updateImpl(Attributor &A) override {
+
+ auto CheckReturnValue = [&](Value &RV) -> bool {
+ if (Constant *C = dyn_cast<Constant>(&RV))
+ if (C->isNullValue() || isa<UndefValue>(C))
+ return true;
+
+ /// For now, we can only deduce noalias if we have call sites.
+ /// FIXME: add more support.
+ ImmutableCallSite ICS(&RV);
+ if (!ICS)
+ return false;
+
+ const IRPosition &RVPos = IRPosition::value(RV);
+ const auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, RVPos);
+ if (!NoAliasAA.isAssumedNoAlias())
+ return false;
+
+ const auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, RVPos);
+ return NoCaptureAA.isAssumedNoCaptureMaybeReturned();
+ };
+
+ if (!A.checkForAllReturnedValues(CheckReturnValue, *this))
+ return indicatePessimisticFixpoint();
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noalias) }
+};
+
+/// NoAlias attribute deduction for a call site return value.
+struct AANoAliasCallSiteReturned final : AANoAliasImpl {
+ AANoAliasCallSiteReturned(const IRPosition &IRP) : AANoAliasImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoAliasImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::returned(*F);
+ auto &FnAA = A.getAAFor<AANoAlias>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(), static_cast<const AANoAlias::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noalias); }
+};
+
+/// -------------------AAIsDead Function Attribute-----------------------
+
+struct AAIsDeadImpl : public AAIsDead {
+ AAIsDeadImpl(const IRPosition &IRP) : AAIsDead(IRP) {}
+
+ void initialize(Attributor &A) override {
+ const Function *F = getAssociatedFunction();
+ if (F && !F->isDeclaration())
+ exploreFromEntry(A, F);
+ }
+
+ void exploreFromEntry(Attributor &A, const Function *F) {
+ ToBeExploredPaths.insert(&(F->getEntryBlock().front()));
+
+ for (size_t i = 0; i < ToBeExploredPaths.size(); ++i)
+ if (const Instruction *NextNoReturnI =
+ findNextNoReturn(A, ToBeExploredPaths[i]))
+ NoReturnCalls.insert(NextNoReturnI);
+
+ // Mark the block live after we looked for no-return instructions.
+ assumeLive(A, F->getEntryBlock());
+ }
+
+ /// Find the next assumed noreturn instruction in the block of \p I starting
+ /// from, thus including, \p I.
+ ///
+ /// The caller is responsible to monitor the ToBeExploredPaths set as new
+ /// instructions discovered in other basic block will be placed in there.
+ ///
+ /// \returns The next assumed noreturn instructions in the block of \p I
+ /// starting from, thus including, \p I.
+ const Instruction *findNextNoReturn(Attributor &A, const Instruction *I);
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
- return getAssumed() ? "nofree" : "may-free";
+ return "Live[#BB " + std::to_string(AssumedLiveBlocks.size()) + "/" +
+ std::to_string(getAssociatedFunction()->size()) + "][#NRI " +
+ std::to_string(NoReturnCalls.size()) + "]";
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ assert(getState().isValidState() &&
+ "Attempted to manifest an invalid state!");
+
+ ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
+ Function &F = *getAssociatedFunction();
+
+ if (AssumedLiveBlocks.empty()) {
+ A.deleteAfterManifest(F);
+ return ChangeStatus::CHANGED;
+ }
+
+ // Flag to determine if we can change an invoke to a call assuming the
+ // callee is nounwind. This is not possible if the personality of the
+ // function allows to catch asynchronous exceptions.
+ bool Invoke2CallAllowed = !mayCatchAsynchronousExceptions(F);
+
+ for (const Instruction *NRC : NoReturnCalls) {
+ Instruction *I = const_cast<Instruction *>(NRC);
+ BasicBlock *BB = I->getParent();
+ Instruction *SplitPos = I->getNextNode();
+ // TODO: mark stuff before unreachable instructions as dead.
+
+ if (auto *II = dyn_cast<InvokeInst>(I)) {
+ // If we keep the invoke the split position is at the beginning of the
+ // normal desitination block (it invokes a noreturn function after all).
+ BasicBlock *NormalDestBB = II->getNormalDest();
+ SplitPos = &NormalDestBB->front();
+
+ /// Invoke is replaced with a call and unreachable is placed after it if
+ /// the callee is nounwind and noreturn. Otherwise, we keep the invoke
+ /// and only place an unreachable in the normal successor.
+ if (Invoke2CallAllowed) {
+ if (II->getCalledFunction()) {
+ const IRPosition &IPos = IRPosition::callsite_function(*II);
+ const auto &AANoUnw = A.getAAFor<AANoUnwind>(*this, IPos);
+ if (AANoUnw.isAssumedNoUnwind()) {
+ LLVM_DEBUG(dbgs()
+ << "[AAIsDead] Replace invoke with call inst\n");
+ // We do not need an invoke (II) but instead want a call followed
+ // by an unreachable. However, we do not remove II as other
+ // abstract attributes might have it cached as part of their
+ // results. Given that we modify the CFG anyway, we simply keep II
+ // around but in a new dead block. To avoid II being live through
+ // a different edge we have to ensure the block we place it in is
+ // only reached from the current block of II and then not reached
+ // at all when we insert the unreachable.
+ SplitBlockPredecessors(NormalDestBB, {BB}, ".i2c");
+ CallInst *CI = createCallMatchingInvoke(II);
+ CI->insertBefore(II);
+ CI->takeName(II);
+ II->replaceAllUsesWith(CI);
+ SplitPos = CI->getNextNode();
+ }
+ }
+ }
+
+ if (SplitPos == &NormalDestBB->front()) {
+ // If this is an invoke of a noreturn function the edge to the normal
+ // destination block is dead but not necessarily the block itself.
+ // TODO: We need to move to an edge based system during deduction and
+ // also manifest.
+ assert(!NormalDestBB->isLandingPad() &&
+ "Expected the normal destination not to be a landingpad!");
+ if (NormalDestBB->getUniquePredecessor() == BB) {
+ assumeLive(A, *NormalDestBB);
+ } else {
+ BasicBlock *SplitBB =
+ SplitBlockPredecessors(NormalDestBB, {BB}, ".dead");
+ // The split block is live even if it contains only an unreachable
+ // instruction at the end.
+ assumeLive(A, *SplitBB);
+ SplitPos = SplitBB->getTerminator();
+ HasChanged = ChangeStatus::CHANGED;
+ }
+ }
+ }
+
+ if (isa_and_nonnull<UnreachableInst>(SplitPos))
+ continue;
+
+ BB = SplitPos->getParent();
+ SplitBlock(BB, SplitPos);
+ changeToUnreachable(BB->getTerminator(), /* UseLLVMTrap */ false);
+ HasChanged = ChangeStatus::CHANGED;
+ }
+
+ for (BasicBlock &BB : F)
+ if (!AssumedLiveBlocks.count(&BB))
+ A.deleteAfterManifest(BB);
+
+ return HasChanged;
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override;
- /// See AbstractAttribute::getAttrKind().
- Attribute::AttrKind getAttrKind() const override { return ID; }
+ /// See AAIsDead::isAssumedDead(BasicBlock *).
+ bool isAssumedDead(const BasicBlock *BB) const override {
+ assert(BB->getParent() == getAssociatedFunction() &&
+ "BB must be in the same anchor scope function.");
+
+ if (!getAssumed())
+ return false;
+ return !AssumedLiveBlocks.count(BB);
+ }
+
+ /// See AAIsDead::isKnownDead(BasicBlock *).
+ bool isKnownDead(const BasicBlock *BB) const override {
+ return getKnown() && isAssumedDead(BB);
+ }
+
+ /// See AAIsDead::isAssumed(Instruction *I).
+ bool isAssumedDead(const Instruction *I) const override {
+ assert(I->getParent()->getParent() == getAssociatedFunction() &&
+ "Instruction must be in the same anchor scope function.");
+
+ if (!getAssumed())
+ return false;
+
+ // If it is not in AssumedLiveBlocks then it for sure dead.
+ // Otherwise, it can still be after noreturn call in a live block.
+ if (!AssumedLiveBlocks.count(I->getParent()))
+ return true;
+
+ // If it is not after a noreturn call, than it is live.
+ return isAfterNoReturn(I);
+ }
+
+ /// See AAIsDead::isKnownDead(Instruction *I).
+ bool isKnownDead(const Instruction *I) const override {
+ return getKnown() && isAssumedDead(I);
+ }
+
+ /// Check if instruction is after noreturn call, in other words, assumed dead.
+ bool isAfterNoReturn(const Instruction *I) const;
- /// Return true if "nofree" is assumed.
- bool isAssumedNoFree() const { return getAssumed(); }
+ /// Determine if \p F might catch asynchronous exceptions.
+ static bool mayCatchAsynchronousExceptions(const Function &F) {
+ return F.hasPersonalityFn() && !canSimplifyInvokeNoUnwind(&F);
+ }
+
+ /// Assume \p BB is (partially) live now and indicate to the Attributor \p A
+ /// that internal function called from \p BB should now be looked at.
+ void assumeLive(Attributor &A, const BasicBlock &BB) {
+ if (!AssumedLiveBlocks.insert(&BB).second)
+ return;
+
+ // We assume that all of BB is (probably) live now and if there are calls to
+ // internal functions we will assume that those are now live as well. This
+ // is a performance optimization for blocks with calls to a lot of internal
+ // functions. It can however cause dead functions to be treated as live.
+ for (const Instruction &I : BB)
+ if (ImmutableCallSite ICS = ImmutableCallSite(&I))
+ if (const Function *F = ICS.getCalledFunction())
+ if (F->hasLocalLinkage())
+ A.markLiveInternalFunction(*F);
+ }
- /// Return true if "nofree" is known.
- bool isKnownNoFree() const { return getKnown(); }
+ /// Collection of to be explored paths.
+ SmallSetVector<const Instruction *, 8> ToBeExploredPaths;
- /// The identifier used by the Attributor for this class of attributes.
- static constexpr Attribute::AttrKind ID = Attribute::NoFree;
+ /// Collection of all assumed live BasicBlocks.
+ DenseSet<const BasicBlock *> AssumedLiveBlocks;
+
+ /// Collection of calls with noreturn attribute, assumed or knwon.
+ SmallSetVector<const Instruction *, 4> NoReturnCalls;
};
-ChangeStatus AANoFreeFunction::updateImpl(Attributor &A) {
- Function &F = getAnchorScope();
+struct AAIsDeadFunction final : public AAIsDeadImpl {
+ AAIsDeadFunction(const IRPosition &IRP) : AAIsDeadImpl(IRP) {}
- // The map from instruction opcodes to those instructions in the function.
- auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECL(PartiallyDeadBlocks, Function,
+ "Number of basic blocks classified as partially dead");
+ BUILD_STAT_NAME(PartiallyDeadBlocks, Function) += NoReturnCalls.size();
+ }
+};
- for (unsigned Opcode :
- {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
- (unsigned)Instruction::Call}) {
- for (Instruction *I : OpcodeInstMap[Opcode]) {
+bool AAIsDeadImpl::isAfterNoReturn(const Instruction *I) const {
+ const Instruction *PrevI = I->getPrevNode();
+ while (PrevI) {
+ if (NoReturnCalls.count(PrevI))
+ return true;
+ PrevI = PrevI->getPrevNode();
+ }
+ return false;
+}
- auto ICS = ImmutableCallSite(I);
- auto *NoFreeAA = A.getAAFor<AANoFreeFunction>(*this, *I);
+const Instruction *AAIsDeadImpl::findNextNoReturn(Attributor &A,
+ const Instruction *I) {
+ const BasicBlock *BB = I->getParent();
+ const Function &F = *BB->getParent();
- if ((!NoFreeAA || !NoFreeAA->isAssumedNoFree()) &&
- !ICS.hasFnAttr(Attribute::NoFree)) {
- indicatePessimisticFixpoint();
- return ChangeStatus::CHANGED;
+ // Flag to determine if we can change an invoke to a call assuming the callee
+ // is nounwind. This is not possible if the personality of the function allows
+ // to catch asynchronous exceptions.
+ bool Invoke2CallAllowed = !mayCatchAsynchronousExceptions(F);
+
+ // TODO: We should have a function that determines if an "edge" is dead.
+ // Edges could be from an instruction to the next or from a terminator
+ // to the successor. For now, we need to special case the unwind block
+ // of InvokeInst below.
+
+ while (I) {
+ ImmutableCallSite ICS(I);
+
+ if (ICS) {
+ const IRPosition &IPos = IRPosition::callsite_function(ICS);
+ // Regarless of the no-return property of an invoke instruction we only
+ // learn that the regular successor is not reachable through this
+ // instruction but the unwind block might still be.
+ if (auto *Invoke = dyn_cast<InvokeInst>(I)) {
+ // Use nounwind to justify the unwind block is dead as well.
+ const auto &AANoUnw = A.getAAFor<AANoUnwind>(*this, IPos);
+ if (!Invoke2CallAllowed || !AANoUnw.isAssumedNoUnwind()) {
+ assumeLive(A, *Invoke->getUnwindDest());
+ ToBeExploredPaths.insert(&Invoke->getUnwindDest()->front());
+ }
}
+
+ const auto &NoReturnAA = A.getAAFor<AANoReturn>(*this, IPos);
+ if (NoReturnAA.isAssumedNoReturn())
+ return I;
}
+
+ I = I->getNextNode();
}
- return ChangeStatus::UNCHANGED;
+
+ // get new paths (reachable blocks).
+ for (const BasicBlock *SuccBB : successors(BB)) {
+ assumeLive(A, *SuccBB);
+ ToBeExploredPaths.insert(&SuccBB->front());
+ }
+
+ // No noreturn instruction found.
+ return nullptr;
}
-/// ------------------------ NonNull Argument Attribute ------------------------
-struct AANonNullImpl : AANonNull, BooleanState {
+ChangeStatus AAIsDeadImpl::updateImpl(Attributor &A) {
+ ChangeStatus Status = ChangeStatus::UNCHANGED;
+
+ // Temporary collection to iterate over existing noreturn instructions. This
+ // will alow easier modification of NoReturnCalls collection
+ SmallVector<const Instruction *, 8> NoReturnChanged;
+
+ for (const Instruction *I : NoReturnCalls)
+ NoReturnChanged.push_back(I);
+
+ for (const Instruction *I : NoReturnChanged) {
+ size_t Size = ToBeExploredPaths.size();
+
+ const Instruction *NextNoReturnI = findNextNoReturn(A, I);
+ if (NextNoReturnI != I) {
+ Status = ChangeStatus::CHANGED;
+ NoReturnCalls.remove(I);
+ if (NextNoReturnI)
+ NoReturnCalls.insert(NextNoReturnI);
+ }
- AANonNullImpl(Value &V, InformationCache &InfoCache)
- : AANonNull(V, InfoCache) {}
+ // Explore new paths.
+ while (Size != ToBeExploredPaths.size()) {
+ Status = ChangeStatus::CHANGED;
+ if (const Instruction *NextNoReturnI =
+ findNextNoReturn(A, ToBeExploredPaths[Size++]))
+ NoReturnCalls.insert(NextNoReturnI);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "[AAIsDead] AssumedLiveBlocks: "
+ << AssumedLiveBlocks.size() << " Total number of blocks: "
+ << getAssociatedFunction()->size() << "\n");
- AANonNullImpl(Value *AssociatedVal, Value &AnchoredValue,
- InformationCache &InfoCache)
- : AANonNull(AssociatedVal, AnchoredValue, InfoCache) {}
+ // If we know everything is live there is no need to query for liveness.
+ if (NoReturnCalls.empty() &&
+ getAssociatedFunction()->size() == AssumedLiveBlocks.size()) {
+ // Indicating a pessimistic fixpoint will cause the state to be "invalid"
+ // which will cause the Attributor to not return the AAIsDead on request,
+ // which will prevent us from querying isAssumedDead().
+ indicatePessimisticFixpoint();
+ assert(!isValidState() && "Expected an invalid state!");
+ Status = ChangeStatus::CHANGED;
+ }
+
+ return Status;
+}
+
+/// Liveness information for a call sites.
+struct AAIsDeadCallSite final : AAIsDeadImpl {
+ AAIsDeadCallSite(const IRPosition &IRP) : AAIsDeadImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites instead of
+ // redirecting requests to the callee.
+ llvm_unreachable("Abstract attributes for liveness are not "
+ "supported for call sites yet!");
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ return indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+};
+
+/// -------------------- Dereferenceable Argument Attribute --------------------
+
+template <>
+ChangeStatus clampStateAndIndicateChange<DerefState>(DerefState &S,
+ const DerefState &R) {
+ ChangeStatus CS0 = clampStateAndIndicateChange<IntegerState>(
+ S.DerefBytesState, R.DerefBytesState);
+ ChangeStatus CS1 =
+ clampStateAndIndicateChange<IntegerState>(S.GlobalState, R.GlobalState);
+ return CS0 | CS1;
+}
+
+struct AADereferenceableImpl : AADereferenceable {
+ AADereferenceableImpl(const IRPosition &IRP) : AADereferenceable(IRP) {}
+ using StateType = DerefState;
+
+ void initialize(Attributor &A) override {
+ SmallVector<Attribute, 4> Attrs;
+ getAttrs({Attribute::Dereferenceable, Attribute::DereferenceableOrNull},
+ Attrs);
+ for (const Attribute &Attr : Attrs)
+ takeKnownDerefBytesMaximum(Attr.getValueAsInt());
+
+ NonNullAA = &A.getAAFor<AANonNull>(*this, getIRPosition());
+
+ const IRPosition &IRP = this->getIRPosition();
+ bool IsFnInterface = IRP.isFnInterfaceKind();
+ const Function *FnScope = IRP.getAnchorScope();
+ if (IsFnInterface && (!FnScope || !FnScope->hasExactDefinition()))
+ indicatePessimisticFixpoint();
+ }
/// See AbstractAttribute::getState()
/// {
- AbstractState &getState() override { return *this; }
- const AbstractState &getState() const override { return *this; }
+ StateType &getState() override { return *this; }
+ const StateType &getState() const override { return *this; }
/// }
+ /// See AAFromMustBeExecutedContext
+ bool followUse(Attributor &A, const Use *U, const Instruction *I) {
+ bool IsNonNull = false;
+ bool TrackUse = false;
+ int64_t DerefBytes = getKnownNonNullAndDerefBytesForUse(
+ A, *this, getAssociatedValue(), U, I, IsNonNull, TrackUse);
+ takeKnownDerefBytesMaximum(DerefBytes);
+ return TrackUse;
+ }
+
+ void getDeducedAttributes(LLVMContext &Ctx,
+ SmallVectorImpl<Attribute> &Attrs) const override {
+ // TODO: Add *_globally support
+ if (isAssumedNonNull())
+ Attrs.emplace_back(Attribute::getWithDereferenceableBytes(
+ Ctx, getAssumedDereferenceableBytes()));
+ else
+ Attrs.emplace_back(Attribute::getWithDereferenceableOrNullBytes(
+ Ctx, getAssumedDereferenceableBytes()));
+ }
+
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
- return getAssumed() ? "nonnull" : "may-null";
+ if (!getAssumedDereferenceableBytes())
+ return "unknown-dereferenceable";
+ return std::string("dereferenceable") +
+ (isAssumedNonNull() ? "" : "_or_null") +
+ (isAssumedGlobal() ? "_globally" : "") + "<" +
+ std::to_string(getKnownDereferenceableBytes()) + "-" +
+ std::to_string(getAssumedDereferenceableBytes()) + ">";
}
+};
+
+/// Dereferenceable attribute for a floating value.
+struct AADereferenceableFloating
+ : AAFromMustBeExecutedContext<AADereferenceable, AADereferenceableImpl> {
+ using Base =
+ AAFromMustBeExecutedContext<AADereferenceable, AADereferenceableImpl>;
+ AADereferenceableFloating(const IRPosition &IRP) : Base(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Change = Base::updateImpl(A);
+
+ const DataLayout &DL = A.getDataLayout();
+
+ auto VisitValueCB = [&](Value &V, DerefState &T, bool Stripped) -> bool {
+ unsigned IdxWidth =
+ DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace());
+ APInt Offset(IdxWidth, 0);
+ const Value *Base =
+ V.stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
+
+ const auto &AA =
+ A.getAAFor<AADereferenceable>(*this, IRPosition::value(*Base));
+ int64_t DerefBytes = 0;
+ if (!Stripped && this == &AA) {
+ // Use IR information if we did not strip anything.
+ // TODO: track globally.
+ bool CanBeNull;
+ DerefBytes = Base->getPointerDereferenceableBytes(DL, CanBeNull);
+ T.GlobalState.indicatePessimisticFixpoint();
+ } else {
+ const DerefState &DS = static_cast<const DerefState &>(AA.getState());
+ DerefBytes = DS.DerefBytesState.getAssumed();
+ T.GlobalState &= DS.GlobalState;
+ }
+
+ // For now we do not try to "increase" dereferenceability due to negative
+ // indices as we first have to come up with code to deal with loops and
+ // for overflows of the dereferenceable bytes.
+ int64_t OffsetSExt = Offset.getSExtValue();
+ if (OffsetSExt < 0)
+ OffsetSExt = 0;
+
+ T.takeAssumedDerefBytesMinimum(
+ std::max(int64_t(0), DerefBytes - OffsetSExt));
+
+ if (this == &AA) {
+ if (!Stripped) {
+ // If nothing was stripped IR information is all we got.
+ T.takeKnownDerefBytesMaximum(
+ std::max(int64_t(0), DerefBytes - OffsetSExt));
+ T.indicatePessimisticFixpoint();
+ } else if (OffsetSExt > 0) {
+ // If something was stripped but there is circular reasoning we look
+ // for the offset. If it is positive we basically decrease the
+ // dereferenceable bytes in a circluar loop now, which will simply
+ // drive them down to the known value in a very slow way which we
+ // can accelerate.
+ T.indicatePessimisticFixpoint();
+ }
+ }
+
+ return T.isValidState();
+ };
- /// See AANonNull::isAssumedNonNull().
- bool isAssumedNonNull() const override { return getAssumed(); }
+ DerefState T;
+ if (!genericValueTraversal<AADereferenceable, DerefState>(
+ A, getIRPosition(), *this, T, VisitValueCB))
+ return indicatePessimisticFixpoint();
- /// See AANonNull::isKnownNonNull().
- bool isKnownNonNull() const override { return getKnown(); }
+ return Change | clampStateAndIndicateChange(getState(), T);
+ }
- /// Generate a predicate that checks if a given value is assumed nonnull.
- /// The generated function returns true if a value satisfies any of
- /// following conditions.
- /// (i) A value is known nonZero(=nonnull).
- /// (ii) A value is associated with AANonNull and its isAssumedNonNull() is
- /// true.
- std::function<bool(Value &)> generatePredicate(Attributor &);
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(dereferenceable)
+ }
};
-std::function<bool(Value &)> AANonNullImpl::generatePredicate(Attributor &A) {
- // FIXME: The `AAReturnedValues` should provide the predicate with the
- // `ReturnInst` vector as well such that we can use the control flow sensitive
- // version of `isKnownNonZero`. This should fix `test11` in
- // `test/Transforms/FunctionAttrs/nonnull.ll`
+/// Dereferenceable attribute for a return value.
+struct AADereferenceableReturned final
+ : AAReturnedFromReturnedValues<AADereferenceable, AADereferenceableImpl,
+ DerefState> {
+ AADereferenceableReturned(const IRPosition &IRP)
+ : AAReturnedFromReturnedValues<AADereferenceable, AADereferenceableImpl,
+ DerefState>(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FNRET_ATTR(dereferenceable)
+ }
+};
- std::function<bool(Value &)> Pred = [&](Value &RV) -> bool {
- if (isKnownNonZero(&RV, getAnchorScope().getParent()->getDataLayout()))
- return true;
+/// Dereferenceable attribute for an argument
+struct AADereferenceableArgument final
+ : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<
+ AADereferenceable, AADereferenceableImpl, DerefState> {
+ using Base = AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<
+ AADereferenceable, AADereferenceableImpl, DerefState>;
+ AADereferenceableArgument(const IRPosition &IRP) : Base(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_ARG_ATTR(dereferenceable)
+ }
+};
- auto *NonNullAA = A.getAAFor<AANonNull>(*this, RV);
+/// Dereferenceable attribute for a call site argument.
+struct AADereferenceableCallSiteArgument final : AADereferenceableFloating {
+ AADereferenceableCallSiteArgument(const IRPosition &IRP)
+ : AADereferenceableFloating(IRP) {}
- ImmutableCallSite ICS(&RV);
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSARG_ATTR(dereferenceable)
+ }
+};
- if ((!NonNullAA || !NonNullAA->isAssumedNonNull()) &&
- (!ICS || !ICS.hasRetAttr(Attribute::NonNull)))
- return false;
+/// Dereferenceable attribute deduction for a call site return value.
+struct AADereferenceableCallSiteReturned final
+ : AACallSiteReturnedFromReturnedAndMustBeExecutedContext<
+ AADereferenceable, AADereferenceableImpl> {
+ using Base = AACallSiteReturnedFromReturnedAndMustBeExecutedContext<
+ AADereferenceable, AADereferenceableImpl>;
+ AADereferenceableCallSiteReturned(const IRPosition &IRP) : Base(IRP) {}
- return true;
- };
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ Base::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
+ indicatePessimisticFixpoint();
+ }
- return Pred;
-}
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+
+ ChangeStatus Change = Base::updateImpl(A);
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::returned(*F);
+ auto &FnAA = A.getAAFor<AADereferenceable>(*this, FnPos);
+ return Change |
+ clampStateAndIndicateChange(
+ getState(), static_cast<const DerefState &>(FnAA.getState()));
+ }
-/// NonNull attribute for function return value.
-struct AANonNullReturned : AANonNullImpl {
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CS_ATTR(dereferenceable);
+ }
+};
- AANonNullReturned(Function &F, InformationCache &InfoCache)
- : AANonNullImpl(F, InfoCache) {}
+// ------------------------ Align Argument Attribute ------------------------
- /// See AbstractAttribute::getManifestPosition().
- ManifestPosition getManifestPosition() const override { return MP_RETURNED; }
+struct AAAlignImpl : AAAlign {
+ AAAlignImpl(const IRPosition &IRP) : AAAlign(IRP) {}
- /// See AbstractAttriubute::initialize(...).
+ // Max alignemnt value allowed in IR
+ static const unsigned MAX_ALIGN = 1U << 29;
+
+ /// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- Function &F = getAnchorScope();
+ takeAssumedMinimum(MAX_ALIGN);
- // Already nonnull.
- if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
- Attribute::NonNull))
- indicateOptimisticFixpoint();
+ SmallVector<Attribute, 4> Attrs;
+ getAttrs({Attribute::Alignment}, Attrs);
+ for (const Attribute &Attr : Attrs)
+ takeKnownMaximum(Attr.getValueAsInt());
+
+ if (getIRPosition().isFnInterfaceKind() &&
+ (!getAssociatedFunction() ||
+ !getAssociatedFunction()->hasExactDefinition()))
+ indicatePessimisticFixpoint();
}
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+ // Check for users that allow alignment annotations.
+ Value &AnchorVal = getIRPosition().getAnchorValue();
+ for (const Use &U : AnchorVal.uses()) {
+ if (auto *SI = dyn_cast<StoreInst>(U.getUser())) {
+ if (SI->getPointerOperand() == &AnchorVal)
+ if (SI->getAlignment() < getAssumedAlign()) {
+ STATS_DECLTRACK(AAAlign, Store,
+ "Number of times alignemnt added to a store");
+ SI->setAlignment(Align(getAssumedAlign()));
+ Changed = ChangeStatus::CHANGED;
+ }
+ } else if (auto *LI = dyn_cast<LoadInst>(U.getUser())) {
+ if (LI->getPointerOperand() == &AnchorVal)
+ if (LI->getAlignment() < getAssumedAlign()) {
+ LI->setAlignment(Align(getAssumedAlign()));
+ STATS_DECLTRACK(AAAlign, Load,
+ "Number of times alignemnt added to a load");
+ Changed = ChangeStatus::CHANGED;
+ }
+ }
+ }
+
+ return AAAlign::manifest(A) | Changed;
+ }
+
+ // TODO: Provide a helper to determine the implied ABI alignment and check in
+ // the existing manifest method and a new one for AAAlignImpl that value
+ // to avoid making the alignment explicit if it did not improve.
+
+ /// See AbstractAttribute::getDeducedAttributes
+ virtual void
+ getDeducedAttributes(LLVMContext &Ctx,
+ SmallVectorImpl<Attribute> &Attrs) const override {
+ if (getAssumedAlign() > 1)
+ Attrs.emplace_back(
+ Attribute::getWithAlignment(Ctx, Align(getAssumedAlign())));
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumedAlign() ? ("align<" + std::to_string(getKnownAlign()) +
+ "-" + std::to_string(getAssumedAlign()) + ">")
+ : "unknown-align";
+ }
+};
+
+/// Align attribute for a floating value.
+struct AAAlignFloating : AAAlignImpl {
+ AAAlignFloating(const IRPosition &IRP) : AAAlignImpl(IRP) {}
+
/// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override;
+ ChangeStatus updateImpl(Attributor &A) override {
+ const DataLayout &DL = A.getDataLayout();
+
+ auto VisitValueCB = [&](Value &V, AAAlign::StateType &T,
+ bool Stripped) -> bool {
+ const auto &AA = A.getAAFor<AAAlign>(*this, IRPosition::value(V));
+ if (!Stripped && this == &AA) {
+ // Use only IR information if we did not strip anything.
+ const MaybeAlign PA = V.getPointerAlignment(DL);
+ T.takeKnownMaximum(PA ? PA->value() : 0);
+ T.indicatePessimisticFixpoint();
+ } else {
+ // Use abstract attribute information.
+ const AAAlign::StateType &DS =
+ static_cast<const AAAlign::StateType &>(AA.getState());
+ T ^= DS;
+ }
+ return T.isValidState();
+ };
+
+ StateType T;
+ if (!genericValueTraversal<AAAlign, StateType>(A, getIRPosition(), *this, T,
+ VisitValueCB))
+ return indicatePessimisticFixpoint();
+
+ // TODO: If we know we visited all incoming values, thus no are assumed
+ // dead, we can take the known information from the state T.
+ return clampStateAndIndicateChange(getState(), T);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FLOATING_ATTR(align) }
};
-ChangeStatus AANonNullReturned::updateImpl(Attributor &A) {
- Function &F = getAnchorScope();
+/// Align attribute for function return value.
+struct AAAlignReturned final
+ : AAReturnedFromReturnedValues<AAAlign, AAAlignImpl> {
+ AAAlignReturned(const IRPosition &IRP)
+ : AAReturnedFromReturnedValues<AAAlign, AAAlignImpl>(IRP) {}
- auto *AARetVal = A.getAAFor<AAReturnedValues>(*this, F);
- if (!AARetVal) {
- indicatePessimisticFixpoint();
- return ChangeStatus::CHANGED;
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(aligned) }
+};
+
+/// Align attribute for function argument.
+struct AAAlignArgument final
+ : AAArgumentFromCallSiteArguments<AAAlign, AAAlignImpl> {
+ AAAlignArgument(const IRPosition &IRP)
+ : AAArgumentFromCallSiteArguments<AAAlign, AAAlignImpl>(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(aligned) }
+};
+
+struct AAAlignCallSiteArgument final : AAAlignFloating {
+ AAAlignCallSiteArgument(const IRPosition &IRP) : AAAlignFloating(IRP) {}
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ return AAAlignImpl::manifest(A);
}
- std::function<bool(Value &)> Pred = this->generatePredicate(A);
- if (!AARetVal->checkForallReturnedValues(Pred)) {
- indicatePessimisticFixpoint();
- return ChangeStatus::CHANGED;
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(aligned) }
+};
+
+/// Align attribute deduction for a call site return value.
+struct AAAlignCallSiteReturned final : AAAlignImpl {
+ AAAlignCallSiteReturned(const IRPosition &IRP) : AAAlignImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AAAlignImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
+ indicatePessimisticFixpoint();
}
- return ChangeStatus::UNCHANGED;
-}
-/// NonNull attribute for function argument.
-struct AANonNullArgument : AANonNullImpl {
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::returned(*F);
+ auto &FnAA = A.getAAFor<AAAlign>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(), static_cast<const AAAlign::StateType &>(FnAA.getState()));
+ }
- AANonNullArgument(Argument &A, InformationCache &InfoCache)
- : AANonNullImpl(A, InfoCache) {}
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); }
+};
- /// See AbstractAttribute::getManifestPosition().
- ManifestPosition getManifestPosition() const override { return MP_ARGUMENT; }
+/// ------------------ Function No-Return Attribute ----------------------------
+struct AANoReturnImpl : public AANoReturn {
+ AANoReturnImpl(const IRPosition &IRP) : AANoReturn(IRP) {}
- /// See AbstractAttriubute::initialize(...).
+ /// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- Argument *Arg = cast<Argument>(getAssociatedValue());
- if (Arg->hasNonNullAttr())
- indicateOptimisticFixpoint();
+ AANoReturn::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F || F->hasFnAttribute(Attribute::WillReturn))
+ indicatePessimisticFixpoint();
}
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumed() ? "noreturn" : "may-return";
+ }
+
+ /// See AbstractAttribute::updateImpl(Attributor &A).
+ virtual ChangeStatus updateImpl(Attributor &A) override {
+ const auto &WillReturnAA = A.getAAFor<AAWillReturn>(*this, getIRPosition());
+ if (WillReturnAA.isKnownWillReturn())
+ return indicatePessimisticFixpoint();
+ auto CheckForNoReturn = [](Instruction &) { return false; };
+ if (!A.checkForAllInstructions(CheckForNoReturn, *this,
+ {(unsigned)Instruction::Ret}))
+ return indicatePessimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
+ }
+};
+
+struct AANoReturnFunction final : AANoReturnImpl {
+ AANoReturnFunction(const IRPosition &IRP) : AANoReturnImpl(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(noreturn) }
+};
+
+/// NoReturn attribute deduction for a call sites.
+struct AANoReturnCallSite final : AANoReturnImpl {
+ AANoReturnCallSite(const IRPosition &IRP) : AANoReturnImpl(IRP) {}
+
/// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override;
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AANoReturn>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(),
+ static_cast<const AANoReturn::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(noreturn); }
};
-/// NonNull attribute for a call site argument.
-struct AANonNullCallSiteArgument : AANonNullImpl {
+/// ----------------------- Variable Capturing ---------------------------------
- /// See AANonNullImpl::AANonNullImpl(...).
- AANonNullCallSiteArgument(CallSite CS, unsigned ArgNo,
- InformationCache &InfoCache)
- : AANonNullImpl(CS.getArgOperand(ArgNo), *CS.getInstruction(), InfoCache),
- ArgNo(ArgNo) {}
+/// A class to hold the state of for no-capture attributes.
+struct AANoCaptureImpl : public AANoCapture {
+ AANoCaptureImpl(const IRPosition &IRP) : AANoCapture(IRP) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- CallSite CS(&getAnchoredValue());
- if (isKnownNonZero(getAssociatedValue(),
- getAnchorScope().getParent()->getDataLayout()) ||
- CS.paramHasAttr(ArgNo, getAttrKind()))
+ AANoCapture::initialize(A);
+
+ // You cannot "capture" null in the default address space.
+ if (isa<ConstantPointerNull>(getAssociatedValue()) &&
+ getAssociatedValue().getType()->getPointerAddressSpace() == 0) {
indicateOptimisticFixpoint();
+ return;
+ }
+
+ const IRPosition &IRP = getIRPosition();
+ const Function *F =
+ getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope();
+
+ // Check what state the associated function can actually capture.
+ if (F)
+ determineFunctionCaptureCapabilities(IRP, *F, *this);
+ else
+ indicatePessimisticFixpoint();
}
- /// See AbstractAttribute::updateImpl(Attributor &A).
+ /// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override;
- /// See AbstractAttribute::getManifestPosition().
- ManifestPosition getManifestPosition() const override {
- return MP_CALL_SITE_ARGUMENT;
- };
+ /// see AbstractAttribute::isAssumedNoCaptureMaybeReturned(...).
+ virtual void
+ getDeducedAttributes(LLVMContext &Ctx,
+ SmallVectorImpl<Attribute> &Attrs) const override {
+ if (!isAssumedNoCaptureMaybeReturned())
+ return;
+
+ if (getArgNo() >= 0) {
+ if (isAssumedNoCapture())
+ Attrs.emplace_back(Attribute::get(Ctx, Attribute::NoCapture));
+ else if (ManifestInternal)
+ Attrs.emplace_back(Attribute::get(Ctx, "no-capture-maybe-returned"));
+ }
+ }
+
+ /// Set the NOT_CAPTURED_IN_MEM and NOT_CAPTURED_IN_RET bits in \p Known
+ /// depending on the ability of the function associated with \p IRP to capture
+ /// state in memory and through "returning/throwing", respectively.
+ static void determineFunctionCaptureCapabilities(const IRPosition &IRP,
+ const Function &F,
+ IntegerState &State) {
+ // TODO: Once we have memory behavior attributes we should use them here.
+
+ // If we know we cannot communicate or write to memory, we do not care about
+ // ptr2int anymore.
+ if (F.onlyReadsMemory() && F.doesNotThrow() &&
+ F.getReturnType()->isVoidTy()) {
+ State.addKnownBits(NO_CAPTURE);
+ return;
+ }
+
+ // A function cannot capture state in memory if it only reads memory, it can
+ // however return/throw state and the state might be influenced by the
+ // pointer value, e.g., loading from a returned pointer might reveal a bit.
+ if (F.onlyReadsMemory())
+ State.addKnownBits(NOT_CAPTURED_IN_MEM);
+
+ // A function cannot communicate state back if it does not through
+ // exceptions and doesn not return values.
+ if (F.doesNotThrow() && F.getReturnType()->isVoidTy())
+ State.addKnownBits(NOT_CAPTURED_IN_RET);
+
+ // Check existing "returned" attributes.
+ int ArgNo = IRP.getArgNo();
+ if (F.doesNotThrow() && ArgNo >= 0) {
+ for (unsigned u = 0, e = F.arg_size(); u< e; ++u)
+ if (F.hasParamAttribute(u, Attribute::Returned)) {
+ if (u == unsigned(ArgNo))
+ State.removeAssumedBits(NOT_CAPTURED_IN_RET);
+ else if (F.onlyReadsMemory())
+ State.addKnownBits(NO_CAPTURE);
+ else
+ State.addKnownBits(NOT_CAPTURED_IN_RET);
+ break;
+ }
+ }
+ }
- // Return argument index of associated value.
- int getArgNo() const { return ArgNo; }
+ /// See AbstractState::getAsStr().
+ const std::string getAsStr() const override {
+ if (isKnownNoCapture())
+ return "known not-captured";
+ if (isAssumedNoCapture())
+ return "assumed not-captured";
+ if (isKnownNoCaptureMaybeReturned())
+ return "known not-captured-maybe-returned";
+ if (isAssumedNoCaptureMaybeReturned())
+ return "assumed not-captured-maybe-returned";
+ return "assumed-captured";
+ }
+};
+
+/// Attributor-aware capture tracker.
+struct AACaptureUseTracker final : public CaptureTracker {
+
+ /// Create a capture tracker that can lookup in-flight abstract attributes
+ /// through the Attributor \p A.
+ ///
+ /// If a use leads to a potential capture, \p CapturedInMemory is set and the
+ /// search is stopped. If a use leads to a return instruction,
+ /// \p CommunicatedBack is set to true and \p CapturedInMemory is not changed.
+ /// If a use leads to a ptr2int which may capture the value,
+ /// \p CapturedInInteger is set. If a use is found that is currently assumed
+ /// "no-capture-maybe-returned", the user is added to the \p PotentialCopies
+ /// set. All values in \p PotentialCopies are later tracked as well. For every
+ /// explored use we decrement \p RemainingUsesToExplore. Once it reaches 0,
+ /// the search is stopped with \p CapturedInMemory and \p CapturedInInteger
+ /// conservatively set to true.
+ AACaptureUseTracker(Attributor &A, AANoCapture &NoCaptureAA,
+ const AAIsDead &IsDeadAA, IntegerState &State,
+ SmallVectorImpl<const Value *> &PotentialCopies,
+ unsigned &RemainingUsesToExplore)
+ : A(A), NoCaptureAA(NoCaptureAA), IsDeadAA(IsDeadAA), State(State),
+ PotentialCopies(PotentialCopies),
+ RemainingUsesToExplore(RemainingUsesToExplore) {}
+
+ /// Determine if \p V maybe captured. *Also updates the state!*
+ bool valueMayBeCaptured(const Value *V) {
+ if (V->getType()->isPointerTy()) {
+ PointerMayBeCaptured(V, this);
+ } else {
+ State.indicatePessimisticFixpoint();
+ }
+ return State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED);
+ }
+
+ /// See CaptureTracker::tooManyUses().
+ void tooManyUses() override {
+ State.removeAssumedBits(AANoCapture::NO_CAPTURE);
+ }
+
+ bool isDereferenceableOrNull(Value *O, const DataLayout &DL) override {
+ if (CaptureTracker::isDereferenceableOrNull(O, DL))
+ return true;
+ const auto &DerefAA =
+ A.getAAFor<AADereferenceable>(NoCaptureAA, IRPosition::value(*O));
+ return DerefAA.getAssumedDereferenceableBytes();
+ }
+
+ /// See CaptureTracker::captured(...).
+ bool captured(const Use *U) override {
+ Instruction *UInst = cast<Instruction>(U->getUser());
+ LLVM_DEBUG(dbgs() << "Check use: " << *U->get() << " in " << *UInst
+ << "\n");
+
+ // Because we may reuse the tracker multiple times we keep track of the
+ // number of explored uses ourselves as well.
+ if (RemainingUsesToExplore-- == 0) {
+ LLVM_DEBUG(dbgs() << " - too many uses to explore!\n");
+ return isCapturedIn(/* Memory */ true, /* Integer */ true,
+ /* Return */ true);
+ }
+
+ // Deal with ptr2int by following uses.
+ if (isa<PtrToIntInst>(UInst)) {
+ LLVM_DEBUG(dbgs() << " - ptr2int assume the worst!\n");
+ return valueMayBeCaptured(UInst);
+ }
+
+ // Explicitly catch return instructions.
+ if (isa<ReturnInst>(UInst))
+ return isCapturedIn(/* Memory */ false, /* Integer */ false,
+ /* Return */ true);
+
+ // For now we only use special logic for call sites. However, the tracker
+ // itself knows about a lot of other non-capturing cases already.
+ CallSite CS(UInst);
+ if (!CS || !CS.isArgOperand(U))
+ return isCapturedIn(/* Memory */ true, /* Integer */ true,
+ /* Return */ true);
+
+ unsigned ArgNo = CS.getArgumentNo(U);
+ const IRPosition &CSArgPos = IRPosition::callsite_argument(CS, ArgNo);
+ // If we have a abstract no-capture attribute for the argument we can use
+ // it to justify a non-capture attribute here. This allows recursion!
+ auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(NoCaptureAA, CSArgPos);
+ if (ArgNoCaptureAA.isAssumedNoCapture())
+ return isCapturedIn(/* Memory */ false, /* Integer */ false,
+ /* Return */ false);
+ if (ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
+ addPotentialCopy(CS);
+ return isCapturedIn(/* Memory */ false, /* Integer */ false,
+ /* Return */ false);
+ }
+
+ // Lastly, we could not find a reason no-capture can be assumed so we don't.
+ return isCapturedIn(/* Memory */ true, /* Integer */ true,
+ /* Return */ true);
+ }
+
+ /// Register \p CS as potential copy of the value we are checking.
+ void addPotentialCopy(CallSite CS) {
+ PotentialCopies.push_back(CS.getInstruction());
+ }
+
+ /// See CaptureTracker::shouldExplore(...).
+ bool shouldExplore(const Use *U) override {
+ // Check liveness.
+ return !IsDeadAA.isAssumedDead(cast<Instruction>(U->getUser()));
+ }
+
+ /// Update the state according to \p CapturedInMem, \p CapturedInInt, and
+ /// \p CapturedInRet, then return the appropriate value for use in the
+ /// CaptureTracker::captured() interface.
+ bool isCapturedIn(bool CapturedInMem, bool CapturedInInt,
+ bool CapturedInRet) {
+ LLVM_DEBUG(dbgs() << " - captures [Mem " << CapturedInMem << "|Int "
+ << CapturedInInt << "|Ret " << CapturedInRet << "]\n");
+ if (CapturedInMem)
+ State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_MEM);
+ if (CapturedInInt)
+ State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_INT);
+ if (CapturedInRet)
+ State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_RET);
+ return !State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED);
+ }
private:
- unsigned ArgNo;
+ /// The attributor providing in-flight abstract attributes.
+ Attributor &A;
+
+ /// The abstract attribute currently updated.
+ AANoCapture &NoCaptureAA;
+
+ /// The abstract liveness state.
+ const AAIsDead &IsDeadAA;
+
+ /// The state currently updated.
+ IntegerState &State;
+
+ /// Set of potential copies of the tracked value.
+ SmallVectorImpl<const Value *> &PotentialCopies;
+
+ /// Global counter to limit the number of explored uses.
+ unsigned &RemainingUsesToExplore;
+};
+
+ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
+ const IRPosition &IRP = getIRPosition();
+ const Value *V =
+ getArgNo() >= 0 ? IRP.getAssociatedArgument() : &IRP.getAssociatedValue();
+ if (!V)
+ return indicatePessimisticFixpoint();
+
+ const Function *F =
+ getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope();
+ assert(F && "Expected a function!");
+ const IRPosition &FnPos = IRPosition::function(*F);
+ const auto &IsDeadAA = A.getAAFor<AAIsDead>(*this, FnPos);
+
+ AANoCapture::StateType T;
+
+ // Readonly means we cannot capture through memory.
+ const auto &FnMemAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos);
+ if (FnMemAA.isAssumedReadOnly()) {
+ T.addKnownBits(NOT_CAPTURED_IN_MEM);
+ if (FnMemAA.isKnownReadOnly())
+ addKnownBits(NOT_CAPTURED_IN_MEM);
+ }
+
+ // Make sure all returned values are different than the underlying value.
+ // TODO: we could do this in a more sophisticated way inside
+ // AAReturnedValues, e.g., track all values that escape through returns
+ // directly somehow.
+ auto CheckReturnedArgs = [&](const AAReturnedValues &RVAA) {
+ bool SeenConstant = false;
+ for (auto &It : RVAA.returned_values()) {
+ if (isa<Constant>(It.first)) {
+ if (SeenConstant)
+ return false;
+ SeenConstant = true;
+ } else if (!isa<Argument>(It.first) ||
+ It.first == getAssociatedArgument())
+ return false;
+ }
+ return true;
+ };
+
+ const auto &NoUnwindAA = A.getAAFor<AANoUnwind>(*this, FnPos);
+ if (NoUnwindAA.isAssumedNoUnwind()) {
+ bool IsVoidTy = F->getReturnType()->isVoidTy();
+ const AAReturnedValues *RVAA =
+ IsVoidTy ? nullptr : &A.getAAFor<AAReturnedValues>(*this, FnPos);
+ if (IsVoidTy || CheckReturnedArgs(*RVAA)) {
+ T.addKnownBits(NOT_CAPTURED_IN_RET);
+ if (T.isKnown(NOT_CAPTURED_IN_MEM))
+ return ChangeStatus::UNCHANGED;
+ if (NoUnwindAA.isKnownNoUnwind() &&
+ (IsVoidTy || RVAA->getState().isAtFixpoint())) {
+ addKnownBits(NOT_CAPTURED_IN_RET);
+ if (isKnown(NOT_CAPTURED_IN_MEM))
+ return indicateOptimisticFixpoint();
+ }
+ }
+ }
+
+ // Use the CaptureTracker interface and logic with the specialized tracker,
+ // defined in AACaptureUseTracker, that can look at in-flight abstract
+ // attributes and directly updates the assumed state.
+ SmallVector<const Value *, 4> PotentialCopies;
+ unsigned RemainingUsesToExplore = DefaultMaxUsesToExplore;
+ AACaptureUseTracker Tracker(A, *this, IsDeadAA, T, PotentialCopies,
+ RemainingUsesToExplore);
+
+ // Check all potential copies of the associated value until we can assume
+ // none will be captured or we have to assume at least one might be.
+ unsigned Idx = 0;
+ PotentialCopies.push_back(V);
+ while (T.isAssumed(NO_CAPTURE_MAYBE_RETURNED) && Idx < PotentialCopies.size())
+ Tracker.valueMayBeCaptured(PotentialCopies[Idx++]);
+
+ AAAlign::StateType &S = getState();
+ auto Assumed = S.getAssumed();
+ S.intersectAssumedBits(T.getAssumed());
+ return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+}
+
+/// NoCapture attribute for function arguments.
+struct AANoCaptureArgument final : AANoCaptureImpl {
+ AANoCaptureArgument(const IRPosition &IRP) : AANoCaptureImpl(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nocapture) }
+};
+
+/// NoCapture attribute for call site arguments.
+struct AANoCaptureCallSiteArgument final : AANoCaptureImpl {
+ AANoCaptureCallSiteArgument(const IRPosition &IRP) : AANoCaptureImpl(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Argument *Arg = getAssociatedArgument();
+ if (!Arg)
+ return indicatePessimisticFixpoint();
+ const IRPosition &ArgPos = IRPosition::argument(*Arg);
+ auto &ArgAA = A.getAAFor<AANoCapture>(*this, ArgPos);
+ return clampStateAndIndicateChange(
+ getState(),
+ static_cast<const AANoCapture::StateType &>(ArgAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override{STATS_DECLTRACK_CSARG_ATTR(nocapture)};
+};
+
+/// NoCapture attribute for floating values.
+struct AANoCaptureFloating final : AANoCaptureImpl {
+ AANoCaptureFloating(const IRPosition &IRP) : AANoCaptureImpl(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(nocapture)
+ }
+};
+
+/// NoCapture attribute for function return value.
+struct AANoCaptureReturned final : AANoCaptureImpl {
+ AANoCaptureReturned(const IRPosition &IRP) : AANoCaptureImpl(IRP) {
+ llvm_unreachable("NoCapture is not applicable to function returns!");
+ }
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ llvm_unreachable("NoCapture is not applicable to function returns!");
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ llvm_unreachable("NoCapture is not applicable to function returns!");
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+};
+
+/// NoCapture attribute deduction for a call site return value.
+struct AANoCaptureCallSiteReturned final : AANoCaptureImpl {
+ AANoCaptureCallSiteReturned(const IRPosition &IRP) : AANoCaptureImpl(IRP) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSRET_ATTR(nocapture)
+ }
};
-ChangeStatus AANonNullArgument::updateImpl(Attributor &A) {
- Function &F = getAnchorScope();
- Argument &Arg = cast<Argument>(getAnchoredValue());
- unsigned ArgNo = Arg.getArgNo();
+/// ------------------ Value Simplify Attribute ----------------------------
+struct AAValueSimplifyImpl : AAValueSimplify {
+ AAValueSimplifyImpl(const IRPosition &IRP) : AAValueSimplify(IRP) {}
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumed() ? (getKnown() ? "simplified" : "maybe-simple")
+ : "not-simple";
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+
+ /// See AAValueSimplify::getAssumedSimplifiedValue()
+ Optional<Value *> getAssumedSimplifiedValue(Attributor &A) const override {
+ if (!getAssumed())
+ return const_cast<Value *>(&getAssociatedValue());
+ return SimplifiedAssociatedValue;
+ }
+ void initialize(Attributor &A) override {}
+
+ /// Helper function for querying AAValueSimplify and updating candicate.
+ /// \param QueryingValue Value trying to unify with SimplifiedValue
+ /// \param AccumulatedSimplifiedValue Current simplification result.
+ static bool checkAndUpdate(Attributor &A, const AbstractAttribute &QueryingAA,
+ Value &QueryingValue,
+ Optional<Value *> &AccumulatedSimplifiedValue) {
+ // FIXME: Add a typecast support.
+
+ auto &ValueSimpifyAA = A.getAAFor<AAValueSimplify>(
+ QueryingAA, IRPosition::value(QueryingValue));
- // Callback function
- std::function<bool(CallSite)> CallSiteCheck = [&](CallSite CS) {
- assert(CS && "Sanity check: Call site was not initialized properly!");
+ Optional<Value *> QueryingValueSimplified =
+ ValueSimpifyAA.getAssumedSimplifiedValue(A);
- auto *NonNullAA = A.getAAFor<AANonNull>(*this, *CS.getInstruction(), ArgNo);
+ if (!QueryingValueSimplified.hasValue())
+ return true;
- // Check that NonNullAA is AANonNullCallSiteArgument.
- if (NonNullAA) {
- ImmutableCallSite ICS(&NonNullAA->getAnchoredValue());
- if (ICS && CS.getInstruction() == ICS.getInstruction())
- return NonNullAA->isAssumedNonNull();
+ if (!QueryingValueSimplified.getValue())
return false;
+
+ Value &QueryingValueSimplifiedUnwrapped =
+ *QueryingValueSimplified.getValue();
+
+ if (isa<UndefValue>(QueryingValueSimplifiedUnwrapped))
+ return true;
+
+ if (AccumulatedSimplifiedValue.hasValue())
+ return AccumulatedSimplifiedValue == QueryingValueSimplified;
+
+ LLVM_DEBUG(dbgs() << "[Attributor][ValueSimplify] " << QueryingValue
+ << " is assumed to be "
+ << QueryingValueSimplifiedUnwrapped << "\n");
+
+ AccumulatedSimplifiedValue = QueryingValueSimplified;
+ return true;
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+ if (!SimplifiedAssociatedValue.hasValue() ||
+ !SimplifiedAssociatedValue.getValue())
+ return Changed;
+
+ if (auto *C = dyn_cast<Constant>(SimplifiedAssociatedValue.getValue())) {
+ // We can replace the AssociatedValue with the constant.
+ Value &V = getAssociatedValue();
+ if (!V.user_empty() && &V != C && V.getType() == C->getType()) {
+ LLVM_DEBUG(dbgs() << "[Attributor][ValueSimplify] " << V << " -> " << *C
+ << "\n");
+ V.replaceAllUsesWith(C);
+ Changed = ChangeStatus::CHANGED;
+ }
+ }
+
+ return Changed | AAValueSimplify::manifest(A);
+ }
+
+protected:
+ // An assumed simplified value. Initially, it is set to Optional::None, which
+ // means that the value is not clear under current assumption. If in the
+ // pessimistic state, getAssumedSimplifiedValue doesn't return this value but
+ // returns orignal associated value.
+ Optional<Value *> SimplifiedAssociatedValue;
+};
+
+struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
+ AAValueSimplifyArgument(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ bool HasValueBefore = SimplifiedAssociatedValue.hasValue();
+
+ auto PredForCallSite = [&](AbstractCallSite ACS) {
+ // Check if we have an associated argument or not (which can happen for
+ // callback calls).
+ if (Value *ArgOp = ACS.getCallArgOperand(getArgNo()))
+ return checkAndUpdate(A, *this, *ArgOp, SimplifiedAssociatedValue);
+ return false;
+ };
+
+ if (!A.checkForAllCallSites(PredForCallSite, *this, true))
+ return indicatePessimisticFixpoint();
+
+ // If a candicate was found in this update, return CHANGED.
+ return HasValueBefore == SimplifiedAssociatedValue.hasValue()
+ ? ChangeStatus::UNCHANGED
+ : ChangeStatus ::CHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_ARG_ATTR(value_simplify)
+ }
+};
+
+struct AAValueSimplifyReturned : AAValueSimplifyImpl {
+ AAValueSimplifyReturned(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ bool HasValueBefore = SimplifiedAssociatedValue.hasValue();
+
+ auto PredForReturned = [&](Value &V) {
+ return checkAndUpdate(A, *this, V, SimplifiedAssociatedValue);
+ };
+
+ if (!A.checkForAllReturnedValues(PredForReturned, *this))
+ return indicatePessimisticFixpoint();
+
+ // If a candicate was found in this update, return CHANGED.
+ return HasValueBefore == SimplifiedAssociatedValue.hasValue()
+ ? ChangeStatus::UNCHANGED
+ : ChangeStatus ::CHANGED;
+ }
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FNRET_ATTR(value_simplify)
+ }
+};
+
+struct AAValueSimplifyFloating : AAValueSimplifyImpl {
+ AAValueSimplifyFloating(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ Value &V = getAnchorValue();
+
+ // TODO: add other stuffs
+ if (isa<Constant>(V) || isa<UndefValue>(V))
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ bool HasValueBefore = SimplifiedAssociatedValue.hasValue();
+
+ auto VisitValueCB = [&](Value &V, BooleanState, bool Stripped) -> bool {
+ auto &AA = A.getAAFor<AAValueSimplify>(*this, IRPosition::value(V));
+ if (!Stripped && this == &AA) {
+ // TODO: Look the instruction and check recursively.
+ LLVM_DEBUG(
+ dbgs() << "[Attributor][ValueSimplify] Can't be stripped more : "
+ << V << "\n");
+ indicatePessimisticFixpoint();
+ return false;
+ }
+ return checkAndUpdate(A, *this, V, SimplifiedAssociatedValue);
+ };
+
+ if (!genericValueTraversal<AAValueSimplify, BooleanState>(
+ A, getIRPosition(), *this, static_cast<BooleanState &>(*this),
+ VisitValueCB))
+ return indicatePessimisticFixpoint();
+
+ // If a candicate was found in this update, return CHANGED.
+
+ return HasValueBefore == SimplifiedAssociatedValue.hasValue()
+ ? ChangeStatus::UNCHANGED
+ : ChangeStatus ::CHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(value_simplify)
+ }
+};
+
+struct AAValueSimplifyFunction : AAValueSimplifyImpl {
+ AAValueSimplifyFunction(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ SimplifiedAssociatedValue = &getAnchorValue();
+ indicateOptimisticFixpoint();
+ }
+ /// See AbstractAttribute::initialize(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ llvm_unreachable(
+ "AAValueSimplify(Function|CallSite)::updateImpl will not be called");
+ }
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FN_ATTR(value_simplify)
+ }
+};
+
+struct AAValueSimplifyCallSite : AAValueSimplifyFunction {
+ AAValueSimplifyCallSite(const IRPosition &IRP)
+ : AAValueSimplifyFunction(IRP) {}
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CS_ATTR(value_simplify)
+ }
+};
+
+struct AAValueSimplifyCallSiteReturned : AAValueSimplifyReturned {
+ AAValueSimplifyCallSiteReturned(const IRPosition &IRP)
+ : AAValueSimplifyReturned(IRP) {}
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSRET_ATTR(value_simplify)
+ }
+};
+struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating {
+ AAValueSimplifyCallSiteArgument(const IRPosition &IRP)
+ : AAValueSimplifyFloating(IRP) {}
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSARG_ATTR(value_simplify)
+ }
+};
+
+/// ----------------------- Heap-To-Stack Conversion ---------------------------
+struct AAHeapToStackImpl : public AAHeapToStack {
+ AAHeapToStackImpl(const IRPosition &IRP) : AAHeapToStack(IRP) {}
+
+ const std::string getAsStr() const override {
+ return "[H2S] Mallocs: " + std::to_string(MallocCalls.size());
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ assert(getState().isValidState() &&
+ "Attempted to manifest an invalid state!");
+
+ ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
+ Function *F = getAssociatedFunction();
+ const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
+
+ for (Instruction *MallocCall : MallocCalls) {
+ // This malloc cannot be replaced.
+ if (BadMallocCalls.count(MallocCall))
+ continue;
+
+ for (Instruction *FreeCall : FreesForMalloc[MallocCall]) {
+ LLVM_DEBUG(dbgs() << "H2S: Removing free call: " << *FreeCall << "\n");
+ A.deleteAfterManifest(*FreeCall);
+ HasChanged = ChangeStatus::CHANGED;
+ }
+
+ LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall
+ << "\n");
+
+ Constant *Size;
+ if (isCallocLikeFn(MallocCall, TLI)) {
+ auto *Num = cast<ConstantInt>(MallocCall->getOperand(0));
+ auto *SizeT = dyn_cast<ConstantInt>(MallocCall->getOperand(1));
+ APInt TotalSize = SizeT->getValue() * Num->getValue();
+ Size =
+ ConstantInt::get(MallocCall->getOperand(0)->getType(), TotalSize);
+ } else {
+ Size = cast<ConstantInt>(MallocCall->getOperand(0));
+ }
+
+ unsigned AS = cast<PointerType>(MallocCall->getType())->getAddressSpace();
+ Instruction *AI = new AllocaInst(Type::getInt8Ty(F->getContext()), AS,
+ Size, "", MallocCall->getNextNode());
+
+ if (AI->getType() != MallocCall->getType())
+ AI = new BitCastInst(AI, MallocCall->getType(), "malloc_bc",
+ AI->getNextNode());
+
+ MallocCall->replaceAllUsesWith(AI);
+
+ if (auto *II = dyn_cast<InvokeInst>(MallocCall)) {
+ auto *NBB = II->getNormalDest();
+ BranchInst::Create(NBB, MallocCall->getParent());
+ A.deleteAfterManifest(*MallocCall);
+ } else {
+ A.deleteAfterManifest(*MallocCall);
+ }
+
+ if (isCallocLikeFn(MallocCall, TLI)) {
+ auto *BI = new BitCastInst(AI, MallocCall->getType(), "calloc_bc",
+ AI->getNextNode());
+ Value *Ops[] = {
+ BI, ConstantInt::get(F->getContext(), APInt(8, 0, false)), Size,
+ ConstantInt::get(Type::getInt1Ty(F->getContext()), false)};
+
+ Type *Tys[] = {BI->getType(), MallocCall->getOperand(0)->getType()};
+ Module *M = F->getParent();
+ Function *Fn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
+ CallInst::Create(Fn, Ops, "", BI->getNextNode());
+ }
+ HasChanged = ChangeStatus::CHANGED;
}
- if (CS.paramHasAttr(ArgNo, Attribute::NonNull))
+ return HasChanged;
+ }
+
+ /// Collection of all malloc calls in a function.
+ SmallSetVector<Instruction *, 4> MallocCalls;
+
+ /// Collection of malloc calls that cannot be converted.
+ DenseSet<const Instruction *> BadMallocCalls;
+
+ /// A map for each malloc call to the set of associated free calls.
+ DenseMap<Instruction *, SmallPtrSet<Instruction *, 4>> FreesForMalloc;
+
+ ChangeStatus updateImpl(Attributor &A) override;
+};
+
+ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
+ const Function *F = getAssociatedFunction();
+ const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
+
+ auto UsesCheck = [&](Instruction &I) {
+ SmallPtrSet<const Use *, 8> Visited;
+ SmallVector<const Use *, 8> Worklist;
+
+ for (Use &U : I.uses())
+ Worklist.push_back(&U);
+
+ while (!Worklist.empty()) {
+ const Use *U = Worklist.pop_back_val();
+ if (!Visited.insert(U).second)
+ continue;
+
+ auto *UserI = U->getUser();
+
+ if (isa<LoadInst>(UserI))
+ continue;
+ if (auto *SI = dyn_cast<StoreInst>(UserI)) {
+ if (SI->getValueOperand() == U->get()) {
+ LLVM_DEBUG(dbgs() << "[H2S] escaping store to memory: " << *UserI << "\n");
+ return false;
+ }
+ // A store into the malloc'ed memory is fine.
+ continue;
+ }
+
+ // NOTE: Right now, if a function that has malloc pointer as an argument
+ // frees memory, we assume that the malloc pointer is freed.
+
+ // TODO: Add nofree callsite argument attribute to indicate that pointer
+ // argument is not freed.
+ if (auto *CB = dyn_cast<CallBase>(UserI)) {
+ if (!CB->isArgOperand(U))
+ continue;
+
+ if (CB->isLifetimeStartOrEnd())
+ continue;
+
+ // Record malloc.
+ if (isFreeCall(UserI, TLI)) {
+ FreesForMalloc[&I].insert(
+ cast<Instruction>(const_cast<User *>(UserI)));
+ continue;
+ }
+
+ // If a function does not free memory we are fine
+ const auto &NoFreeAA =
+ A.getAAFor<AANoFree>(*this, IRPosition::callsite_function(*CB));
+
+ unsigned ArgNo = U - CB->arg_begin();
+ const auto &NoCaptureAA = A.getAAFor<AANoCapture>(
+ *this, IRPosition::callsite_argument(*CB, ArgNo));
+
+ if (!NoCaptureAA.isAssumedNoCapture() || !NoFreeAA.isAssumedNoFree()) {
+ LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n");
+ return false;
+ }
+ continue;
+ }
+
+ if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI)) {
+ for (Use &U : UserI->uses())
+ Worklist.push_back(&U);
+ continue;
+ }
+
+ // Unknown user.
+ LLVM_DEBUG(dbgs() << "[H2S] Unknown user: " << *UserI << "\n");
+ return false;
+ }
+ return true;
+ };
+
+ auto MallocCallocCheck = [&](Instruction &I) {
+ if (BadMallocCalls.count(&I))
return true;
- Value *V = CS.getArgOperand(ArgNo);
- if (isKnownNonZero(V, getAnchorScope().getParent()->getDataLayout()))
+ bool IsMalloc = isMallocLikeFn(&I, TLI);
+ bool IsCalloc = !IsMalloc && isCallocLikeFn(&I, TLI);
+ if (!IsMalloc && !IsCalloc) {
+ BadMallocCalls.insert(&I);
return true;
+ }
- return false;
- };
- if (!A.checkForAllCallSites(F, CallSiteCheck, true)) {
- indicatePessimisticFixpoint();
- return ChangeStatus::CHANGED;
- }
- return ChangeStatus::UNCHANGED;
-}
+ if (IsMalloc) {
+ if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(0)))
+ if (Size->getValue().sle(MaxHeapToStackSize))
+ if (UsesCheck(I)) {
+ MallocCalls.insert(&I);
+ return true;
+ }
+ } else if (IsCalloc) {
+ bool Overflow = false;
+ if (auto *Num = dyn_cast<ConstantInt>(I.getOperand(0)))
+ if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(1)))
+ if ((Size->getValue().umul_ov(Num->getValue(), Overflow))
+ .sle(MaxHeapToStackSize))
+ if (!Overflow && UsesCheck(I)) {
+ MallocCalls.insert(&I);
+ return true;
+ }
+ }
-ChangeStatus AANonNullCallSiteArgument::updateImpl(Attributor &A) {
- // NOTE: Never look at the argument of the callee in this method.
- // If we do this, "nonnull" is always deduced because of the assumption.
+ BadMallocCalls.insert(&I);
+ return true;
+ };
- Value &V = *getAssociatedValue();
+ size_t NumBadMallocs = BadMallocCalls.size();
- auto *NonNullAA = A.getAAFor<AANonNull>(*this, V);
+ A.checkForAllCallLikeInstructions(MallocCallocCheck, *this);
- if (!NonNullAA || !NonNullAA->isAssumedNonNull()) {
- indicatePessimisticFixpoint();
+ if (NumBadMallocs != BadMallocCalls.size())
return ChangeStatus::CHANGED;
- }
return ChangeStatus::UNCHANGED;
}
-/// ------------------------ Will-Return Attributes ----------------------------
+struct AAHeapToStackFunction final : public AAHeapToStackImpl {
+ AAHeapToStackFunction(const IRPosition &IRP) : AAHeapToStackImpl(IRP) {}
-struct AAWillReturnImpl : public AAWillReturn, BooleanState {
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECL(MallocCalls, Function,
+ "Number of MallocCalls converted to allocas");
+ BUILD_STAT_NAME(MallocCalls, Function) += MallocCalls.size();
+ }
+};
+
+/// -------------------- Memory Behavior Attributes ----------------------------
+/// Includes read-none, read-only, and write-only.
+/// ----------------------------------------------------------------------------
+struct AAMemoryBehaviorImpl : public AAMemoryBehavior {
+ AAMemoryBehaviorImpl(const IRPosition &IRP) : AAMemoryBehavior(IRP) {}
- /// See AbstractAttribute::AbstractAttribute(...).
- AAWillReturnImpl(Function &F, InformationCache &InfoCache)
- : AAWillReturn(F, InfoCache) {}
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ intersectAssumedBits(BEST_STATE);
+ getKnownStateFromValue(getIRPosition(), getState());
+ IRAttribute::initialize(A);
+ }
- /// See AAWillReturn::isKnownWillReturn().
- bool isKnownWillReturn() const override { return getKnown(); }
+ /// Return the memory behavior information encoded in the IR for \p IRP.
+ static void getKnownStateFromValue(const IRPosition &IRP,
+ IntegerState &State) {
+ SmallVector<Attribute, 2> Attrs;
+ IRP.getAttrs(AttrKinds, Attrs);
+ for (const Attribute &Attr : Attrs) {
+ switch (Attr.getKindAsEnum()) {
+ case Attribute::ReadNone:
+ State.addKnownBits(NO_ACCESSES);
+ break;
+ case Attribute::ReadOnly:
+ State.addKnownBits(NO_WRITES);
+ break;
+ case Attribute::WriteOnly:
+ State.addKnownBits(NO_READS);
+ break;
+ default:
+ llvm_unreachable("Unexpcted attribute!");
+ }
+ }
- /// See AAWillReturn::isAssumedWillReturn().
- bool isAssumedWillReturn() const override { return getAssumed(); }
+ if (auto *I = dyn_cast<Instruction>(&IRP.getAnchorValue())) {
+ if (!I->mayReadFromMemory())
+ State.addKnownBits(NO_READS);
+ if (!I->mayWriteToMemory())
+ State.addKnownBits(NO_WRITES);
+ }
+ }
- /// See AbstractAttribute::getState(...).
- AbstractState &getState() override { return *this; }
+ /// See AbstractAttribute::getDeducedAttributes(...).
+ void getDeducedAttributes(LLVMContext &Ctx,
+ SmallVectorImpl<Attribute> &Attrs) const override {
+ assert(Attrs.size() == 0);
+ if (isAssumedReadNone())
+ Attrs.push_back(Attribute::get(Ctx, Attribute::ReadNone));
+ else if (isAssumedReadOnly())
+ Attrs.push_back(Attribute::get(Ctx, Attribute::ReadOnly));
+ else if (isAssumedWriteOnly())
+ Attrs.push_back(Attribute::get(Ctx, Attribute::WriteOnly));
+ assert(Attrs.size() <= 1);
+ }
- /// See AbstractAttribute::getState(...).
- const AbstractState &getState() const override { return *this; }
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ IRPosition &IRP = getIRPosition();
+
+ // Check if we would improve the existing attributes first.
+ SmallVector<Attribute, 4> DeducedAttrs;
+ getDeducedAttributes(IRP.getAnchorValue().getContext(), DeducedAttrs);
+ if (llvm::all_of(DeducedAttrs, [&](const Attribute &Attr) {
+ return IRP.hasAttr(Attr.getKindAsEnum(),
+ /* IgnoreSubsumingPositions */ true);
+ }))
+ return ChangeStatus::UNCHANGED;
+
+ // Clear existing attributes.
+ IRP.removeAttrs(AttrKinds);
+
+ // Use the generic manifest method.
+ return IRAttribute::manifest(A);
+ }
- /// See AbstractAttribute::getAsStr()
+ /// See AbstractState::getAsStr().
const std::string getAsStr() const override {
- return getAssumed() ? "willreturn" : "may-noreturn";
+ if (isAssumedReadNone())
+ return "readnone";
+ if (isAssumedReadOnly())
+ return "readonly";
+ if (isAssumedWriteOnly())
+ return "writeonly";
+ return "may-read/write";
}
+
+ /// The set of IR attributes AAMemoryBehavior deals with.
+ static const Attribute::AttrKind AttrKinds[3];
};
-struct AAWillReturnFunction final : AAWillReturnImpl {
+const Attribute::AttrKind AAMemoryBehaviorImpl::AttrKinds[] = {
+ Attribute::ReadNone, Attribute::ReadOnly, Attribute::WriteOnly};
- /// See AbstractAttribute::AbstractAttribute(...).
- AAWillReturnFunction(Function &F, InformationCache &InfoCache)
- : AAWillReturnImpl(F, InfoCache) {}
+/// Memory behavior attribute for a floating value.
+struct AAMemoryBehaviorFloating : AAMemoryBehaviorImpl {
+ AAMemoryBehaviorFloating(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {}
- /// See AbstractAttribute::getManifestPosition().
- ManifestPosition getManifestPosition() const override {
- return MP_FUNCTION;
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AAMemoryBehaviorImpl::initialize(A);
+ // Initialize the use vector with all direct uses of the associated value.
+ for (const Use &U : getAssociatedValue().uses())
+ Uses.insert(&U);
}
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override;
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ if (isAssumedReadNone())
+ STATS_DECLTRACK_FLOATING_ATTR(readnone)
+ else if (isAssumedReadOnly())
+ STATS_DECLTRACK_FLOATING_ATTR(readonly)
+ else if (isAssumedWriteOnly())
+ STATS_DECLTRACK_FLOATING_ATTR(writeonly)
+ }
+
+private:
+ /// Return true if users of \p UserI might access the underlying
+ /// variable/location described by \p U and should therefore be analyzed.
+ bool followUsersOfUseIn(Attributor &A, const Use *U,
+ const Instruction *UserI);
+
+ /// Update the state according to the effect of use \p U in \p UserI.
+ void analyzeUseIn(Attributor &A, const Use *U, const Instruction *UserI);
+
+protected:
+ /// Container for (transitive) uses of the associated argument.
+ SetVector<const Use *> Uses;
+};
+
+/// Memory behavior attribute for function argument.
+struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating {
+ AAMemoryBehaviorArgument(const IRPosition &IRP)
+ : AAMemoryBehaviorFloating(IRP) {}
+
/// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override;
+ void initialize(Attributor &A) override {
+ AAMemoryBehaviorFloating::initialize(A);
+
+ // Initialize the use vector with all direct uses of the associated value.
+ Argument *Arg = getAssociatedArgument();
+ if (!Arg || !Arg->getParent()->hasExactDefinition())
+ indicatePessimisticFixpoint();
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ // TODO: From readattrs.ll: "inalloca parameters are always
+ // considered written"
+ if (hasAttr({Attribute::InAlloca})) {
+ removeKnownBits(NO_WRITES);
+ removeAssumedBits(NO_WRITES);
+ }
+ return AAMemoryBehaviorFloating::manifest(A);
+ }
+
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ if (isAssumedReadNone())
+ STATS_DECLTRACK_ARG_ATTR(readnone)
+ else if (isAssumedReadOnly())
+ STATS_DECLTRACK_ARG_ATTR(readonly)
+ else if (isAssumedWriteOnly())
+ STATS_DECLTRACK_ARG_ATTR(writeonly)
+ }
+};
+
+struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument {
+ AAMemoryBehaviorCallSiteArgument(const IRPosition &IRP)
+ : AAMemoryBehaviorArgument(IRP) {}
/// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override;
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Argument *Arg = getAssociatedArgument();
+ const IRPosition &ArgPos = IRPosition::argument(*Arg);
+ auto &ArgAA = A.getAAFor<AAMemoryBehavior>(*this, ArgPos);
+ return clampStateAndIndicateChange(
+ getState(),
+ static_cast<const AANoCapture::StateType &>(ArgAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ if (isAssumedReadNone())
+ STATS_DECLTRACK_CSARG_ATTR(readnone)
+ else if (isAssumedReadOnly())
+ STATS_DECLTRACK_CSARG_ATTR(readonly)
+ else if (isAssumedWriteOnly())
+ STATS_DECLTRACK_CSARG_ATTR(writeonly)
+ }
};
-// Helper function that checks whether a function has any cycle.
-// TODO: Replace with more efficent code
-bool containsCycle(Function &F) {
- SmallPtrSet<BasicBlock *, 32> Visited;
+/// Memory behavior attribute for a call site return position.
+struct AAMemoryBehaviorCallSiteReturned final : AAMemoryBehaviorFloating {
+ AAMemoryBehaviorCallSiteReturned(const IRPosition &IRP)
+ : AAMemoryBehaviorFloating(IRP) {}
- // Traverse BB by dfs and check whether successor is already visited.
- for (BasicBlock *BB : depth_first(&F)) {
- Visited.insert(BB);
- for (auto *SuccBB : successors(BB)) {
- if (Visited.count(SuccBB))
- return true;
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ // We do not annotate returned values.
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+};
+
+/// An AA to represent the memory behavior function attributes.
+struct AAMemoryBehaviorFunction final : public AAMemoryBehaviorImpl {
+ AAMemoryBehaviorFunction(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {}
+
+ /// See AbstractAttribute::updateImpl(Attributor &A).
+ virtual ChangeStatus updateImpl(Attributor &A) override;
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ Function &F = cast<Function>(getAnchorValue());
+ if (isAssumedReadNone()) {
+ F.removeFnAttr(Attribute::ArgMemOnly);
+ F.removeFnAttr(Attribute::InaccessibleMemOnly);
+ F.removeFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
}
+ return AAMemoryBehaviorImpl::manifest(A);
}
- return false;
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ if (isAssumedReadNone())
+ STATS_DECLTRACK_FN_ATTR(readnone)
+ else if (isAssumedReadOnly())
+ STATS_DECLTRACK_FN_ATTR(readonly)
+ else if (isAssumedWriteOnly())
+ STATS_DECLTRACK_FN_ATTR(writeonly)
+ }
+};
+
+/// AAMemoryBehavior attribute for call sites.
+struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl {
+ AAMemoryBehaviorCallSite(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AAMemoryBehaviorImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F || !F->hasExactDefinition())
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(), static_cast<const AAAlign::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ if (isAssumedReadNone())
+ STATS_DECLTRACK_CS_ATTR(readnone)
+ else if (isAssumedReadOnly())
+ STATS_DECLTRACK_CS_ATTR(readonly)
+ else if (isAssumedWriteOnly())
+ STATS_DECLTRACK_CS_ATTR(writeonly)
+ }
+};
+} // namespace
+
+ChangeStatus AAMemoryBehaviorFunction::updateImpl(Attributor &A) {
+
+ // The current assumed state used to determine a change.
+ auto AssumedState = getAssumed();
+
+ auto CheckRWInst = [&](Instruction &I) {
+ // If the instruction has an own memory behavior state, use it to restrict
+ // the local state. No further analysis is required as the other memory
+ // state is as optimistic as it gets.
+ if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
+ const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
+ *this, IRPosition::callsite_function(ICS));
+ intersectAssumedBits(MemBehaviorAA.getAssumed());
+ return !isAtFixpoint();
+ }
+
+ // Remove access kind modifiers if necessary.
+ if (I.mayReadFromMemory())
+ removeAssumedBits(NO_READS);
+ if (I.mayWriteToMemory())
+ removeAssumedBits(NO_WRITES);
+ return !isAtFixpoint();
+ };
+
+ if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this))
+ return indicatePessimisticFixpoint();
+
+ return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED
+ : ChangeStatus::UNCHANGED;
}
-// Helper function that checks the function have a loop which might become an
-// endless loop
-// FIXME: Any cycle is regarded as endless loop for now.
-// We have to allow some patterns.
-bool containsPossiblyEndlessLoop(Function &F) { return containsCycle(F); }
+ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) {
-void AAWillReturnFunction::initialize(Attributor &A) {
- Function &F = getAnchorScope();
+ const IRPosition &IRP = getIRPosition();
+ const IRPosition &FnPos = IRPosition::function_scope(IRP);
+ AAMemoryBehavior::StateType &S = getState();
- if (containsPossiblyEndlessLoop(F))
- indicatePessimisticFixpoint();
+ // First, check the function scope. We take the known information and we avoid
+ // work if the assumed information implies the current assumed information for
+ // this attribute.
+ const auto &FnMemAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos);
+ S.addKnownBits(FnMemAA.getKnown());
+ if ((S.getAssumed() & FnMemAA.getAssumed()) == S.getAssumed())
+ return ChangeStatus::UNCHANGED;
+
+ // Make sure the value is not captured (except through "return"), if
+ // it is, any information derived would be irrelevant anyway as we cannot
+ // check the potential aliases introduced by the capture. However, no need
+ // to fall back to anythign less optimistic than the function state.
+ const auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(*this, IRP);
+ if (!ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
+ S.intersectAssumedBits(FnMemAA.getAssumed());
+ return ChangeStatus::CHANGED;
+ }
+
+ // The current assumed state used to determine a change.
+ auto AssumedState = S.getAssumed();
+
+ // Liveness information to exclude dead users.
+ // TODO: Take the FnPos once we have call site specific liveness information.
+ const auto &LivenessAA = A.getAAFor<AAIsDead>(
+ *this, IRPosition::function(*IRP.getAssociatedFunction()));
+
+ // Visit and expand uses until all are analyzed or a fixpoint is reached.
+ for (unsigned i = 0; i < Uses.size() && !isAtFixpoint(); i++) {
+ const Use *U = Uses[i];
+ Instruction *UserI = cast<Instruction>(U->getUser());
+ LLVM_DEBUG(dbgs() << "[AAMemoryBehavior] Use: " << **U << " in " << *UserI
+ << " [Dead: " << (LivenessAA.isAssumedDead(UserI))
+ << "]\n");
+ if (LivenessAA.isAssumedDead(UserI))
+ continue;
+
+ // Check if the users of UserI should also be visited.
+ if (followUsersOfUseIn(A, U, UserI))
+ for (const Use &UserIUse : UserI->uses())
+ Uses.insert(&UserIUse);
+
+ // If UserI might touch memory we analyze the use in detail.
+ if (UserI->mayReadOrWriteMemory())
+ analyzeUseIn(A, U, UserI);
+ }
+
+ return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED
+ : ChangeStatus::UNCHANGED;
}
-ChangeStatus AAWillReturnFunction::updateImpl(Attributor &A) {
- Function &F = getAnchorScope();
+bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use *U,
+ const Instruction *UserI) {
+ // The loaded value is unrelated to the pointer argument, no need to
+ // follow the users of the load.
+ if (isa<LoadInst>(UserI))
+ return false;
- // The map from instruction opcodes to those instructions in the function.
- auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+ // By default we follow all uses assuming UserI might leak information on U,
+ // we have special handling for call sites operands though.
+ ImmutableCallSite ICS(UserI);
+ if (!ICS || !ICS.isArgOperand(U))
+ return true;
- for (unsigned Opcode :
- {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
- (unsigned)Instruction::Call}) {
- for (Instruction *I : OpcodeInstMap[Opcode]) {
- auto ICS = ImmutableCallSite(I);
+ // If the use is a call argument known not to be captured, the users of
+ // the call do not need to be visited because they have to be unrelated to
+ // the input. Note that this check is not trivial even though we disallow
+ // general capturing of the underlying argument. The reason is that the
+ // call might the argument "through return", which we allow and for which we
+ // need to check call users.
+ unsigned ArgNo = ICS.getArgumentNo(U);
+ const auto &ArgNoCaptureAA =
+ A.getAAFor<AANoCapture>(*this, IRPosition::callsite_argument(ICS, ArgNo));
+ return !ArgNoCaptureAA.isAssumedNoCapture();
+}
- if (ICS.hasFnAttr(Attribute::WillReturn))
- continue;
+void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use *U,
+ const Instruction *UserI) {
+ assert(UserI->mayReadOrWriteMemory());
- auto *WillReturnAA = A.getAAFor<AAWillReturn>(*this, *I);
- if (!WillReturnAA || !WillReturnAA->isAssumedWillReturn()) {
- indicatePessimisticFixpoint();
- return ChangeStatus::CHANGED;
- }
+ switch (UserI->getOpcode()) {
+ default:
+ // TODO: Handle all atomics and other side-effect operations we know of.
+ break;
+ case Instruction::Load:
+ // Loads cause the NO_READS property to disappear.
+ removeAssumedBits(NO_READS);
+ return;
- auto *NoRecurseAA = A.getAAFor<AANoRecurse>(*this, *I);
+ case Instruction::Store:
+ // Stores cause the NO_WRITES property to disappear if the use is the
+ // pointer operand. Note that we do assume that capturing was taken care of
+ // somewhere else.
+ if (cast<StoreInst>(UserI)->getPointerOperand() == U->get())
+ removeAssumedBits(NO_WRITES);
+ return;
- // FIXME: (i) Prohibit any recursion for now.
- // (ii) AANoRecurse isn't implemented yet so currently any call is
- // regarded as having recursion.
- // Code below should be
- // if ((!NoRecurseAA || !NoRecurseAA->isAssumedNoRecurse()) &&
- if (!NoRecurseAA && !ICS.hasFnAttr(Attribute::NoRecurse)) {
- indicatePessimisticFixpoint();
- return ChangeStatus::CHANGED;
- }
+ case Instruction::Call:
+ case Instruction::CallBr:
+ case Instruction::Invoke: {
+ // For call sites we look at the argument memory behavior attribute (this
+ // could be recursive!) in order to restrict our own state.
+ ImmutableCallSite ICS(UserI);
+
+ // Give up on operand bundles.
+ if (ICS.isBundleOperand(U)) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+
+ // Calling a function does read the function pointer, maybe write it if the
+ // function is self-modifying.
+ if (ICS.isCallee(U)) {
+ removeAssumedBits(NO_READS);
+ break;
}
+
+ // Adjust the possible access behavior based on the information on the
+ // argument.
+ unsigned ArgNo = ICS.getArgumentNo(U);
+ const IRPosition &ArgPos = IRPosition::callsite_argument(ICS, ArgNo);
+ const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(*this, ArgPos);
+ // "assumed" has at most the same bits as the MemBehaviorAA assumed
+ // and at least "known".
+ intersectAssumedBits(MemBehaviorAA.getAssumed());
+ return;
}
+ };
- return ChangeStatus::UNCHANGED;
+ // Generally, look at the "may-properties" and adjust the assumed state if we
+ // did not trigger special handling before.
+ if (UserI->mayReadFromMemory())
+ removeAssumedBits(NO_READS);
+ if (UserI->mayWriteToMemory())
+ removeAssumedBits(NO_WRITES);
}
/// ----------------------------------------------------------------------------
/// Attributor
/// ----------------------------------------------------------------------------
-bool Attributor::checkForAllCallSites(Function &F,
- std::function<bool(CallSite)> &Pred,
- bool RequireAllCallSites) {
+bool Attributor::isAssumedDead(const AbstractAttribute &AA,
+ const AAIsDead *LivenessAA) {
+ const Instruction *CtxI = AA.getIRPosition().getCtxI();
+ if (!CtxI)
+ return false;
+
+ if (!LivenessAA)
+ LivenessAA =
+ &getAAFor<AAIsDead>(AA, IRPosition::function(*CtxI->getFunction()),
+ /* TrackDependence */ false);
+
+ // Don't check liveness for AAIsDead.
+ if (&AA == LivenessAA)
+ return false;
+
+ if (!LivenessAA->isAssumedDead(CtxI))
+ return false;
+
+ // We actually used liveness information so we have to record a dependence.
+ recordDependence(*LivenessAA, AA);
+
+ return true;
+}
+
+bool Attributor::checkForAllCallSites(
+ const function_ref<bool(AbstractCallSite)> &Pred,
+ const AbstractAttribute &QueryingAA, bool RequireAllCallSites) {
// We can try to determine information from
// the call sites. However, this is only possible all call sites are known,
// hence the function has internal linkage.
- if (RequireAllCallSites && !F.hasInternalLinkage()) {
+ const IRPosition &IRP = QueryingAA.getIRPosition();
+ const Function *AssociatedFunction = IRP.getAssociatedFunction();
+ if (!AssociatedFunction) {
+ LLVM_DEBUG(dbgs() << "[Attributor] No function associated with " << IRP
+ << "\n");
+ return false;
+ }
+
+ return checkForAllCallSites(Pred, *AssociatedFunction, RequireAllCallSites,
+ &QueryingAA);
+}
+
+bool Attributor::checkForAllCallSites(
+ const function_ref<bool(AbstractCallSite)> &Pred, const Function &Fn,
+ bool RequireAllCallSites, const AbstractAttribute *QueryingAA) {
+ if (RequireAllCallSites && !Fn.hasLocalLinkage()) {
LLVM_DEBUG(
dbgs()
- << "Attributor: Function " << F.getName()
+ << "[Attributor] Function " << Fn.getName()
<< " has no internal linkage, hence not all call sites are known\n");
return false;
}
- for (const Use &U : F.uses()) {
+ for (const Use &U : Fn.uses()) {
+ AbstractCallSite ACS(&U);
+ if (!ACS) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Function "
+ << Fn.getName()
+ << " has non call site use " << *U.get() << " in "
+ << *U.getUser() << "\n");
+ return false;
+ }
+
+ Instruction *I = ACS.getInstruction();
+ Function *Caller = I->getFunction();
+
+ const auto *LivenessAA =
+ lookupAAFor<AAIsDead>(IRPosition::function(*Caller), QueryingAA,
+ /* TrackDependence */ false);
+
+ // Skip dead calls.
+ if (LivenessAA && LivenessAA->isAssumedDead(I)) {
+ // We actually used liveness information so we have to record a
+ // dependence.
+ if (QueryingAA)
+ recordDependence(*LivenessAA, *QueryingAA);
+ continue;
+ }
- CallSite CS(U.getUser());
- if (!CS || !CS.isCallee(&U) || !CS.getCaller()->hasExactDefinition()) {
+ const Use *EffectiveUse =
+ ACS.isCallbackCall() ? &ACS.getCalleeUseForCallback() : &U;
+ if (!ACS.isCallee(EffectiveUse)) {
if (!RequireAllCallSites)
continue;
-
- LLVM_DEBUG(dbgs() << "Attributor: User " << *U.getUser()
- << " is an invalid use of " << F.getName() << "\n");
+ LLVM_DEBUG(dbgs() << "[Attributor] User " << EffectiveUse->getUser()
+ << " is an invalid use of "
+ << Fn.getName() << "\n");
return false;
}
- if (Pred(CS))
+ if (Pred(ACS))
continue;
- LLVM_DEBUG(dbgs() << "Attributor: Call site callback failed for "
- << *CS.getInstruction() << "\n");
+ LLVM_DEBUG(dbgs() << "[Attributor] Call site callback failed for "
+ << *ACS.getInstruction() << "\n");
return false;
}
return true;
}
-ChangeStatus Attributor::run() {
- // Initialize all abstract attributes.
- for (AbstractAttribute *AA : AllAbstractAttributes)
- AA->initialize(*this);
+bool Attributor::checkForAllReturnedValuesAndReturnInsts(
+ const function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)>
+ &Pred,
+ const AbstractAttribute &QueryingAA) {
+
+ const IRPosition &IRP = QueryingAA.getIRPosition();
+ // Since we need to provide return instructions we have to have an exact
+ // definition.
+ const Function *AssociatedFunction = IRP.getAssociatedFunction();
+ if (!AssociatedFunction)
+ return false;
+ // If this is a call site query we use the call site specific return values
+ // and liveness information.
+ // TODO: use the function scope once we have call site AAReturnedValues.
+ const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
+ const auto &AARetVal = getAAFor<AAReturnedValues>(QueryingAA, QueryIRP);
+ if (!AARetVal.getState().isValidState())
+ return false;
+
+ return AARetVal.checkForAllReturnedValuesAndReturnInsts(Pred);
+}
+
+bool Attributor::checkForAllReturnedValues(
+ const function_ref<bool(Value &)> &Pred,
+ const AbstractAttribute &QueryingAA) {
+
+ const IRPosition &IRP = QueryingAA.getIRPosition();
+ const Function *AssociatedFunction = IRP.getAssociatedFunction();
+ if (!AssociatedFunction)
+ return false;
+
+ // TODO: use the function scope once we have call site AAReturnedValues.
+ const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
+ const auto &AARetVal = getAAFor<AAReturnedValues>(QueryingAA, QueryIRP);
+ if (!AARetVal.getState().isValidState())
+ return false;
+
+ return AARetVal.checkForAllReturnedValuesAndReturnInsts(
+ [&](Value &RV, const SmallSetVector<ReturnInst *, 4> &) {
+ return Pred(RV);
+ });
+}
+
+static bool
+checkForAllInstructionsImpl(InformationCache::OpcodeInstMapTy &OpcodeInstMap,
+ const function_ref<bool(Instruction &)> &Pred,
+ const AAIsDead *LivenessAA, bool &AnyDead,
+ const ArrayRef<unsigned> &Opcodes) {
+ for (unsigned Opcode : Opcodes) {
+ for (Instruction *I : OpcodeInstMap[Opcode]) {
+ // Skip dead instructions.
+ if (LivenessAA && LivenessAA->isAssumedDead(I)) {
+ AnyDead = true;
+ continue;
+ }
+
+ if (!Pred(*I))
+ return false;
+ }
+ }
+ return true;
+}
+
+bool Attributor::checkForAllInstructions(
+ const llvm::function_ref<bool(Instruction &)> &Pred,
+ const AbstractAttribute &QueryingAA, const ArrayRef<unsigned> &Opcodes) {
+
+ const IRPosition &IRP = QueryingAA.getIRPosition();
+ // Since we need to provide instructions we have to have an exact definition.
+ const Function *AssociatedFunction = IRP.getAssociatedFunction();
+ if (!AssociatedFunction)
+ return false;
+
+ // TODO: use the function scope once we have call site AAReturnedValues.
+ const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
+ const auto &LivenessAA =
+ getAAFor<AAIsDead>(QueryingAA, QueryIRP, /* TrackDependence */ false);
+ bool AnyDead = false;
+
+ auto &OpcodeInstMap =
+ InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction);
+ if (!checkForAllInstructionsImpl(OpcodeInstMap, Pred, &LivenessAA, AnyDead,
+ Opcodes))
+ return false;
+
+ // If we actually used liveness information so we have to record a dependence.
+ if (AnyDead)
+ recordDependence(LivenessAA, QueryingAA);
+
+ return true;
+}
+
+bool Attributor::checkForAllReadWriteInstructions(
+ const llvm::function_ref<bool(Instruction &)> &Pred,
+ AbstractAttribute &QueryingAA) {
+
+ const Function *AssociatedFunction =
+ QueryingAA.getIRPosition().getAssociatedFunction();
+ if (!AssociatedFunction)
+ return false;
+
+ // TODO: use the function scope once we have call site AAReturnedValues.
+ const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
+ const auto &LivenessAA =
+ getAAFor<AAIsDead>(QueryingAA, QueryIRP, /* TrackDependence */ false);
+ bool AnyDead = false;
+
+ for (Instruction *I :
+ InfoCache.getReadOrWriteInstsForFunction(*AssociatedFunction)) {
+ // Skip dead instructions.
+ if (LivenessAA.isAssumedDead(I)) {
+ AnyDead = true;
+ continue;
+ }
+
+ if (!Pred(*I))
+ return false;
+ }
+
+ // If we actually used liveness information so we have to record a dependence.
+ if (AnyDead)
+ recordDependence(LivenessAA, QueryingAA);
+
+ return true;
+}
+
+ChangeStatus Attributor::run(Module &M) {
LLVM_DEBUG(dbgs() << "[Attributor] Identified and initialized "
<< AllAbstractAttributes.size()
<< " abstract attributes.\n");
@@ -1370,10 +4470,25 @@ ChangeStatus Attributor::run() {
SetVector<AbstractAttribute *> Worklist;
Worklist.insert(AllAbstractAttributes.begin(), AllAbstractAttributes.end());
+ bool RecomputeDependences = false;
+
do {
+ // Remember the size to determine new attributes.
+ size_t NumAAs = AllAbstractAttributes.size();
LLVM_DEBUG(dbgs() << "\n\n[Attributor] #Iteration: " << IterationCounter
<< ", Worklist size: " << Worklist.size() << "\n");
+ // If dependences (=QueryMap) are recomputed we have to look at all abstract
+ // attributes again, regardless of what changed in the last iteration.
+ if (RecomputeDependences) {
+ LLVM_DEBUG(
+ dbgs() << "[Attributor] Run all AAs to recompute dependences\n");
+ QueryMap.clear();
+ ChangedAAs.clear();
+ Worklist.insert(AllAbstractAttributes.begin(),
+ AllAbstractAttributes.end());
+ }
+
// Add all abstract attributes that are potentially dependent on one that
// changed to the work list.
for (AbstractAttribute *ChangedAA : ChangedAAs) {
@@ -1381,27 +4496,42 @@ ChangeStatus Attributor::run() {
Worklist.insert(QuerriedAAs.begin(), QuerriedAAs.end());
}
+ LLVM_DEBUG(dbgs() << "[Attributor] #Iteration: " << IterationCounter
+ << ", Worklist+Dependent size: " << Worklist.size()
+ << "\n");
+
// Reset the changed set.
ChangedAAs.clear();
// Update all abstract attribute in the work list and record the ones that
// changed.
for (AbstractAttribute *AA : Worklist)
- if (AA->update(*this) == ChangeStatus::CHANGED)
- ChangedAAs.push_back(AA);
+ if (!isAssumedDead(*AA, nullptr))
+ if (AA->update(*this) == ChangeStatus::CHANGED)
+ ChangedAAs.push_back(AA);
+
+ // Check if we recompute the dependences in the next iteration.
+ RecomputeDependences = (DepRecomputeInterval > 0 &&
+ IterationCounter % DepRecomputeInterval == 0);
+
+ // Add attributes to the changed set if they have been created in the last
+ // iteration.
+ ChangedAAs.append(AllAbstractAttributes.begin() + NumAAs,
+ AllAbstractAttributes.end());
// Reset the work list and repopulate with the changed abstract attributes.
// Note that dependent ones are added above.
Worklist.clear();
Worklist.insert(ChangedAAs.begin(), ChangedAAs.end());
- } while (!Worklist.empty() && ++IterationCounter < MaxFixpointIterations);
+ } while (!Worklist.empty() && (IterationCounter++ < MaxFixpointIterations ||
+ VerifyMaxFixpointIterations));
LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: "
<< IterationCounter << "/" << MaxFixpointIterations
<< " iterations\n");
- bool FinishedAtFixpoint = Worklist.empty();
+ size_t NumFinalAAs = AllAbstractAttributes.size();
// Reset abstract arguments not settled in a sound fixpoint by now. This
// happens when we stopped the fixpoint iteration early. Note that only the
@@ -1448,8 +4578,14 @@ ChangeStatus Attributor::run() {
if (!State.isValidState())
continue;
+ // Skip dead code.
+ if (isAssumedDead(*AA, nullptr))
+ continue;
// Manifest the state and record if we changed the IR.
ChangeStatus LocalChange = AA->manifest(*this);
+ if (LocalChange == ChangeStatus::CHANGED && AreStatisticsEnabled())
+ AA->trackStatistics();
+
ManifestChange = ManifestChange | LocalChange;
NumAtFixpoint++;
@@ -1462,69 +4598,92 @@ ChangeStatus Attributor::run() {
<< " arguments while " << NumAtFixpoint
<< " were in a valid fixpoint state\n");
- // If verification is requested, we finished this run at a fixpoint, and the
- // IR was changed, we re-run the whole fixpoint analysis, starting at
- // re-initialization of the arguments. This re-run should not result in an IR
- // change. Though, the (virtual) state of attributes at the end of the re-run
- // might be more optimistic than the known state or the IR state if the better
- // state cannot be manifested.
- if (VerifyAttributor && FinishedAtFixpoint &&
- ManifestChange == ChangeStatus::CHANGED) {
- VerifyAttributor = false;
- ChangeStatus VerifyStatus = run();
- if (VerifyStatus != ChangeStatus::UNCHANGED)
- llvm_unreachable(
- "Attributor verification failed, re-run did result in an IR change "
- "even after a fixpoint was reached in the original run. (False "
- "positives possible!)");
- VerifyAttributor = true;
- }
-
NumAttributesManifested += NumManifested;
NumAttributesValidFixpoint += NumAtFixpoint;
- return ManifestChange;
-}
-
-void Attributor::identifyDefaultAbstractAttributes(
- Function &F, InformationCache &InfoCache,
- DenseSet</* Attribute::AttrKind */ unsigned> *Whitelist) {
+ (void)NumFinalAAs;
+ assert(
+ NumFinalAAs == AllAbstractAttributes.size() &&
+ "Expected the final number of abstract attributes to remain unchanged!");
+
+ // Delete stuff at the end to avoid invalid references and a nice order.
+ {
+ LLVM_DEBUG(dbgs() << "\n[Attributor] Delete at least "
+ << ToBeDeletedFunctions.size() << " functions and "
+ << ToBeDeletedBlocks.size() << " blocks and "
+ << ToBeDeletedInsts.size() << " instructions\n");
+ for (Instruction *I : ToBeDeletedInsts) {
+ if (!I->use_empty())
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->eraseFromParent();
+ }
- // Every function can be nounwind.
- registerAA(*new AANoUnwindFunction(F, InfoCache));
+ if (unsigned NumDeadBlocks = ToBeDeletedBlocks.size()) {
+ SmallVector<BasicBlock *, 8> ToBeDeletedBBs;
+ ToBeDeletedBBs.reserve(NumDeadBlocks);
+ ToBeDeletedBBs.append(ToBeDeletedBlocks.begin(), ToBeDeletedBlocks.end());
+ DeleteDeadBlocks(ToBeDeletedBBs);
+ STATS_DECLTRACK(AAIsDead, BasicBlock,
+ "Number of dead basic blocks deleted.");
+ }
- // Every function might be marked "nosync"
- registerAA(*new AANoSyncFunction(F, InfoCache));
+ STATS_DECL(AAIsDead, Function, "Number of dead functions deleted.");
+ for (Function *Fn : ToBeDeletedFunctions) {
+ Fn->replaceAllUsesWith(UndefValue::get(Fn->getType()));
+ Fn->eraseFromParent();
+ STATS_TRACK(AAIsDead, Function);
+ }
- // Every function might be "no-free".
- registerAA(*new AANoFreeFunction(F, InfoCache));
+ // Identify dead internal functions and delete them. This happens outside
+ // the other fixpoint analysis as we might treat potentially dead functions
+ // as live to lower the number of iterations. If they happen to be dead, the
+ // below fixpoint loop will identify and eliminate them.
+ SmallVector<Function *, 8> InternalFns;
+ for (Function &F : M)
+ if (F.hasLocalLinkage())
+ InternalFns.push_back(&F);
+
+ bool FoundDeadFn = true;
+ while (FoundDeadFn) {
+ FoundDeadFn = false;
+ for (unsigned u = 0, e = InternalFns.size(); u < e; ++u) {
+ Function *F = InternalFns[u];
+ if (!F)
+ continue;
- // Return attributes are only appropriate if the return type is non void.
- Type *ReturnType = F.getReturnType();
- if (!ReturnType->isVoidTy()) {
- // Argument attribute "returned" --- Create only one per function even
- // though it is an argument attribute.
- if (!Whitelist || Whitelist->count(AAReturnedValues::ID))
- registerAA(*new AAReturnedValuesImpl(F, InfoCache));
+ const auto *LivenessAA =
+ lookupAAFor<AAIsDead>(IRPosition::function(*F));
+ if (LivenessAA &&
+ !checkForAllCallSites([](AbstractCallSite ACS) { return false; },
+ *LivenessAA, true))
+ continue;
- // Every function with pointer return type might be marked nonnull.
- if (ReturnType->isPointerTy() &&
- (!Whitelist || Whitelist->count(AANonNullReturned::ID)))
- registerAA(*new AANonNullReturned(F, InfoCache));
+ STATS_TRACK(AAIsDead, Function);
+ F->replaceAllUsesWith(UndefValue::get(F->getType()));
+ F->eraseFromParent();
+ InternalFns[u] = nullptr;
+ FoundDeadFn = true;
+ }
+ }
}
- // Every argument with pointer type might be marked nonnull.
- for (Argument &Arg : F.args()) {
- if (Arg.getType()->isPointerTy())
- registerAA(*new AANonNullArgument(Arg, InfoCache));
+ if (VerifyMaxFixpointIterations &&
+ IterationCounter != MaxFixpointIterations) {
+ errs() << "\n[Attributor] Fixpoint iteration done after: "
+ << IterationCounter << "/" << MaxFixpointIterations
+ << " iterations\n";
+ llvm_unreachable("The fixpoint was not reached with exactly the number of "
+ "specified iterations!");
}
- // Every function might be "will-return".
- registerAA(*new AAWillReturnFunction(F, InfoCache));
+ return ManifestChange;
+}
+
+void Attributor::initializeInformationCache(Function &F) {
- // Walk all instructions to find more attribute opportunities and also
- // interesting instructions that might be queried by abstract attributes
- // during their initialization or update.
+ // Walk all instructions to find interesting instructions that might be
+ // queried by abstract attributes during their initialization or update.
+ // This has to happen before we create attributes.
auto &ReadOrWriteInsts = InfoCache.FuncRWInstsMap[&F];
auto &InstOpcodeMap = InfoCache.FuncInstOpcodeMap[&F];
@@ -1540,8 +4699,12 @@ void Attributor::identifyDefaultAbstractAttributes(
default:
assert((!ImmutableCallSite(&I)) && (!isa<CallBase>(&I)) &&
"New call site/base instruction type needs to be known int the "
- "attributor.");
+ "Attributor.");
break;
+ case Instruction::Load:
+ // The alignment of a pointer is interesting for loads.
+ case Instruction::Store:
+ // The alignment of a pointer is interesting for stores.
case Instruction::Call:
case Instruction::CallBr:
case Instruction::Invoke:
@@ -1555,18 +4718,154 @@ void Attributor::identifyDefaultAbstractAttributes(
InstOpcodeMap[I.getOpcode()].push_back(&I);
if (I.mayReadOrWriteMemory())
ReadOrWriteInsts.push_back(&I);
+ }
+}
+
+void Attributor::identifyDefaultAbstractAttributes(Function &F) {
+ if (!VisitedFunctions.insert(&F).second)
+ return;
+
+ IRPosition FPos = IRPosition::function(F);
+
+ // Check for dead BasicBlocks in every function.
+ // We need dead instruction detection because we do not want to deal with
+ // broken IR in which SSA rules do not apply.
+ getOrCreateAAFor<AAIsDead>(FPos);
+
+ // Every function might be "will-return".
+ getOrCreateAAFor<AAWillReturn>(FPos);
+ // Every function can be nounwind.
+ getOrCreateAAFor<AANoUnwind>(FPos);
+
+ // Every function might be marked "nosync"
+ getOrCreateAAFor<AANoSync>(FPos);
+
+ // Every function might be "no-free".
+ getOrCreateAAFor<AANoFree>(FPos);
+
+ // Every function might be "no-return".
+ getOrCreateAAFor<AANoReturn>(FPos);
+
+ // Every function might be "no-recurse".
+ getOrCreateAAFor<AANoRecurse>(FPos);
+
+ // Every function might be "readnone/readonly/writeonly/...".
+ getOrCreateAAFor<AAMemoryBehavior>(FPos);
+
+ // Every function might be applicable for Heap-To-Stack conversion.
+ if (EnableHeapToStack)
+ getOrCreateAAFor<AAHeapToStack>(FPos);
+
+ // Return attributes are only appropriate if the return type is non void.
+ Type *ReturnType = F.getReturnType();
+ if (!ReturnType->isVoidTy()) {
+ // Argument attribute "returned" --- Create only one per function even
+ // though it is an argument attribute.
+ getOrCreateAAFor<AAReturnedValues>(FPos);
+
+ IRPosition RetPos = IRPosition::returned(F);
+
+ // Every function might be simplified.
+ getOrCreateAAFor<AAValueSimplify>(RetPos);
+
+ if (ReturnType->isPointerTy()) {
+
+ // Every function with pointer return type might be marked align.
+ getOrCreateAAFor<AAAlign>(RetPos);
+
+ // Every function with pointer return type might be marked nonnull.
+ getOrCreateAAFor<AANonNull>(RetPos);
+
+ // Every function with pointer return type might be marked noalias.
+ getOrCreateAAFor<AANoAlias>(RetPos);
+
+ // Every function with pointer return type might be marked
+ // dereferenceable.
+ getOrCreateAAFor<AADereferenceable>(RetPos);
+ }
+ }
+
+ for (Argument &Arg : F.args()) {
+ IRPosition ArgPos = IRPosition::argument(Arg);
+
+ // Every argument might be simplified.
+ getOrCreateAAFor<AAValueSimplify>(ArgPos);
+
+ if (Arg.getType()->isPointerTy()) {
+ // Every argument with pointer type might be marked nonnull.
+ getOrCreateAAFor<AANonNull>(ArgPos);
+
+ // Every argument with pointer type might be marked noalias.
+ getOrCreateAAFor<AANoAlias>(ArgPos);
+
+ // Every argument with pointer type might be marked dereferenceable.
+ getOrCreateAAFor<AADereferenceable>(ArgPos);
+
+ // Every argument with pointer type might be marked align.
+ getOrCreateAAFor<AAAlign>(ArgPos);
+
+ // Every argument with pointer type might be marked nocapture.
+ getOrCreateAAFor<AANoCapture>(ArgPos);
+
+ // Every argument with pointer type might be marked
+ // "readnone/readonly/writeonly/..."
+ getOrCreateAAFor<AAMemoryBehavior>(ArgPos);
+ }
+ }
+
+ auto CallSitePred = [&](Instruction &I) -> bool {
CallSite CS(&I);
- if (CS && CS.getCalledFunction()) {
+ if (CS.getCalledFunction()) {
for (int i = 0, e = CS.getCalledFunction()->arg_size(); i < e; i++) {
+
+ IRPosition CSArgPos = IRPosition::callsite_argument(CS, i);
+
+ // Call site argument might be simplified.
+ getOrCreateAAFor<AAValueSimplify>(CSArgPos);
+
if (!CS.getArgument(i)->getType()->isPointerTy())
continue;
// Call site argument attribute "non-null".
- registerAA(*new AANonNullCallSiteArgument(CS, i, InfoCache), i);
+ getOrCreateAAFor<AANonNull>(CSArgPos);
+
+ // Call site argument attribute "no-alias".
+ getOrCreateAAFor<AANoAlias>(CSArgPos);
+
+ // Call site argument attribute "dereferenceable".
+ getOrCreateAAFor<AADereferenceable>(CSArgPos);
+
+ // Call site argument attribute "align".
+ getOrCreateAAFor<AAAlign>(CSArgPos);
}
}
- }
+ return true;
+ };
+
+ auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+ bool Success, AnyDead = false;
+ Success = checkForAllInstructionsImpl(
+ OpcodeInstMap, CallSitePred, nullptr, AnyDead,
+ {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
+ (unsigned)Instruction::Call});
+ (void)Success;
+ assert(Success && !AnyDead && "Expected the check call to be successful!");
+
+ auto LoadStorePred = [&](Instruction &I) -> bool {
+ if (isa<LoadInst>(I))
+ getOrCreateAAFor<AAAlign>(
+ IRPosition::value(*cast<LoadInst>(I).getPointerOperand()));
+ else
+ getOrCreateAAFor<AAAlign>(
+ IRPosition::value(*cast<StoreInst>(I).getPointerOperand()));
+ return true;
+ };
+ Success = checkForAllInstructionsImpl(
+ OpcodeInstMap, LoadStorePred, nullptr, AnyDead,
+ {(unsigned)Instruction::Load, (unsigned)Instruction::Store});
+ (void)Success;
+ assert(Success && !AnyDead && "Expected the check call to be successful!");
}
/// Helpers to ease debugging through output streams and print calls.
@@ -1576,21 +4875,39 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, ChangeStatus S) {
return OS << (S == ChangeStatus::CHANGED ? "changed" : "unchanged");
}
-raw_ostream &llvm::operator<<(raw_ostream &OS,
- AbstractAttribute::ManifestPosition AP) {
+raw_ostream &llvm::operator<<(raw_ostream &OS, IRPosition::Kind AP) {
switch (AP) {
- case AbstractAttribute::MP_ARGUMENT:
+ case IRPosition::IRP_INVALID:
+ return OS << "inv";
+ case IRPosition::IRP_FLOAT:
+ return OS << "flt";
+ case IRPosition::IRP_RETURNED:
+ return OS << "fn_ret";
+ case IRPosition::IRP_CALL_SITE_RETURNED:
+ return OS << "cs_ret";
+ case IRPosition::IRP_FUNCTION:
+ return OS << "fn";
+ case IRPosition::IRP_CALL_SITE:
+ return OS << "cs";
+ case IRPosition::IRP_ARGUMENT:
return OS << "arg";
- case AbstractAttribute::MP_CALL_SITE_ARGUMENT:
+ case IRPosition::IRP_CALL_SITE_ARGUMENT:
return OS << "cs_arg";
- case AbstractAttribute::MP_FUNCTION:
- return OS << "fn";
- case AbstractAttribute::MP_RETURNED:
- return OS << "fn_ret";
}
llvm_unreachable("Unknown attribute position!");
}
+raw_ostream &llvm::operator<<(raw_ostream &OS, const IRPosition &Pos) {
+ const Value &AV = Pos.getAssociatedValue();
+ return OS << "{" << Pos.getPositionKind() << ":" << AV.getName() << " ["
+ << Pos.getAnchorValue().getName() << "@" << Pos.getArgNo() << "]}";
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const IntegerState &S) {
+ return OS << "(" << S.getKnown() << "-" << S.getAssumed() << ")"
+ << static_cast<const AbstractState &>(S);
+}
+
raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractState &S) {
return OS << (!S.isValidState() ? "top" : (S.isAtFixpoint() ? "fix" : ""));
}
@@ -1601,8 +4918,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractAttribute &AA) {
}
void AbstractAttribute::print(raw_ostream &OS) const {
- OS << "[" << getManifestPosition() << "][" << getAsStr() << "]["
- << AnchoredVal.getName() << "]";
+ OS << "[P: " << getIRPosition() << "][" << getAsStr() << "][S: " << getState()
+ << "]";
}
///}
@@ -1610,7 +4927,7 @@ void AbstractAttribute::print(raw_ostream &OS) const {
/// Pass (Manager) Boilerplate
/// ----------------------------------------------------------------------------
-static bool runAttributorOnModule(Module &M) {
+static bool runAttributorOnModule(Module &M, AnalysisGetter &AG) {
if (DisableAttributor)
return false;
@@ -1619,39 +4936,39 @@ static bool runAttributorOnModule(Module &M) {
// Create an Attributor and initially empty information cache that is filled
// while we identify default attribute opportunities.
- Attributor A;
- InformationCache InfoCache;
+ InformationCache InfoCache(M, AG);
+ Attributor A(InfoCache, DepRecInterval);
+
+ for (Function &F : M)
+ A.initializeInformationCache(F);
for (Function &F : M) {
- // TODO: Not all attributes require an exact definition. Find a way to
- // enable deduction for some but not all attributes in case the
- // definition might be changed at runtime, see also
- // http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html.
- // TODO: We could always determine abstract attributes and if sufficient
- // information was found we could duplicate the functions that do not
- // have an exact definition.
- if (!F.hasExactDefinition()) {
+ if (F.hasExactDefinition())
+ NumFnWithExactDefinition++;
+ else
NumFnWithoutExactDefinition++;
- continue;
- }
- // For now we ignore naked and optnone functions.
- if (F.hasFnAttribute(Attribute::Naked) ||
- F.hasFnAttribute(Attribute::OptimizeNone))
- continue;
-
- NumFnWithExactDefinition++;
+ // We look at internal functions only on-demand but if any use is not a
+ // direct call, we have to do it eagerly.
+ if (F.hasLocalLinkage()) {
+ if (llvm::all_of(F.uses(), [](const Use &U) {
+ return ImmutableCallSite(U.getUser()) &&
+ ImmutableCallSite(U.getUser()).isCallee(&U);
+ }))
+ continue;
+ }
// Populate the Attributor with abstract attribute opportunities in the
// function and the information cache with IR information.
- A.identifyDefaultAbstractAttributes(F, InfoCache);
+ A.identifyDefaultAbstractAttributes(F);
}
- return A.run() == ChangeStatus::CHANGED;
+ return A.run(M) == ChangeStatus::CHANGED;
}
PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) {
- if (runAttributorOnModule(M)) {
+ AnalysisGetter AG(AM);
+ if (runAttributorOnModule(M, AG)) {
// FIXME: Think about passes we will preserve and add them here.
return PreservedAnalyses::none();
}
@@ -1670,12 +4987,14 @@ struct AttributorLegacyPass : public ModulePass {
bool runOnModule(Module &M) override {
if (skipModule(M))
return false;
- return runAttributorOnModule(M);
+
+ AnalysisGetter AG;
+ return runAttributorOnModule(M, AG);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
// FIXME: Think about passes we will preserve and add them here.
- AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
@@ -1684,7 +5003,147 @@ struct AttributorLegacyPass : public ModulePass {
Pass *llvm::createAttributorLegacyPass() { return new AttributorLegacyPass(); }
char AttributorLegacyPass::ID = 0;
+
+const char AAReturnedValues::ID = 0;
+const char AANoUnwind::ID = 0;
+const char AANoSync::ID = 0;
+const char AANoFree::ID = 0;
+const char AANonNull::ID = 0;
+const char AANoRecurse::ID = 0;
+const char AAWillReturn::ID = 0;
+const char AANoAlias::ID = 0;
+const char AANoReturn::ID = 0;
+const char AAIsDead::ID = 0;
+const char AADereferenceable::ID = 0;
+const char AAAlign::ID = 0;
+const char AANoCapture::ID = 0;
+const char AAValueSimplify::ID = 0;
+const char AAHeapToStack::ID = 0;
+const char AAMemoryBehavior::ID = 0;
+
+// Macro magic to create the static generator function for attributes that
+// follow the naming scheme.
+
+#define SWITCH_PK_INV(CLASS, PK, POS_NAME) \
+ case IRPosition::PK: \
+ llvm_unreachable("Cannot create " #CLASS " for a " POS_NAME " position!");
+
+#define SWITCH_PK_CREATE(CLASS, IRP, PK, SUFFIX) \
+ case IRPosition::PK: \
+ AA = new CLASS##SUFFIX(IRP); \
+ break;
+
+#define CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
+ CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
+ CLASS *AA = nullptr; \
+ switch (IRP.getPositionKind()) { \
+ SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
+ SWITCH_PK_INV(CLASS, IRP_FLOAT, "floating") \
+ SWITCH_PK_INV(CLASS, IRP_ARGUMENT, "argument") \
+ SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \
+ SWITCH_PK_INV(CLASS, IRP_CALL_SITE_RETURNED, "call site returned") \
+ SWITCH_PK_INV(CLASS, IRP_CALL_SITE_ARGUMENT, "call site argument") \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \
+ } \
+ return *AA; \
+ }
+
+#define CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
+ CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
+ CLASS *AA = nullptr; \
+ switch (IRP.getPositionKind()) { \
+ SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
+ SWITCH_PK_INV(CLASS, IRP_FUNCTION, "function") \
+ SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_RETURNED, Returned) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \
+ } \
+ return *AA; \
+ }
+
+#define CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
+ CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
+ CLASS *AA = nullptr; \
+ switch (IRP.getPositionKind()) { \
+ SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_RETURNED, Returned) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \
+ } \
+ return *AA; \
+ }
+
+#define CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
+ CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
+ CLASS *AA = nullptr; \
+ switch (IRP.getPositionKind()) { \
+ SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
+ SWITCH_PK_INV(CLASS, IRP_ARGUMENT, "argument") \
+ SWITCH_PK_INV(CLASS, IRP_FLOAT, "floating") \
+ SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \
+ SWITCH_PK_INV(CLASS, IRP_CALL_SITE_RETURNED, "call site returned") \
+ SWITCH_PK_INV(CLASS, IRP_CALL_SITE_ARGUMENT, "call site argument") \
+ SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \
+ } \
+ return *AA; \
+ }
+
+#define CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
+ CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
+ CLASS *AA = nullptr; \
+ switch (IRP.getPositionKind()) { \
+ SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
+ SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \
+ } \
+ return *AA; \
+ }
+
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUnwind)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoSync)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFree)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoRecurse)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAWillReturn)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoReturn)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReturnedValues)
+
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture)
+
+CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify)
+
+CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHeapToStack)
+
+CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryBehavior)
+
+#undef CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION
+#undef CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION
+#undef CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION
+#undef CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION
+#undef CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION
+#undef SWITCH_PK_CREATE
+#undef SWITCH_PK_INV
+
INITIALIZE_PASS_BEGIN(AttributorLegacyPass, "attributor",
"Deduce and propagate attributes", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(AttributorLegacyPass, "attributor",
"Deduce and propagate attributes", false, false)
diff --git a/lib/Transforms/IPO/BlockExtractor.cpp b/lib/Transforms/IPO/BlockExtractor.cpp
index 6c365f3f3cbe..de80c88c1591 100644
--- a/lib/Transforms/IPO/BlockExtractor.cpp
+++ b/lib/Transforms/IPO/BlockExtractor.cpp
@@ -119,6 +119,8 @@ void BlockExtractor::loadFile() {
/*KeepEmpty=*/false);
if (LineSplit.empty())
continue;
+ if (LineSplit.size()!=2)
+ report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'");
SmallVector<StringRef, 4> BBNames;
LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1,
/*KeepEmpty=*/false);
@@ -204,7 +206,8 @@ bool BlockExtractor::runOnModule(Module &M) {
++NumExtracted;
Changed = true;
}
- Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion();
+ CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent());
+ Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC);
if (F)
LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName()
<< "' in: " << F->getName() << '\n');
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index ad877ae1786c..3cf839e397f8 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -48,7 +48,7 @@ static void FindUsedValues(GlobalVariable *LLVMUsed,
ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) {
- Value *Operand = Inits->getOperand(i)->stripPointerCastsNoFollowAliases();
+ Value *Operand = Inits->getOperand(i)->stripPointerCasts();
GlobalValue *GV = cast<GlobalValue>(Operand);
UsedValues.insert(GV);
}
@@ -120,7 +120,7 @@ static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New) {
// Bump the alignment if necessary.
if (Old->getAlignment() || New->getAlignment())
- New->setAlignment(std::max(getAlignment(Old), getAlignment(New)));
+ New->setAlignment(Align(std::max(getAlignment(Old), getAlignment(New))));
copyDebugLocMetadata(Old, New);
Old->replaceAllUsesWith(NewConstant);
diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp
index e30b33aa4872..e20159ba0db5 100644
--- a/lib/Transforms/IPO/CrossDSOCFI.cpp
+++ b/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -84,13 +84,9 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
for (GlobalObject &GO : M.global_objects()) {
Types.clear();
GO.getMetadata(LLVMContext::MD_type, Types);
- for (MDNode *Type : Types) {
- // Sanity check. GO must not be a function declaration.
- assert(!isa<Function>(&GO) || !cast<Function>(&GO)->isDeclaration());
-
+ for (MDNode *Type : Types)
if (ConstantInt *TypeId = extractNumericTypeId(Type))
TypeIds.insert(TypeId->getZExtValue());
- }
}
NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions");
@@ -108,11 +104,11 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
FunctionCallee C = M.getOrInsertFunction(
"__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx),
Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx));
- Function *F = dyn_cast<Function>(C.getCallee());
+ Function *F = cast<Function>(C.getCallee());
// Take over the existing function. The frontend emits a weak stub so that the
// linker knows about the symbol; this pass replaces the function body.
F->deleteBody();
- F->setAlignment(4096);
+ F->setAlignment(Align(4096));
Triple T(M.getTargetTriple());
if (T.isARM() || T.isThumb())
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 5ccd8bc4b0fb..b174c63a577b 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -78,11 +78,8 @@ STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
STATISTIC(NumNoUnwind, "Number of functions marked as nounwind");
STATISTIC(NumNoFree, "Number of functions marked as nofree");
-// FIXME: This is disabled by default to avoid exposing security vulnerabilities
-// in C/C++ code compiled by clang:
-// http://lists.llvm.org/pipermail/cfe-dev/2017-January/052066.html
static cl::opt<bool> EnableNonnullArgPropagation(
- "enable-nonnull-arg-prop", cl::Hidden,
+ "enable-nonnull-arg-prop", cl::init(true), cl::Hidden,
cl::desc("Try to propagate nonnull argument attributes from callsites to "
"caller functions."));
@@ -664,6 +661,25 @@ static bool addArgumentAttrsFromCallsites(Function &F) {
return Changed;
}
+static bool addReadAttr(Argument *A, Attribute::AttrKind R) {
+ assert((R == Attribute::ReadOnly || R == Attribute::ReadNone)
+ && "Must be a Read attribute.");
+ assert(A && "Argument must not be null.");
+
+ // If the argument already has the attribute, nothing needs to be done.
+ if (A->hasAttribute(R))
+ return false;
+
+ // Otherwise, remove potentially conflicting attribute, add the new one,
+ // and update statistics.
+ A->removeAttr(Attribute::WriteOnly);
+ A->removeAttr(Attribute::ReadOnly);
+ A->removeAttr(Attribute::ReadNone);
+ A->addAttr(R);
+ R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
+ return true;
+}
+
/// Deduce nocapture attributes for the SCC.
static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
bool Changed = false;
@@ -732,11 +748,8 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
SmallPtrSet<Argument *, 8> Self;
Self.insert(&*A);
Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self);
- if (R != Attribute::None) {
- A->addAttr(R);
- Changed = true;
- R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
- }
+ if (R != Attribute::None)
+ Changed = addReadAttr(A, R);
}
}
}
@@ -833,12 +846,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (ReadAttr != Attribute::None) {
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
- // Clear out existing readonly/readnone attributes
- A->removeAttr(Attribute::ReadOnly);
- A->removeAttr(Attribute::ReadNone);
- A->addAttr(ReadAttr);
- ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
- Changed = true;
+ Changed = addReadAttr(A, ReadAttr);
}
}
}
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index 62c7fbd07223..3f5cc078d75f 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -450,7 +450,7 @@ static void computeImportForFunction(
} else if (PrintImportFailures) {
assert(!FailureInfo &&
"Expected no FailureInfo for newly rejected candidate");
- FailureInfo = llvm::make_unique<FunctionImporter::ImportFailureInfo>(
+ FailureInfo = std::make_unique<FunctionImporter::ImportFailureInfo>(
VI, Edge.second.getHotness(), Reason, 1);
}
LLVM_DEBUG(
@@ -764,7 +764,7 @@ void llvm::computeDeadSymbols(
}
// Make value live and add it to the worklist if it was not live before.
- auto visit = [&](ValueInfo VI) {
+ auto visit = [&](ValueInfo VI, bool IsAliasee) {
// FIXME: If we knew which edges were created for indirect call profiles,
// we could skip them here. Any that are live should be reached via
// other edges, e.g. reference edges. Otherwise, using a profile collected
@@ -800,12 +800,15 @@ void llvm::computeDeadSymbols(
Interposable = true;
}
- if (!KeepAliveLinkage)
- return;
+ if (!IsAliasee) {
+ if (!KeepAliveLinkage)
+ return;
- if (Interposable)
- report_fatal_error(
- "Interposable and available_externally/linkonce_odr/weak_odr symbol");
+ if (Interposable)
+ report_fatal_error(
+ "Interposable and available_externally/linkonce_odr/weak_odr "
+ "symbol");
+ }
}
for (auto &S : VI.getSummaryList())
@@ -821,16 +824,16 @@ void llvm::computeDeadSymbols(
// If this is an alias, visit the aliasee VI to ensure that all copies
// are marked live and it is added to the worklist for further
// processing of its references.
- visit(AS->getAliaseeVI());
+ visit(AS->getAliaseeVI(), true);
continue;
}
Summary->setLive(true);
for (auto Ref : Summary->refs())
- visit(Ref);
+ visit(Ref, false);
if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
for (auto Call : FS->calls())
- visit(Call.first);
+ visit(Call.first, false);
}
}
Index.setWithGlobalValueDeadStripping();
@@ -892,7 +895,7 @@ std::error_code llvm::EmitImportsFiles(
StringRef ModulePath, StringRef OutputFilename,
const std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
std::error_code EC;
- raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::F_None);
+ raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
if (EC)
return EC;
for (auto &ILI : ModuleToSummariesForIndex)
@@ -948,23 +951,15 @@ void llvm::thinLTOResolvePrevailingInModule(
auto NewLinkage = GS->second->linkage();
if (NewLinkage == GV.getLinkage())
return;
-
- // Switch the linkage to weakany if asked for, e.g. we do this for
- // linker redefined symbols (via --wrap or --defsym).
- // We record that the visibility should be changed here in `addThinLTO`
- // as we need access to the resolution vectors for each input file in
- // order to find which symbols have been redefined.
- // We may consider reorganizing this code and moving the linkage recording
- // somewhere else, e.g. in thinLTOResolvePrevailingInIndex.
- if (NewLinkage == GlobalValue::WeakAnyLinkage) {
- GV.setLinkage(NewLinkage);
- return;
- }
-
if (GlobalValue::isLocalLinkage(GV.getLinkage()) ||
+ // Don't internalize anything here, because the code below
+ // lacks necessary correctness checks. Leave this job to
+ // LLVM 'internalize' pass.
+ GlobalValue::isLocalLinkage(NewLinkage) ||
// In case it was dead and already converted to declaration.
GV.isDeclaration())
return;
+
// Check for a non-prevailing def that has interposable linkage
// (e.g. non-odr weak or linkonce). In that case we can't simply
// convert to available_externally, since it would lose the
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 86b7f3e49ee6..f010f7b703a6 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -17,9 +17,11 @@
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
@@ -29,10 +31,15 @@ using namespace llvm;
#define DEBUG_TYPE "globaldce"
+static cl::opt<bool>
+ ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true), cl::ZeroOrMore,
+ cl::desc("Enable virtual function elimination"));
+
STATISTIC(NumAliases , "Number of global aliases removed");
STATISTIC(NumFunctions, "Number of functions removed");
STATISTIC(NumIFuncs, "Number of indirect functions removed");
STATISTIC(NumVariables, "Number of global variables removed");
+STATISTIC(NumVFuncs, "Number of virtual functions removed");
namespace {
class GlobalDCELegacyPass : public ModulePass {
@@ -118,6 +125,15 @@ void GlobalDCEPass::UpdateGVDependencies(GlobalValue &GV) {
ComputeDependencies(User, Deps);
Deps.erase(&GV); // Remove self-reference.
for (GlobalValue *GVU : Deps) {
+ // If this is a dep from a vtable to a virtual function, and we have
+ // complete information about all virtual call sites which could call
+ // though this vtable, then skip it, because the call site information will
+ // be more precise.
+ if (VFESafeVTables.count(GVU) && isa<Function>(&GV)) {
+ LLVM_DEBUG(dbgs() << "Ignoring dep " << GVU->getName() << " -> "
+ << GV.getName() << "\n");
+ continue;
+ }
GVDependencies[GVU].insert(&GV);
}
}
@@ -132,12 +148,133 @@ void GlobalDCEPass::MarkLive(GlobalValue &GV,
if (Updates)
Updates->push_back(&GV);
if (Comdat *C = GV.getComdat()) {
- for (auto &&CM : make_range(ComdatMembers.equal_range(C)))
+ for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
MarkLive(*CM.second, Updates); // Recursion depth is only two because only
// globals in the same comdat are visited.
+ }
+ }
+}
+
+void GlobalDCEPass::ScanVTables(Module &M) {
+ SmallVector<MDNode *, 2> Types;
+ LLVM_DEBUG(dbgs() << "Building type info -> vtable map\n");
+
+ auto *LTOPostLinkMD =
+ cast_or_null<ConstantAsMetadata>(M.getModuleFlag("LTOPostLink"));
+ bool LTOPostLink =
+ LTOPostLinkMD &&
+ (cast<ConstantInt>(LTOPostLinkMD->getValue())->getZExtValue() != 0);
+
+ for (GlobalVariable &GV : M.globals()) {
+ Types.clear();
+ GV.getMetadata(LLVMContext::MD_type, Types);
+ if (GV.isDeclaration() || Types.empty())
+ continue;
+
+ // Use the typeid metadata on the vtable to build a mapping from typeids to
+ // the list of (GV, offset) pairs which are the possible vtables for that
+ // typeid.
+ for (MDNode *Type : Types) {
+ Metadata *TypeID = Type->getOperand(1).get();
+
+ uint64_t Offset =
+ cast<ConstantInt>(
+ cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
+ ->getZExtValue();
+
+ TypeIdMap[TypeID].insert(std::make_pair(&GV, Offset));
+ }
+
+ // If the type corresponding to the vtable is private to this translation
+ // unit, we know that we can see all virtual functions which might use it,
+ // so VFE is safe.
+ if (auto GO = dyn_cast<GlobalObject>(&GV)) {
+ GlobalObject::VCallVisibility TypeVis = GO->getVCallVisibility();
+ if (TypeVis == GlobalObject::VCallVisibilityTranslationUnit ||
+ (LTOPostLink &&
+ TypeVis == GlobalObject::VCallVisibilityLinkageUnit)) {
+ LLVM_DEBUG(dbgs() << GV.getName() << " is safe for VFE\n");
+ VFESafeVTables.insert(&GV);
+ }
+ }
+ }
+}
+
+void GlobalDCEPass::ScanVTableLoad(Function *Caller, Metadata *TypeId,
+ uint64_t CallOffset) {
+ for (auto &VTableInfo : TypeIdMap[TypeId]) {
+ GlobalVariable *VTable = VTableInfo.first;
+ uint64_t VTableOffset = VTableInfo.second;
+
+ Constant *Ptr =
+ getPointerAtOffset(VTable->getInitializer(), VTableOffset + CallOffset,
+ *Caller->getParent());
+ if (!Ptr) {
+ LLVM_DEBUG(dbgs() << "can't find pointer in vtable!\n");
+ VFESafeVTables.erase(VTable);
+ return;
+ }
+
+ auto Callee = dyn_cast<Function>(Ptr->stripPointerCasts());
+ if (!Callee) {
+ LLVM_DEBUG(dbgs() << "vtable entry is not function pointer!\n");
+ VFESafeVTables.erase(VTable);
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << "vfunc dep " << Caller->getName() << " -> "
+ << Callee->getName() << "\n");
+ GVDependencies[Caller].insert(Callee);
}
}
+void GlobalDCEPass::ScanTypeCheckedLoadIntrinsics(Module &M) {
+ LLVM_DEBUG(dbgs() << "Scanning type.checked.load intrinsics\n");
+ Function *TypeCheckedLoadFunc =
+ M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
+
+ if (!TypeCheckedLoadFunc)
+ return;
+
+ for (auto U : TypeCheckedLoadFunc->users()) {
+ auto CI = dyn_cast<CallInst>(U);
+ if (!CI)
+ continue;
+
+ auto *Offset = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ Value *TypeIdValue = CI->getArgOperand(2);
+ auto *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata();
+
+ if (Offset) {
+ ScanVTableLoad(CI->getFunction(), TypeId, Offset->getZExtValue());
+ } else {
+ // type.checked.load with a non-constant offset, so assume every entry in
+ // every matching vtable is used.
+ for (auto &VTableInfo : TypeIdMap[TypeId]) {
+ VFESafeVTables.erase(VTableInfo.first);
+ }
+ }
+ }
+}
+
+void GlobalDCEPass::AddVirtualFunctionDependencies(Module &M) {
+ if (!ClEnableVFE)
+ return;
+
+ ScanVTables(M);
+
+ if (VFESafeVTables.empty())
+ return;
+
+ ScanTypeCheckedLoadIntrinsics(M);
+
+ LLVM_DEBUG(
+ dbgs() << "VFE safe vtables:\n";
+ for (auto *VTable : VFESafeVTables)
+ dbgs() << " " << VTable->getName() << "\n";
+ );
+}
+
PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
bool Changed = false;
@@ -163,6 +300,10 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
if (Comdat *C = GA.getComdat())
ComdatMembers.insert(std::make_pair(C, &GA));
+ // Add dependencies between virtual call sites and the virtual functions they
+ // might call, if we have that information.
+ AddVirtualFunctionDependencies(M);
+
// Loop over the module, adding globals which are obviously necessary.
for (GlobalObject &GO : M.global_objects()) {
Changed |= RemoveUnusedGlobalValue(GO);
@@ -257,8 +398,17 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
};
NumFunctions += DeadFunctions.size();
- for (Function *F : DeadFunctions)
+ for (Function *F : DeadFunctions) {
+ if (!F->use_empty()) {
+ // Virtual functions might still be referenced by one or more vtables,
+ // but if we've proven them to be unused then it's safe to replace the
+ // virtual function pointers with null, allowing us to remove the
+ // function itself.
+ ++NumVFuncs;
+ F->replaceNonMetadataUsesWith(ConstantPointerNull::get(F->getType()));
+ }
EraseUnusedGlobalValue(F);
+ }
NumVariables += DeadGlobalVars.size();
for (GlobalVariable *GV : DeadGlobalVars)
@@ -277,6 +427,8 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
ConstantDependenciesCache.clear();
GVDependencies.clear();
ComdatMembers.clear();
+ TypeIdMap.clear();
+ VFESafeVTables.clear();
if (Changed)
return PreservedAnalyses::none();
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index c4fb3ce77f6e..819715b9f8da 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -155,7 +155,8 @@ static bool isLeakCheckerRoot(GlobalVariable *GV) {
/// Given a value that is stored to a global but never read, determine whether
/// it's safe to remove the store and the chain of computation that feeds the
/// store.
-static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
+static bool IsSafeComputationToRemove(
+ Value *V, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
do {
if (isa<Constant>(V))
return true;
@@ -164,7 +165,7 @@ static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
isa<GlobalValue>(V))
return false;
- if (isAllocationFn(V, TLI))
+ if (isAllocationFn(V, GetTLI))
return true;
Instruction *I = cast<Instruction>(V);
@@ -184,8 +185,9 @@ static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
/// This GV is a pointer root. Loop over all users of the global and clean up
/// any that obviously don't assign the global a value that isn't dynamically
/// allocated.
-static bool CleanupPointerRootUsers(GlobalVariable *GV,
- const TargetLibraryInfo *TLI) {
+static bool
+CleanupPointerRootUsers(GlobalVariable *GV,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
// A brief explanation of leak checkers. The goal is to find bugs where
// pointers are forgotten, causing an accumulating growth in memory
// usage over time. The common strategy for leak checkers is to whitelist the
@@ -241,18 +243,18 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
C->destroyConstant();
// This could have invalidated UI, start over from scratch.
Dead.clear();
- CleanupPointerRootUsers(GV, TLI);
+ CleanupPointerRootUsers(GV, GetTLI);
return true;
}
}
}
for (int i = 0, e = Dead.size(); i != e; ++i) {
- if (IsSafeComputationToRemove(Dead[i].first, TLI)) {
+ if (IsSafeComputationToRemove(Dead[i].first, GetTLI)) {
Dead[i].second->eraseFromParent();
Instruction *I = Dead[i].first;
do {
- if (isAllocationFn(I, TLI))
+ if (isAllocationFn(I, GetTLI))
break;
Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
if (!J)
@@ -270,9 +272,9 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
/// We just marked GV constant. Loop over all users of the global, cleaning up
/// the obvious ones. This is largely just a quick scan over the use list to
/// clean up the easy and obvious cruft. This returns true if it made a change.
-static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
- const DataLayout &DL,
- TargetLibraryInfo *TLI) {
+static bool CleanupConstantGlobalUsers(
+ Value *V, Constant *Init, const DataLayout &DL,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
bool Changed = false;
// Note that we need to use a weak value handle for the worklist items. When
// we delete a constant array, we may also be holding pointer to one of its
@@ -302,12 +304,12 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
Constant *SubInit = nullptr;
if (Init)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
- Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI);
+ Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, GetTLI);
} else if ((CE->getOpcode() == Instruction::BitCast &&
CE->getType()->isPointerTy()) ||
CE->getOpcode() == Instruction::AddrSpaceCast) {
// Pointer cast, delete any stores and memsets to the global.
- Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, TLI);
+ Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, GetTLI);
}
if (CE->use_empty()) {
@@ -321,7 +323,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
Constant *SubInit = nullptr;
if (!isa<ConstantExpr>(GEP->getOperand(0))) {
ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(
- ConstantFoldInstruction(GEP, DL, TLI));
+ ConstantFoldInstruction(GEP, DL, &GetTLI(*GEP->getFunction())));
if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
@@ -331,7 +333,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
SubInit = Constant::getNullValue(GEP->getResultElementType());
}
- Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, TLI);
+ Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, GetTLI);
if (GEP->use_empty()) {
GEP->eraseFromParent();
@@ -348,7 +350,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
// us, and if they are all dead, nuke them without remorse.
if (isSafeToDestroyConstant(C)) {
C->destroyConstant();
- CleanupConstantGlobalUsers(V, Init, DL, TLI);
+ CleanupConstantGlobalUsers(V, Init, DL, GetTLI);
return true;
}
}
@@ -495,8 +497,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
// had 256 byte alignment for example, something might depend on that:
// propagate info to each field.
uint64_t FieldOffset = Layout.getElementOffset(i);
- unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
- if (NewAlign > DL.getABITypeAlignment(STy->getElementType(i)))
+ Align NewAlign(MinAlign(StartAlignment, FieldOffset));
+ if (NewAlign > Align(DL.getABITypeAlignment(STy->getElementType(i))))
NGV->setAlignment(NewAlign);
// Copy over the debug info for the variable.
@@ -511,7 +513,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
NewGlobals.reserve(NumElements);
auto ElTy = STy->getElementType();
uint64_t EltSize = DL.getTypeAllocSize(ElTy);
- unsigned EltAlign = DL.getABITypeAlignment(ElTy);
+ Align EltAlign(DL.getABITypeAlignment(ElTy));
uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy);
for (unsigned i = 0, e = NumElements; i != e; ++i) {
Constant *In = Init->getAggregateElement(i);
@@ -530,7 +532,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
// Calculate the known alignment of the field. If the original aggregate
// had 256 byte alignment for example, something might depend on that:
// propagate info to each field.
- unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
+ Align NewAlign(MinAlign(StartAlignment, EltSize * i));
if (NewAlign > EltAlign)
NGV->setAlignment(NewAlign);
transferSRADebugInfo(GV, NGV, FragmentSizeInBits * i, FragmentSizeInBits,
@@ -745,9 +747,9 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
/// are uses of the loaded value that would trap if the loaded value is
/// dynamically null, then we know that they cannot be reachable with a null
/// optimize away the load.
-static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
- const DataLayout &DL,
- TargetLibraryInfo *TLI) {
+static bool OptimizeAwayTrappingUsesOfLoads(
+ GlobalVariable *GV, Constant *LV, const DataLayout &DL,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
bool Changed = false;
// Keep track of whether we are able to remove all the uses of the global
@@ -793,10 +795,10 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
// nor is the global.
if (AllNonStoreUsesGone) {
if (isLeakCheckerRoot(GV)) {
- Changed |= CleanupPointerRootUsers(GV, TLI);
+ Changed |= CleanupPointerRootUsers(GV, GetTLI);
} else {
Changed = true;
- CleanupConstantGlobalUsers(GV, nullptr, DL, TLI);
+ CleanupConstantGlobalUsers(GV, nullptr, DL, GetTLI);
}
if (GV->use_empty()) {
LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
@@ -889,8 +891,8 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
while (!GV->use_empty()) {
if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) {
// The global is initialized when the store to it occurs.
- new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
- SI->getOrdering(), SI->getSyncScopeID(), SI);
+ new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false,
+ None, SI->getOrdering(), SI->getSyncScopeID(), SI);
SI->eraseFromParent();
continue;
}
@@ -907,7 +909,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
// Replace the cmp X, 0 with a use of the bool value.
// Sink the load to where the compare was, if atomic rules allow us to.
Value *LV = new LoadInst(InitBool->getValueType(), InitBool,
- InitBool->getName() + ".val", false, 0,
+ InitBool->getName() + ".val", false, None,
LI->getOrdering(), LI->getSyncScopeID(),
LI->isUnordered() ? (Instruction *)ICI : LI);
InitBoolUsed = true;
@@ -1562,10 +1564,10 @@ static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
// Try to optimize globals based on the knowledge that only one value (besides
// its initializer) is ever stored to the global.
-static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
- AtomicOrdering Ordering,
- const DataLayout &DL,
- TargetLibraryInfo *TLI) {
+static bool
+optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
+ AtomicOrdering Ordering, const DataLayout &DL,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
// Ignore no-op GEPs and bitcasts.
StoredOnceVal = StoredOnceVal->stripPointerCasts();
@@ -1583,9 +1585,10 @@ static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
// Optimize away any trapping uses of the loaded value.
- if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, TLI))
+ if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, GetTLI))
return true;
- } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
+ } else if (CallInst *CI = extractMallocCall(StoredOnceVal, GetTLI)) {
+ auto *TLI = &GetTLI(*CI->getFunction());
Type *MallocType = getMallocAllocatedType(CI, TLI);
if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
Ordering, DL, TLI))
@@ -1643,10 +1646,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
// instead of a select to synthesize the desired value.
bool IsOneZero = false;
bool EmitOneOrZero = true;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)){
+ auto *CI = dyn_cast<ConstantInt>(OtherVal);
+ if (CI && CI->getValue().getActiveBits() <= 64) {
IsOneZero = InitVal->isNullValue() && CI->isOne();
- if (ConstantInt *CIInit = dyn_cast<ConstantInt>(GV->getInitializer())){
+ auto *CIInit = dyn_cast<ConstantInt>(GV->getInitializer());
+ if (CIInit && CIInit->getValue().getActiveBits() <= 64) {
uint64_t ValInit = CIInit->getZExtValue();
uint64_t ValOther = CI->getZExtValue();
uint64_t ValMinus = ValOther - ValInit;
@@ -1711,7 +1716,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
assert(LI->getOperand(0) == GV && "Not a copy!");
// Insert a new load, to preserve the saved value.
StoreVal = new LoadInst(NewGV->getValueType(), NewGV,
- LI->getName() + ".b", false, 0,
+ LI->getName() + ".b", false, None,
LI->getOrdering(), LI->getSyncScopeID(), LI);
} else {
assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
@@ -1721,15 +1726,15 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
}
}
StoreInst *NSI =
- new StoreInst(StoreVal, NewGV, false, 0, SI->getOrdering(),
+ new StoreInst(StoreVal, NewGV, false, None, SI->getOrdering(),
SI->getSyncScopeID(), SI);
NSI->setDebugLoc(SI->getDebugLoc());
} else {
// Change the load into a load of bool then a select.
LoadInst *LI = cast<LoadInst>(UI);
- LoadInst *NLI =
- new LoadInst(NewGV->getValueType(), NewGV, LI->getName() + ".b",
- false, 0, LI->getOrdering(), LI->getSyncScopeID(), LI);
+ LoadInst *NLI = new LoadInst(NewGV->getValueType(), NewGV,
+ LI->getName() + ".b", false, None,
+ LI->getOrdering(), LI->getSyncScopeID(), LI);
Instruction *NSI;
if (IsOneZero)
NSI = new ZExtInst(NLI, LI->getType(), "", LI);
@@ -1914,9 +1919,10 @@ static void makeAllConstantUsesInstructions(Constant *C) {
/// Analyze the specified global variable and optimize
/// it if possible. If we make a change, return true.
-static bool processInternalGlobal(
- GlobalVariable *GV, const GlobalStatus &GS, TargetLibraryInfo *TLI,
- function_ref<DominatorTree &(Function &)> LookupDomTree) {
+static bool
+processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI,
+ function_ref<DominatorTree &(Function &)> LookupDomTree) {
auto &DL = GV->getParent()->getDataLayout();
// If this is a first class global and has only one accessing function and
// this function is non-recursive, we replace the global with a local alloca
@@ -1963,11 +1969,12 @@ static bool processInternalGlobal(
bool Changed;
if (isLeakCheckerRoot(GV)) {
// Delete any constant stores to the global.
- Changed = CleanupPointerRootUsers(GV, TLI);
+ Changed = CleanupPointerRootUsers(GV, GetTLI);
} else {
// Delete any stores we can find to the global. We may not be able to
// make it completely dead though.
- Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
+ Changed =
+ CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
}
// If the global is dead now, delete it.
@@ -1989,7 +1996,7 @@ static bool processInternalGlobal(
GV->setConstant(true);
// Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
+ CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
// If the global is dead now, just nuke it.
if (GV->use_empty()) {
@@ -2019,7 +2026,7 @@ static bool processInternalGlobal(
GV->setInitializer(SOVConstant);
// Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
+ CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
if (GV->use_empty()) {
LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "
@@ -2033,7 +2040,8 @@ static bool processInternalGlobal(
// Try to optimize globals based on the knowledge that only one value
// (besides its initializer) is ever stored to the global.
- if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, TLI))
+ if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL,
+ GetTLI))
return true;
// Otherwise, if the global was not a boolean, we can shrink it to be a
@@ -2054,7 +2062,8 @@ static bool processInternalGlobal(
/// Analyze the specified global variable and optimize it if possible. If we
/// make a change, return true.
static bool
-processGlobal(GlobalValue &GV, TargetLibraryInfo *TLI,
+processGlobal(GlobalValue &GV,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<DominatorTree &(Function &)> LookupDomTree) {
if (GV.getName().startswith("llvm."))
return false;
@@ -2086,7 +2095,7 @@ processGlobal(GlobalValue &GV, TargetLibraryInfo *TLI,
if (GVar->isConstant() || !GVar->hasInitializer())
return Changed;
- return processInternalGlobal(GVar, GS, TLI, LookupDomTree) || Changed;
+ return processInternalGlobal(GVar, GS, GetTLI, LookupDomTree) || Changed;
}
/// Walk all of the direct calls of the specified function, changing them to
@@ -2234,7 +2243,8 @@ hasOnlyColdCalls(Function &F,
}
static bool
-OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
+OptimizeFunctions(Module &M,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
function_ref<DominatorTree &(Function &)> LookupDomTree,
@@ -2275,17 +2285,13 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
// So, remove unreachable blocks from the function, because a) there's
// no point in analyzing them and b) GlobalOpt should otherwise grow
// some more complicated logic to break these cycles.
- // Removing unreachable blocks might invalidate the dominator so we
- // recalculate it.
if (!F->isDeclaration()) {
- if (removeUnreachableBlocks(*F)) {
- auto &DT = LookupDomTree(*F);
- DT.recalculate(*F);
- Changed = true;
- }
+ auto &DT = LookupDomTree(*F);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ Changed |= removeUnreachableBlocks(*F, &DTU);
}
- Changed |= processGlobal(*F, TLI, LookupDomTree);
+ Changed |= processGlobal(*F, GetTLI, LookupDomTree);
if (!F->hasLocalLinkage())
continue;
@@ -2342,7 +2348,8 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
}
static bool
-OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI,
+OptimizeGlobalVars(Module &M,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<DominatorTree &(Function &)> LookupDomTree,
SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
bool Changed = false;
@@ -2357,7 +2364,10 @@ OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI,
if (GV->hasInitializer())
if (auto *C = dyn_cast<Constant>(GV->getInitializer())) {
auto &DL = M.getDataLayout();
- Constant *New = ConstantFoldConstant(C, DL, TLI);
+ // TLI is not used in the case of a Constant, so use default nullptr
+ // for that optional parameter, since we don't have a Function to
+ // provide GetTLI anyway.
+ Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr);
if (New && New != C)
GV->setInitializer(New);
}
@@ -2367,7 +2377,7 @@ OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI,
continue;
}
- Changed |= processGlobal(*GV, TLI, LookupDomTree);
+ Changed |= processGlobal(*GV, GetTLI, LookupDomTree);
}
return Changed;
}
@@ -2581,8 +2591,8 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
}
static int compareNames(Constant *const *A, Constant *const *B) {
- Value *AStripped = (*A)->stripPointerCastsNoFollowAliases();
- Value *BStripped = (*B)->stripPointerCastsNoFollowAliases();
+ Value *AStripped = (*A)->stripPointerCasts();
+ Value *BStripped = (*B)->stripPointerCasts();
return AStripped->getName().compare(BStripped->getName());
}
@@ -2809,7 +2819,14 @@ OptimizeGlobalAliases(Module &M,
return Changed;
}
-static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
+static Function *
+FindCXAAtExit(Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
+ // Hack to get a default TLI before we have actual Function.
+ auto FuncIter = M.begin();
+ if (FuncIter == M.end())
+ return nullptr;
+ auto *TLI = &GetTLI(*FuncIter);
+
LibFunc F = LibFunc_cxa_atexit;
if (!TLI->has(F))
return nullptr;
@@ -2818,6 +2835,9 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
if (!Fn)
return nullptr;
+ // Now get the actual TLI for Fn.
+ TLI = &GetTLI(*Fn);
+
// Make sure that the function has the correct prototype.
if (!TLI->getLibFunc(*Fn, F) || F != LibFunc_cxa_atexit)
return nullptr;
@@ -2889,7 +2909,8 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
}
static bool optimizeGlobalsInModule(
- Module &M, const DataLayout &DL, TargetLibraryInfo *TLI,
+ Module &M, const DataLayout &DL,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
function_ref<DominatorTree &(Function &)> LookupDomTree) {
@@ -2914,24 +2935,24 @@ static bool optimizeGlobalsInModule(
NotDiscardableComdats.insert(C);
// Delete functions that are trivially dead, ccc -> fastcc
- LocalChange |= OptimizeFunctions(M, TLI, GetTTI, GetBFI, LookupDomTree,
+ LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree,
NotDiscardableComdats);
// Optimize global_ctors list.
LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
- return EvaluateStaticConstructor(F, DL, TLI);
+ return EvaluateStaticConstructor(F, DL, &GetTLI(*F));
});
// Optimize non-address-taken globals.
- LocalChange |= OptimizeGlobalVars(M, TLI, LookupDomTree,
- NotDiscardableComdats);
+ LocalChange |=
+ OptimizeGlobalVars(M, GetTLI, LookupDomTree, NotDiscardableComdats);
// Resolve aliases, when possible.
LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats);
// Try to remove trivial global destructors if they are not removed
// already.
- Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
+ Function *CXAAtExitFn = FindCXAAtExit(M, GetTLI);
if (CXAAtExitFn)
LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
@@ -2946,12 +2967,14 @@ static bool optimizeGlobalsInModule(
PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
auto &DL = M.getDataLayout();
- auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
auto &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{
return FAM.getResult<DominatorTreeAnalysis>(F);
};
+ auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
return FAM.getResult<TargetIRAnalysis>(F);
};
@@ -2960,7 +2983,7 @@ PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
return FAM.getResult<BlockFrequencyAnalysis>(F);
};
- if (!optimizeGlobalsInModule(M, DL, &TLI, GetTTI, GetBFI, LookupDomTree))
+ if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
@@ -2979,10 +3002,12 @@ struct GlobalOptLegacyPass : public ModulePass {
return false;
auto &DL = M.getDataLayout();
- auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
auto LookupDomTree = [this](Function &F) -> DominatorTree & {
return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
};
+ auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
};
@@ -2991,7 +3016,8 @@ struct GlobalOptLegacyPass : public ModulePass {
return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
};
- return optimizeGlobalsInModule(M, DL, TLI, GetTTI, GetBFI, LookupDomTree);
+ return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI,
+ LookupDomTree);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
diff --git a/lib/Transforms/IPO/HotColdSplitting.cpp b/lib/Transforms/IPO/HotColdSplitting.cpp
index ab1a9a79cad6..cfdcc8db7f50 100644
--- a/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -85,12 +85,6 @@ static cl::opt<int>
"multiple of TCC_Basic)"));
namespace {
-
-/// A sequence of basic blocks.
-///
-/// A 0-sized SmallVector is slightly cheaper to move than a std::vector.
-using BlockSequence = SmallVector<BasicBlock *, 0>;
-
// Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify
// this function unless you modify the MBB version as well.
//
@@ -169,31 +163,6 @@ static bool markFunctionCold(Function &F, bool UpdateEntryCount = false) {
return Changed;
}
-class HotColdSplitting {
-public:
- HotColdSplitting(ProfileSummaryInfo *ProfSI,
- function_ref<BlockFrequencyInfo *(Function &)> GBFI,
- function_ref<TargetTransformInfo &(Function &)> GTTI,
- std::function<OptimizationRemarkEmitter &(Function &)> *GORE,
- function_ref<AssumptionCache *(Function &)> LAC)
- : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE), LookupAC(LAC) {}
- bool run(Module &M);
-
-private:
- bool isFunctionCold(const Function &F) const;
- bool shouldOutlineFrom(const Function &F) const;
- bool outlineColdRegions(Function &F, bool HasProfileSummary);
- Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT,
- BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
- OptimizationRemarkEmitter &ORE,
- AssumptionCache *AC, unsigned Count);
- ProfileSummaryInfo *PSI;
- function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
- function_ref<TargetTransformInfo &(Function &)> GetTTI;
- std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
- function_ref<AssumptionCache *(Function &)> LookupAC;
-};
-
class HotColdSplittingLegacyPass : public ModulePass {
public:
static char ID;
@@ -321,13 +290,10 @@ static int getOutliningPenalty(ArrayRef<BasicBlock *> Region,
return Penalty;
}
-Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
- DominatorTree &DT,
- BlockFrequencyInfo *BFI,
- TargetTransformInfo &TTI,
- OptimizationRemarkEmitter &ORE,
- AssumptionCache *AC,
- unsigned Count) {
+Function *HotColdSplitting::extractColdRegion(
+ const BlockSequence &Region, const CodeExtractorAnalysisCache &CEAC,
+ DominatorTree &DT, BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
+ OptimizationRemarkEmitter &ORE, AssumptionCache *AC, unsigned Count) {
assert(!Region.empty());
// TODO: Pass BFI and BPI to update profile information.
@@ -349,7 +315,7 @@ Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
return nullptr;
Function *OrigF = Region[0]->getParent();
- if (Function *OutF = CE.extractCodeRegion()) {
+ if (Function *OutF = CE.extractCodeRegion(CEAC)) {
User *U = *OutF->user_begin();
CallInst *CI = cast<CallInst>(U);
CallSite CS(CI);
@@ -607,9 +573,9 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
});
if (!DT)
- DT = make_unique<DominatorTree>(F);
+ DT = std::make_unique<DominatorTree>(F);
if (!PDT)
- PDT = make_unique<PostDominatorTree>(F);
+ PDT = std::make_unique<PostDominatorTree>(F);
auto Regions = OutliningRegion::create(*BB, *DT, *PDT);
for (OutliningRegion &Region : Regions) {
@@ -637,9 +603,14 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
}
}
+ if (OutliningWorklist.empty())
+ return Changed;
+
// Outline single-entry cold regions, splitting up larger regions as needed.
unsigned OutlinedFunctionID = 1;
- while (!OutliningWorklist.empty()) {
+ // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
+ CodeExtractorAnalysisCache CEAC(F);
+ do {
OutliningRegion Region = OutliningWorklist.pop_back_val();
assert(!Region.empty() && "Empty outlining region in worklist");
do {
@@ -650,14 +621,14 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
BB->dump();
});
- Function *Outlined = extractColdRegion(SubRegion, *DT, BFI, TTI, ORE, AC,
- OutlinedFunctionID);
+ Function *Outlined = extractColdRegion(SubRegion, CEAC, *DT, BFI, TTI,
+ ORE, AC, OutlinedFunctionID);
if (Outlined) {
++OutlinedFunctionID;
Changed = true;
}
} while (!Region.empty());
- }
+ } while (!OutliningWorklist.empty());
return Changed;
}
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 34db75dd8b03..bddf75211599 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -114,6 +114,10 @@ void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createIPSCCPPass());
}
+void LLVMAddMergeFunctionsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createMergeFunctionsPass());
+}
+
void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
auto PreserveMain = [=](const GlobalValue &GV) {
return AllButMain && GV.getName() == "main";
@@ -121,6 +125,15 @@ void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
unwrap(PM)->add(createInternalizePass(PreserveMain));
}
+void LLVMAddInternalizePassWithMustPreservePredicate(
+ LLVMPassManagerRef PM,
+ void *Context,
+ LLVMBool (*Pred)(LLVMValueRef, void *)) {
+ unwrap(PM)->add(createInternalizePass([=](const GlobalValue &GV) {
+ return Pred(wrap(&GV), Context) == 0 ? false : true;
+ }));
+}
+
void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createStripDeadPrototypesPass());
}
diff --git a/lib/Transforms/IPO/InferFunctionAttrs.cpp b/lib/Transforms/IPO/InferFunctionAttrs.cpp
index 7f5511e008e1..d1a68b28bd33 100644
--- a/lib/Transforms/IPO/InferFunctionAttrs.cpp
+++ b/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -18,24 +18,28 @@ using namespace llvm;
#define DEBUG_TYPE "inferattrs"
-static bool inferAllPrototypeAttributes(Module &M,
- const TargetLibraryInfo &TLI) {
+static bool inferAllPrototypeAttributes(
+ Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
bool Changed = false;
for (Function &F : M.functions())
// We only infer things using the prototype and the name; we don't need
// definitions.
if (F.isDeclaration() && !F.hasOptNone())
- Changed |= inferLibFuncAttributes(F, TLI);
+ Changed |= inferLibFuncAttributes(F, GetTLI(F));
return Changed;
}
PreservedAnalyses InferFunctionAttrsPass::run(Module &M,
ModuleAnalysisManager &AM) {
- auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
- if (!inferAllPrototypeAttributes(M, TLI))
+ if (!inferAllPrototypeAttributes(M, GetTLI))
// If we didn't infer anything, preserve all analyses.
return PreservedAnalyses::all();
@@ -60,8 +64,10 @@ struct InferFunctionAttrsLegacyPass : public ModulePass {
if (skipModule(M))
return false;
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- return inferAllPrototypeAttributes(M, TLI);
+ auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
+ return inferAllPrototypeAttributes(M, GetTLI);
}
};
}
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 945f8affae6e..4b72261131c1 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -239,7 +239,7 @@ static void mergeInlinedArrayAllocas(
}
if (Align1 > Align2)
- AvailableAlloca->setAlignment(AI->getAlignment());
+ AvailableAlloca->setAlignment(MaybeAlign(AI->getAlignment()));
}
AI->eraseFromParent();
@@ -527,7 +527,8 @@ static void setInlineRemark(CallSite &CS, StringRef message) {
static bool
inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
- ProfileSummaryInfo *PSI, TargetLibraryInfo &TLI,
+ ProfileSummaryInfo *PSI,
+ std::function<TargetLibraryInfo &(Function &)> GetTLI,
bool InsertLifetime,
function_ref<InlineCost(CallSite CS)> GetInlineCost,
function_ref<AAResults &(Function &)> AARGetter,
@@ -626,7 +627,8 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
Instruction *Instr = CS.getInstruction();
- bool IsTriviallyDead = isInstructionTriviallyDead(Instr, &TLI);
+ bool IsTriviallyDead =
+ isInstructionTriviallyDead(Instr, &GetTLI(*Caller));
int InlineHistoryID;
if (!IsTriviallyDead) {
@@ -757,13 +759,16 @@ bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
ACT = &getAnalysis<AssumptionCacheTracker>();
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto GetTLI = [&](Function &F) -> TargetLibraryInfo & {
+ return getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
};
- return inlineCallsImpl(SCC, CG, GetAssumptionCache, PSI, TLI, InsertLifetime,
- [this](CallSite CS) { return getInlineCost(CS); },
- LegacyAARGetter(*this), ImportedFunctionsStats);
+ return inlineCallsImpl(
+ SCC, CG, GetAssumptionCache, PSI, GetTLI, InsertLifetime,
+ [this](CallSite CS) { return getInlineCost(CS); }, LegacyAARGetter(*this),
+ ImportedFunctionsStats);
}
/// Remove now-dead linkonce functions at the end of
@@ -879,7 +884,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (!ImportedFunctionsStats &&
InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) {
ImportedFunctionsStats =
- llvm::make_unique<ImportedFunctionsInliningStatistics>();
+ std::make_unique<ImportedFunctionsInliningStatistics>();
ImportedFunctionsStats->setModuleInfo(M);
}
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 91c7b5f5f135..add2ae053735 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -141,10 +141,12 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
if (NumLoops == 0) return Changed;
--NumLoops;
AssumptionCache *AC = nullptr;
+ Function &Func = *L->getHeader()->getParent();
if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>())
- AC = ACT->lookupAssumptionCache(*L->getHeader()->getParent());
+ AC = ACT->lookupAssumptionCache(Func);
+ CodeExtractorAnalysisCache CEAC(Func);
CodeExtractor Extractor(DT, *L, false, nullptr, nullptr, AC);
- if (Extractor.extractCodeRegion() != nullptr) {
+ if (Extractor.extractCodeRegion(CEAC) != nullptr) {
Changed = true;
// After extraction, the loop is replaced by a function call, so
// we shouldn't try to run any more loop passes on it.
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp
index f7371284f47e..2dec366d70e2 100644
--- a/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -230,6 +230,16 @@ void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits,
Bytes[AllocByteOffset + B] |= AllocMask;
}
+bool lowertypetests::isJumpTableCanonical(Function *F) {
+ if (F->isDeclarationForLinker())
+ return false;
+ auto *CI = mdconst::extract_or_null<ConstantInt>(
+ F->getParent()->getModuleFlag("CFI Canonical Jump Tables"));
+ if (!CI || CI->getZExtValue() != 0)
+ return true;
+ return F->hasFnAttribute("cfi-canonical-jump-table");
+}
+
namespace {
struct ByteArrayInfo {
@@ -251,9 +261,12 @@ class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> {
GlobalObject *GO;
size_t NTypes;
- // For functions: true if this is a definition (either in the merged module or
- // in one of the thinlto modules).
- bool IsDefinition;
+ // For functions: true if the jump table is canonical. This essentially means
+ // whether the canonical address (i.e. the symbol table entry) of the function
+ // is provided by the local jump table. This is normally the same as whether
+ // the function is defined locally, but if canonical jump tables are disabled
+ // by the user then the jump table never provides a canonical definition.
+ bool IsJumpTableCanonical;
// For functions: true if this function is either defined or used in a thinlto
// module and its jumptable entry needs to be exported to thinlto backends.
@@ -263,13 +276,13 @@ class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> {
public:
static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO,
- bool IsDefinition, bool IsExported,
+ bool IsJumpTableCanonical, bool IsExported,
ArrayRef<MDNode *> Types) {
auto *GTM = static_cast<GlobalTypeMember *>(Alloc.Allocate(
totalSizeToAlloc<MDNode *>(Types.size()), alignof(GlobalTypeMember)));
GTM->GO = GO;
GTM->NTypes = Types.size();
- GTM->IsDefinition = IsDefinition;
+ GTM->IsJumpTableCanonical = IsJumpTableCanonical;
GTM->IsExported = IsExported;
std::uninitialized_copy(Types.begin(), Types.end(),
GTM->getTrailingObjects<MDNode *>());
@@ -280,8 +293,8 @@ public:
return GO;
}
- bool isDefinition() const {
- return IsDefinition;
+ bool isJumpTableCanonical() const {
+ return IsJumpTableCanonical;
}
bool isExported() const {
@@ -320,6 +333,49 @@ private:
size_t NTargets;
};
+struct ScopedSaveAliaseesAndUsed {
+ Module &M;
+ SmallPtrSet<GlobalValue *, 16> Used, CompilerUsed;
+ std::vector<std::pair<GlobalIndirectSymbol *, Function *>> FunctionAliases;
+
+ ScopedSaveAliaseesAndUsed(Module &M) : M(M) {
+ // The users of this class want to replace all function references except
+ // for aliases and llvm.used/llvm.compiler.used with references to a jump
+ // table. We avoid replacing aliases in order to avoid introducing a double
+ // indirection (or an alias pointing to a declaration in ThinLTO mode), and
+ // we avoid replacing llvm.used/llvm.compiler.used because these global
+ // variables describe properties of the global, not the jump table (besides,
+ // offseted references to the jump table in llvm.used are invalid).
+ // Unfortunately, LLVM doesn't have a "RAUW except for these (possibly
+ // indirect) users", so what we do is save the list of globals referenced by
+ // llvm.used/llvm.compiler.used and aliases, erase the used lists, let RAUW
+ // replace the aliasees and then set them back to their original values at
+ // the end.
+ if (GlobalVariable *GV = collectUsedGlobalVariables(M, Used, false))
+ GV->eraseFromParent();
+ if (GlobalVariable *GV = collectUsedGlobalVariables(M, CompilerUsed, true))
+ GV->eraseFromParent();
+
+ for (auto &GIS : concat<GlobalIndirectSymbol>(M.aliases(), M.ifuncs())) {
+ // FIXME: This should look past all aliases not just interposable ones,
+ // see discussion on D65118.
+ if (auto *F =
+ dyn_cast<Function>(GIS.getIndirectSymbol()->stripPointerCasts()))
+ FunctionAliases.push_back({&GIS, F});
+ }
+ }
+
+ ~ScopedSaveAliaseesAndUsed() {
+ appendToUsed(M, std::vector<GlobalValue *>(Used.begin(), Used.end()));
+ appendToCompilerUsed(M, std::vector<GlobalValue *>(CompilerUsed.begin(),
+ CompilerUsed.end()));
+
+ for (auto P : FunctionAliases)
+ P.first->setIndirectSymbol(
+ ConstantExpr::getBitCast(P.second, P.first->getType()));
+ }
+};
+
class LowerTypeTestsModule {
Module &M;
@@ -387,7 +443,8 @@ class LowerTypeTestsModule {
uint8_t *exportTypeId(StringRef TypeId, const TypeIdLowering &TIL);
TypeIdLowering importTypeId(StringRef TypeId);
void importTypeTest(CallInst *CI);
- void importFunction(Function *F, bool isDefinition);
+ void importFunction(Function *F, bool isJumpTableCanonical,
+ std::vector<GlobalAlias *> &AliasesToErase);
BitSetInfo
buildBitSet(Metadata *TypeId,
@@ -421,7 +478,8 @@ class LowerTypeTestsModule {
ArrayRef<GlobalTypeMember *> Globals,
ArrayRef<ICallBranchFunnel *> ICallBranchFunnels);
- void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT, bool IsDefinition);
+ void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT,
+ bool IsJumpTableCanonical);
void moveInitializerToModuleConstructor(GlobalVariable *GV);
void findGlobalVariableUsersOf(Constant *C,
SmallSetVector<GlobalVariable *, 8> &Out);
@@ -433,7 +491,7 @@ class LowerTypeTestsModule {
/// the block. 'This's use list is expected to have at least one element.
/// Unlike replaceAllUsesWith this function skips blockaddr and direct call
/// uses.
- void replaceCfiUses(Function *Old, Value *New, bool IsDefinition);
+ void replaceCfiUses(Function *Old, Value *New, bool IsJumpTableCanonical);
/// replaceDirectCalls - Go through the uses list for this definition and
/// replace each use, which is a direct function call.
@@ -759,43 +817,50 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables(
// Build a new global with the combined contents of the referenced globals.
// This global is a struct whose even-indexed elements contain the original
// contents of the referenced globals and whose odd-indexed elements contain
- // any padding required to align the next element to the next power of 2.
+ // any padding required to align the next element to the next power of 2 plus
+ // any additional padding required to meet its alignment requirements.
std::vector<Constant *> GlobalInits;
const DataLayout &DL = M.getDataLayout();
+ DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
+ Align MaxAlign;
+ uint64_t CurOffset = 0;
+ uint64_t DesiredPadding = 0;
for (GlobalTypeMember *G : Globals) {
- GlobalVariable *GV = cast<GlobalVariable>(G->getGlobal());
+ auto *GV = cast<GlobalVariable>(G->getGlobal());
+ MaybeAlign Alignment(GV->getAlignment());
+ if (!Alignment)
+ Alignment = Align(DL.getABITypeAlignment(GV->getValueType()));
+ MaxAlign = std::max(MaxAlign, *Alignment);
+ uint64_t GVOffset = alignTo(CurOffset + DesiredPadding, *Alignment);
+ GlobalLayout[G] = GVOffset;
+ if (GVOffset != 0) {
+ uint64_t Padding = GVOffset - CurOffset;
+ GlobalInits.push_back(
+ ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding)));
+ }
+
GlobalInits.push_back(GV->getInitializer());
uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
+ CurOffset = GVOffset + InitSize;
- // Compute the amount of padding required.
- uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize;
+ // Compute the amount of padding that we'd like for the next element.
+ DesiredPadding = NextPowerOf2(InitSize - 1) - InitSize;
// Experiments of different caps with Chromium on both x64 and ARM64
// have shown that the 32-byte cap generates the smallest binary on
// both platforms while different caps yield similar performance.
// (see https://lists.llvm.org/pipermail/llvm-dev/2018-July/124694.html)
- if (Padding > 32)
- Padding = alignTo(InitSize, 32) - InitSize;
-
- GlobalInits.push_back(
- ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding)));
+ if (DesiredPadding > 32)
+ DesiredPadding = alignTo(InitSize, 32) - InitSize;
}
- if (!GlobalInits.empty())
- GlobalInits.pop_back();
+
Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits);
auto *CombinedGlobal =
new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true,
GlobalValue::PrivateLinkage, NewInit);
+ CombinedGlobal->setAlignment(MaxAlign);
StructType *NewTy = cast<StructType>(NewInit->getType());
- const StructLayout *CombinedGlobalLayout = DL.getStructLayout(NewTy);
-
- // Compute the offsets of the original globals within the new global.
- DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
- for (unsigned I = 0; I != Globals.size(); ++I)
- // Multiply by 2 to account for padding elements.
- GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2);
-
lowerTypeTestCalls(TypeIds, CombinedGlobal, GlobalLayout);
// Build aliases pointing to offsets into the combined global for each
@@ -975,14 +1040,16 @@ void LowerTypeTestsModule::importTypeTest(CallInst *CI) {
}
// ThinLTO backend: the function F has a jump table entry; update this module
-// accordingly. isDefinition describes the type of the jump table entry.
-void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) {
+// accordingly. isJumpTableCanonical describes the type of the jump table entry.
+void LowerTypeTestsModule::importFunction(
+ Function *F, bool isJumpTableCanonical,
+ std::vector<GlobalAlias *> &AliasesToErase) {
assert(F->getType()->getAddressSpace() == 0);
GlobalValue::VisibilityTypes Visibility = F->getVisibility();
std::string Name = F->getName();
- if (F->isDeclarationForLinker() && isDefinition) {
+ if (F->isDeclarationForLinker() && isJumpTableCanonical) {
// Non-dso_local functions may be overriden at run time,
// don't short curcuit them
if (F->isDSOLocal()) {
@@ -997,12 +1064,13 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) {
}
Function *FDecl;
- if (F->isDeclarationForLinker() && !isDefinition) {
- // Declaration of an external function.
+ if (!isJumpTableCanonical) {
+ // Either a declaration of an external function or a reference to a locally
+ // defined jump table.
FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
F->getAddressSpace(), Name + ".cfi_jt", &M);
FDecl->setVisibility(GlobalValue::HiddenVisibility);
- } else if (isDefinition) {
+ } else {
F->setName(Name + ".cfi");
F->setLinkage(GlobalValue::ExternalLinkage);
FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
@@ -1011,8 +1079,8 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) {
Visibility = GlobalValue::HiddenVisibility;
// Delete aliases pointing to this function, they'll be re-created in the
- // merged output
- SmallVector<GlobalAlias*, 4> ToErase;
+ // merged output. Don't do it yet though because ScopedSaveAliaseesAndUsed
+ // will want to reset the aliasees first.
for (auto &U : F->uses()) {
if (auto *A = dyn_cast<GlobalAlias>(U.getUser())) {
Function *AliasDecl = Function::Create(
@@ -1020,24 +1088,15 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) {
F->getAddressSpace(), "", &M);
AliasDecl->takeName(A);
A->replaceAllUsesWith(AliasDecl);
- ToErase.push_back(A);
+ AliasesToErase.push_back(A);
}
}
- for (auto *A : ToErase)
- A->eraseFromParent();
- } else {
- // Function definition without type metadata, where some other translation
- // unit contained a declaration with type metadata. This normally happens
- // during mixed CFI + non-CFI compilation. We do nothing with the function
- // so that it is treated the same way as a function defined outside of the
- // LTO unit.
- return;
}
- if (F->isWeakForLinker())
- replaceWeakDeclarationWithJumpTablePtr(F, FDecl, isDefinition);
+ if (F->hasExternalWeakLinkage())
+ replaceWeakDeclarationWithJumpTablePtr(F, FDecl, isJumpTableCanonical);
else
- replaceCfiUses(F, FDecl, isDefinition);
+ replaceCfiUses(F, FDecl, isJumpTableCanonical);
// Set visibility late because it's used in replaceCfiUses() to determine
// whether uses need to to be replaced.
@@ -1225,7 +1284,7 @@ void LowerTypeTestsModule::findGlobalVariableUsersOf(
// Replace all uses of F with (F ? JT : 0).
void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr(
- Function *F, Constant *JT, bool IsDefinition) {
+ Function *F, Constant *JT, bool IsJumpTableCanonical) {
// The target expression can not appear in a constant initializer on most
// (all?) targets. Switch to a runtime initializer.
SmallSetVector<GlobalVariable *, 8> GlobalVarUsers;
@@ -1239,7 +1298,7 @@ void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr(
Function::Create(cast<FunctionType>(F->getValueType()),
GlobalValue::ExternalWeakLinkage,
F->getAddressSpace(), "", &M);
- replaceCfiUses(F, PlaceholderFn, IsDefinition);
+ replaceCfiUses(F, PlaceholderFn, IsJumpTableCanonical);
Constant *Target = ConstantExpr::getSelect(
ConstantExpr::getICmp(CmpInst::ICMP_NE, F,
@@ -1276,8 +1335,9 @@ selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions,
unsigned ArmCount = 0, ThumbCount = 0;
for (const auto GTM : Functions) {
- if (!GTM->isDefinition()) {
+ if (!GTM->isJumpTableCanonical()) {
// PLT stubs are always ARM.
+ // FIXME: This is the wrong heuristic for non-canonical jump tables.
++ArmCount;
continue;
}
@@ -1303,7 +1363,7 @@ void LowerTypeTestsModule::createJumpTable(
cast<Function>(Functions[I]->getGlobal()));
// Align the whole table by entry size.
- F->setAlignment(getJumpTableEntrySize());
+ F->setAlignment(Align(getJumpTableEntrySize()));
// Skip prologue.
// Disabled on win32 due to https://llvm.org/bugs/show_bug.cgi?id=28641#c3.
// Luckily, this function does not get any prologue even without the
@@ -1438,47 +1498,53 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
lowerTypeTestCalls(TypeIds, JumpTable, GlobalLayout);
- // Build aliases pointing to offsets into the jump table, and replace
- // references to the original functions with references to the aliases.
- for (unsigned I = 0; I != Functions.size(); ++I) {
- Function *F = cast<Function>(Functions[I]->getGlobal());
- bool IsDefinition = Functions[I]->isDefinition();
-
- Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast(
- ConstantExpr::getInBoundsGetElementPtr(
- JumpTableType, JumpTable,
- ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
- ConstantInt::get(IntPtrTy, I)}),
- F->getType());
- if (Functions[I]->isExported()) {
- if (IsDefinition) {
- ExportSummary->cfiFunctionDefs().insert(F->getName());
+ {
+ ScopedSaveAliaseesAndUsed S(M);
+
+ // Build aliases pointing to offsets into the jump table, and replace
+ // references to the original functions with references to the aliases.
+ for (unsigned I = 0; I != Functions.size(); ++I) {
+ Function *F = cast<Function>(Functions[I]->getGlobal());
+ bool IsJumpTableCanonical = Functions[I]->isJumpTableCanonical();
+
+ Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast(
+ ConstantExpr::getInBoundsGetElementPtr(
+ JumpTableType, JumpTable,
+ ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
+ ConstantInt::get(IntPtrTy, I)}),
+ F->getType());
+ if (Functions[I]->isExported()) {
+ if (IsJumpTableCanonical) {
+ ExportSummary->cfiFunctionDefs().insert(F->getName());
+ } else {
+ GlobalAlias *JtAlias = GlobalAlias::create(
+ F->getValueType(), 0, GlobalValue::ExternalLinkage,
+ F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M);
+ JtAlias->setVisibility(GlobalValue::HiddenVisibility);
+ ExportSummary->cfiFunctionDecls().insert(F->getName());
+ }
+ }
+ if (!IsJumpTableCanonical) {
+ if (F->hasExternalWeakLinkage())
+ replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr,
+ IsJumpTableCanonical);
+ else
+ replaceCfiUses(F, CombinedGlobalElemPtr, IsJumpTableCanonical);
} else {
- GlobalAlias *JtAlias = GlobalAlias::create(
- F->getValueType(), 0, GlobalValue::ExternalLinkage,
- F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M);
- JtAlias->setVisibility(GlobalValue::HiddenVisibility);
- ExportSummary->cfiFunctionDecls().insert(F->getName());
+ assert(F->getType()->getAddressSpace() == 0);
+
+ GlobalAlias *FAlias =
+ GlobalAlias::create(F->getValueType(), 0, F->getLinkage(), "",
+ CombinedGlobalElemPtr, &M);
+ FAlias->setVisibility(F->getVisibility());
+ FAlias->takeName(F);
+ if (FAlias->hasName())
+ F->setName(FAlias->getName() + ".cfi");
+ replaceCfiUses(F, FAlias, IsJumpTableCanonical);
+ if (!F->hasLocalLinkage())
+ F->setVisibility(GlobalVariable::HiddenVisibility);
}
}
- if (!IsDefinition) {
- if (F->isWeakForLinker())
- replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr, IsDefinition);
- else
- replaceCfiUses(F, CombinedGlobalElemPtr, IsDefinition);
- } else {
- assert(F->getType()->getAddressSpace() == 0);
-
- GlobalAlias *FAlias = GlobalAlias::create(
- F->getValueType(), 0, F->getLinkage(), "", CombinedGlobalElemPtr, &M);
- FAlias->setVisibility(F->getVisibility());
- FAlias->takeName(F);
- if (FAlias->hasName())
- F->setName(FAlias->getName() + ".cfi");
- replaceCfiUses(F, FAlias, IsDefinition);
- if (!F->hasLocalLinkage())
- F->setVisibility(GlobalVariable::HiddenVisibility);
- }
}
createJumpTable(JumpTableFn, Functions);
@@ -1623,7 +1689,7 @@ bool LowerTypeTestsModule::runForTesting(Module &M) {
ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary +
": ");
std::error_code EC;
- raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text);
+ raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_Text);
ExitOnErr(errorCodeToError(EC));
yaml::Output Out(OS);
@@ -1643,7 +1709,8 @@ static bool isDirectCall(Use& U) {
return false;
}
-void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, bool IsDefinition) {
+void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New,
+ bool IsJumpTableCanonical) {
SmallSetVector<Constant *, 4> Constants;
auto UI = Old->use_begin(), E = Old->use_end();
for (; UI != E;) {
@@ -1655,7 +1722,7 @@ void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, bool IsDefi
continue;
// Skip direct calls to externally defined or non-dso_local functions
- if (isDirectCall(U) && (Old->isDSOLocal() || !IsDefinition))
+ if (isDirectCall(U) && (Old->isDSOLocal() || !IsJumpTableCanonical))
continue;
// Must handle Constants specially, we cannot call replaceUsesOfWith on a
@@ -1678,16 +1745,7 @@ void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, bool IsDefi
}
void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) {
- auto UI = Old->use_begin(), E = Old->use_end();
- for (; UI != E;) {
- Use &U = *UI;
- ++UI;
-
- if (!isDirectCall(U))
- continue;
-
- U.set(New);
- }
+ Old->replaceUsesWithIf(New, [](Use &U) { return isDirectCall(U); });
}
bool LowerTypeTestsModule::lower() {
@@ -1734,10 +1792,16 @@ bool LowerTypeTestsModule::lower() {
Decls.push_back(&F);
}
- for (auto F : Defs)
- importFunction(F, /*isDefinition*/ true);
- for (auto F : Decls)
- importFunction(F, /*isDefinition*/ false);
+ std::vector<GlobalAlias *> AliasesToErase;
+ {
+ ScopedSaveAliaseesAndUsed S(M);
+ for (auto F : Defs)
+ importFunction(F, /*isJumpTableCanonical*/ true, AliasesToErase);
+ for (auto F : Decls)
+ importFunction(F, /*isJumpTableCanonical*/ false, AliasesToErase);
+ }
+ for (GlobalAlias *GA : AliasesToErase)
+ GA->eraseFromParent();
return true;
}
@@ -1823,6 +1887,17 @@ bool LowerTypeTestsModule::lower() {
CfiFunctionLinkage Linkage = P.second.Linkage;
MDNode *FuncMD = P.second.FuncMD;
Function *F = M.getFunction(FunctionName);
+ if (F && F->hasLocalLinkage()) {
+ // Locally defined function that happens to have the same name as a
+ // function defined in a ThinLTO module. Rename it to move it out of
+ // the way of the external reference that we're about to create.
+ // Note that setName will find a unique name for the function, so even
+ // if there is an existing function with the suffix there won't be a
+ // name collision.
+ F->setName(F->getName() + ".1");
+ F = nullptr;
+ }
+
if (!F)
F = Function::Create(
FunctionType::get(Type::getVoidTy(M.getContext()), false),
@@ -1871,24 +1946,26 @@ bool LowerTypeTestsModule::lower() {
Types.clear();
GO.getMetadata(LLVMContext::MD_type, Types);
- bool IsDefinition = !GO.isDeclarationForLinker();
+ bool IsJumpTableCanonical = false;
bool IsExported = false;
if (Function *F = dyn_cast<Function>(&GO)) {
+ IsJumpTableCanonical = isJumpTableCanonical(F);
if (ExportedFunctions.count(F->getName())) {
- IsDefinition |= ExportedFunctions[F->getName()].Linkage == CFL_Definition;
+ IsJumpTableCanonical |=
+ ExportedFunctions[F->getName()].Linkage == CFL_Definition;
IsExported = true;
// TODO: The logic here checks only that the function is address taken,
// not that the address takers are live. This can be updated to check
// their liveness and emit fewer jumptable entries once monolithic LTO
// builds also emit summaries.
} else if (!F->hasAddressTaken()) {
- if (!CrossDsoCfi || !IsDefinition || F->hasLocalLinkage())
+ if (!CrossDsoCfi || !IsJumpTableCanonical || F->hasLocalLinkage())
continue;
}
}
- auto *GTM =
- GlobalTypeMember::create(Alloc, &GO, IsDefinition, IsExported, Types);
+ auto *GTM = GlobalTypeMember::create(Alloc, &GO, IsJumpTableCanonical,
+ IsExported, Types);
GlobalTypeMembers[&GO] = GTM;
for (MDNode *Type : Types) {
verifyTypeMDNode(&GO, Type);
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 3a08069dcd4a..8b9abaddc84c 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -769,7 +769,7 @@ void MergeFunctions::writeAlias(Function *F, Function *G) {
PtrType->getElementType(), PtrType->getAddressSpace(),
G->getLinkage(), "", BitcastF, G->getParent());
- F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+ F->setAlignment(MaybeAlign(std::max(F->getAlignment(), G->getAlignment())));
GA->takeName(G);
GA->setVisibility(G->getVisibility());
GA->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
@@ -816,7 +816,7 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
removeUsers(F);
F->replaceAllUsesWith(NewF);
- unsigned MaxAlignment = std::max(G->getAlignment(), NewF->getAlignment());
+ MaybeAlign MaxAlignment(std::max(G->getAlignment(), NewF->getAlignment()));
writeThunkOrAlias(F, G);
writeThunkOrAlias(F, NewF);
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 733782e8764d..e193074884af 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -409,7 +409,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
- llvm::make_unique<FunctionOutliningMultiRegionInfo>();
+ std::make_unique<FunctionOutliningMultiRegionInfo>();
auto IsSingleEntry = [](SmallVectorImpl<BasicBlock *> &BlockList) {
BasicBlock *Dom = BlockList.front();
@@ -589,7 +589,7 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) {
};
std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
- llvm::make_unique<FunctionOutliningInfo>();
+ std::make_unique<FunctionOutliningInfo>();
BasicBlock *CurrEntry = EntryBlock;
bool CandidateFound = false;
@@ -966,7 +966,7 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(
Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
function_ref<AssumptionCache *(Function &)> LookupAC)
: OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
- ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
+ ClonedOI = std::make_unique<FunctionOutliningInfo>();
// Clone the function, so that we can hack away on it.
ValueToValueMapTy VMap;
@@ -991,7 +991,7 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(
OptimizationRemarkEmitter &ORE,
function_ref<AssumptionCache *(Function &)> LookupAC)
: OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
- ClonedOMRI = llvm::make_unique<FunctionOutliningMultiRegionInfo>();
+ ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
// Clone the function, so that we can hack away on it.
ValueToValueMapTy VMap;
@@ -1122,6 +1122,9 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
BranchProbabilityInfo BPI(*ClonedFunc, LI);
ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
+ // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
+ CodeExtractorAnalysisCache CEAC(*ClonedFunc);
+
SetVector<Value *> Inputs, Outputs, Sinks;
for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
ClonedOMRI->ORI) {
@@ -1148,7 +1151,7 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
if (Outputs.size() > 0 && !ForceLiveExit)
continue;
- Function *OutlinedFunc = CE.extractCodeRegion();
+ Function *OutlinedFunc = CE.extractCodeRegion(CEAC);
if (OutlinedFunc) {
CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc);
@@ -1210,11 +1213,12 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
}
// Extract the body of the if.
+ CodeExtractorAnalysisCache CEAC(*ClonedFunc);
Function *OutlinedFunc =
CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
/* AllowVarargs */ true)
- .extractCodeRegion();
+ .extractCodeRegion(CEAC);
if (OutlinedFunc) {
BasicBlock *OutliningCallBB =
@@ -1264,7 +1268,7 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
if (PSI->isFunctionEntryCold(F))
return {false, nullptr};
- if (empty(F->users()))
+ if (F->users().empty())
return {false, nullptr};
OptimizationRemarkEmitter ORE(F);
@@ -1370,7 +1374,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
return false;
}
- assert(empty(Cloner.OrigFunc->users()) &&
+ assert(Cloner.OrigFunc->users().empty() &&
"F's users should all be replaced!");
std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 3ea77f08fd3c..5314a8219b1e 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -654,6 +654,7 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createGlobalsAAWrapperPass());
MPM.add(createFloat2IntPass());
+ MPM.add(createLowerConstantIntrinsicsPass());
addExtensionsToPM(EP_VectorizerStart, MPM);
diff --git a/lib/Transforms/IPO/SCCP.cpp b/lib/Transforms/IPO/SCCP.cpp
index 7be3608bd2ec..307690729b14 100644
--- a/lib/Transforms/IPO/SCCP.cpp
+++ b/lib/Transforms/IPO/SCCP.cpp
@@ -9,16 +9,18 @@ using namespace llvm;
PreservedAnalyses IPSCCPPass::run(Module &M, ModuleAnalysisManager &AM) {
const DataLayout &DL = M.getDataLayout();
- auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto GetTLI = [&FAM](Function &F) -> const TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
auto getAnalysis = [&FAM](Function &F) -> AnalysisResultsForFn {
DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
return {
- make_unique<PredicateInfo>(F, DT, FAM.getResult<AssumptionAnalysis>(F)),
+ std::make_unique<PredicateInfo>(F, DT, FAM.getResult<AssumptionAnalysis>(F)),
&DT, FAM.getCachedResult<PostDominatorTreeAnalysis>(F)};
};
- if (!runIPSCCP(M, DL, &TLI, getAnalysis))
+ if (!runIPSCCP(M, DL, GetTLI, getAnalysis))
return PreservedAnalyses::all();
PreservedAnalyses PA;
@@ -47,14 +49,14 @@ public:
if (skipModule(M))
return false;
const DataLayout &DL = M.getDataLayout();
- const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-
+ auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
auto getAnalysis = [this](Function &F) -> AnalysisResultsForFn {
DominatorTree &DT =
this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
return {
- make_unique<PredicateInfo>(
+ std::make_unique<PredicateInfo>(
F, DT,
this->getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F)),
@@ -62,7 +64,7 @@ public:
nullptr}; // manager, so set them to nullptr.
};
- return runIPSCCP(M, DL, TLI, getAnalysis);
+ return runIPSCCP(M, DL, GetTLI, getAnalysis);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
index 877d20e72ffc..6184681db8a2 100644
--- a/lib/Transforms/IPO/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -72,6 +72,7 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/MisExpect.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -79,6 +80,7 @@
#include <limits>
#include <map>
#include <memory>
+#include <queue>
#include <string>
#include <system_error>
#include <utility>
@@ -128,6 +130,12 @@ static cl::opt<bool> ProfileSampleAccurate(
"callsite and function as having 0 samples. Otherwise, treat "
"un-sampled callsites and functions conservatively as unknown. "));
+static cl::opt<bool> ProfileAccurateForSymsInList(
+ "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore,
+ cl::init(true),
+ cl::desc("For symbols in profile symbol list, regard their profiles to "
+ "be accurate. It may be overriden by profile-sample-accurate. "));
+
namespace {
using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -137,9 +145,11 @@ using EdgeWeightMap = DenseMap<Edge, uint64_t>;
using BlockEdgeMap =
DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>;
+class SampleProfileLoader;
+
class SampleCoverageTracker {
public:
- SampleCoverageTracker() = default;
+ SampleCoverageTracker(SampleProfileLoader &SPL) : SPLoader(SPL){};
bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
uint32_t Discriminator, uint64_t Samples);
@@ -185,6 +195,76 @@ private:
/// keyed by FunctionSamples pointers, but these stats are cleared after
/// every function, so we just need to keep a single counter.
uint64_t TotalUsedSamples = 0;
+
+ SampleProfileLoader &SPLoader;
+};
+
+class GUIDToFuncNameMapper {
+public:
+ GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader,
+ DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap)
+ : CurrentReader(Reader), CurrentModule(M),
+ CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
+ if (CurrentReader.getFormat() != SPF_Compact_Binary)
+ return;
+
+ for (const auto &F : CurrentModule) {
+ StringRef OrigName = F.getName();
+ CurrentGUIDToFuncNameMap.insert(
+ {Function::getGUID(OrigName), OrigName});
+
+ // Local to global var promotion used by optimization like thinlto
+ // will rename the var and add suffix like ".llvm.xxx" to the
+ // original local name. In sample profile, the suffixes of function
+ // names are all stripped. Since it is possible that the mapper is
+ // built in post-thin-link phase and var promotion has been done,
+ // we need to add the substring of function name without the suffix
+ // into the GUIDToFuncNameMap.
+ StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+ if (CanonName != OrigName)
+ CurrentGUIDToFuncNameMap.insert(
+ {Function::getGUID(CanonName), CanonName});
+ }
+
+ // Update GUIDToFuncNameMap for each function including inlinees.
+ SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
+ }
+
+ ~GUIDToFuncNameMapper() {
+ if (CurrentReader.getFormat() != SPF_Compact_Binary)
+ return;
+
+ CurrentGUIDToFuncNameMap.clear();
+
+ // Reset GUIDToFuncNameMap for of each function as they're no
+ // longer valid at this point.
+ SetGUIDToFuncNameMapForAll(nullptr);
+ }
+
+private:
+ void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) {
+ std::queue<FunctionSamples *> FSToUpdate;
+ for (auto &IFS : CurrentReader.getProfiles()) {
+ FSToUpdate.push(&IFS.second);
+ }
+
+ while (!FSToUpdate.empty()) {
+ FunctionSamples *FS = FSToUpdate.front();
+ FSToUpdate.pop();
+ FS->GUIDToFuncNameMap = Map;
+ for (const auto &ICS : FS->getCallsiteSamples()) {
+ const FunctionSamplesMap &FSMap = ICS.second;
+ for (auto &IFS : FSMap) {
+ FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second);
+ FSToUpdate.push(&FS);
+ }
+ }
+ }
+ }
+
+ SampleProfileReader &CurrentReader;
+ Module &CurrentModule;
+ DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
};
/// Sample profile pass.
@@ -199,8 +279,9 @@ public:
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
: GetAC(std::move(GetAssumptionCache)),
- GetTTI(std::move(GetTargetTransformInfo)), Filename(Name),
- RemappingFilename(RemapName), IsThinLTOPreLink(IsThinLTOPreLink) {}
+ GetTTI(std::move(GetTargetTransformInfo)), CoverageTracker(*this),
+ Filename(Name), RemappingFilename(RemapName),
+ IsThinLTOPreLink(IsThinLTOPreLink) {}
bool doInitialization(Module &M);
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
@@ -209,6 +290,8 @@ public:
void dump() { Reader->dump(); }
protected:
+ friend class SampleCoverageTracker;
+
bool runOnFunction(Function &F, ModuleAnalysisManager *AM);
unsigned getFunctionLoc(Function &F);
bool emitAnnotations(Function &F);
@@ -237,6 +320,8 @@ protected:
bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
void computeDominanceAndLoopInfo(Function &F);
void clearFunctionData();
+ bool callsiteIsHot(const FunctionSamples *CallsiteFS,
+ ProfileSummaryInfo *PSI);
/// Map basic blocks to their computed weights.
///
@@ -310,6 +395,10 @@ protected:
/// Profile Summary Info computed from sample profile.
ProfileSummaryInfo *PSI = nullptr;
+ /// Profle Symbol list tells whether a function name appears in the binary
+ /// used to generate the current profile.
+ std::unique_ptr<ProfileSymbolList> PSL;
+
/// Total number of samples collected in this profile.
///
/// This is the sum of all the samples collected in all the functions executed
@@ -326,6 +415,21 @@ protected:
uint64_t entryCount;
};
DenseMap<Function *, NotInlinedProfileInfo> notInlinedCallInfo;
+
+ // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
+ // all the function symbols defined or declared in current module.
+ DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
+
+ // All the Names used in FunctionSamples including outline function
+ // names, inline instance names and call target names.
+ StringSet<> NamesInProfile;
+
+ // For symbol in profile symbol list, whether to regard their profiles
+ // to be accurate. It is mainly decided by existance of profile symbol
+ // list and -profile-accurate-for-symsinlist flag, but it can be
+ // overriden by -profile-sample-accurate or profile-sample-accurate
+ // attribute.
+ bool ProfAccForSymsInList;
};
class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -381,14 +485,23 @@ private:
/// To decide whether an inlined callsite is hot, we compare the callsite
/// sample count with the hot cutoff computed by ProfileSummaryInfo, it is
/// regarded as hot if the count is above the cutoff value.
-static bool callsiteIsHot(const FunctionSamples *CallsiteFS,
- ProfileSummaryInfo *PSI) {
+///
+/// When ProfileAccurateForSymsInList is enabled and profile symbol list
+/// is present, functions in the profile symbol list but without profile will
+/// be regarded as cold and much less inlining will happen in CGSCC inlining
+/// pass, so we tend to lower the hot criteria here to allow more early
+/// inlining to happen for warm callsites and it is helpful for performance.
+bool SampleProfileLoader::callsiteIsHot(const FunctionSamples *CallsiteFS,
+ ProfileSummaryInfo *PSI) {
if (!CallsiteFS)
return false; // The callsite was not inlined in the original binary.
assert(PSI && "PSI is expected to be non null");
uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
- return PSI->isHotCount(CallsiteTotalSamples);
+ if (ProfAccForSymsInList)
+ return !PSI->isColdCount(CallsiteTotalSamples);
+ else
+ return PSI->isHotCount(CallsiteTotalSamples);
}
/// Mark as used the sample record for the given function samples at
@@ -425,7 +538,7 @@ SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS,
for (const auto &I : FS->getCallsiteSamples())
for (const auto &J : I.second) {
const FunctionSamples *CalleeSamples = &J.second;
- if (callsiteIsHot(CalleeSamples, PSI))
+ if (SPLoader.callsiteIsHot(CalleeSamples, PSI))
Count += countUsedRecords(CalleeSamples, PSI);
}
@@ -444,7 +557,7 @@ SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS,
for (const auto &I : FS->getCallsiteSamples())
for (const auto &J : I.second) {
const FunctionSamples *CalleeSamples = &J.second;
- if (callsiteIsHot(CalleeSamples, PSI))
+ if (SPLoader.callsiteIsHot(CalleeSamples, PSI))
Count += countBodyRecords(CalleeSamples, PSI);
}
@@ -465,7 +578,7 @@ SampleCoverageTracker::countBodySamples(const FunctionSamples *FS,
for (const auto &I : FS->getCallsiteSamples())
for (const auto &J : I.second) {
const FunctionSamples *CalleeSamples = &J.second;
- if (callsiteIsHot(CalleeSamples, PSI))
+ if (SPLoader.callsiteIsHot(CalleeSamples, PSI))
Total += countBodySamples(CalleeSamples, PSI);
}
@@ -788,6 +901,14 @@ bool SampleProfileLoader::inlineHotFunctions(
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
DenseSet<Instruction *> PromotedInsns;
+ // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
+ // Profile symbol list is ignored when profile-sample-accurate is on.
+ assert((!ProfAccForSymsInList ||
+ (!ProfileSampleAccurate &&
+ !F.hasFnAttribute("profile-sample-accurate"))) &&
+ "ProfAccForSymsInList should be false when profile-sample-accurate "
+ "is enabled");
+
DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites;
bool Changed = false;
while (true) {
@@ -1219,17 +1340,12 @@ void SampleProfileLoader::buildEdges(Function &F) {
}
/// Returns the sorted CallTargetMap \p M by count in descending order.
-static SmallVector<InstrProfValueData, 2> SortCallTargets(
- const SampleRecord::CallTargetMap &M) {
+static SmallVector<InstrProfValueData, 2> GetSortedValueDataFromCallTargets(
+ const SampleRecord::CallTargetMap & M) {
SmallVector<InstrProfValueData, 2> R;
- for (auto I = M.begin(); I != M.end(); ++I)
- R.push_back({FunctionSamples::getGUID(I->getKey()), I->getValue()});
- llvm::sort(R, [](const InstrProfValueData &L, const InstrProfValueData &R) {
- if (L.Count == R.Count)
- return L.Value > R.Value;
- else
- return L.Count > R.Count;
- });
+ for (const auto &I : SampleRecord::SortCallTargets(M)) {
+ R.emplace_back(InstrProfValueData{FunctionSamples::getGUID(I.first), I.second});
+ }
return R;
}
@@ -1324,7 +1440,7 @@ void SampleProfileLoader::propagateWeights(Function &F) {
if (!T || T.get().empty())
continue;
SmallVector<InstrProfValueData, 2> SortedCallTargets =
- SortCallTargets(T.get());
+ GetSortedValueDataFromCallTargets(T.get());
uint64_t Sum;
findIndirectCallFunctionSamples(I, Sum);
annotateValueSite(*I.getParent()->getParent()->getParent(), I,
@@ -1374,6 +1490,8 @@ void SampleProfileLoader::propagateWeights(Function &F) {
}
}
+ misexpect::verifyMisExpect(TI, Weights, TI->getContext());
+
uint64_t TempWeight;
// Only set weights if there is at least one non-zero weight.
// In any other case, let the analyzer set weights.
@@ -1557,30 +1675,29 @@ INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
bool SampleProfileLoader::doInitialization(Module &M) {
auto &Ctx = M.getContext();
- auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx);
+
+ std::unique_ptr<SampleProfileReaderItaniumRemapper> RemapReader;
+ auto ReaderOrErr =
+ SampleProfileReader::create(Filename, Ctx, RemappingFilename);
if (std::error_code EC = ReaderOrErr.getError()) {
std::string Msg = "Could not open profile: " + EC.message();
Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
return false;
}
Reader = std::move(ReaderOrErr.get());
- Reader->collectFuncsToUse(M);
+ Reader->collectFuncsFrom(M);
ProfileIsValid = (Reader->read() == sampleprof_error::success);
-
- if (!RemappingFilename.empty()) {
- // Apply profile remappings to the loaded profile data if requested.
- // For now, we only support remapping symbols encoded using the Itanium
- // C++ ABI's name mangling scheme.
- ReaderOrErr = SampleProfileReaderItaniumRemapper::create(
- RemappingFilename, Ctx, std::move(Reader));
- if (std::error_code EC = ReaderOrErr.getError()) {
- std::string Msg = "Could not open profile remapping file: " + EC.message();
- Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
- return false;
- }
- Reader = std::move(ReaderOrErr.get());
- ProfileIsValid = (Reader->read() == sampleprof_error::success);
+ PSL = Reader->getProfileSymbolList();
+
+ // While profile-sample-accurate is on, ignore symbol list.
+ ProfAccForSymsInList =
+ ProfileAccurateForSymsInList && PSL && !ProfileSampleAccurate;
+ if (ProfAccForSymsInList) {
+ NamesInProfile.clear();
+ if (auto NameTable = Reader->getNameTable())
+ NamesInProfile.insert(NameTable->begin(), NameTable->end());
}
+
return true;
}
@@ -1594,7 +1711,7 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
ProfileSummaryInfo *_PSI) {
- FunctionSamples::GUIDToFuncNameMapper Mapper(M);
+ GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
if (!ProfileIsValid)
return false;
@@ -1651,19 +1768,48 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
}
bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
-
+
DILocation2SampleMap.clear();
// By default the entry count is initialized to -1, which will be treated
// conservatively by getEntryCount as the same as unknown (None). This is
// to avoid newly added code to be treated as cold. If we have samples
// this will be overwritten in emitAnnotations.
- // If ProfileSampleAccurate is true or F has profile-sample-accurate
- // attribute, initialize the entry count to 0 so callsites or functions
- // unsampled will be treated as cold.
- uint64_t initialEntryCount =
- (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate"))
- ? 0
- : -1;
+ uint64_t initialEntryCount = -1;
+
+ ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL;
+ if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) {
+ // initialize all the function entry counts to 0. It means all the
+ // functions without profile will be regarded as cold.
+ initialEntryCount = 0;
+ // profile-sample-accurate is a user assertion which has a higher precedence
+ // than symbol list. When profile-sample-accurate is on, ignore symbol list.
+ ProfAccForSymsInList = false;
+ }
+
+ // PSL -- profile symbol list include all the symbols in sampled binary.
+ // If ProfileAccurateForSymsInList is enabled, PSL is used to treat
+ // old functions without samples being cold, without having to worry
+ // about new and hot functions being mistakenly treated as cold.
+ if (ProfAccForSymsInList) {
+ // Initialize the entry count to 0 for functions in the list.
+ if (PSL->contains(F.getName()))
+ initialEntryCount = 0;
+
+ // Function in the symbol list but without sample will be regarded as
+ // cold. To minimize the potential negative performance impact it could
+ // have, we want to be a little conservative here saying if a function
+ // shows up in the profile, no matter as outline function, inline instance
+ // or call targets, treat the function as not being cold. This will handle
+ // the cases such as most callsites of a function are inlined in sampled
+ // binary but not inlined in current build (because of source code drift,
+ // imprecise debug information, or the callsites are all cold individually
+ // but not cold accumulatively...), so the outline function showing up as
+ // cold in sampled binary will actually not be cold after current build.
+ StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+ if (NamesInProfile.count(CanonName))
+ initialEntryCount = -1;
+ }
+
F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
if (AM) {
@@ -1672,7 +1818,7 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
.getManager();
ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
} else {
- OwnedORE = make_unique<OptimizationRemarkEmitter>(&F);
+ OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
ORE = OwnedORE.get();
}
Samples = Reader->getSamplesFor(F);
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 24c476376c14..690b5e8bf49e 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -24,6 +24,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
+#include "llvm/Transforms/IPO/LowerTypeTests.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
@@ -218,10 +219,18 @@ void splitAndWriteThinLTOBitcode(
promoteTypeIds(M, ModuleId);
- // Returns whether a global has attached type metadata. Such globals may
- // participate in CFI or whole-program devirtualization, so they need to
- // appear in the merged module instead of the thin LTO module.
+ // Returns whether a global or its associated global has attached type
+ // metadata. The former may participate in CFI or whole-program
+ // devirtualization, so they need to appear in the merged module instead of
+ // the thin LTO module. Similarly, globals that are associated with globals
+ // with type metadata need to appear in the merged module because they will
+ // reference the global's section directly.
auto HasTypeMetadata = [](const GlobalObject *GO) {
+ if (MDNode *MD = GO->getMetadata(LLVMContext::MD_associated))
+ if (auto *AssocVM = dyn_cast_or_null<ValueAsMetadata>(MD->getOperand(0)))
+ if (auto *AssocGO = dyn_cast<GlobalObject>(AssocVM->getValue()))
+ if (AssocGO->hasMetadata(LLVMContext::MD_type))
+ return true;
return GO->hasMetadata(LLVMContext::MD_type);
};
@@ -315,9 +324,9 @@ void splitAndWriteThinLTOBitcode(
SmallVector<Metadata *, 4> Elts;
Elts.push_back(MDString::get(Ctx, F.getName()));
CfiFunctionLinkage Linkage;
- if (!F.isDeclarationForLinker())
+ if (lowertypetests::isJumpTableCanonical(&F))
Linkage = CFL_Definition;
- else if (F.isWeakForLinker())
+ else if (F.hasExternalWeakLinkage())
Linkage = CFL_WeakDeclaration;
else
Linkage = CFL_Declaration;
@@ -457,7 +466,7 @@ void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
// splitAndWriteThinLTOBitcode). Just always build it once via the
// buildModuleSummaryIndex when Module(s) are ready.
ProfileSummaryInfo PSI(M);
- NewIndex = llvm::make_unique<ModuleSummaryIndex>(
+ NewIndex = std::make_unique<ModuleSummaryIndex>(
buildModuleSummaryIndex(M, nullptr, &PSI));
Index = NewIndex.get();
}
diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 6b6dd6194e17..f0cf5581ba8a 100644
--- a/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -24,12 +24,14 @@
// returns 0, or a single vtable's function returns 1, replace each virtual
// call with a comparison of the vptr against that vtable's address.
//
-// This pass is intended to be used during the regular and thin LTO pipelines.
+// This pass is intended to be used during the regular and thin LTO pipelines:
+//
// During regular LTO, the pass determines the best optimization for each
// virtual call and applies the resolutions directly to virtual calls that are
// eligible for virtual call optimization (i.e. calls that use either of the
-// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics). During
-// ThinLTO, the pass operates in two phases:
+// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics).
+//
+// During hybrid Regular/ThinLTO, the pass operates in two phases:
// - Export phase: this is run during the thin link over a single merged module
// that contains all vtables with !type metadata that participate in the link.
// The pass computes a resolution for each virtual call and stores it in the
@@ -38,6 +40,14 @@
// modules. The pass applies the resolutions previously computed during the
// import phase to each eligible virtual call.
//
+// During ThinLTO, the pass operates in two phases:
+// - Export phase: this is run during the thin link over the index which
+// contains a summary of all vtables with !type metadata that participate in
+// the link. It computes a resolution for each virtual call and stores it in
+// the type identifier summary. Only single implementation devirtualization
+// is supported.
+// - Import phase: (same as with hybrid case above).
+//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
@@ -117,6 +127,11 @@ static cl::opt<unsigned>
cl::desc("Maximum number of call targets per "
"call site to enable branch funnels"));
+static cl::opt<bool>
+ PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden,
+ cl::init(false), cl::ZeroOrMore,
+ cl::desc("Print index-based devirtualization messages"));
+
// Find the minimum offset that we may store a value of size Size bits at. If
// IsAfter is set, look for an offset before the object, otherwise look for an
// offset after the object.
@@ -265,6 +280,25 @@ template <> struct DenseMapInfo<VTableSlot> {
}
};
+template <> struct DenseMapInfo<VTableSlotSummary> {
+ static VTableSlotSummary getEmptyKey() {
+ return {DenseMapInfo<StringRef>::getEmptyKey(),
+ DenseMapInfo<uint64_t>::getEmptyKey()};
+ }
+ static VTableSlotSummary getTombstoneKey() {
+ return {DenseMapInfo<StringRef>::getTombstoneKey(),
+ DenseMapInfo<uint64_t>::getTombstoneKey()};
+ }
+ static unsigned getHashValue(const VTableSlotSummary &I) {
+ return DenseMapInfo<StringRef>::getHashValue(I.TypeID) ^
+ DenseMapInfo<uint64_t>::getHashValue(I.ByteOffset);
+ }
+ static bool isEqual(const VTableSlotSummary &LHS,
+ const VTableSlotSummary &RHS) {
+ return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
+ }
+};
+
} // end namespace llvm
namespace {
@@ -342,19 +376,21 @@ struct CallSiteInfo {
/// pass the vector is non-empty, we will need to add a use of llvm.type.test
/// to each of the function summaries in the vector.
std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
+ std::vector<FunctionSummary *> SummaryTypeTestAssumeUsers;
bool isExported() const {
return SummaryHasTypeTestAssumeUsers ||
!SummaryTypeCheckedLoadUsers.empty();
}
- void markSummaryHasTypeTestAssumeUsers() {
- SummaryHasTypeTestAssumeUsers = true;
+ void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) {
+ SummaryTypeCheckedLoadUsers.push_back(FS);
AllCallSitesDevirted = false;
}
- void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) {
- SummaryTypeCheckedLoadUsers.push_back(FS);
+ void addSummaryTypeTestAssumeUser(FunctionSummary *FS) {
+ SummaryTypeTestAssumeUsers.push_back(FS);
+ SummaryHasTypeTestAssumeUsers = true;
AllCallSitesDevirted = false;
}
@@ -456,7 +492,6 @@ struct DevirtModule {
void buildTypeIdentifierMap(
std::vector<VTableBits> &Bits,
DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
- Constant *getPointerAtOffset(Constant *I, uint64_t Offset);
bool
tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
const std::set<TypeMemberInfo> &TypeMemberInfos,
@@ -464,7 +499,8 @@ struct DevirtModule {
void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,
bool &IsExported);
- bool trySingleImplDevirt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
+ bool trySingleImplDevirt(ModuleSummaryIndex *ExportSummary,
+ MutableArrayRef<VirtualCallTarget> TargetsForSlot,
VTableSlotInfo &SlotInfo,
WholeProgramDevirtResolution *Res);
@@ -542,6 +578,38 @@ struct DevirtModule {
function_ref<DominatorTree &(Function &)> LookupDomTree);
};
+struct DevirtIndex {
+ ModuleSummaryIndex &ExportSummary;
+ // The set in which to record GUIDs exported from their module by
+ // devirtualization, used by client to ensure they are not internalized.
+ std::set<GlobalValue::GUID> &ExportedGUIDs;
+ // A map in which to record the information necessary to locate the WPD
+ // resolution for local targets in case they are exported by cross module
+ // importing.
+ std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap;
+
+ MapVector<VTableSlotSummary, VTableSlotInfo> CallSlots;
+
+ DevirtIndex(
+ ModuleSummaryIndex &ExportSummary,
+ std::set<GlobalValue::GUID> &ExportedGUIDs,
+ std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap)
+ : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs),
+ LocalWPDTargetsMap(LocalWPDTargetsMap) {}
+
+ bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot,
+ const TypeIdCompatibleVtableInfo TIdInfo,
+ uint64_t ByteOffset);
+
+ bool trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
+ VTableSlotSummary &SlotSummary,
+ VTableSlotInfo &SlotInfo,
+ WholeProgramDevirtResolution *Res,
+ std::set<ValueInfo> &DevirtTargets);
+
+ void run();
+};
+
struct WholeProgramDevirt : public ModulePass {
static char ID;
@@ -572,7 +640,7 @@ struct WholeProgramDevirt : public ModulePass {
// an optimization remark emitter on the fly, when we need it.
std::unique_ptr<OptimizationRemarkEmitter> ORE;
auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
- ORE = make_unique<OptimizationRemarkEmitter>(F);
+ ORE = std::make_unique<OptimizationRemarkEmitter>(F);
return *ORE;
};
@@ -632,6 +700,41 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
return PreservedAnalyses::none();
}
+namespace llvm {
+void runWholeProgramDevirtOnIndex(
+ ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
+ std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
+ DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run();
+}
+
+void updateIndexWPDForExports(
+ ModuleSummaryIndex &Summary,
+ function_ref<bool(StringRef, GlobalValue::GUID)> isExported,
+ std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
+ for (auto &T : LocalWPDTargetsMap) {
+ auto &VI = T.first;
+ // This was enforced earlier during trySingleImplDevirt.
+ assert(VI.getSummaryList().size() == 1 &&
+ "Devirt of local target has more than one copy");
+ auto &S = VI.getSummaryList()[0];
+ if (!isExported(S->modulePath(), VI.getGUID()))
+ continue;
+
+ // It's been exported by a cross module import.
+ for (auto &SlotSummary : T.second) {
+ auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID);
+ assert(TIdSum);
+ auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset);
+ assert(WPDRes != TIdSum->WPDRes.end());
+ WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
+ WPDRes->second.SingleImplName,
+ Summary.getModuleHash(S->modulePath()));
+ }
+ }
+}
+
+} // end namespace llvm
+
bool DevirtModule::runForTesting(
Module &M, function_ref<AAResults &(Function &)> AARGetter,
function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
@@ -662,7 +765,7 @@ bool DevirtModule::runForTesting(
ExitOnError ExitOnErr(
"-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");
std::error_code EC;
- raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text);
+ raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_Text);
ExitOnErr(errorCodeToError(EC));
yaml::Output Out(OS);
@@ -706,38 +809,6 @@ void DevirtModule::buildTypeIdentifierMap(
}
}
-Constant *DevirtModule::getPointerAtOffset(Constant *I, uint64_t Offset) {
- if (I->getType()->isPointerTy()) {
- if (Offset == 0)
- return I;
- return nullptr;
- }
-
- const DataLayout &DL = M.getDataLayout();
-
- if (auto *C = dyn_cast<ConstantStruct>(I)) {
- const StructLayout *SL = DL.getStructLayout(C->getType());
- if (Offset >= SL->getSizeInBytes())
- return nullptr;
-
- unsigned Op = SL->getElementContainingOffset(Offset);
- return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
- Offset - SL->getElementOffset(Op));
- }
- if (auto *C = dyn_cast<ConstantArray>(I)) {
- ArrayType *VTableTy = C->getType();
- uint64_t ElemSize = DL.getTypeAllocSize(VTableTy->getElementType());
-
- unsigned Op = Offset / ElemSize;
- if (Op >= C->getNumOperands())
- return nullptr;
-
- return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
- Offset % ElemSize);
- }
- return nullptr;
-}
-
bool DevirtModule::tryFindVirtualCallTargets(
std::vector<VirtualCallTarget> &TargetsForSlot,
const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset) {
@@ -746,7 +817,7 @@ bool DevirtModule::tryFindVirtualCallTargets(
return false;
Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(),
- TM.Offset + ByteOffset);
+ TM.Offset + ByteOffset, M);
if (!Ptr)
return false;
@@ -766,6 +837,34 @@ bool DevirtModule::tryFindVirtualCallTargets(
return !TargetsForSlot.empty();
}
+bool DevirtIndex::tryFindVirtualCallTargets(
+ std::vector<ValueInfo> &TargetsForSlot, const TypeIdCompatibleVtableInfo TIdInfo,
+ uint64_t ByteOffset) {
+ for (const TypeIdOffsetVtableInfo P : TIdInfo) {
+ // VTable initializer should have only one summary, or all copies must be
+ // linkonce/weak ODR.
+ assert(P.VTableVI.getSummaryList().size() == 1 ||
+ llvm::all_of(
+ P.VTableVI.getSummaryList(),
+ [&](const std::unique_ptr<GlobalValueSummary> &Summary) {
+ return GlobalValue::isLinkOnceODRLinkage(Summary->linkage()) ||
+ GlobalValue::isWeakODRLinkage(Summary->linkage());
+ }));
+ const auto *VS = cast<GlobalVarSummary>(P.VTableVI.getSummaryList()[0].get());
+ if (!P.VTableVI.getSummaryList()[0]->isLive())
+ continue;
+ for (auto VTP : VS->vTableFuncs()) {
+ if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)
+ continue;
+
+ TargetsForSlot.push_back(VTP.FuncVI);
+ }
+ }
+
+ // Give up if we couldn't find any targets.
+ return !TargetsForSlot.empty();
+}
+
void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
Constant *TheFn, bool &IsExported) {
auto Apply = [&](CallSiteInfo &CSInfo) {
@@ -788,9 +887,38 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
Apply(P.second);
}
+static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) {
+ // We can't add calls if we haven't seen a definition
+ if (Callee.getSummaryList().empty())
+ return false;
+
+ // Insert calls into the summary index so that the devirtualized targets
+ // are eligible for import.
+ // FIXME: Annotate type tests with hotness. For now, mark these as hot
+ // to better ensure we have the opportunity to inline them.
+ bool IsExported = false;
+ auto &S = Callee.getSummaryList()[0];
+ CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0);
+ auto AddCalls = [&](CallSiteInfo &CSInfo) {
+ for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {
+ FS->addCall({Callee, CI});
+ IsExported |= S->modulePath() != FS->modulePath();
+ }
+ for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) {
+ FS->addCall({Callee, CI});
+ IsExported |= S->modulePath() != FS->modulePath();
+ }
+ };
+ AddCalls(SlotInfo.CSInfo);
+ for (auto &P : SlotInfo.ConstCSInfo)
+ AddCalls(P.second);
+ return IsExported;
+}
+
bool DevirtModule::trySingleImplDevirt(
- MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res) {
+ ModuleSummaryIndex *ExportSummary,
+ MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
+ WholeProgramDevirtResolution *Res) {
// See if the program contains a single implementation of this virtual
// function.
Function *TheFn = TargetsForSlot[0].Fn;
@@ -830,6 +958,10 @@ bool DevirtModule::trySingleImplDevirt(
TheFn->setVisibility(GlobalValue::HiddenVisibility);
TheFn->setName(NewName);
}
+ if (ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFn->getGUID()))
+ // Any needed promotion of 'TheFn' has already been done during
+ // LTO unit split, so we can ignore return value of AddCalls.
+ AddCalls(SlotInfo, TheFnVI);
Res->TheKind = WholeProgramDevirtResolution::SingleImpl;
Res->SingleImplName = TheFn->getName();
@@ -837,6 +969,63 @@ bool DevirtModule::trySingleImplDevirt(
return true;
}
+bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
+ VTableSlotSummary &SlotSummary,
+ VTableSlotInfo &SlotInfo,
+ WholeProgramDevirtResolution *Res,
+ std::set<ValueInfo> &DevirtTargets) {
+ // See if the program contains a single implementation of this virtual
+ // function.
+ auto TheFn = TargetsForSlot[0];
+ for (auto &&Target : TargetsForSlot)
+ if (TheFn != Target)
+ return false;
+
+ // Don't devirtualize if we don't have target definition.
+ auto Size = TheFn.getSummaryList().size();
+ if (!Size)
+ return false;
+
+ // If the summary list contains multiple summaries where at least one is
+ // a local, give up, as we won't know which (possibly promoted) name to use.
+ for (auto &S : TheFn.getSummaryList())
+ if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1)
+ return false;
+
+ // Collect functions devirtualized at least for one call site for stats.
+ if (PrintSummaryDevirt)
+ DevirtTargets.insert(TheFn);
+
+ auto &S = TheFn.getSummaryList()[0];
+ bool IsExported = AddCalls(SlotInfo, TheFn);
+ if (IsExported)
+ ExportedGUIDs.insert(TheFn.getGUID());
+
+ // Record in summary for use in devirtualization during the ThinLTO import
+ // step.
+ Res->TheKind = WholeProgramDevirtResolution::SingleImpl;
+ if (GlobalValue::isLocalLinkage(S->linkage())) {
+ if (IsExported)
+ // If target is a local function and we are exporting it by
+ // devirtualizing a call in another module, we need to record the
+ // promoted name.
+ Res->SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
+ TheFn.name(), ExportSummary.getModuleHash(S->modulePath()));
+ else {
+ LocalWPDTargetsMap[TheFn].push_back(SlotSummary);
+ Res->SingleImplName = TheFn.name();
+ }
+ } else
+ Res->SingleImplName = TheFn.name();
+
+ // Name will be empty if this thin link driven off of serialized combined
+ // index (e.g. llvm-lto). However, WPD is not supported/invoked for the
+ // legacy LTO API anyway.
+ assert(!Res->SingleImplName.empty());
+
+ return true;
+}
+
void DevirtModule::tryICallBranchFunnel(
MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
WholeProgramDevirtResolution *Res, VTableSlot Slot) {
@@ -1302,10 +1491,13 @@ void DevirtModule::rebuildGlobal(VTableBits &B) {
if (B.Before.Bytes.empty() && B.After.Bytes.empty())
return;
- // Align each byte array to pointer width.
- unsigned PointerSize = M.getDataLayout().getPointerSize();
- B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), PointerSize));
- B.After.Bytes.resize(alignTo(B.After.Bytes.size(), PointerSize));
+ // Align the before byte array to the global's minimum alignment so that we
+ // don't break any alignment requirements on the global.
+ MaybeAlign Alignment(B.GV->getAlignment());
+ if (!Alignment)
+ Alignment =
+ Align(M.getDataLayout().getABITypeAlignment(B.GV->getValueType()));
+ B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), Alignment));
// Before was stored in reverse order; flip it now.
for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I)
@@ -1322,6 +1514,7 @@ void DevirtModule::rebuildGlobal(VTableBits &B) {
GlobalVariable::PrivateLinkage, NewInit, "", B.GV);
NewGV->setSection(B.GV->getSection());
NewGV->setComdat(B.GV->getComdat());
+ NewGV->setAlignment(MaybeAlign(B.GV->getAlignment()));
// Copy the original vtable's metadata to the anonymous global, adjusting
// offsets as required.
@@ -1483,8 +1676,11 @@ void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
}
void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
+ auto *TypeId = dyn_cast<MDString>(Slot.TypeID);
+ if (!TypeId)
+ return;
const TypeIdSummary *TidSummary =
- ImportSummary->getTypeIdSummary(cast<MDString>(Slot.TypeID)->getString());
+ ImportSummary->getTypeIdSummary(TypeId->getString());
if (!TidSummary)
return;
auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
@@ -1493,6 +1689,7 @@ void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
const WholeProgramDevirtResolution &Res = ResI->second;
if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) {
+ assert(!Res.SingleImplName.empty());
// The type of the function in the declaration is irrelevant because every
// call site will cast it to the correct type.
Constant *SingleImpl =
@@ -1627,8 +1824,7 @@ bool DevirtModule::run() {
// FIXME: Only add live functions.
for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
for (Metadata *MD : MetadataByGUID[VF.GUID]) {
- CallSlots[{MD, VF.Offset}]
- .CSInfo.markSummaryHasTypeTestAssumeUsers();
+ CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
}
}
for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
@@ -1641,7 +1837,7 @@ bool DevirtModule::run() {
for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
CallSlots[{MD, VC.VFunc.Offset}]
.ConstCSInfo[VC.Args]
- .markSummaryHasTypeTestAssumeUsers();
+ .addSummaryTypeTestAssumeUser(FS);
}
}
for (const FunctionSummary::ConstVCall &VC :
@@ -1673,7 +1869,7 @@ bool DevirtModule::run() {
cast<MDString>(S.first.TypeID)->getString())
.WPDRes[S.first.ByteOffset];
- if (!trySingleImplDevirt(TargetsForSlot, S.second, Res)) {
+ if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
DidVirtualConstProp |=
tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
@@ -1710,7 +1906,7 @@ bool DevirtModule::run() {
using namespace ore;
OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
<< "devirtualized "
- << NV("FunctionName", F->getName()));
+ << NV("FunctionName", DT.first));
}
}
@@ -1722,5 +1918,86 @@ bool DevirtModule::run() {
for (VTableBits &B : Bits)
rebuildGlobal(B);
+ // We have lowered or deleted the type checked load intrinsics, so we no
+ // longer have enough information to reason about the liveness of virtual
+ // function pointers in GlobalDCE.
+ for (GlobalVariable &GV : M.globals())
+ GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
+
return true;
}
+
+void DevirtIndex::run() {
+ if (ExportSummary.typeIdCompatibleVtableMap().empty())
+ return;
+
+ DenseMap<GlobalValue::GUID, std::vector<StringRef>> NameByGUID;
+ for (auto &P : ExportSummary.typeIdCompatibleVtableMap()) {
+ NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first);
+ }
+
+ // Collect information from summary about which calls to try to devirtualize.
+ for (auto &P : ExportSummary) {
+ for (auto &S : P.second.SummaryList) {
+ auto *FS = dyn_cast<FunctionSummary>(S.get());
+ if (!FS)
+ continue;
+ // FIXME: Only add live functions.
+ for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
+ for (StringRef Name : NameByGUID[VF.GUID]) {
+ CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
+ }
+ }
+ for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
+ for (StringRef Name : NameByGUID[VF.GUID]) {
+ CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
+ }
+ }
+ for (const FunctionSummary::ConstVCall &VC :
+ FS->type_test_assume_const_vcalls()) {
+ for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
+ CallSlots[{Name, VC.VFunc.Offset}]
+ .ConstCSInfo[VC.Args]
+ .addSummaryTypeTestAssumeUser(FS);
+ }
+ }
+ for (const FunctionSummary::ConstVCall &VC :
+ FS->type_checked_load_const_vcalls()) {
+ for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
+ CallSlots[{Name, VC.VFunc.Offset}]
+ .ConstCSInfo[VC.Args]
+ .addSummaryTypeCheckedLoadUser(FS);
+ }
+ }
+ }
+ }
+
+ std::set<ValueInfo> DevirtTargets;
+ // For each (type, offset) pair:
+ for (auto &S : CallSlots) {
+ // Search each of the members of the type identifier for the virtual
+ // function implementation at offset S.first.ByteOffset, and add to
+ // TargetsForSlot.
+ std::vector<ValueInfo> TargetsForSlot;
+ auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);
+ assert(TidSummary);
+ if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,
+ S.first.ByteOffset)) {
+ WholeProgramDevirtResolution *Res =
+ &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID)
+ .WPDRes[S.first.ByteOffset];
+
+ if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,
+ DevirtTargets))
+ continue;
+ }
+ }
+
+ // Optionally have the thin link print message for each devirtualized
+ // function.
+ if (PrintSummaryDevirt)
+ for (const auto &DT : DevirtTargets)
+ errs() << "Devirtualized call to " << DT << "\n";
+
+ return;
+}