summaryrefslogtreecommitdiff
path: root/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp')
-rw-r--r--llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp363
1 files changed, 363 insertions, 0 deletions
diff --git a/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp b/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
new file mode 100644
index 000000000000..b87aa4065392
--- /dev/null
+++ b/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
@@ -0,0 +1,363 @@
+//===- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a CFL-base, summary-based alias analysis algorithm. It
+// does not depend on types. The algorithm is a mixture of the one described in
+// "Demand-driven alias analysis for C" by Xin Zheng and Radu Rugina, and "Fast
+// algorithms for Dyck-CFL-reachability with applications to Alias Analysis" by
+// Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the papers, we build a
+// graph of the uses of a variable, where each node is a memory location, and
+// each edge is an action that happened on that memory location. The "actions"
+// can be one of Dereference, Reference, or Assign. The precision of this
+// analysis is roughly the same as that of an one level context-sensitive
+// Steensgaard's algorithm.
+//
+// Two variables are considered as aliasing iff you can reach one value's node
+// from the other value's node and the language formed by concatenating all of
+// the edge labels (actions) conforms to a context-free grammar.
+//
+// Because this algorithm requires a graph search on each query, we execute the
+// algorithm outlined in "Fast algorithms..." (mentioned above)
+// in order to transform the graph into sets of variables that may alias in
+// ~nlogn time (n = number of variables), which makes queries take constant
+// time.
+//===----------------------------------------------------------------------===//
+
+// N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and
+// CFLSteensAA is interprocedural. This is *technically* A Bad Thing, because
+// FunctionPasses are only allowed to inspect the Function that they're being
+// run on. Realistically, this likely isn't a problem until we allow
+// FunctionPasses to run concurrently.
+
+#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
+#include "AliasAnalysisSummary.h"
+#include "CFLGraph.h"
+#include "StratifiedSets.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <limits>
+#include <memory>
+#include <utility>
+
+using namespace llvm;
+using namespace llvm::cflaa;
+
+#define DEBUG_TYPE "cfl-steens-aa"
+
+CFLSteensAAResult::CFLSteensAAResult(
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI)
+ : AAResultBase(), GetTLI(std::move(GetTLI)) {}
+CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg)
+ : AAResultBase(std::move(Arg)), GetTLI(std::move(Arg.GetTLI)) {}
+CFLSteensAAResult::~CFLSteensAAResult() = default;
+
+/// Information we have about a function and would like to keep around.
+class CFLSteensAAResult::FunctionInfo {
+ StratifiedSets<InstantiatedValue> Sets;
+ AliasSummary Summary;
+
+public:
+ FunctionInfo(Function &Fn, const SmallVectorImpl<Value *> &RetVals,
+ StratifiedSets<InstantiatedValue> S);
+
+ const StratifiedSets<InstantiatedValue> &getStratifiedSets() const {
+ return Sets;
+ }
+
+ const AliasSummary &getAliasSummary() const { return Summary; }
+};
+
+const StratifiedIndex StratifiedLink::SetSentinel =
+ std::numeric_limits<StratifiedIndex>::max();
+
+//===----------------------------------------------------------------------===//
+// Function declarations that require types defined in the namespace above
+//===----------------------------------------------------------------------===//
+
+/// Determines whether it would be pointless to add the given Value to our sets.
+static bool canSkipAddingToSets(Value *Val) {
+ // Constants can share instances, which may falsely unify multiple
+ // sets, e.g. in
+ // store i32* null, i32** %ptr1
+ // store i32* null, i32** %ptr2
+ // clearly ptr1 and ptr2 should not be unified into the same set, so
+ // we should filter out the (potentially shared) instance to
+ // i32* null.
+ if (isa<Constant>(Val)) {
+ // TODO: Because all of these things are constant, we can determine whether
+ // the data is *actually* mutable at graph building time. This will probably
+ // come for free/cheap with offset awareness.
+ bool CanStoreMutableData = isa<GlobalValue>(Val) ||
+ isa<ConstantExpr>(Val) ||
+ isa<ConstantAggregate>(Val);
+ return !CanStoreMutableData;
+ }
+
+ return false;
+}
+
+CFLSteensAAResult::FunctionInfo::FunctionInfo(
+ Function &Fn, const SmallVectorImpl<Value *> &RetVals,
+ StratifiedSets<InstantiatedValue> S)
+ : Sets(std::move(S)) {
+ // Historically, an arbitrary upper-bound of 50 args was selected. We may want
+ // to remove this if it doesn't really matter in practice.
+ if (Fn.arg_size() > MaxSupportedArgsInSummary)
+ return;
+
+ DenseMap<StratifiedIndex, InterfaceValue> InterfaceMap;
+
+ // Our intention here is to record all InterfaceValues that share the same
+ // StratifiedIndex in RetParamRelations. For each valid InterfaceValue, we
+ // have its StratifiedIndex scanned here and check if the index is presented
+ // in InterfaceMap: if it is not, we add the correspondence to the map;
+ // otherwise, an aliasing relation is found and we add it to
+ // RetParamRelations.
+
+ auto AddToRetParamRelations = [&](unsigned InterfaceIndex,
+ StratifiedIndex SetIndex) {
+ unsigned Level = 0;
+ while (true) {
+ InterfaceValue CurrValue{InterfaceIndex, Level};
+
+ auto Itr = InterfaceMap.find(SetIndex);
+ if (Itr != InterfaceMap.end()) {
+ if (CurrValue != Itr->second)
+ Summary.RetParamRelations.push_back(
+ ExternalRelation{CurrValue, Itr->second, UnknownOffset});
+ break;
+ }
+
+ auto &Link = Sets.getLink(SetIndex);
+ InterfaceMap.insert(std::make_pair(SetIndex, CurrValue));
+ auto ExternalAttrs = getExternallyVisibleAttrs(Link.Attrs);
+ if (ExternalAttrs.any())
+ Summary.RetParamAttributes.push_back(
+ ExternalAttribute{CurrValue, ExternalAttrs});
+
+ if (!Link.hasBelow())
+ break;
+
+ ++Level;
+ SetIndex = Link.Below;
+ }
+ };
+
+ // Populate RetParamRelations for return values
+ for (auto *RetVal : RetVals) {
+ assert(RetVal != nullptr);
+ assert(RetVal->getType()->isPointerTy());
+ auto RetInfo = Sets.find(InstantiatedValue{RetVal, 0});
+ if (RetInfo.hasValue())
+ AddToRetParamRelations(0, RetInfo->Index);
+ }
+
+ // Populate RetParamRelations for parameters
+ unsigned I = 0;
+ for (auto &Param : Fn.args()) {
+ if (Param.getType()->isPointerTy()) {
+ auto ParamInfo = Sets.find(InstantiatedValue{&Param, 0});
+ if (ParamInfo.hasValue())
+ AddToRetParamRelations(I + 1, ParamInfo->Index);
+ }
+ ++I;
+ }
+}
+
+// Builds the graph + StratifiedSets for a function.
+CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) {
+ CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, GetTLI(*Fn), *Fn);
+ StratifiedSetsBuilder<InstantiatedValue> SetBuilder;
+
+ // Add all CFLGraph nodes and all Dereference edges to StratifiedSets
+ auto &Graph = GraphBuilder.getCFLGraph();
+ for (const auto &Mapping : Graph.value_mappings()) {
+ auto Val = Mapping.first;
+ if (canSkipAddingToSets(Val))
+ continue;
+ auto &ValueInfo = Mapping.second;
+
+ assert(ValueInfo.getNumLevels() > 0);
+ SetBuilder.add(InstantiatedValue{Val, 0});
+ SetBuilder.noteAttributes(InstantiatedValue{Val, 0},
+ ValueInfo.getNodeInfoAtLevel(0).Attr);
+ for (unsigned I = 0, E = ValueInfo.getNumLevels() - 1; I < E; ++I) {
+ SetBuilder.add(InstantiatedValue{Val, I + 1});
+ SetBuilder.noteAttributes(InstantiatedValue{Val, I + 1},
+ ValueInfo.getNodeInfoAtLevel(I + 1).Attr);
+ SetBuilder.addBelow(InstantiatedValue{Val, I},
+ InstantiatedValue{Val, I + 1});
+ }
+ }
+
+ // Add all assign edges to StratifiedSets
+ for (const auto &Mapping : Graph.value_mappings()) {
+ auto Val = Mapping.first;
+ if (canSkipAddingToSets(Val))
+ continue;
+ auto &ValueInfo = Mapping.second;
+
+ for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
+ auto Src = InstantiatedValue{Val, I};
+ for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges)
+ SetBuilder.addWith(Src, Edge.Other);
+ }
+ }
+
+ return FunctionInfo(*Fn, GraphBuilder.getReturnValues(), SetBuilder.build());
+}
+
+void CFLSteensAAResult::scan(Function *Fn) {
+ auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>()));
+ (void)InsertPair;
+ assert(InsertPair.second &&
+ "Trying to scan a function that has already been cached");
+
+ // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call
+ // may get evaluated after operator[], potentially triggering a DenseMap
+ // resize and invalidating the reference returned by operator[]
+ auto FunInfo = buildSetsFrom(Fn);
+ Cache[Fn] = std::move(FunInfo);
+
+ Handles.emplace_front(Fn, this);
+}
+
+void CFLSteensAAResult::evict(Function *Fn) { Cache.erase(Fn); }
+
+/// Ensures that the given function is available in the cache, and returns the
+/// entry.
+const Optional<CFLSteensAAResult::FunctionInfo> &
+CFLSteensAAResult::ensureCached(Function *Fn) {
+ auto Iter = Cache.find(Fn);
+ if (Iter == Cache.end()) {
+ scan(Fn);
+ Iter = Cache.find(Fn);
+ assert(Iter != Cache.end());
+ assert(Iter->second.hasValue());
+ }
+ return Iter->second;
+}
+
+const AliasSummary *CFLSteensAAResult::getAliasSummary(Function &Fn) {
+ auto &FunInfo = ensureCached(&Fn);
+ if (FunInfo.hasValue())
+ return &FunInfo->getAliasSummary();
+ else
+ return nullptr;
+}
+
+AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
+ auto *ValA = const_cast<Value *>(LocA.Ptr);
+ auto *ValB = const_cast<Value *>(LocB.Ptr);
+
+ if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy())
+ return NoAlias;
+
+ Function *Fn = nullptr;
+ Function *MaybeFnA = const_cast<Function *>(parentFunctionOfValue(ValA));
+ Function *MaybeFnB = const_cast<Function *>(parentFunctionOfValue(ValB));
+ if (!MaybeFnA && !MaybeFnB) {
+ // The only times this is known to happen are when globals + InlineAsm are
+ // involved
+ LLVM_DEBUG(
+ dbgs()
+ << "CFLSteensAA: could not extract parent function information.\n");
+ return MayAlias;
+ }
+
+ if (MaybeFnA) {
+ Fn = MaybeFnA;
+ assert((!MaybeFnB || MaybeFnB == MaybeFnA) &&
+ "Interprocedural queries not supported");
+ } else {
+ Fn = MaybeFnB;
+ }
+
+ assert(Fn != nullptr);
+ auto &MaybeInfo = ensureCached(Fn);
+ assert(MaybeInfo.hasValue());
+
+ auto &Sets = MaybeInfo->getStratifiedSets();
+ auto MaybeA = Sets.find(InstantiatedValue{ValA, 0});
+ if (!MaybeA.hasValue())
+ return MayAlias;
+
+ auto MaybeB = Sets.find(InstantiatedValue{ValB, 0});
+ if (!MaybeB.hasValue())
+ return MayAlias;
+
+ auto SetA = *MaybeA;
+ auto SetB = *MaybeB;
+ auto AttrsA = Sets.getLink(SetA.Index).Attrs;
+ auto AttrsB = Sets.getLink(SetB.Index).Attrs;
+
+ // If both values are local (meaning the corresponding set has attribute
+ // AttrNone or AttrEscaped), then we know that CFLSteensAA fully models them:
+ // they may-alias each other if and only if they are in the same set.
+ // If at least one value is non-local (meaning it either is global/argument or
+ // it comes from unknown sources like integer cast), the situation becomes a
+ // bit more interesting. We follow three general rules described below:
+ // - Non-local values may alias each other
+ // - AttrNone values do not alias any non-local values
+ // - AttrEscaped do not alias globals/arguments, but they may alias
+ // AttrUnknown values
+ if (SetA.Index == SetB.Index)
+ return MayAlias;
+ if (AttrsA.none() || AttrsB.none())
+ return NoAlias;
+ if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB))
+ return MayAlias;
+ if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB))
+ return MayAlias;
+ return NoAlias;
+}
+
+AnalysisKey CFLSteensAA::Key;
+
+CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) {
+ auto GetTLI = [&AM](Function &F) -> const TargetLibraryInfo & {
+ return AM.getResult<TargetLibraryAnalysis>(F);
+ };
+ return CFLSteensAAResult(GetTLI);
+}
+
+char CFLSteensAAWrapperPass::ID = 0;
+INITIALIZE_PASS(CFLSteensAAWrapperPass, "cfl-steens-aa",
+ "Unification-Based CFL Alias Analysis", false, true)
+
+ImmutablePass *llvm::createCFLSteensAAWrapperPass() {
+ return new CFLSteensAAWrapperPass();
+}
+
+CFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) {
+ initializeCFLSteensAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+void CFLSteensAAWrapperPass::initializePass() {
+ auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
+ Result.reset(new CFLSteensAAResult(GetTLI));
+}
+
+void CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+}