diff options
Diffstat (limited to 'llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp')
-rw-r--r-- | llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp | 931 |
1 files changed, 931 insertions, 0 deletions
diff --git a/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp new file mode 100644 index 000000000000..fd90bd1521d6 --- /dev/null +++ b/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp @@ -0,0 +1,931 @@ +//===- CFLAndersAliasAnalysis.cpp - Unification-based Alias Analysis ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a CFL-based, summary-based alias analysis algorithm. It +// differs from CFLSteensAliasAnalysis in its inclusion-based nature while +// CFLSteensAliasAnalysis is unification-based. This pass has worse performance +// than CFLSteensAliasAnalysis (the worst case complexity of +// CFLAndersAliasAnalysis is cubic, while the worst case complexity of +// CFLSteensAliasAnalysis is almost linear), but it is able to yield more +// precise analysis result. The precision of this analysis is roughly the same +// as that of an one level context-sensitive Andersen's algorithm. +// +// The algorithm used here is based on recursive state machine matching scheme +// proposed in "Demand-driven alias analysis for C" by Xin Zheng and Radu +// Rugina. The general idea is to extend the traditional transitive closure +// algorithm to perform CFL matching along the way: instead of recording +// "whether X is reachable from Y", we keep track of "whether X is reachable +// from Y at state Z", where the "state" field indicates where we are in the CFL +// matching process. To understand the matching better, it is advisable to have +// the state machine shown in Figure 3 of the paper available when reading the +// codes: all we do here is to selectively expand the transitive closure by +// discarding edges that are not recognized by the state machine. +// +// There are two differences between our current implementation and the one +// described in the paper: +// - Our algorithm eagerly computes all alias pairs after the CFLGraph is built, +// while in the paper the authors did the computation in a demand-driven +// fashion. We did not implement the demand-driven algorithm due to the +// additional coding complexity and higher memory profile, but if we found it +// necessary we may switch to it eventually. +// - In the paper the authors use a state machine that does not distinguish +// value reads from value writes. For example, if Y is reachable from X at state +// S3, it may be the case that X is written into Y, or it may be the case that +// there's a third value Z that writes into both X and Y. To make that +// distinction (which is crucial in building function summary as well as +// retrieving mod-ref info), we choose to duplicate some of the states in the +// paper's proposed state machine. The duplication does not change the set the +// machine accepts. Given a pair of reachable values, it only provides more +// detailed information on which value is being written into and which is being +// read from. +// +//===----------------------------------------------------------------------===// + +// N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and +// CFLAndersAA is interprocedural. This is *technically* A Bad Thing, because +// FunctionPasses are only allowed to inspect the Function that they're being +// run on. Realistically, this likely isn't a problem until we allow +// FunctionPasses to run concurrently. + +#include "llvm/Analysis/CFLAndersAliasAnalysis.h" +#include "AliasAnalysisSummary.h" +#include "CFLGraph.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <bitset> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <functional> +#include <utility> +#include <vector> + +using namespace llvm; +using namespace llvm::cflaa; + +#define DEBUG_TYPE "cfl-anders-aa" + +CFLAndersAAResult::CFLAndersAAResult( + std::function<const TargetLibraryInfo &(Function &F)> GetTLI) + : GetTLI(std::move(GetTLI)) {} +CFLAndersAAResult::CFLAndersAAResult(CFLAndersAAResult &&RHS) + : AAResultBase(std::move(RHS)), GetTLI(std::move(RHS.GetTLI)) {} +CFLAndersAAResult::~CFLAndersAAResult() = default; + +namespace { + +enum class MatchState : uint8_t { + // The following state represents S1 in the paper. + FlowFromReadOnly = 0, + // The following two states together represent S2 in the paper. + // The 'NoReadWrite' suffix indicates that there exists an alias path that + // does not contain assignment and reverse assignment edges. + // The 'ReadOnly' suffix indicates that there exists an alias path that + // contains reverse assignment edges only. + FlowFromMemAliasNoReadWrite, + FlowFromMemAliasReadOnly, + // The following two states together represent S3 in the paper. + // The 'WriteOnly' suffix indicates that there exists an alias path that + // contains assignment edges only. + // The 'ReadWrite' suffix indicates that there exists an alias path that + // contains both assignment and reverse assignment edges. Note that if X and Y + // are reachable at 'ReadWrite' state, it does NOT mean X is both read from + // and written to Y. Instead, it means that a third value Z is written to both + // X and Y. + FlowToWriteOnly, + FlowToReadWrite, + // The following two states together represent S4 in the paper. + FlowToMemAliasWriteOnly, + FlowToMemAliasReadWrite, +}; + +using StateSet = std::bitset<7>; + +const unsigned ReadOnlyStateMask = + (1U << static_cast<uint8_t>(MatchState::FlowFromReadOnly)) | + (1U << static_cast<uint8_t>(MatchState::FlowFromMemAliasReadOnly)); +const unsigned WriteOnlyStateMask = + (1U << static_cast<uint8_t>(MatchState::FlowToWriteOnly)) | + (1U << static_cast<uint8_t>(MatchState::FlowToMemAliasWriteOnly)); + +// A pair that consists of a value and an offset +struct OffsetValue { + const Value *Val; + int64_t Offset; +}; + +bool operator==(OffsetValue LHS, OffsetValue RHS) { + return LHS.Val == RHS.Val && LHS.Offset == RHS.Offset; +} +bool operator<(OffsetValue LHS, OffsetValue RHS) { + return std::less<const Value *>()(LHS.Val, RHS.Val) || + (LHS.Val == RHS.Val && LHS.Offset < RHS.Offset); +} + +// A pair that consists of an InstantiatedValue and an offset +struct OffsetInstantiatedValue { + InstantiatedValue IVal; + int64_t Offset; +}; + +bool operator==(OffsetInstantiatedValue LHS, OffsetInstantiatedValue RHS) { + return LHS.IVal == RHS.IVal && LHS.Offset == RHS.Offset; +} + +// We use ReachabilitySet to keep track of value aliases (The nonterminal "V" in +// the paper) during the analysis. +class ReachabilitySet { + using ValueStateMap = DenseMap<InstantiatedValue, StateSet>; + using ValueReachMap = DenseMap<InstantiatedValue, ValueStateMap>; + + ValueReachMap ReachMap; + +public: + using const_valuestate_iterator = ValueStateMap::const_iterator; + using const_value_iterator = ValueReachMap::const_iterator; + + // Insert edge 'From->To' at state 'State' + bool insert(InstantiatedValue From, InstantiatedValue To, MatchState State) { + assert(From != To); + auto &States = ReachMap[To][From]; + auto Idx = static_cast<size_t>(State); + if (!States.test(Idx)) { + States.set(Idx); + return true; + } + return false; + } + + // Return the set of all ('From', 'State') pair for a given node 'To' + iterator_range<const_valuestate_iterator> + reachableValueAliases(InstantiatedValue V) const { + auto Itr = ReachMap.find(V); + if (Itr == ReachMap.end()) + return make_range<const_valuestate_iterator>(const_valuestate_iterator(), + const_valuestate_iterator()); + return make_range<const_valuestate_iterator>(Itr->second.begin(), + Itr->second.end()); + } + + iterator_range<const_value_iterator> value_mappings() const { + return make_range<const_value_iterator>(ReachMap.begin(), ReachMap.end()); + } +}; + +// We use AliasMemSet to keep track of all memory aliases (the nonterminal "M" +// in the paper) during the analysis. +class AliasMemSet { + using MemSet = DenseSet<InstantiatedValue>; + using MemMapType = DenseMap<InstantiatedValue, MemSet>; + + MemMapType MemMap; + +public: + using const_mem_iterator = MemSet::const_iterator; + + bool insert(InstantiatedValue LHS, InstantiatedValue RHS) { + // Top-level values can never be memory aliases because one cannot take the + // addresses of them + assert(LHS.DerefLevel > 0 && RHS.DerefLevel > 0); + return MemMap[LHS].insert(RHS).second; + } + + const MemSet *getMemoryAliases(InstantiatedValue V) const { + auto Itr = MemMap.find(V); + if (Itr == MemMap.end()) + return nullptr; + return &Itr->second; + } +}; + +// We use AliasAttrMap to keep track of the AliasAttr of each node. +class AliasAttrMap { + using MapType = DenseMap<InstantiatedValue, AliasAttrs>; + + MapType AttrMap; + +public: + using const_iterator = MapType::const_iterator; + + bool add(InstantiatedValue V, AliasAttrs Attr) { + auto &OldAttr = AttrMap[V]; + auto NewAttr = OldAttr | Attr; + if (OldAttr == NewAttr) + return false; + OldAttr = NewAttr; + return true; + } + + AliasAttrs getAttrs(InstantiatedValue V) const { + AliasAttrs Attr; + auto Itr = AttrMap.find(V); + if (Itr != AttrMap.end()) + Attr = Itr->second; + return Attr; + } + + iterator_range<const_iterator> mappings() const { + return make_range<const_iterator>(AttrMap.begin(), AttrMap.end()); + } +}; + +struct WorkListItem { + InstantiatedValue From; + InstantiatedValue To; + MatchState State; +}; + +struct ValueSummary { + struct Record { + InterfaceValue IValue; + unsigned DerefLevel; + }; + SmallVector<Record, 4> FromRecords, ToRecords; +}; + +} // end anonymous namespace + +namespace llvm { + +// Specialize DenseMapInfo for OffsetValue. +template <> struct DenseMapInfo<OffsetValue> { + static OffsetValue getEmptyKey() { + return OffsetValue{DenseMapInfo<const Value *>::getEmptyKey(), + DenseMapInfo<int64_t>::getEmptyKey()}; + } + + static OffsetValue getTombstoneKey() { + return OffsetValue{DenseMapInfo<const Value *>::getTombstoneKey(), + DenseMapInfo<int64_t>::getEmptyKey()}; + } + + static unsigned getHashValue(const OffsetValue &OVal) { + return DenseMapInfo<std::pair<const Value *, int64_t>>::getHashValue( + std::make_pair(OVal.Val, OVal.Offset)); + } + + static bool isEqual(const OffsetValue &LHS, const OffsetValue &RHS) { + return LHS == RHS; + } +}; + +// Specialize DenseMapInfo for OffsetInstantiatedValue. +template <> struct DenseMapInfo<OffsetInstantiatedValue> { + static OffsetInstantiatedValue getEmptyKey() { + return OffsetInstantiatedValue{ + DenseMapInfo<InstantiatedValue>::getEmptyKey(), + DenseMapInfo<int64_t>::getEmptyKey()}; + } + + static OffsetInstantiatedValue getTombstoneKey() { + return OffsetInstantiatedValue{ + DenseMapInfo<InstantiatedValue>::getTombstoneKey(), + DenseMapInfo<int64_t>::getEmptyKey()}; + } + + static unsigned getHashValue(const OffsetInstantiatedValue &OVal) { + return DenseMapInfo<std::pair<InstantiatedValue, int64_t>>::getHashValue( + std::make_pair(OVal.IVal, OVal.Offset)); + } + + static bool isEqual(const OffsetInstantiatedValue &LHS, + const OffsetInstantiatedValue &RHS) { + return LHS == RHS; + } +}; + +} // end namespace llvm + +class CFLAndersAAResult::FunctionInfo { + /// Map a value to other values that may alias it + /// Since the alias relation is symmetric, to save some space we assume values + /// are properly ordered: if a and b alias each other, and a < b, then b is in + /// AliasMap[a] but not vice versa. + DenseMap<const Value *, std::vector<OffsetValue>> AliasMap; + + /// Map a value to its corresponding AliasAttrs + DenseMap<const Value *, AliasAttrs> AttrMap; + + /// Summary of externally visible effects. + AliasSummary Summary; + + Optional<AliasAttrs> getAttrs(const Value *) const; + +public: + FunctionInfo(const Function &, const SmallVectorImpl<Value *> &, + const ReachabilitySet &, const AliasAttrMap &); + + bool mayAlias(const Value *, LocationSize, const Value *, LocationSize) const; + const AliasSummary &getAliasSummary() const { return Summary; } +}; + +static bool hasReadOnlyState(StateSet Set) { + return (Set & StateSet(ReadOnlyStateMask)).any(); +} + +static bool hasWriteOnlyState(StateSet Set) { + return (Set & StateSet(WriteOnlyStateMask)).any(); +} + +static Optional<InterfaceValue> +getInterfaceValue(InstantiatedValue IValue, + const SmallVectorImpl<Value *> &RetVals) { + auto Val = IValue.Val; + + Optional<unsigned> Index; + if (auto Arg = dyn_cast<Argument>(Val)) + Index = Arg->getArgNo() + 1; + else if (is_contained(RetVals, Val)) + Index = 0; + + if (Index) + return InterfaceValue{*Index, IValue.DerefLevel}; + return None; +} + +static void populateAttrMap(DenseMap<const Value *, AliasAttrs> &AttrMap, + const AliasAttrMap &AMap) { + for (const auto &Mapping : AMap.mappings()) { + auto IVal = Mapping.first; + + // Insert IVal into the map + auto &Attr = AttrMap[IVal.Val]; + // AttrMap only cares about top-level values + if (IVal.DerefLevel == 0) + Attr |= Mapping.second; + } +} + +static void +populateAliasMap(DenseMap<const Value *, std::vector<OffsetValue>> &AliasMap, + const ReachabilitySet &ReachSet) { + for (const auto &OuterMapping : ReachSet.value_mappings()) { + // AliasMap only cares about top-level values + if (OuterMapping.first.DerefLevel > 0) + continue; + + auto Val = OuterMapping.first.Val; + auto &AliasList = AliasMap[Val]; + for (const auto &InnerMapping : OuterMapping.second) { + // Again, AliasMap only cares about top-level values + if (InnerMapping.first.DerefLevel == 0) + AliasList.push_back(OffsetValue{InnerMapping.first.Val, UnknownOffset}); + } + + // Sort AliasList for faster lookup + llvm::sort(AliasList); + } +} + +static void populateExternalRelations( + SmallVectorImpl<ExternalRelation> &ExtRelations, const Function &Fn, + const SmallVectorImpl<Value *> &RetVals, const ReachabilitySet &ReachSet) { + // If a function only returns one of its argument X, then X will be both an + // argument and a return value at the same time. This is an edge case that + // needs special handling here. + for (const auto &Arg : Fn.args()) { + if (is_contained(RetVals, &Arg)) { + auto ArgVal = InterfaceValue{Arg.getArgNo() + 1, 0}; + auto RetVal = InterfaceValue{0, 0}; + ExtRelations.push_back(ExternalRelation{ArgVal, RetVal, 0}); + } + } + + // Below is the core summary construction logic. + // A naive solution of adding only the value aliases that are parameters or + // return values in ReachSet to the summary won't work: It is possible that a + // parameter P is written into an intermediate value I, and the function + // subsequently returns *I. In that case, *I is does not value alias anything + // in ReachSet, and the naive solution will miss a summary edge from (P, 1) to + // (I, 1). + // To account for the aforementioned case, we need to check each non-parameter + // and non-return value for the possibility of acting as an intermediate. + // 'ValueMap' here records, for each value, which InterfaceValues read from or + // write into it. If both the read list and the write list of a given value + // are non-empty, we know that a particular value is an intermidate and we + // need to add summary edges from the writes to the reads. + DenseMap<Value *, ValueSummary> ValueMap; + for (const auto &OuterMapping : ReachSet.value_mappings()) { + if (auto Dst = getInterfaceValue(OuterMapping.first, RetVals)) { + for (const auto &InnerMapping : OuterMapping.second) { + // If Src is a param/return value, we get a same-level assignment. + if (auto Src = getInterfaceValue(InnerMapping.first, RetVals)) { + // This may happen if both Dst and Src are return values + if (*Dst == *Src) + continue; + + if (hasReadOnlyState(InnerMapping.second)) + ExtRelations.push_back(ExternalRelation{*Dst, *Src, UnknownOffset}); + // No need to check for WriteOnly state, since ReachSet is symmetric + } else { + // If Src is not a param/return, add it to ValueMap + auto SrcIVal = InnerMapping.first; + if (hasReadOnlyState(InnerMapping.second)) + ValueMap[SrcIVal.Val].FromRecords.push_back( + ValueSummary::Record{*Dst, SrcIVal.DerefLevel}); + if (hasWriteOnlyState(InnerMapping.second)) + ValueMap[SrcIVal.Val].ToRecords.push_back( + ValueSummary::Record{*Dst, SrcIVal.DerefLevel}); + } + } + } + } + + for (const auto &Mapping : ValueMap) { + for (const auto &FromRecord : Mapping.second.FromRecords) { + for (const auto &ToRecord : Mapping.second.ToRecords) { + auto ToLevel = ToRecord.DerefLevel; + auto FromLevel = FromRecord.DerefLevel; + // Same-level assignments should have already been processed by now + if (ToLevel == FromLevel) + continue; + + auto SrcIndex = FromRecord.IValue.Index; + auto SrcLevel = FromRecord.IValue.DerefLevel; + auto DstIndex = ToRecord.IValue.Index; + auto DstLevel = ToRecord.IValue.DerefLevel; + if (ToLevel > FromLevel) + SrcLevel += ToLevel - FromLevel; + else + DstLevel += FromLevel - ToLevel; + + ExtRelations.push_back(ExternalRelation{ + InterfaceValue{SrcIndex, SrcLevel}, + InterfaceValue{DstIndex, DstLevel}, UnknownOffset}); + } + } + } + + // Remove duplicates in ExtRelations + llvm::sort(ExtRelations); + ExtRelations.erase(std::unique(ExtRelations.begin(), ExtRelations.end()), + ExtRelations.end()); +} + +static void populateExternalAttributes( + SmallVectorImpl<ExternalAttribute> &ExtAttributes, const Function &Fn, + const SmallVectorImpl<Value *> &RetVals, const AliasAttrMap &AMap) { + for (const auto &Mapping : AMap.mappings()) { + if (auto IVal = getInterfaceValue(Mapping.first, RetVals)) { + auto Attr = getExternallyVisibleAttrs(Mapping.second); + if (Attr.any()) + ExtAttributes.push_back(ExternalAttribute{*IVal, Attr}); + } + } +} + +CFLAndersAAResult::FunctionInfo::FunctionInfo( + const Function &Fn, const SmallVectorImpl<Value *> &RetVals, + const ReachabilitySet &ReachSet, const AliasAttrMap &AMap) { + populateAttrMap(AttrMap, AMap); + populateExternalAttributes(Summary.RetParamAttributes, Fn, RetVals, AMap); + populateAliasMap(AliasMap, ReachSet); + populateExternalRelations(Summary.RetParamRelations, Fn, RetVals, ReachSet); +} + +Optional<AliasAttrs> +CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const { + assert(V != nullptr); + + auto Itr = AttrMap.find(V); + if (Itr != AttrMap.end()) + return Itr->second; + return None; +} + +bool CFLAndersAAResult::FunctionInfo::mayAlias( + const Value *LHS, LocationSize MaybeLHSSize, const Value *RHS, + LocationSize MaybeRHSSize) const { + assert(LHS && RHS); + + // Check if we've seen LHS and RHS before. Sometimes LHS or RHS can be created + // after the analysis gets executed, and we want to be conservative in those + // cases. + auto MaybeAttrsA = getAttrs(LHS); + auto MaybeAttrsB = getAttrs(RHS); + if (!MaybeAttrsA || !MaybeAttrsB) + return true; + + // Check AliasAttrs before AliasMap lookup since it's cheaper + auto AttrsA = *MaybeAttrsA; + auto AttrsB = *MaybeAttrsB; + if (hasUnknownOrCallerAttr(AttrsA)) + return AttrsB.any(); + if (hasUnknownOrCallerAttr(AttrsB)) + return AttrsA.any(); + if (isGlobalOrArgAttr(AttrsA)) + return isGlobalOrArgAttr(AttrsB); + if (isGlobalOrArgAttr(AttrsB)) + return isGlobalOrArgAttr(AttrsA); + + // At this point both LHS and RHS should point to locally allocated objects + + auto Itr = AliasMap.find(LHS); + if (Itr != AliasMap.end()) { + + // Find out all (X, Offset) where X == RHS + auto Comparator = [](OffsetValue LHS, OffsetValue RHS) { + return std::less<const Value *>()(LHS.Val, RHS.Val); + }; +#ifdef EXPENSIVE_CHECKS + assert(std::is_sorted(Itr->second.begin(), Itr->second.end(), Comparator)); +#endif + auto RangePair = std::equal_range(Itr->second.begin(), Itr->second.end(), + OffsetValue{RHS, 0}, Comparator); + + if (RangePair.first != RangePair.second) { + // Be conservative about unknown sizes + if (MaybeLHSSize == LocationSize::unknown() || + MaybeRHSSize == LocationSize::unknown()) + return true; + + const uint64_t LHSSize = MaybeLHSSize.getValue(); + const uint64_t RHSSize = MaybeRHSSize.getValue(); + + for (const auto &OVal : make_range(RangePair)) { + // Be conservative about UnknownOffset + if (OVal.Offset == UnknownOffset) + return true; + + // We know that LHS aliases (RHS + OVal.Offset) if the control flow + // reaches here. The may-alias query essentially becomes integer + // range-overlap queries over two ranges [OVal.Offset, OVal.Offset + + // LHSSize) and [0, RHSSize). + + // Try to be conservative on super large offsets + if (LLVM_UNLIKELY(LHSSize > INT64_MAX || RHSSize > INT64_MAX)) + return true; + + auto LHSStart = OVal.Offset; + // FIXME: Do we need to guard against integer overflow? + auto LHSEnd = OVal.Offset + static_cast<int64_t>(LHSSize); + auto RHSStart = 0; + auto RHSEnd = static_cast<int64_t>(RHSSize); + if (LHSEnd > RHSStart && LHSStart < RHSEnd) + return true; + } + } + } + + return false; +} + +static void propagate(InstantiatedValue From, InstantiatedValue To, + MatchState State, ReachabilitySet &ReachSet, + std::vector<WorkListItem> &WorkList) { + if (From == To) + return; + if (ReachSet.insert(From, To, State)) + WorkList.push_back(WorkListItem{From, To, State}); +} + +static void initializeWorkList(std::vector<WorkListItem> &WorkList, + ReachabilitySet &ReachSet, + const CFLGraph &Graph) { + for (const auto &Mapping : Graph.value_mappings()) { + auto Val = Mapping.first; + auto &ValueInfo = Mapping.second; + assert(ValueInfo.getNumLevels() > 0); + + // Insert all immediate assignment neighbors to the worklist + for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) { + auto Src = InstantiatedValue{Val, I}; + // If there's an assignment edge from X to Y, it means Y is reachable from + // X at S3 and X is reachable from Y at S1 + for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) { + propagate(Edge.Other, Src, MatchState::FlowFromReadOnly, ReachSet, + WorkList); + propagate(Src, Edge.Other, MatchState::FlowToWriteOnly, ReachSet, + WorkList); + } + } + } +} + +static Optional<InstantiatedValue> getNodeBelow(const CFLGraph &Graph, + InstantiatedValue V) { + auto NodeBelow = InstantiatedValue{V.Val, V.DerefLevel + 1}; + if (Graph.getNode(NodeBelow)) + return NodeBelow; + return None; +} + +static void processWorkListItem(const WorkListItem &Item, const CFLGraph &Graph, + ReachabilitySet &ReachSet, AliasMemSet &MemSet, + std::vector<WorkListItem> &WorkList) { + auto FromNode = Item.From; + auto ToNode = Item.To; + + auto NodeInfo = Graph.getNode(ToNode); + assert(NodeInfo != nullptr); + + // TODO: propagate field offsets + + // FIXME: Here is a neat trick we can do: since both ReachSet and MemSet holds + // relations that are symmetric, we could actually cut the storage by half by + // sorting FromNode and ToNode before insertion happens. + + // The newly added value alias pair may potentially generate more memory + // alias pairs. Check for them here. + auto FromNodeBelow = getNodeBelow(Graph, FromNode); + auto ToNodeBelow = getNodeBelow(Graph, ToNode); + if (FromNodeBelow && ToNodeBelow && + MemSet.insert(*FromNodeBelow, *ToNodeBelow)) { + propagate(*FromNodeBelow, *ToNodeBelow, + MatchState::FlowFromMemAliasNoReadWrite, ReachSet, WorkList); + for (const auto &Mapping : ReachSet.reachableValueAliases(*FromNodeBelow)) { + auto Src = Mapping.first; + auto MemAliasPropagate = [&](MatchState FromState, MatchState ToState) { + if (Mapping.second.test(static_cast<size_t>(FromState))) + propagate(Src, *ToNodeBelow, ToState, ReachSet, WorkList); + }; + + MemAliasPropagate(MatchState::FlowFromReadOnly, + MatchState::FlowFromMemAliasReadOnly); + MemAliasPropagate(MatchState::FlowToWriteOnly, + MatchState::FlowToMemAliasWriteOnly); + MemAliasPropagate(MatchState::FlowToReadWrite, + MatchState::FlowToMemAliasReadWrite); + } + } + + // This is the core of the state machine walking algorithm. We expand ReachSet + // based on which state we are at (which in turn dictates what edges we + // should examine) + // From a high-level point of view, the state machine here guarantees two + // properties: + // - If *X and *Y are memory aliases, then X and Y are value aliases + // - If Y is an alias of X, then reverse assignment edges (if there is any) + // should precede any assignment edges on the path from X to Y. + auto NextAssignState = [&](MatchState State) { + for (const auto &AssignEdge : NodeInfo->Edges) + propagate(FromNode, AssignEdge.Other, State, ReachSet, WorkList); + }; + auto NextRevAssignState = [&](MatchState State) { + for (const auto &RevAssignEdge : NodeInfo->ReverseEdges) + propagate(FromNode, RevAssignEdge.Other, State, ReachSet, WorkList); + }; + auto NextMemState = [&](MatchState State) { + if (auto AliasSet = MemSet.getMemoryAliases(ToNode)) { + for (const auto &MemAlias : *AliasSet) + propagate(FromNode, MemAlias, State, ReachSet, WorkList); + } + }; + + switch (Item.State) { + case MatchState::FlowFromReadOnly: + NextRevAssignState(MatchState::FlowFromReadOnly); + NextAssignState(MatchState::FlowToReadWrite); + NextMemState(MatchState::FlowFromMemAliasReadOnly); + break; + + case MatchState::FlowFromMemAliasNoReadWrite: + NextRevAssignState(MatchState::FlowFromReadOnly); + NextAssignState(MatchState::FlowToWriteOnly); + break; + + case MatchState::FlowFromMemAliasReadOnly: + NextRevAssignState(MatchState::FlowFromReadOnly); + NextAssignState(MatchState::FlowToReadWrite); + break; + + case MatchState::FlowToWriteOnly: + NextAssignState(MatchState::FlowToWriteOnly); + NextMemState(MatchState::FlowToMemAliasWriteOnly); + break; + + case MatchState::FlowToReadWrite: + NextAssignState(MatchState::FlowToReadWrite); + NextMemState(MatchState::FlowToMemAliasReadWrite); + break; + + case MatchState::FlowToMemAliasWriteOnly: + NextAssignState(MatchState::FlowToWriteOnly); + break; + + case MatchState::FlowToMemAliasReadWrite: + NextAssignState(MatchState::FlowToReadWrite); + break; + } +} + +static AliasAttrMap buildAttrMap(const CFLGraph &Graph, + const ReachabilitySet &ReachSet) { + AliasAttrMap AttrMap; + std::vector<InstantiatedValue> WorkList, NextList; + + // Initialize each node with its original AliasAttrs in CFLGraph + for (const auto &Mapping : Graph.value_mappings()) { + auto Val = Mapping.first; + auto &ValueInfo = Mapping.second; + for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) { + auto Node = InstantiatedValue{Val, I}; + AttrMap.add(Node, ValueInfo.getNodeInfoAtLevel(I).Attr); + WorkList.push_back(Node); + } + } + + while (!WorkList.empty()) { + for (const auto &Dst : WorkList) { + auto DstAttr = AttrMap.getAttrs(Dst); + if (DstAttr.none()) + continue; + + // Propagate attr on the same level + for (const auto &Mapping : ReachSet.reachableValueAliases(Dst)) { + auto Src = Mapping.first; + if (AttrMap.add(Src, DstAttr)) + NextList.push_back(Src); + } + + // Propagate attr to the levels below + auto DstBelow = getNodeBelow(Graph, Dst); + while (DstBelow) { + if (AttrMap.add(*DstBelow, DstAttr)) { + NextList.push_back(*DstBelow); + break; + } + DstBelow = getNodeBelow(Graph, *DstBelow); + } + } + WorkList.swap(NextList); + NextList.clear(); + } + + return AttrMap; +} + +CFLAndersAAResult::FunctionInfo +CFLAndersAAResult::buildInfoFrom(const Function &Fn) { + CFLGraphBuilder<CFLAndersAAResult> GraphBuilder( + *this, GetTLI(const_cast<Function &>(Fn)), + // Cast away the constness here due to GraphBuilder's API requirement + const_cast<Function &>(Fn)); + auto &Graph = GraphBuilder.getCFLGraph(); + + ReachabilitySet ReachSet; + AliasMemSet MemSet; + + std::vector<WorkListItem> WorkList, NextList; + initializeWorkList(WorkList, ReachSet, Graph); + // TODO: make sure we don't stop before the fix point is reached + while (!WorkList.empty()) { + for (const auto &Item : WorkList) + processWorkListItem(Item, Graph, ReachSet, MemSet, NextList); + + NextList.swap(WorkList); + NextList.clear(); + } + + // Now that we have all the reachability info, propagate AliasAttrs according + // to it + auto IValueAttrMap = buildAttrMap(Graph, ReachSet); + + return FunctionInfo(Fn, GraphBuilder.getReturnValues(), ReachSet, + std::move(IValueAttrMap)); +} + +void CFLAndersAAResult::scan(const Function &Fn) { + auto InsertPair = Cache.insert(std::make_pair(&Fn, Optional<FunctionInfo>())); + (void)InsertPair; + assert(InsertPair.second && + "Trying to scan a function that has already been cached"); + + // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call + // may get evaluated after operator[], potentially triggering a DenseMap + // resize and invalidating the reference returned by operator[] + auto FunInfo = buildInfoFrom(Fn); + Cache[&Fn] = std::move(FunInfo); + Handles.emplace_front(const_cast<Function *>(&Fn), this); +} + +void CFLAndersAAResult::evict(const Function *Fn) { Cache.erase(Fn); } + +const Optional<CFLAndersAAResult::FunctionInfo> & +CFLAndersAAResult::ensureCached(const Function &Fn) { + auto Iter = Cache.find(&Fn); + if (Iter == Cache.end()) { + scan(Fn); + Iter = Cache.find(&Fn); + assert(Iter != Cache.end()); + assert(Iter->second.hasValue()); + } + return Iter->second; +} + +const AliasSummary *CFLAndersAAResult::getAliasSummary(const Function &Fn) { + auto &FunInfo = ensureCached(Fn); + if (FunInfo.hasValue()) + return &FunInfo->getAliasSummary(); + else + return nullptr; +} + +AliasResult CFLAndersAAResult::query(const MemoryLocation &LocA, + const MemoryLocation &LocB) { + auto *ValA = LocA.Ptr; + auto *ValB = LocB.Ptr; + + if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy()) + return NoAlias; + + auto *Fn = parentFunctionOfValue(ValA); + if (!Fn) { + Fn = parentFunctionOfValue(ValB); + if (!Fn) { + // The only times this is known to happen are when globals + InlineAsm are + // involved + LLVM_DEBUG( + dbgs() + << "CFLAndersAA: could not extract parent function information.\n"); + return MayAlias; + } + } else { + assert(!parentFunctionOfValue(ValB) || parentFunctionOfValue(ValB) == Fn); + } + + assert(Fn != nullptr); + auto &FunInfo = ensureCached(*Fn); + + // AliasMap lookup + if (FunInfo->mayAlias(ValA, LocA.Size, ValB, LocB.Size)) + return MayAlias; + return NoAlias; +} + +AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA, + const MemoryLocation &LocB, + AAQueryInfo &AAQI) { + if (LocA.Ptr == LocB.Ptr) + return MustAlias; + + // Comparisons between global variables and other constants should be + // handled by BasicAA. + // CFLAndersAA may report NoAlias when comparing a GlobalValue and + // ConstantExpr, but every query needs to have at least one Value tied to a + // Function, and neither GlobalValues nor ConstantExprs are. + if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) + return AAResultBase::alias(LocA, LocB, AAQI); + + AliasResult QueryResult = query(LocA, LocB); + if (QueryResult == MayAlias) + return AAResultBase::alias(LocA, LocB, AAQI); + + return QueryResult; +} + +AnalysisKey CFLAndersAA::Key; + +CFLAndersAAResult CFLAndersAA::run(Function &F, FunctionAnalysisManager &AM) { + auto GetTLI = [&AM](Function &F) -> TargetLibraryInfo & { + return AM.getResult<TargetLibraryAnalysis>(F); + }; + return CFLAndersAAResult(GetTLI); +} + +char CFLAndersAAWrapperPass::ID = 0; +INITIALIZE_PASS(CFLAndersAAWrapperPass, "cfl-anders-aa", + "Inclusion-Based CFL Alias Analysis", false, true) + +ImmutablePass *llvm::createCFLAndersAAWrapperPass() { + return new CFLAndersAAWrapperPass(); +} + +CFLAndersAAWrapperPass::CFLAndersAAWrapperPass() : ImmutablePass(ID) { + initializeCFLAndersAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +void CFLAndersAAWrapperPass::initializePass() { + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + }; + Result.reset(new CFLAndersAAResult(GetTLI)); +} + +void CFLAndersAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); +} |