diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-07-26 19:03:47 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2023-07-26 19:04:23 +0000 |
commit | 7fa27ce4a07f19b07799a767fc29416f3b625afb (patch) | |
tree | 27825c83636c4de341eb09a74f49f5d38a15d165 /clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp | |
parent | e3b557809604d036af6e00c60f012c2025b59a5e (diff) |
Diffstat (limited to 'clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp')
-rw-r--r-- | clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp | 401 |
1 files changed, 153 insertions, 248 deletions
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp index 480606bdac8d..9f72dc8f6ab3 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp @@ -15,54 +15,68 @@ #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" #include "clang/AST/ExprCXX.h" #include "clang/Analysis/FlowSensitive/DebugSupport.h" +#include "clang/Analysis/FlowSensitive/Formula.h" +#include "clang/Analysis/FlowSensitive/Logger.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" #include <cassert> #include <memory> +#include <string> #include <utility> +#include <vector> + +static llvm::cl::opt<std::string> DataflowLog( + "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, + llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " + "log to stderr. With an arg, writes HTML logs under the " + "specified directory (one per analyzed function).")); namespace clang { namespace dataflow { -void DataflowAnalysisContext::addModeledFields( - const llvm::DenseSet<const FieldDecl *> &Fields) { - llvm::set_union(ModeledFields, Fields); +FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) { + // During context-sensitive analysis, a struct may be allocated in one + // function, but its field accessed in a function lower in the stack than + // the allocation. Since we only collect fields used in the function where + // the allocation occurs, we can't apply that filter when performing + // context-sensitive analysis. But, this only applies to storage locations, + // since field access it not allowed to fail. In contrast, field *values* + // don't need this allowance, since the API allows for uninitialized fields. + if (Opts.ContextSensitiveOpts) + return getObjectFields(Type); + + return llvm::set_intersection(getObjectFields(Type), ModeledFields); } -llvm::DenseSet<const FieldDecl *> -DataflowAnalysisContext::getReferencedFields(QualType Type) { - llvm::DenseSet<const FieldDecl *> Fields = getObjectFields(Type); - llvm::set_intersect(Fields, ModeledFields); - return Fields; +void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) { + ModeledFields.set_union(Fields); } StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { - if (!Type.isNull() && - (Type->isStructureOrClassType() || Type->isUnionType())) { + if (!Type.isNull() && Type->isRecordType()) { llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; - // During context-sensitive analysis, a struct may be allocated in one - // function, but its field accessed in a function lower in the stack than - // the allocation. Since we only collect fields used in the function where - // the allocation occurs, we can't apply that filter when performing - // context-sensitive analysis. But, this only applies to storage locations, - // since field access it not allowed to fail. In contrast, field *values* - // don't need this allowance, since the API allows for uninitialized fields. - auto Fields = Opts.ContextSensitiveOpts ? getObjectFields(Type) - : getReferencedFields(Type); - for (const FieldDecl *Field : Fields) - FieldLocs.insert({Field, &createStorageLocation(Field->getType())}); - return takeOwnership( - std::make_unique<AggregateStorageLocation>(Type, std::move(FieldLocs))); + for (const FieldDecl *Field : getModeledFields(Type)) + if (Field->getType()->isReferenceType()) + FieldLocs.insert({Field, nullptr}); + else + FieldLocs.insert({Field, &createStorageLocation( + Field->getType().getNonReferenceType())}); + return arena().create<AggregateStorageLocation>(Type, std::move(FieldLocs)); } - return takeOwnership(std::make_unique<ScalarStorageLocation>(Type)); + return arena().create<ScalarStorageLocation>(Type); } StorageLocation & DataflowAnalysisContext::getStableStorageLocation(const VarDecl &D) { if (auto *Loc = getStorageLocation(D)) return *Loc; - auto &Loc = createStorageLocation(D.getType()); + auto &Loc = createStorageLocation(D.getType().getNonReferenceType()); setStorageLocation(D, Loc); return Loc; } @@ -83,275 +97,120 @@ DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) { auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr); if (Res.second) { auto &PointeeLoc = createStorageLocation(CanonicalPointeeType); - Res.first->second = - &takeOwnership(std::make_unique<PointerValue>(PointeeLoc)); + Res.first->second = &arena().create<PointerValue>(PointeeLoc); } return *Res.first->second; } -static std::pair<BoolValue *, BoolValue *> -makeCanonicalBoolValuePair(BoolValue &LHS, BoolValue &RHS) { - auto Res = std::make_pair(&LHS, &RHS); - if (&RHS < &LHS) - std::swap(Res.first, Res.second); - return Res; -} - -BoolValue &DataflowAnalysisContext::getOrCreateConjunction(BoolValue &LHS, - BoolValue &RHS) { - if (&LHS == &RHS) - return LHS; - - auto Res = ConjunctionVals.try_emplace(makeCanonicalBoolValuePair(LHS, RHS), - nullptr); - if (Res.second) - Res.first->second = - &takeOwnership(std::make_unique<ConjunctionValue>(LHS, RHS)); - return *Res.first->second; -} - -BoolValue &DataflowAnalysisContext::getOrCreateDisjunction(BoolValue &LHS, - BoolValue &RHS) { - if (&LHS == &RHS) - return LHS; - - auto Res = DisjunctionVals.try_emplace(makeCanonicalBoolValuePair(LHS, RHS), - nullptr); - if (Res.second) - Res.first->second = - &takeOwnership(std::make_unique<DisjunctionValue>(LHS, RHS)); - return *Res.first->second; -} - -BoolValue &DataflowAnalysisContext::getOrCreateNegation(BoolValue &Val) { - auto Res = NegationVals.try_emplace(&Val, nullptr); - if (Res.second) - Res.first->second = &takeOwnership(std::make_unique<NegationValue>(Val)); - return *Res.first->second; -} - -BoolValue &DataflowAnalysisContext::getOrCreateImplication(BoolValue &LHS, - BoolValue &RHS) { - if (&LHS == &RHS) - return getBoolLiteralValue(true); - - auto Res = ImplicationVals.try_emplace(std::make_pair(&LHS, &RHS), nullptr); - if (Res.second) - Res.first->second = - &takeOwnership(std::make_unique<ImplicationValue>(LHS, RHS)); - return *Res.first->second; -} - -BoolValue &DataflowAnalysisContext::getOrCreateIff(BoolValue &LHS, - BoolValue &RHS) { - if (&LHS == &RHS) - return getBoolLiteralValue(true); - - auto Res = BiconditionalVals.try_emplace(makeCanonicalBoolValuePair(LHS, RHS), - nullptr); - if (Res.second) - Res.first->second = - &takeOwnership(std::make_unique<BiconditionalValue>(LHS, RHS)); - return *Res.first->second; -} - -AtomicBoolValue &DataflowAnalysisContext::makeFlowConditionToken() { - return createAtomicBoolValue(); -} - void DataflowAnalysisContext::addFlowConditionConstraint( - AtomicBoolValue &Token, BoolValue &Constraint) { - auto Res = FlowConditionConstraints.try_emplace(&Token, &Constraint); + Atom Token, const Formula &Constraint) { + auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint); if (!Res.second) { - Res.first->second = &getOrCreateConjunction(*Res.first->second, Constraint); + Res.first->second = + &arena().makeAnd(*Res.first->second, Constraint); } } -AtomicBoolValue & -DataflowAnalysisContext::forkFlowCondition(AtomicBoolValue &Token) { - auto &ForkToken = makeFlowConditionToken(); - FlowConditionDeps[&ForkToken].insert(&Token); - addFlowConditionConstraint(ForkToken, Token); +Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) { + Atom ForkToken = arena().makeFlowConditionToken(); + FlowConditionDeps[ForkToken].insert(Token); + addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token)); return ForkToken; } -AtomicBoolValue & -DataflowAnalysisContext::joinFlowConditions(AtomicBoolValue &FirstToken, - AtomicBoolValue &SecondToken) { - auto &Token = makeFlowConditionToken(); - FlowConditionDeps[&Token].insert(&FirstToken); - FlowConditionDeps[&Token].insert(&SecondToken); +Atom +DataflowAnalysisContext::joinFlowConditions(Atom FirstToken, + Atom SecondToken) { + Atom Token = arena().makeFlowConditionToken(); + FlowConditionDeps[Token].insert(FirstToken); + FlowConditionDeps[Token].insert(SecondToken); addFlowConditionConstraint(Token, - getOrCreateDisjunction(FirstToken, SecondToken)); + arena().makeOr(arena().makeAtomRef(FirstToken), + arena().makeAtomRef(SecondToken))); return Token; } -Solver::Result -DataflowAnalysisContext::querySolver(llvm::DenseSet<BoolValue *> Constraints) { - Constraints.insert(&getBoolLiteralValue(true)); - Constraints.insert(&getOrCreateNegation(getBoolLiteralValue(false))); - return S->solve(std::move(Constraints)); +Solver::Result DataflowAnalysisContext::querySolver( + llvm::SetVector<const Formula *> Constraints) { + Constraints.insert(&arena().makeLiteral(true)); + Constraints.insert(&arena().makeNot(arena().makeLiteral(false))); + return S->solve(Constraints.getArrayRef()); } -bool DataflowAnalysisContext::flowConditionImplies(AtomicBoolValue &Token, - BoolValue &Val) { +bool DataflowAnalysisContext::flowConditionImplies(Atom Token, + const Formula &Val) { // Returns true if and only if truth assignment of the flow condition implies // that `Val` is also true. We prove whether or not this property holds by // reducing the problem to satisfiability checking. In other words, we attempt // to show that assuming `Val` is false makes the constraints induced by the // flow condition unsatisfiable. - llvm::DenseSet<BoolValue *> Constraints = {&Token, &getOrCreateNegation(Val)}; - llvm::DenseSet<AtomicBoolValue *> VisitedTokens; + llvm::SetVector<const Formula *> Constraints; + Constraints.insert(&arena().makeAtomRef(Token)); + Constraints.insert(&arena().makeNot(Val)); + llvm::DenseSet<Atom> VisitedTokens; addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); return isUnsatisfiable(std::move(Constraints)); } -bool DataflowAnalysisContext::flowConditionIsTautology(AtomicBoolValue &Token) { +bool DataflowAnalysisContext::flowConditionIsTautology(Atom Token) { // Returns true if and only if we cannot prove that the flow condition can // ever be false. - llvm::DenseSet<BoolValue *> Constraints = {&getOrCreateNegation(Token)}; - llvm::DenseSet<AtomicBoolValue *> VisitedTokens; + llvm::SetVector<const Formula *> Constraints; + Constraints.insert(&arena().makeNot(arena().makeAtomRef(Token))); + llvm::DenseSet<Atom> VisitedTokens; addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); return isUnsatisfiable(std::move(Constraints)); } -bool DataflowAnalysisContext::equivalentBoolValues(BoolValue &Val1, - BoolValue &Val2) { - llvm::DenseSet<BoolValue *> Constraints = { - &getOrCreateNegation(getOrCreateIff(Val1, Val2))}; - return isUnsatisfiable(Constraints); +bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1, + const Formula &Val2) { + llvm::SetVector<const Formula *> Constraints; + Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2))); + return isUnsatisfiable(std::move(Constraints)); } void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( - AtomicBoolValue &Token, llvm::DenseSet<BoolValue *> &Constraints, - llvm::DenseSet<AtomicBoolValue *> &VisitedTokens) { - auto Res = VisitedTokens.insert(&Token); + Atom Token, llvm::SetVector<const Formula *> &Constraints, + llvm::DenseSet<Atom> &VisitedTokens) { + auto Res = VisitedTokens.insert(Token); if (!Res.second) return; - auto ConstraintsIt = FlowConditionConstraints.find(&Token); + auto ConstraintsIt = FlowConditionConstraints.find(Token); if (ConstraintsIt == FlowConditionConstraints.end()) { - Constraints.insert(&Token); + Constraints.insert(&arena().makeAtomRef(Token)); } else { // Bind flow condition token via `iff` to its set of constraints: // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints - Constraints.insert(&getOrCreateIff(Token, *ConstraintsIt->second)); + Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token), + *ConstraintsIt->second)); } - auto DepsIt = FlowConditionDeps.find(&Token); + auto DepsIt = FlowConditionDeps.find(Token); if (DepsIt != FlowConditionDeps.end()) { - for (AtomicBoolValue *DepToken : DepsIt->second) { - addTransitiveFlowConditionConstraints(*DepToken, Constraints, + for (Atom DepToken : DepsIt->second) { + addTransitiveFlowConditionConstraints(DepToken, Constraints, VisitedTokens); } } } -BoolValue &DataflowAnalysisContext::substituteBoolValue( - BoolValue &Val, - llvm::DenseMap<BoolValue *, BoolValue *> &SubstitutionsCache) { - auto It = SubstitutionsCache.find(&Val); - if (It != SubstitutionsCache.end()) { - // Return memoized result of substituting this boolean value. - return *It->second; - } - - // Handle substitution on the boolean value (and its subvalues), saving the - // result into `SubstitutionsCache`. - BoolValue *Result; - switch (Val.getKind()) { - case Value::Kind::AtomicBool: { - Result = &Val; - break; - } - case Value::Kind::Negation: { - auto &Negation = *cast<NegationValue>(&Val); - auto &Sub = substituteBoolValue(Negation.getSubVal(), SubstitutionsCache); - Result = &getOrCreateNegation(Sub); - break; - } - case Value::Kind::Disjunction: { - auto &Disjunct = *cast<DisjunctionValue>(&Val); - auto &LeftSub = - substituteBoolValue(Disjunct.getLeftSubValue(), SubstitutionsCache); - auto &RightSub = - substituteBoolValue(Disjunct.getRightSubValue(), SubstitutionsCache); - Result = &getOrCreateDisjunction(LeftSub, RightSub); - break; - } - case Value::Kind::Conjunction: { - auto &Conjunct = *cast<ConjunctionValue>(&Val); - auto &LeftSub = - substituteBoolValue(Conjunct.getLeftSubValue(), SubstitutionsCache); - auto &RightSub = - substituteBoolValue(Conjunct.getRightSubValue(), SubstitutionsCache); - Result = &getOrCreateConjunction(LeftSub, RightSub); - break; - } - case Value::Kind::Implication: { - auto &IV = *cast<ImplicationValue>(&Val); - auto &LeftSub = - substituteBoolValue(IV.getLeftSubValue(), SubstitutionsCache); - auto &RightSub = - substituteBoolValue(IV.getRightSubValue(), SubstitutionsCache); - Result = &getOrCreateImplication(LeftSub, RightSub); - break; - } - case Value::Kind::Biconditional: { - auto &BV = *cast<BiconditionalValue>(&Val); - auto &LeftSub = - substituteBoolValue(BV.getLeftSubValue(), SubstitutionsCache); - auto &RightSub = - substituteBoolValue(BV.getRightSubValue(), SubstitutionsCache); - Result = &getOrCreateIff(LeftSub, RightSub); - break; - } - default: - llvm_unreachable("Unhandled Value Kind"); - } - SubstitutionsCache[&Val] = Result; - return *Result; -} +void DataflowAnalysisContext::dumpFlowCondition(Atom Token, + llvm::raw_ostream &OS) { + llvm::SetVector<const Formula *> Constraints; + Constraints.insert(&arena().makeAtomRef(Token)); + llvm::DenseSet<Atom> VisitedTokens; + addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); -BoolValue &DataflowAnalysisContext::buildAndSubstituteFlowCondition( - AtomicBoolValue &Token, - llvm::DenseMap<AtomicBoolValue *, BoolValue *> Substitutions) { - assert( - Substitutions.find(&getBoolLiteralValue(true)) == Substitutions.end() && - Substitutions.find(&getBoolLiteralValue(false)) == Substitutions.end() && - "Do not substitute true/false boolean literals"); - llvm::DenseMap<BoolValue *, BoolValue *> SubstitutionsCache( - Substitutions.begin(), Substitutions.end()); - return buildAndSubstituteFlowConditionWithCache(Token, SubstitutionsCache); -} + // TODO: have formulas know about true/false directly instead + Atom True = arena().makeLiteral(true).getAtom(); + Atom False = arena().makeLiteral(false).getAtom(); + Formula::AtomNames Names = {{False, "false"}, {True, "true"}}; -BoolValue &DataflowAnalysisContext::buildAndSubstituteFlowConditionWithCache( - AtomicBoolValue &Token, - llvm::DenseMap<BoolValue *, BoolValue *> &SubstitutionsCache) { - auto ConstraintsIt = FlowConditionConstraints.find(&Token); - if (ConstraintsIt == FlowConditionConstraints.end()) { - return getBoolLiteralValue(true); + for (const auto *Constraint : Constraints) { + Constraint->print(OS, &Names); + OS << "\n"; } - auto DepsIt = FlowConditionDeps.find(&Token); - if (DepsIt != FlowConditionDeps.end()) { - for (AtomicBoolValue *DepToken : DepsIt->second) { - auto &NewDep = buildAndSubstituteFlowConditionWithCache( - *DepToken, SubstitutionsCache); - SubstitutionsCache[DepToken] = &NewDep; - } - } - return substituteBoolValue(*ConstraintsIt->second, SubstitutionsCache); -} - -void DataflowAnalysisContext::dumpFlowCondition(AtomicBoolValue &Token) { - llvm::DenseSet<BoolValue *> Constraints = {&Token}; - llvm::DenseSet<AtomicBoolValue *> VisitedTokens; - addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); - - llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames = { - {&getBoolLiteralValue(false), "False"}, - {&getBoolLiteralValue(true), "True"}}; - llvm::dbgs() << debugString(Constraints, AtomNames); } const ControlFlowContext * @@ -364,8 +223,8 @@ DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) { if (It != FunctionContexts.end()) return &It->second; - if (Stmt *Body = F->getBody()) { - auto CFCtx = ControlFlowContext::build(F, *Body, F->getASTContext()); + if (F->hasBody()) { + auto CFCtx = ControlFlowContext::build(*F); // FIXME: Handle errors. assert(CFCtx); auto Result = FunctionContexts.insert({F, std::move(*CFCtx)}); @@ -375,6 +234,54 @@ DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) { return nullptr; } +static std::unique_ptr<Logger> makeLoggerFromCommandLine() { + if (DataflowLog.empty()) + return Logger::textual(llvm::errs()); + + llvm::StringRef Dir = DataflowLog; + if (auto EC = llvm::sys::fs::create_directories(Dir)) + llvm::errs() << "Failed to create log dir: " << EC.message() << "\n"; + // All analysis runs within a process will log to the same directory. + // Share a counter so they don't all overwrite each other's 0.html. + // (Don't share a logger, it's not threadsafe). + static std::atomic<unsigned> Counter = {0}; + auto StreamFactory = + [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> { + llvm::SmallString<256> File(Dir); + llvm::sys::path::append(File, + std::to_string(Counter.fetch_add(1)) + ".html"); + std::error_code EC; + auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC); + if (EC) { + llvm::errs() << "Failed to create log " << File << ": " << EC.message() + << "\n"; + return std::make_unique<llvm::raw_null_ostream>(); + } + return OS; + }; + return Logger::html(std::move(StreamFactory)); +} + +DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S, + Options Opts) + : S(std::move(S)), A(std::make_unique<Arena>()), Opts(Opts) { + assert(this->S != nullptr); + // If the -dataflow-log command-line flag was set, synthesize a logger. + // This is ugly but provides a uniform method for ad-hoc debugging dataflow- + // based tools. + if (Opts.Log == nullptr) { + if (DataflowLog.getNumOccurrences() > 0) { + LogOwner = makeLoggerFromCommandLine(); + this->Opts.Log = LogOwner.get(); + // FIXME: if the flag is given a value, write an HTML log to a file. + } else { + this->Opts.Log = &Logger::null(); + } + } +} + +DataflowAnalysisContext::~DataflowAnalysisContext() = default; + } // namespace dataflow } // namespace clang @@ -399,9 +306,8 @@ const Stmt &clang::dataflow::ignoreCFGOmittedNodes(const Stmt &S) { // FIXME: Does not precisely handle non-virtual diamond inheritance. A single // field decl will be modeled for all instances of the inherited field. -static void -getFieldsFromClassHierarchy(QualType Type, - llvm::DenseSet<const FieldDecl *> &Fields) { +static void getFieldsFromClassHierarchy(QualType Type, + clang::dataflow::FieldSet &Fields) { if (Type->isIncompleteType() || Type->isDependentType() || !Type->isRecordType()) return; @@ -414,9 +320,8 @@ getFieldsFromClassHierarchy(QualType Type, } /// Gets the set of all fields in the type. -llvm::DenseSet<const FieldDecl *> -clang::dataflow::getObjectFields(QualType Type) { - llvm::DenseSet<const FieldDecl *> Fields; +clang::dataflow::FieldSet clang::dataflow::getObjectFields(QualType Type) { + FieldSet Fields; getFieldsFromClassHierarchy(Type, Fields); return Fields; } |