diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2023-09-02 21:17:18 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2023-12-08 17:34:50 +0000 |
| commit | 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e (patch) | |
| tree | 62f873df87c7c675557a179e0c4c83fe9f3087bc /contrib/llvm-project/clang/lib/Analysis/FlowSensitive | |
| parent | cf037972ea8863e2bab7461d77345367d2c1e054 (diff) | |
| parent | 7fa27ce4a07f19b07799a767fc29416f3b625afb (diff) | |
Diffstat (limited to 'contrib/llvm-project/clang/lib/Analysis/FlowSensitive')
17 files changed, 3068 insertions, 1526 deletions
diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Arena.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Arena.cpp new file mode 100644 index 000000000000..a12da2d9b555 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Arena.cpp @@ -0,0 +1,98 @@ +//===-- Arena.cpp ---------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Arena.h" +#include "clang/Analysis/FlowSensitive/Value.h" + +namespace clang::dataflow { + +static std::pair<const Formula *, const Formula *> +canonicalFormulaPair(const Formula &LHS, const Formula &RHS) { + auto Res = std::make_pair(&LHS, &RHS); + if (&RHS < &LHS) // FIXME: use a deterministic order instead + std::swap(Res.first, Res.second); + return Res; +} + +const Formula &Arena::makeAtomRef(Atom A) { + auto [It, Inserted] = AtomRefs.try_emplace(A); + if (Inserted) + It->second = + &Formula::create(Alloc, Formula::AtomRef, {}, static_cast<unsigned>(A)); + return *It->second; +} + +const Formula &Arena::makeAnd(const Formula &LHS, const Formula &RHS) { + if (&LHS == &RHS) + return LHS; + + auto [It, Inserted] = + Ands.try_emplace(canonicalFormulaPair(LHS, RHS), nullptr); + if (Inserted) + It->second = &Formula::create(Alloc, Formula::And, {&LHS, &RHS}); + return *It->second; +} + +const Formula &Arena::makeOr(const Formula &LHS, const Formula &RHS) { + if (&LHS == &RHS) + return LHS; + + auto [It, Inserted] = + Ors.try_emplace(canonicalFormulaPair(LHS, RHS), nullptr); + if (Inserted) + It->second = &Formula::create(Alloc, Formula::Or, {&LHS, &RHS}); + return *It->second; +} + +const Formula &Arena::makeNot(const Formula &Val) { + auto [It, Inserted] = Nots.try_emplace(&Val, nullptr); + if (Inserted) + It->second = &Formula::create(Alloc, Formula::Not, {&Val}); + return *It->second; +} + +const Formula &Arena::makeImplies(const Formula &LHS, const Formula &RHS) { + if (&LHS == &RHS) + return makeLiteral(true); + + auto [It, Inserted] = + Implies.try_emplace(std::make_pair(&LHS, &RHS), nullptr); + if (Inserted) + It->second = &Formula::create(Alloc, Formula::Implies, {&LHS, &RHS}); + return *It->second; +} + +const Formula &Arena::makeEquals(const Formula &LHS, const Formula &RHS) { + if (&LHS == &RHS) + return makeLiteral(true); + + auto [It, Inserted] = + Equals.try_emplace(canonicalFormulaPair(LHS, RHS), nullptr); + if (Inserted) + It->second = &Formula::create(Alloc, Formula::Equal, {&LHS, &RHS}); + return *It->second; +} + +IntegerValue &Arena::makeIntLiteral(llvm::APInt Value) { + auto [It, Inserted] = IntegerLiterals.try_emplace(Value, nullptr); + + if (Inserted) + It->second = &create<IntegerValue>(); + return *It->second; +} + +BoolValue &Arena::makeBoolValue(const Formula &F) { + auto [It, Inserted] = FormulaValues.try_emplace(&F); + if (Inserted) + It->second = (F.kind() == Formula::AtomRef) + ? (BoolValue *)&create<AtomicBoolValue>(F) + : &create<FormulaBoolValue>(F); + return *It->second; +} + +} // namespace clang::dataflow diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp index 2492b5203724..c80525dc4f34 100644 --- a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp @@ -16,6 +16,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/Stmt.h" #include "clang/Analysis/CFG.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/Error.h" #include <utility> @@ -44,10 +45,47 @@ buildStmtToBasicBlockMap(const CFG &Cfg) { return StmtToBlock; } +static llvm::BitVector findReachableBlocks(const CFG &Cfg) { + llvm::BitVector BlockReachable(Cfg.getNumBlockIDs(), false); + + llvm::SmallVector<const CFGBlock *> BlocksToVisit; + BlocksToVisit.push_back(&Cfg.getEntry()); + while (!BlocksToVisit.empty()) { + const CFGBlock *Block = BlocksToVisit.back(); + BlocksToVisit.pop_back(); + + if (BlockReachable[Block->getBlockID()]) + continue; + + BlockReachable[Block->getBlockID()] = true; + + for (const CFGBlock *Succ : Block->succs()) + if (Succ) + BlocksToVisit.push_back(Succ); + } + + return BlockReachable; +} + llvm::Expected<ControlFlowContext> -ControlFlowContext::build(const Decl *D, Stmt &S, ASTContext &C) { +ControlFlowContext::build(const FunctionDecl &Func) { + if (!Func.hasBody()) + return llvm::createStringError( + std::make_error_code(std::errc::invalid_argument), + "Cannot analyze function without a body"); + + return build(Func, *Func.getBody(), Func.getASTContext()); +} + +llvm::Expected<ControlFlowContext> +ControlFlowContext::build(const Decl &D, Stmt &S, ASTContext &C) { + if (D.isTemplated()) + return llvm::createStringError( + std::make_error_code(std::errc::invalid_argument), + "Cannot analyze templated declarations"); + CFG::BuildOptions Options; - Options.PruneTriviallyFalseEdges = false; + Options.PruneTriviallyFalseEdges = true; Options.AddImplicitDtors = true; Options.AddTemporaryDtors = true; Options.AddInitializers = true; @@ -56,7 +94,7 @@ ControlFlowContext::build(const Decl *D, Stmt &S, ASTContext &C) { // Ensure that all sub-expressions in basic blocks are evaluated. Options.setAllAlwaysAdd(); - auto Cfg = CFG::buildCFG(D, &S, &C, Options); + auto Cfg = CFG::buildCFG(&D, &S, &C, Options); if (Cfg == nullptr) return llvm::createStringError( std::make_error_code(std::errc::invalid_argument), @@ -64,7 +102,21 @@ ControlFlowContext::build(const Decl *D, Stmt &S, ASTContext &C) { llvm::DenseMap<const Stmt *, const CFGBlock *> StmtToBlock = buildStmtToBasicBlockMap(*Cfg); - return ControlFlowContext(D, std::move(Cfg), std::move(StmtToBlock)); + + llvm::BitVector BlockReachable = findReachableBlocks(*Cfg); + + return ControlFlowContext(&D, std::move(Cfg), std::move(StmtToBlock), + std::move(BlockReachable)); +} + +llvm::Expected<ControlFlowContext> +ControlFlowContext::build(const Decl *D, Stmt &S, ASTContext &C) { + if (D == nullptr) + return llvm::createStringError( + std::make_error_code(std::errc::invalid_argument), + "Declaration must not be null"); + + return build(*D, S, C); } } // namespace dataflow diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp index 480606bdac8d..9f72dc8f6ab3 100644 --- a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp @@ -15,54 +15,68 @@ #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" #include "clang/AST/ExprCXX.h" #include "clang/Analysis/FlowSensitive/DebugSupport.h" +#include "clang/Analysis/FlowSensitive/Formula.h" +#include "clang/Analysis/FlowSensitive/Logger.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" #include <cassert> #include <memory> +#include <string> #include <utility> +#include <vector> + +static llvm::cl::opt<std::string> DataflowLog( + "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, + llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " + "log to stderr. With an arg, writes HTML logs under the " + "specified directory (one per analyzed function).")); namespace clang { namespace dataflow { -void DataflowAnalysisContext::addModeledFields( - const llvm::DenseSet<const FieldDecl *> &Fields) { - llvm::set_union(ModeledFields, Fields); +FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) { + // During context-sensitive analysis, a struct may be allocated in one + // function, but its field accessed in a function lower in the stack than + // the allocation. Since we only collect fields used in the function where + // the allocation occurs, we can't apply that filter when performing + // context-sensitive analysis. But, this only applies to storage locations, + // since field access it not allowed to fail. In contrast, field *values* + // don't need this allowance, since the API allows for uninitialized fields. + if (Opts.ContextSensitiveOpts) + return getObjectFields(Type); + + return llvm::set_intersection(getObjectFields(Type), ModeledFields); } -llvm::DenseSet<const FieldDecl *> -DataflowAnalysisContext::getReferencedFields(QualType Type) { - llvm::DenseSet<const FieldDecl *> Fields = getObjectFields(Type); - llvm::set_intersect(Fields, ModeledFields); - return Fields; +void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) { + ModeledFields.set_union(Fields); } StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { - if (!Type.isNull() && - (Type->isStructureOrClassType() || Type->isUnionType())) { + if (!Type.isNull() && Type->isRecordType()) { llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; - // During context-sensitive analysis, a struct may be allocated in one - // function, but its field accessed in a function lower in the stack than - // the allocation. Since we only collect fields used in the function where - // the allocation occurs, we can't apply that filter when performing - // context-sensitive analysis. But, this only applies to storage locations, - // since field access it not allowed to fail. In contrast, field *values* - // don't need this allowance, since the API allows for uninitialized fields. - auto Fields = Opts.ContextSensitiveOpts ? getObjectFields(Type) - : getReferencedFields(Type); - for (const FieldDecl *Field : Fields) - FieldLocs.insert({Field, &createStorageLocation(Field->getType())}); - return takeOwnership( - std::make_unique<AggregateStorageLocation>(Type, std::move(FieldLocs))); + for (const FieldDecl *Field : getModeledFields(Type)) + if (Field->getType()->isReferenceType()) + FieldLocs.insert({Field, nullptr}); + else + FieldLocs.insert({Field, &createStorageLocation( + Field->getType().getNonReferenceType())}); + return arena().create<AggregateStorageLocation>(Type, std::move(FieldLocs)); } - return takeOwnership(std::make_unique<ScalarStorageLocation>(Type)); + return arena().create<ScalarStorageLocation>(Type); } StorageLocation & DataflowAnalysisContext::getStableStorageLocation(const VarDecl &D) { if (auto *Loc = getStorageLocation(D)) return *Loc; - auto &Loc = createStorageLocation(D.getType()); + auto &Loc = createStorageLocation(D.getType().getNonReferenceType()); setStorageLocation(D, Loc); return Loc; } @@ -83,275 +97,120 @@ DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) { auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr); if (Res.second) { auto &PointeeLoc = createStorageLocation(CanonicalPointeeType); - Res.first->second = - &takeOwnership(std::make_unique<PointerValue>(PointeeLoc)); + Res.first->second = &arena().create<PointerValue>(PointeeLoc); } return *Res.first->second; } -static std::pair<BoolValue *, BoolValue *> -makeCanonicalBoolValuePair(BoolValue &LHS, BoolValue &RHS) { - auto Res = std::make_pair(&LHS, &RHS); - if (&RHS < &LHS) - std::swap(Res.first, Res.second); - return Res; -} - -BoolValue &DataflowAnalysisContext::getOrCreateConjunction(BoolValue &LHS, - BoolValue &RHS) { - if (&LHS == &RHS) - return LHS; - - auto Res = ConjunctionVals.try_emplace(makeCanonicalBoolValuePair(LHS, RHS), - nullptr); - if (Res.second) - Res.first->second = - &takeOwnership(std::make_unique<ConjunctionValue>(LHS, RHS)); - return *Res.first->second; -} - -BoolValue &DataflowAnalysisContext::getOrCreateDisjunction(BoolValue &LHS, - BoolValue &RHS) { - if (&LHS == &RHS) - return LHS; - - auto Res = DisjunctionVals.try_emplace(makeCanonicalBoolValuePair(LHS, RHS), - nullptr); - if (Res.second) - Res.first->second = - &takeOwnership(std::make_unique<DisjunctionValue>(LHS, RHS)); - return *Res.first->second; -} - -BoolValue &DataflowAnalysisContext::getOrCreateNegation(BoolValue &Val) { - auto Res = NegationVals.try_emplace(&Val, nullptr); - if (Res.second) - Res.first->second = &takeOwnership(std::make_unique<NegationValue>(Val)); - return *Res.first->second; -} - -BoolValue &DataflowAnalysisContext::getOrCreateImplication(BoolValue &LHS, - BoolValue &RHS) { - if (&LHS == &RHS) - return getBoolLiteralValue(true); - - auto Res = ImplicationVals.try_emplace(std::make_pair(&LHS, &RHS), nullptr); - if (Res.second) - Res.first->second = - &takeOwnership(std::make_unique<ImplicationValue>(LHS, RHS)); - return *Res.first->second; -} - -BoolValue &DataflowAnalysisContext::getOrCreateIff(BoolValue &LHS, - BoolValue &RHS) { - if (&LHS == &RHS) - return getBoolLiteralValue(true); - - auto Res = BiconditionalVals.try_emplace(makeCanonicalBoolValuePair(LHS, RHS), - nullptr); - if (Res.second) - Res.first->second = - &takeOwnership(std::make_unique<BiconditionalValue>(LHS, RHS)); - return *Res.first->second; -} - -AtomicBoolValue &DataflowAnalysisContext::makeFlowConditionToken() { - return createAtomicBoolValue(); -} - void DataflowAnalysisContext::addFlowConditionConstraint( - AtomicBoolValue &Token, BoolValue &Constraint) { - auto Res = FlowConditionConstraints.try_emplace(&Token, &Constraint); + Atom Token, const Formula &Constraint) { + auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint); if (!Res.second) { - Res.first->second = &getOrCreateConjunction(*Res.first->second, Constraint); + Res.first->second = + &arena().makeAnd(*Res.first->second, Constraint); } } -AtomicBoolValue & -DataflowAnalysisContext::forkFlowCondition(AtomicBoolValue &Token) { - auto &ForkToken = makeFlowConditionToken(); - FlowConditionDeps[&ForkToken].insert(&Token); - addFlowConditionConstraint(ForkToken, Token); +Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) { + Atom ForkToken = arena().makeFlowConditionToken(); + FlowConditionDeps[ForkToken].insert(Token); + addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token)); return ForkToken; } -AtomicBoolValue & -DataflowAnalysisContext::joinFlowConditions(AtomicBoolValue &FirstToken, - AtomicBoolValue &SecondToken) { - auto &Token = makeFlowConditionToken(); - FlowConditionDeps[&Token].insert(&FirstToken); - FlowConditionDeps[&Token].insert(&SecondToken); +Atom +DataflowAnalysisContext::joinFlowConditions(Atom FirstToken, + Atom SecondToken) { + Atom Token = arena().makeFlowConditionToken(); + FlowConditionDeps[Token].insert(FirstToken); + FlowConditionDeps[Token].insert(SecondToken); addFlowConditionConstraint(Token, - getOrCreateDisjunction(FirstToken, SecondToken)); + arena().makeOr(arena().makeAtomRef(FirstToken), + arena().makeAtomRef(SecondToken))); return Token; } -Solver::Result -DataflowAnalysisContext::querySolver(llvm::DenseSet<BoolValue *> Constraints) { - Constraints.insert(&getBoolLiteralValue(true)); - Constraints.insert(&getOrCreateNegation(getBoolLiteralValue(false))); - return S->solve(std::move(Constraints)); +Solver::Result DataflowAnalysisContext::querySolver( + llvm::SetVector<const Formula *> Constraints) { + Constraints.insert(&arena().makeLiteral(true)); + Constraints.insert(&arena().makeNot(arena().makeLiteral(false))); + return S->solve(Constraints.getArrayRef()); } -bool DataflowAnalysisContext::flowConditionImplies(AtomicBoolValue &Token, - BoolValue &Val) { +bool DataflowAnalysisContext::flowConditionImplies(Atom Token, + const Formula &Val) { // Returns true if and only if truth assignment of the flow condition implies // that `Val` is also true. We prove whether or not this property holds by // reducing the problem to satisfiability checking. In other words, we attempt // to show that assuming `Val` is false makes the constraints induced by the // flow condition unsatisfiable. - llvm::DenseSet<BoolValue *> Constraints = {&Token, &getOrCreateNegation(Val)}; - llvm::DenseSet<AtomicBoolValue *> VisitedTokens; + llvm::SetVector<const Formula *> Constraints; + Constraints.insert(&arena().makeAtomRef(Token)); + Constraints.insert(&arena().makeNot(Val)); + llvm::DenseSet<Atom> VisitedTokens; addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); return isUnsatisfiable(std::move(Constraints)); } -bool DataflowAnalysisContext::flowConditionIsTautology(AtomicBoolValue &Token) { +bool DataflowAnalysisContext::flowConditionIsTautology(Atom Token) { // Returns true if and only if we cannot prove that the flow condition can // ever be false. - llvm::DenseSet<BoolValue *> Constraints = {&getOrCreateNegation(Token)}; - llvm::DenseSet<AtomicBoolValue *> VisitedTokens; + llvm::SetVector<const Formula *> Constraints; + Constraints.insert(&arena().makeNot(arena().makeAtomRef(Token))); + llvm::DenseSet<Atom> VisitedTokens; addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); return isUnsatisfiable(std::move(Constraints)); } -bool DataflowAnalysisContext::equivalentBoolValues(BoolValue &Val1, - BoolValue &Val2) { - llvm::DenseSet<BoolValue *> Constraints = { - &getOrCreateNegation(getOrCreateIff(Val1, Val2))}; - return isUnsatisfiable(Constraints); +bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1, + const Formula &Val2) { + llvm::SetVector<const Formula *> Constraints; + Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2))); + return isUnsatisfiable(std::move(Constraints)); } void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( - AtomicBoolValue &Token, llvm::DenseSet<BoolValue *> &Constraints, - llvm::DenseSet<AtomicBoolValue *> &VisitedTokens) { - auto Res = VisitedTokens.insert(&Token); + Atom Token, llvm::SetVector<const Formula *> &Constraints, + llvm::DenseSet<Atom> &VisitedTokens) { + auto Res = VisitedTokens.insert(Token); if (!Res.second) return; - auto ConstraintsIt = FlowConditionConstraints.find(&Token); + auto ConstraintsIt = FlowConditionConstraints.find(Token); if (ConstraintsIt == FlowConditionConstraints.end()) { - Constraints.insert(&Token); + Constraints.insert(&arena().makeAtomRef(Token)); } else { // Bind flow condition token via `iff` to its set of constraints: // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints - Constraints.insert(&getOrCreateIff(Token, *ConstraintsIt->second)); + Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token), + *ConstraintsIt->second)); } - auto DepsIt = FlowConditionDeps.find(&Token); + auto DepsIt = FlowConditionDeps.find(Token); if (DepsIt != FlowConditionDeps.end()) { - for (AtomicBoolValue *DepToken : DepsIt->second) { - addTransitiveFlowConditionConstraints(*DepToken, Constraints, + for (Atom DepToken : DepsIt->second) { + addTransitiveFlowConditionConstraints(DepToken, Constraints, VisitedTokens); } } } -BoolValue &DataflowAnalysisContext::substituteBoolValue( - BoolValue &Val, - llvm::DenseMap<BoolValue *, BoolValue *> &SubstitutionsCache) { - auto It = SubstitutionsCache.find(&Val); - if (It != SubstitutionsCache.end()) { - // Return memoized result of substituting this boolean value. - return *It->second; - } - - // Handle substitution on the boolean value (and its subvalues), saving the - // result into `SubstitutionsCache`. - BoolValue *Result; - switch (Val.getKind()) { - case Value::Kind::AtomicBool: { - Result = &Val; - break; - } - case Value::Kind::Negation: { - auto &Negation = *cast<NegationValue>(&Val); - auto &Sub = substituteBoolValue(Negation.getSubVal(), SubstitutionsCache); - Result = &getOrCreateNegation(Sub); - break; - } - case Value::Kind::Disjunction: { - auto &Disjunct = *cast<DisjunctionValue>(&Val); - auto &LeftSub = - substituteBoolValue(Disjunct.getLeftSubValue(), SubstitutionsCache); - auto &RightSub = - substituteBoolValue(Disjunct.getRightSubValue(), SubstitutionsCache); - Result = &getOrCreateDisjunction(LeftSub, RightSub); - break; - } - case Value::Kind::Conjunction: { - auto &Conjunct = *cast<ConjunctionValue>(&Val); - auto &LeftSub = - substituteBoolValue(Conjunct.getLeftSubValue(), SubstitutionsCache); - auto &RightSub = - substituteBoolValue(Conjunct.getRightSubValue(), SubstitutionsCache); - Result = &getOrCreateConjunction(LeftSub, RightSub); - break; - } - case Value::Kind::Implication: { - auto &IV = *cast<ImplicationValue>(&Val); - auto &LeftSub = - substituteBoolValue(IV.getLeftSubValue(), SubstitutionsCache); - auto &RightSub = - substituteBoolValue(IV.getRightSubValue(), SubstitutionsCache); - Result = &getOrCreateImplication(LeftSub, RightSub); - break; - } - case Value::Kind::Biconditional: { - auto &BV = *cast<BiconditionalValue>(&Val); - auto &LeftSub = - substituteBoolValue(BV.getLeftSubValue(), SubstitutionsCache); - auto &RightSub = - substituteBoolValue(BV.getRightSubValue(), SubstitutionsCache); - Result = &getOrCreateIff(LeftSub, RightSub); - break; - } - default: - llvm_unreachable("Unhandled Value Kind"); - } - SubstitutionsCache[&Val] = Result; - return *Result; -} +void DataflowAnalysisContext::dumpFlowCondition(Atom Token, + llvm::raw_ostream &OS) { + llvm::SetVector<const Formula *> Constraints; + Constraints.insert(&arena().makeAtomRef(Token)); + llvm::DenseSet<Atom> VisitedTokens; + addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); -BoolValue &DataflowAnalysisContext::buildAndSubstituteFlowCondition( - AtomicBoolValue &Token, - llvm::DenseMap<AtomicBoolValue *, BoolValue *> Substitutions) { - assert( - Substitutions.find(&getBoolLiteralValue(true)) == Substitutions.end() && - Substitutions.find(&getBoolLiteralValue(false)) == Substitutions.end() && - "Do not substitute true/false boolean literals"); - llvm::DenseMap<BoolValue *, BoolValue *> SubstitutionsCache( - Substitutions.begin(), Substitutions.end()); - return buildAndSubstituteFlowConditionWithCache(Token, SubstitutionsCache); -} + // TODO: have formulas know about true/false directly instead + Atom True = arena().makeLiteral(true).getAtom(); + Atom False = arena().makeLiteral(false).getAtom(); + Formula::AtomNames Names = {{False, "false"}, {True, "true"}}; -BoolValue &DataflowAnalysisContext::buildAndSubstituteFlowConditionWithCache( - AtomicBoolValue &Token, - llvm::DenseMap<BoolValue *, BoolValue *> &SubstitutionsCache) { - auto ConstraintsIt = FlowConditionConstraints.find(&Token); - if (ConstraintsIt == FlowConditionConstraints.end()) { - return getBoolLiteralValue(true); + for (const auto *Constraint : Constraints) { + Constraint->print(OS, &Names); + OS << "\n"; } - auto DepsIt = FlowConditionDeps.find(&Token); - if (DepsIt != FlowConditionDeps.end()) { - for (AtomicBoolValue *DepToken : DepsIt->second) { - auto &NewDep = buildAndSubstituteFlowConditionWithCache( - *DepToken, SubstitutionsCache); - SubstitutionsCache[DepToken] = &NewDep; - } - } - return substituteBoolValue(*ConstraintsIt->second, SubstitutionsCache); -} - -void DataflowAnalysisContext::dumpFlowCondition(AtomicBoolValue &Token) { - llvm::DenseSet<BoolValue *> Constraints = {&Token}; - llvm::DenseSet<AtomicBoolValue *> VisitedTokens; - addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); - - llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames = { - {&getBoolLiteralValue(false), "False"}, - {&getBoolLiteralValue(true), "True"}}; - llvm::dbgs() << debugString(Constraints, AtomNames); } const ControlFlowContext * @@ -364,8 +223,8 @@ DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) { if (It != FunctionContexts.end()) return &It->second; - if (Stmt *Body = F->getBody()) { - auto CFCtx = ControlFlowContext::build(F, *Body, F->getASTContext()); + if (F->hasBody()) { + auto CFCtx = ControlFlowContext::build(*F); // FIXME: Handle errors. assert(CFCtx); auto Result = FunctionContexts.insert({F, std::move(*CFCtx)}); @@ -375,6 +234,54 @@ DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) { return nullptr; } +static std::unique_ptr<Logger> makeLoggerFromCommandLine() { + if (DataflowLog.empty()) + return Logger::textual(llvm::errs()); + + llvm::StringRef Dir = DataflowLog; + if (auto EC = llvm::sys::fs::create_directories(Dir)) + llvm::errs() << "Failed to create log dir: " << EC.message() << "\n"; + // All analysis runs within a process will log to the same directory. + // Share a counter so they don't all overwrite each other's 0.html. + // (Don't share a logger, it's not threadsafe). + static std::atomic<unsigned> Counter = {0}; + auto StreamFactory = + [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> { + llvm::SmallString<256> File(Dir); + llvm::sys::path::append(File, + std::to_string(Counter.fetch_add(1)) + ".html"); + std::error_code EC; + auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC); + if (EC) { + llvm::errs() << "Failed to create log " << File << ": " << EC.message() + << "\n"; + return std::make_unique<llvm::raw_null_ostream>(); + } + return OS; + }; + return Logger::html(std::move(StreamFactory)); +} + +DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S, + Options Opts) + : S(std::move(S)), A(std::make_unique<Arena>()), Opts(Opts) { + assert(this->S != nullptr); + // If the -dataflow-log command-line flag was set, synthesize a logger. + // This is ugly but provides a uniform method for ad-hoc debugging dataflow- + // based tools. + if (Opts.Log == nullptr) { + if (DataflowLog.getNumOccurrences() > 0) { + LogOwner = makeLoggerFromCommandLine(); + this->Opts.Log = LogOwner.get(); + // FIXME: if the flag is given a value, write an HTML log to a file. + } else { + this->Opts.Log = &Logger::null(); + } + } +} + +DataflowAnalysisContext::~DataflowAnalysisContext() = default; + } // namespace dataflow } // namespace clang @@ -399,9 +306,8 @@ const Stmt &clang::dataflow::ignoreCFGOmittedNodes(const Stmt &S) { // FIXME: Does not precisely handle non-virtual diamond inheritance. A single // field decl will be modeled for all instances of the inherited field. -static void -getFieldsFromClassHierarchy(QualType Type, - llvm::DenseSet<const FieldDecl *> &Fields) { +static void getFieldsFromClassHierarchy(QualType Type, + clang::dataflow::FieldSet &Fields) { if (Type->isIncompleteType() || Type->isDependentType() || !Type->isRecordType()) return; @@ -414,9 +320,8 @@ getFieldsFromClassHierarchy(QualType Type, } /// Gets the set of all fields in the type. -llvm::DenseSet<const FieldDecl *> -clang::dataflow::getObjectFields(QualType Type) { - llvm::DenseSet<const FieldDecl *> Fields; +clang::dataflow::FieldSet clang::dataflow::getObjectFields(QualType Type) { + FieldSet Fields; getFieldsFromClassHierarchy(Type, Fields); return Fields; } diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index cc3992805cc7..3a91025df6e1 100644 --- a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -20,6 +20,7 @@ #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" @@ -107,22 +108,44 @@ static Value *mergeDistinctValues(QualType Type, Value &Val1, // if (o.has_value()) // x = o.value(); // ``` - auto *Expr1 = cast<BoolValue>(&Val1); - auto *Expr2 = cast<BoolValue>(&Val2); - auto &MergedVal = MergedEnv.makeAtomicBoolValue(); - MergedEnv.addToFlowCondition(MergedEnv.makeOr( - MergedEnv.makeAnd(Env1.getFlowConditionToken(), - MergedEnv.makeIff(MergedVal, *Expr1)), - MergedEnv.makeAnd(Env2.getFlowConditionToken(), - MergedEnv.makeIff(MergedVal, *Expr2)))); - return &MergedVal; + auto &Expr1 = cast<BoolValue>(Val1).formula(); + auto &Expr2 = cast<BoolValue>(Val2).formula(); + auto &A = MergedEnv.arena(); + auto &MergedVal = A.makeAtomRef(A.makeAtom()); + MergedEnv.addToFlowCondition( + A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()), + A.makeEquals(MergedVal, Expr1)), + A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()), + A.makeEquals(MergedVal, Expr2)))); + return &A.makeBoolValue(MergedVal); + } + + Value *MergedVal = nullptr; + if (auto *StructVal1 = dyn_cast<StructValue>(&Val1)) { + [[maybe_unused]] auto *StructVal2 = cast<StructValue>(&Val2); + + // Values to be merged are always associated with the same location in + // `LocToVal`. The location stored in `StructVal` should therefore also + // be the same. + assert(&StructVal1->getAggregateLoc() == &StructVal2->getAggregateLoc()); + + // `StructVal1` and `StructVal2` may have different properties associated + // with them. Create a new `StructValue` without any properties so that we + // soundly approximate both values. If a particular analysis needs to merge + // properties, it should do so in `DataflowAnalysis::merge()`. + MergedVal = &MergedEnv.create<StructValue>(StructVal1->getAggregateLoc()); + } else { + MergedVal = MergedEnv.createValue(Type); } // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge` // returns false to avoid storing unneeded values in `DACtx`. // FIXME: Creating the value based on the type alone creates misshapen values // for lvalues, since the type does not reflect the need for `ReferenceValue`. - if (Value *MergedVal = MergedEnv.createValue(Type)) + // This issue will be resolved when `ReferenceValue` is eliminated as part + // of the ongoing migration to strict handling of value categories (see + // https://discourse.llvm.org/t/70086 for details). + if (MergedVal) if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv)) return MergedVal; @@ -156,17 +179,24 @@ static Value &widenDistinctValues(QualType Type, Value &Prev, /// Initializes a global storage value. static void insertIfGlobal(const Decl &D, - llvm::DenseSet<const FieldDecl *> &Fields, llvm::DenseSet<const VarDecl *> &Vars) { if (auto *V = dyn_cast<VarDecl>(&D)) if (V->hasGlobalStorage()) Vars.insert(V); } -static void getFieldsAndGlobalVars(const Decl &D, - llvm::DenseSet<const FieldDecl *> &Fields, - llvm::DenseSet<const VarDecl *> &Vars) { - insertIfGlobal(D, Fields, Vars); +static void insertIfFunction(const Decl &D, + llvm::DenseSet<const FunctionDecl *> &Funcs) { + if (auto *FD = dyn_cast<FunctionDecl>(&D)) + Funcs.insert(FD); +} + +static void +getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields, + llvm::DenseSet<const VarDecl *> &Vars, + llvm::DenseSet<const FunctionDecl *> &Funcs) { + insertIfGlobal(D, Vars); + insertIfFunction(D, Funcs); if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D)) for (const auto *B : Decomp->bindings()) if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding())) @@ -175,60 +205,99 @@ static void getFieldsAndGlobalVars(const Decl &D, Fields.insert(FD); } -/// Traverses `S` and inserts into `Vars` any global storage values that are -/// declared in or referenced from sub-statements. -static void getFieldsAndGlobalVars(const Stmt &S, - llvm::DenseSet<const FieldDecl *> &Fields, - llvm::DenseSet<const VarDecl *> &Vars) { +/// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields, +/// global variables and functions that are declared in or referenced from +/// sub-statements. +static void +getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, + llvm::DenseSet<const VarDecl *> &Vars, + llvm::DenseSet<const FunctionDecl *> &Funcs) { for (auto *Child : S.children()) if (Child != nullptr) - getFieldsAndGlobalVars(*Child, Fields, Vars); + getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs); + if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S)) + getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs); if (auto *DS = dyn_cast<DeclStmt>(&S)) { if (DS->isSingleDecl()) - getFieldsAndGlobalVars(*DS->getSingleDecl(), Fields, Vars); + getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs); else for (auto *D : DS->getDeclGroup()) - getFieldsAndGlobalVars(*D, Fields, Vars); + getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs); } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) { - insertIfGlobal(*E->getDecl(), Fields, Vars); + insertIfGlobal(*E->getDecl(), Vars); + insertIfFunction(*E->getDecl(), Funcs); } else if (auto *E = dyn_cast<MemberExpr>(&S)) { // FIXME: should we be using `E->getFoundDecl()`? const ValueDecl *VD = E->getMemberDecl(); - insertIfGlobal(*VD, Fields, Vars); + insertIfGlobal(*VD, Vars); + insertIfFunction(*VD, Funcs); if (const auto *FD = dyn_cast<FieldDecl>(VD)) Fields.insert(FD); + } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) { + if (RecordDecl *RD = InitList->getType()->getAsRecordDecl()) + for (const auto *FD : getFieldsForInitListExpr(RD)) + Fields.insert(FD); } } // FIXME: Add support for resetting globals after function calls to enable // the implementation of sound analyses. -void Environment::initVars(llvm::DenseSet<const VarDecl *> Vars) { +void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) { + assert(FuncDecl->getBody() != nullptr); + + FieldSet Fields; + llvm::DenseSet<const VarDecl *> Vars; + llvm::DenseSet<const FunctionDecl *> Funcs; + + // Look for global variable and field references in the + // constructor-initializers. + if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) { + for (const auto *Init : CtorDecl->inits()) { + if (Init->isMemberInitializer()) { + Fields.insert(Init->getMember()); + } else if (Init->isIndirectMemberInitializer()) { + for (const auto *I : Init->getIndirectMember()->chain()) + Fields.insert(cast<FieldDecl>(I)); + } + const Expr *E = Init->getInit(); + assert(E != nullptr); + getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs); + } + // Add all fields mentioned in default member initializers. + for (const FieldDecl *F : CtorDecl->getParent()->fields()) + if (const auto *I = F->getInClassInitializer()) + getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs); + } + getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs); + + // These have to be added before the lines that follow to ensure that + // `create*` work correctly for structs. + DACtx->addModeledFields(Fields); + for (const VarDecl *D : Vars) { - if (getStorageLocation(*D, SkipPast::None) != nullptr) + if (getStorageLocation(*D) != nullptr) continue; - auto &Loc = createStorageLocation(*D); - setStorageLocation(*D, Loc); - if (auto *Val = createValue(D->getType())) - setValue(Loc, *Val); + + setStorageLocation(*D, createObject(*D)); + } + + for (const FunctionDecl *FD : Funcs) { + if (getStorageLocation(*FD) != nullptr) + continue; + auto &Loc = createStorageLocation(FD->getType()); + setStorageLocation(*FD, Loc); } } Environment::Environment(DataflowAnalysisContext &DACtx) - : DACtx(&DACtx), FlowConditionToken(&DACtx.makeFlowConditionToken()) {} - -Environment::Environment(const Environment &Other) - : DACtx(Other.DACtx), CallStack(Other.CallStack), - ReturnLoc(Other.ReturnLoc), ThisPointeeLoc(Other.ThisPointeeLoc), - DeclToLoc(Other.DeclToLoc), ExprToLoc(Other.ExprToLoc), - LocToVal(Other.LocToVal), MemberLocToStruct(Other.MemberLocToStruct), - FlowConditionToken(&DACtx->forkFlowCondition(*Other.FlowConditionToken)) { -} + : DACtx(&DACtx), + FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {} -Environment &Environment::operator=(const Environment &Other) { - Environment Copy(Other); - *this = std::move(Copy); - return *this; +Environment Environment::fork() const { + Environment Copy(*this); + Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken); + return Copy; } Environment::Environment(DataflowAnalysisContext &DACtx, @@ -239,37 +308,12 @@ Environment::Environment(DataflowAnalysisContext &DACtx, if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) { assert(FuncDecl->getBody() != nullptr); - llvm::DenseSet<const FieldDecl *> Fields; - llvm::DenseSet<const VarDecl *> Vars; - - // Look for global variable references in the constructor-initializers. - if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(&DeclCtx)) { - for (const auto *Init : CtorDecl->inits()) { - if (const auto *M = Init->getAnyMember()) - Fields.insert(M); - const Expr *E = Init->getInit(); - assert(E != nullptr); - getFieldsAndGlobalVars(*E, Fields, Vars); - } - } - getFieldsAndGlobalVars(*FuncDecl->getBody(), Fields, Vars); - - // These have to be added before the lines that follow to ensure that - // `create*` work correctly for structs. - DACtx.addModeledFields(Fields); - - initVars(Vars); + initFieldsGlobalsAndFuncs(FuncDecl); for (const auto *ParamDecl : FuncDecl->parameters()) { assert(ParamDecl != nullptr); - auto &ParamLoc = createStorageLocation(*ParamDecl); - setStorageLocation(*ParamDecl, ParamLoc); - if (Value *ParamVal = createValue(ParamDecl->getType())) - setValue(ParamLoc, *ParamVal); + setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr)); } - - QualType ReturnType = FuncDecl->getReturnType(); - ReturnLoc = &createStorageLocation(ReturnType); } if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) { @@ -281,9 +325,8 @@ Environment::Environment(DataflowAnalysisContext &DACtx, // FIXME: Initialize the ThisPointeeLoc of lambdas too. if (MethodDecl && !MethodDecl->isStatic()) { QualType ThisPointeeType = MethodDecl->getThisObjectType(); - ThisPointeeLoc = &createStorageLocation(ThisPointeeType); - if (Value *ThisPointeeVal = createValue(ThisPointeeType)) - setValue(*ThisPointeeLoc, *ThisPointeeVal); + ThisPointeeLoc = + &cast<StructValue>(createValue(ThisPointeeType))->getAggregateLoc(); } } } @@ -296,13 +339,11 @@ bool Environment::canDescend(unsigned MaxDepth, Environment Environment::pushCall(const CallExpr *Call) const { Environment Env(*this); - // FIXME: Support references here. - Env.ReturnLoc = getStorageLocation(*Call, SkipPast::Reference); - if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) { if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) { if (!isa<CXXThisExpr>(Arg)) - Env.ThisPointeeLoc = getStorageLocation(*Arg, SkipPast::Reference); + Env.ThisPointeeLoc = cast<AggregateStorageLocation>( + getStorageLocation(*Arg, SkipPast::Reference)); // Otherwise (when the argument is `this`), retain the current // environment's `ThisPointeeLoc`. } @@ -317,10 +358,7 @@ Environment Environment::pushCall(const CallExpr *Call) const { Environment Environment::pushCall(const CXXConstructExpr *Call) const { Environment Env(*this); - // FIXME: Support references here. - Env.ReturnLoc = getStorageLocation(*Call, SkipPast::Reference); - - Env.ThisPointeeLoc = Env.ReturnLoc; + Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call); Env.pushCallInternal(Call->getConstructor(), llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); @@ -330,28 +368,15 @@ Environment Environment::pushCall(const CXXConstructExpr *Call) const { void Environment::pushCallInternal(const FunctionDecl *FuncDecl, ArrayRef<const Expr *> Args) { - CallStack.push_back(FuncDecl); + // Canonicalize to the definition of the function. This ensures that we're + // putting arguments into the same `ParamVarDecl`s` that the callee will later + // be retrieving them from. + assert(FuncDecl->getDefinition() != nullptr); + FuncDecl = FuncDecl->getDefinition(); - // FIXME: Share this code with the constructor, rather than duplicating it. - llvm::DenseSet<const FieldDecl *> Fields; - llvm::DenseSet<const VarDecl *> Vars; - // Look for global variable references in the constructor-initializers. - if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) { - for (const auto *Init : CtorDecl->inits()) { - if (const auto *M = Init->getAnyMember()) - Fields.insert(M); - const Expr *E = Init->getInit(); - assert(E != nullptr); - getFieldsAndGlobalVars(*E, Fields, Vars); - } - } - getFieldsAndGlobalVars(*FuncDecl->getBody(), Fields, Vars); - - // These have to be added before the lines that follow to ensure that - // `create*` work correctly for structs. - DACtx->addModeledFields(Fields); + CallStack.push_back(FuncDecl); - initVars(Vars); + initFieldsGlobalsAndFuncs(FuncDecl); const auto *ParamIt = FuncDecl->param_begin(); @@ -359,45 +384,49 @@ void Environment::pushCallInternal(const FunctionDecl *FuncDecl, // overloaded operators implemented as member functions, and parameter packs. for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) { assert(ParamIt != FuncDecl->param_end()); - - const Expr *Arg = Args[ArgIndex]; - auto *ArgLoc = getStorageLocation(*Arg, SkipPast::Reference); - if (ArgLoc == nullptr) - continue; - const VarDecl *Param = *ParamIt; - auto &Loc = createStorageLocation(*Param); - setStorageLocation(*Param, Loc); - - QualType ParamType = Param->getType(); - if (ParamType->isReferenceType()) { - auto &Val = takeOwnership(std::make_unique<ReferenceValue>(*ArgLoc)); - setValue(Loc, Val); - } else if (auto *ArgVal = getValue(*ArgLoc)) { - setValue(Loc, *ArgVal); - } else if (Value *Val = createValue(ParamType)) { - setValue(Loc, *Val); - } + setStorageLocation(*Param, createObject(*Param, Args[ArgIndex])); } } -void Environment::popCall(const Environment &CalleeEnv) { +void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) { // We ignore `DACtx` because it's already the same in both. We don't want the - // callee's `DeclCtx`, `ReturnLoc` or `ThisPointeeLoc`. We don't bring back - // `DeclToLoc` and `ExprToLoc` because we want to be able to later analyze the - // same callee in a different context, and `setStorageLocation` requires there - // to not already be a storage location assigned. Conceptually, these maps - // capture information from the local scope, so when popping that scope, we do - // not propagate the maps. + // callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or `ThisPointeeLoc`. We don't + // bring back `DeclToLoc` and `ExprToLoc` because we want to be able to later + // analyze the same callee in a different context, and `setStorageLocation` + // requires there to not already be a storage location assigned. Conceptually, + // these maps capture information from the local scope, so when popping that + // scope, we do not propagate the maps. this->LocToVal = std::move(CalleeEnv.LocToVal); - this->MemberLocToStruct = std::move(CalleeEnv.MemberLocToStruct); this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); + + if (Call->isGLValue()) { + if (CalleeEnv.ReturnLoc != nullptr) + setStorageLocationStrict(*Call, *CalleeEnv.ReturnLoc); + } else if (!Call->getType()->isVoidType()) { + if (CalleeEnv.ReturnVal != nullptr) + setValueStrict(*Call, *CalleeEnv.ReturnVal); + } +} + +void Environment::popCall(const CXXConstructExpr *Call, + const Environment &CalleeEnv) { + // See also comment in `popCall(const CallExpr *, const Environment &)` above. + this->LocToVal = std::move(CalleeEnv.LocToVal); + this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); + + if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) { + setValueStrict(*Call, *Val); + } } bool Environment::equivalentTo(const Environment &Other, Environment::ValueModel &Model) const { assert(DACtx == Other.DACtx); + if (ReturnVal != Other.ReturnVal) + return false; + if (ReturnLoc != Other.ReturnLoc) return false; @@ -435,6 +464,7 @@ bool Environment::equivalentTo(const Environment &Other, LatticeJoinEffect Environment::widen(const Environment &PrevEnv, Environment::ValueModel &Model) { assert(DACtx == PrevEnv.DACtx); + assert(ReturnVal == PrevEnv.ReturnVal); assert(ReturnLoc == PrevEnv.ReturnLoc); assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); assert(CallStack == PrevEnv.CallStack); @@ -447,17 +477,10 @@ LatticeJoinEffect Environment::widen(const Environment &PrevEnv, // block. For `DeclToLoc` and `ExprToLoc`, join guarantees that these maps are // subsets of the maps in `PrevEnv`. So, as long as we maintain this property // here, we don't need change their current values to widen. - // - // FIXME: `MemberLocToStruct` does not share the above property, because - // `join` can cause the map size to increase (when we add fresh data in places - // of conflict). Once this issue with join is resolved, re-enable the - // assertion below or replace with something that captures the desired - // invariant. assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size()); assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size()); - // assert(MemberLocToStruct.size() <= PrevEnv.MemberLocToStruct.size()); - llvm::DenseMap<const StorageLocation *, Value *> WidenedLocToVal; + llvm::MapVector<const StorageLocation *, Value *> WidenedLocToVal; for (auto &Entry : LocToVal) { const StorageLocation *Loc = Entry.first; assert(Loc != nullptr); @@ -482,60 +505,75 @@ LatticeJoinEffect Environment::widen(const Environment &PrevEnv, Effect = LatticeJoinEffect::Changed; } LocToVal = std::move(WidenedLocToVal); - // FIXME: update the equivalence calculation for `MemberLocToStruct`, once we - // have a systematic way of soundly comparing this map. if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() || ExprToLoc.size() != PrevEnv.ExprToLoc.size() || - LocToVal.size() != PrevEnv.LocToVal.size() || - MemberLocToStruct.size() != PrevEnv.MemberLocToStruct.size()) + LocToVal.size() != PrevEnv.LocToVal.size()) Effect = LatticeJoinEffect::Changed; return Effect; } -LatticeJoinEffect Environment::join(const Environment &Other, - Environment::ValueModel &Model) { - assert(DACtx == Other.DACtx); - assert(ReturnLoc == Other.ReturnLoc); - assert(ThisPointeeLoc == Other.ThisPointeeLoc); - assert(CallStack == Other.CallStack); - - auto Effect = LatticeJoinEffect::Unchanged; - - Environment JoinedEnv(*DACtx); +Environment Environment::join(const Environment &EnvA, const Environment &EnvB, + Environment::ValueModel &Model) { + assert(EnvA.DACtx == EnvB.DACtx); + assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); + assert(EnvA.CallStack == EnvB.CallStack); + + Environment JoinedEnv(*EnvA.DACtx); + + JoinedEnv.CallStack = EnvA.CallStack; + JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc; + + if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) { + // `ReturnVal` might not always get set -- for example if we have a return + // statement of the form `return some_other_func()` and we decide not to + // analyze `some_other_func()`. + // In this case, we can't say anything about the joined return value -- we + // don't simply want to propagate the return value that we do have, because + // it might not be the correct one. + // This occurs for example in the test `ContextSensitiveMutualRecursion`. + JoinedEnv.ReturnVal = nullptr; + } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) { + JoinedEnv.ReturnVal = EnvA.ReturnVal; + } else { + assert(!EnvA.CallStack.empty()); + // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this + // cast. + auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back()); + assert(Func != nullptr); + if (Value *MergedVal = + mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA, + *EnvB.ReturnVal, EnvB, JoinedEnv, Model)) + JoinedEnv.ReturnVal = MergedVal; + } - JoinedEnv.CallStack = CallStack; - JoinedEnv.ReturnLoc = ReturnLoc; - JoinedEnv.ThisPointeeLoc = ThisPointeeLoc; + if (EnvA.ReturnLoc == EnvB.ReturnLoc) + JoinedEnv.ReturnLoc = EnvA.ReturnLoc; + else + JoinedEnv.ReturnLoc = nullptr; - JoinedEnv.DeclToLoc = intersectDenseMaps(DeclToLoc, Other.DeclToLoc); - if (DeclToLoc.size() != JoinedEnv.DeclToLoc.size()) - Effect = LatticeJoinEffect::Changed; + // FIXME: Once we're able to remove declarations from `DeclToLoc` when their + // lifetime ends, add an assertion that there aren't any entries in + // `DeclToLoc` and `Other.DeclToLoc` that map the same declaration to + // different storage locations. + JoinedEnv.DeclToLoc = intersectDenseMaps(EnvA.DeclToLoc, EnvB.DeclToLoc); - JoinedEnv.ExprToLoc = intersectDenseMaps(ExprToLoc, Other.ExprToLoc); - if (ExprToLoc.size() != JoinedEnv.ExprToLoc.size()) - Effect = LatticeJoinEffect::Changed; + JoinedEnv.ExprToLoc = intersectDenseMaps(EnvA.ExprToLoc, EnvB.ExprToLoc); - JoinedEnv.MemberLocToStruct = - intersectDenseMaps(MemberLocToStruct, Other.MemberLocToStruct); - if (MemberLocToStruct.size() != JoinedEnv.MemberLocToStruct.size()) - Effect = LatticeJoinEffect::Changed; - - // FIXME: set `Effect` as needed. // FIXME: update join to detect backedges and simplify the flow condition // accordingly. - JoinedEnv.FlowConditionToken = &DACtx->joinFlowConditions( - *FlowConditionToken, *Other.FlowConditionToken); + JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions( + EnvA.FlowConditionToken, EnvB.FlowConditionToken); - for (auto &Entry : LocToVal) { + for (auto &Entry : EnvA.LocToVal) { const StorageLocation *Loc = Entry.first; assert(Loc != nullptr); Value *Val = Entry.second; assert(Val != nullptr); - auto It = Other.LocToVal.find(Loc); - if (It == Other.LocToVal.end()) + auto It = EnvB.LocToVal.find(Loc); + if (It == EnvB.LocToVal.end()) continue; assert(It->second != nullptr); @@ -544,19 +582,13 @@ LatticeJoinEffect Environment::join(const Environment &Other, continue; } - if (Value *MergedVal = - mergeDistinctValues(Loc->getType(), *Val, *this, *It->second, Other, - JoinedEnv, Model)) { + if (Value *MergedVal = mergeDistinctValues( + Loc->getType(), *Val, EnvA, *It->second, EnvB, JoinedEnv, Model)) { JoinedEnv.LocToVal.insert({Loc, MergedVal}); - Effect = LatticeJoinEffect::Changed; } } - if (LocToVal.size() != JoinedEnv.LocToVal.size()) - Effect = LatticeJoinEffect::Changed; - *this = std::move(JoinedEnv); - - return Effect; + return JoinedEnv; } StorageLocation &Environment::createStorageLocation(QualType Type) { @@ -578,22 +610,39 @@ StorageLocation &Environment::createStorageLocation(const Expr &E) { } void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { - assert(DeclToLoc.find(&D) == DeclToLoc.end()); + assert(!DeclToLoc.contains(&D)); + assert(!isa_and_nonnull<ReferenceValue>(getValue(Loc))); DeclToLoc[&D] = &Loc; } -StorageLocation *Environment::getStorageLocation(const ValueDecl &D, - SkipPast SP) const { +StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const { auto It = DeclToLoc.find(&D); - return It == DeclToLoc.end() ? nullptr : &skip(*It->second, SP); + if (It == DeclToLoc.end()) + return nullptr; + + StorageLocation *Loc = It->second; + + assert(!isa_and_nonnull<ReferenceValue>(getValue(*Loc))); + + return Loc; } void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) { const Expr &CanonE = ignoreCFGOmittedNodes(E); - assert(ExprToLoc.find(&CanonE) == ExprToLoc.end()); + assert(!ExprToLoc.contains(&CanonE)); ExprToLoc[&CanonE] = &Loc; } +void Environment::setStorageLocationStrict(const Expr &E, + StorageLocation &Loc) { + // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason, + // but we still want to be able to associate a `StorageLocation` with them, + // so allow these as an exception. + assert(E.isGLValue() || + E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); + setStorageLocation(E, Loc); +} + StorageLocation *Environment::getStorageLocation(const Expr &E, SkipPast SP) const { // FIXME: Add a test with parens. @@ -601,12 +650,37 @@ StorageLocation *Environment::getStorageLocation(const Expr &E, return It == ExprToLoc.end() ? nullptr : &skip(*It->second, SP); } -StorageLocation *Environment::getThisPointeeStorageLocation() const { +StorageLocation *Environment::getStorageLocationStrict(const Expr &E) const { + // See comment in `setStorageLocationStrict()`. + assert(E.isGLValue() || + E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); + StorageLocation *Loc = getStorageLocation(E, SkipPast::None); + + if (Loc == nullptr) + return nullptr; + + if (auto *RefVal = dyn_cast_or_null<ReferenceValue>(getValue(*Loc))) + return &RefVal->getReferentLoc(); + + return Loc; +} + +AggregateStorageLocation *Environment::getThisPointeeStorageLocation() const { return ThisPointeeLoc; } -StorageLocation *Environment::getReturnStorageLocation() const { - return ReturnLoc; +AggregateStorageLocation & +Environment::getResultObjectLocation(const Expr &RecordPRValue) { + assert(RecordPRValue.getType()->isRecordType()); + assert(RecordPRValue.isPRValue()); + + if (StorageLocation *ExistingLoc = + getStorageLocation(RecordPRValue, SkipPast::None)) + return *cast<AggregateStorageLocation>(ExistingLoc); + auto &Loc = cast<AggregateStorageLocation>( + DACtx->getStableStorageLocation(RecordPRValue)); + setStorageLocation(RecordPRValue, Loc); + return Loc; } PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { @@ -614,45 +688,41 @@ PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { } void Environment::setValue(const StorageLocation &Loc, Value &Val) { - LocToVal[&Loc] = &Val; + assert(!isa<StructValue>(&Val) || + &cast<StructValue>(&Val)->getAggregateLoc() == &Loc); - if (auto *StructVal = dyn_cast<StructValue>(&Val)) { - auto &AggregateLoc = *cast<AggregateStorageLocation>(&Loc); + LocToVal[&Loc] = &Val; +} - const QualType Type = AggregateLoc.getType(); - assert(Type->isStructureOrClassType() || Type->isUnionType()); +void Environment::setValueStrict(const Expr &E, Value &Val) { + assert(E.isPRValue()); + assert(!isa<ReferenceValue>(Val)); - for (const FieldDecl *Field : DACtx->getReferencedFields(Type)) { - assert(Field != nullptr); - StorageLocation &FieldLoc = AggregateLoc.getChild(*Field); - MemberLocToStruct[&FieldLoc] = std::make_pair(StructVal, Field); - if (auto *FieldVal = StructVal->getChild(*Field)) - setValue(FieldLoc, *FieldVal); - } + if (auto *StructVal = dyn_cast<StructValue>(&Val)) { + if (auto *ExistingVal = cast_or_null<StructValue>(getValueStrict(E))) + assert(&ExistingVal->getAggregateLoc() == &StructVal->getAggregateLoc()); + if (StorageLocation *ExistingLoc = getStorageLocation(E, SkipPast::None)) + assert(ExistingLoc == &StructVal->getAggregateLoc()); + else + setStorageLocation(E, StructVal->getAggregateLoc()); + setValue(StructVal->getAggregateLoc(), Val); + return; } - auto It = MemberLocToStruct.find(&Loc); - if (It != MemberLocToStruct.end()) { - // `Loc` is the location of a struct member so we need to also update the - // value of the member in the corresponding `StructValue`. - - assert(It->second.first != nullptr); - StructValue &StructVal = *It->second.first; - - assert(It->second.second != nullptr); - const ValueDecl &Member = *It->second.second; - - StructVal.setChild(Member, Val); + StorageLocation *Loc = getStorageLocation(E, SkipPast::None); + if (Loc == nullptr) { + Loc = &createStorageLocation(E); + setStorageLocation(E, *Loc); } + setValue(*Loc, Val); } Value *Environment::getValue(const StorageLocation &Loc) const { - auto It = LocToVal.find(&Loc); - return It == LocToVal.end() ? nullptr : It->second; + return LocToVal.lookup(&Loc); } -Value *Environment::getValue(const ValueDecl &D, SkipPast SP) const { - auto *Loc = getStorageLocation(D, SP); +Value *Environment::getValue(const ValueDecl &D) const { + auto *Loc = getStorageLocation(D); if (Loc == nullptr) return nullptr; return getValue(*Loc); @@ -665,6 +735,15 @@ Value *Environment::getValue(const Expr &E, SkipPast SP) const { return getValue(*Loc); } +Value *Environment::getValueStrict(const Expr &E) const { + assert(E.isPRValue()); + Value *Val = getValue(E, SkipPast::None); + + assert(Val == nullptr || !isa<ReferenceValue>(Val)); + + return Val; +} + Value *Environment::createValue(QualType Type) { llvm::DenseSet<QualType> Visited; int CreatedValuesCount = 0; @@ -697,65 +776,120 @@ Value *Environment::createValueUnlessSelfReferential( // with integers, and so distinguishing them serves no purpose, but could // prevent convergence. CreatedValuesCount++; - return &takeOwnership(std::make_unique<IntegerValue>()); + return &arena().create<IntegerValue>(); } - if (Type->isReferenceType()) { + if (Type->isReferenceType() || Type->isPointerType()) { CreatedValuesCount++; - QualType PointeeType = Type->castAs<ReferenceType>()->getPointeeType(); - auto &PointeeLoc = createStorageLocation(PointeeType); + QualType PointeeType = Type->getPointeeType(); + StorageLocation &PointeeLoc = + createLocAndMaybeValue(PointeeType, Visited, Depth, CreatedValuesCount); - if (Visited.insert(PointeeType.getCanonicalType()).second) { - Value *PointeeVal = createValueUnlessSelfReferential( - PointeeType, Visited, Depth, CreatedValuesCount); - Visited.erase(PointeeType.getCanonicalType()); - - if (PointeeVal != nullptr) - setValue(PointeeLoc, *PointeeVal); - } - - return &takeOwnership(std::make_unique<ReferenceValue>(PointeeLoc)); + if (Type->isReferenceType()) + return &arena().create<ReferenceValue>(PointeeLoc); + else + return &arena().create<PointerValue>(PointeeLoc); } - if (Type->isPointerType()) { + if (Type->isRecordType()) { CreatedValuesCount++; - QualType PointeeType = Type->castAs<PointerType>()->getPointeeType(); - auto &PointeeLoc = createStorageLocation(PointeeType); + llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; + for (const FieldDecl *Field : DACtx->getModeledFields(Type)) { + assert(Field != nullptr); - if (Visited.insert(PointeeType.getCanonicalType()).second) { - Value *PointeeVal = createValueUnlessSelfReferential( - PointeeType, Visited, Depth, CreatedValuesCount); - Visited.erase(PointeeType.getCanonicalType()); + QualType FieldType = Field->getType(); - if (PointeeVal != nullptr) - setValue(PointeeLoc, *PointeeVal); + FieldLocs.insert( + {Field, &createLocAndMaybeValue(FieldType, Visited, Depth + 1, + CreatedValuesCount)}); } - return &takeOwnership(std::make_unique<PointerValue>(PointeeLoc)); + AggregateStorageLocation &Loc = + arena().create<AggregateStorageLocation>(Type, std::move(FieldLocs)); + StructValue &StructVal = create<StructValue>(Loc); + + // As we already have a storage location for the `StructValue`, we can and + // should associate them in the environment. + setValue(Loc, StructVal); + + return &StructVal; } - if (Type->isStructureOrClassType() || Type->isUnionType()) { - CreatedValuesCount++; - llvm::DenseMap<const ValueDecl *, Value *> FieldValues; - for (const FieldDecl *Field : DACtx->getReferencedFields(Type)) { - assert(Field != nullptr); + return nullptr; +} - QualType FieldType = Field->getType(); - if (Visited.contains(FieldType.getCanonicalType())) - continue; - - Visited.insert(FieldType.getCanonicalType()); - if (auto *FieldValue = createValueUnlessSelfReferential( - FieldType, Visited, Depth + 1, CreatedValuesCount)) - FieldValues.insert({Field, FieldValue}); - Visited.erase(FieldType.getCanonicalType()); +StorageLocation & +Environment::createLocAndMaybeValue(QualType Ty, + llvm::DenseSet<QualType> &Visited, + int Depth, int &CreatedValuesCount) { + if (!Visited.insert(Ty.getCanonicalType()).second) + return createStorageLocation(Ty.getNonReferenceType()); + Value *Val = createValueUnlessSelfReferential( + Ty.getNonReferenceType(), Visited, Depth, CreatedValuesCount); + Visited.erase(Ty.getCanonicalType()); + + Ty = Ty.getNonReferenceType(); + + if (Val == nullptr) + return createStorageLocation(Ty); + + if (Ty->isRecordType()) + return cast<StructValue>(Val)->getAggregateLoc(); + + StorageLocation &Loc = createStorageLocation(Ty); + setValue(Loc, *Val); + return Loc; +} + +StorageLocation &Environment::createObjectInternal(const VarDecl *D, + QualType Ty, + const Expr *InitExpr) { + if (Ty->isReferenceType()) { + // Although variables of reference type always need to be initialized, it + // can happen that we can't see the initializer, so `InitExpr` may still + // be null. + if (InitExpr) { + if (auto *InitExprLoc = + getStorageLocation(*InitExpr, SkipPast::Reference)) + return *InitExprLoc; } - return &takeOwnership( - std::make_unique<StructValue>(std::move(FieldValues))); + // Even though we have an initializer, we might not get an + // InitExprLoc, for example if the InitExpr is a CallExpr for which we + // don't have a function body. In this case, we just invent a storage + // location and value -- it's the best we can do. + return createObjectInternal(D, Ty.getNonReferenceType(), nullptr); } - return nullptr; + Value *Val = nullptr; + if (InitExpr) + // In the (few) cases where an expression is intentionally + // "uninterpreted", `InitExpr` is not associated with a value. There are + // two ways to handle this situation: propagate the status, so that + // uninterpreted initializers result in uninterpreted variables, or + // provide a default value. We choose the latter so that later refinements + // of the variable can be used for reasoning about the surrounding code. + // For this reason, we let this case be handled by the `createValue()` + // call below. + // + // FIXME. If and when we interpret all language cases, change this to + // assert that `InitExpr` is interpreted, rather than supplying a + // default value (assuming we don't update the environment API to return + // references). + Val = getValueStrict(*InitExpr); + if (!Val) + Val = createValue(Ty); + + if (Ty->isRecordType()) + return cast<StructValue>(Val)->getAggregateLoc(); + + StorageLocation &Loc = + D ? createStorageLocation(*D) : createStorageLocation(Ty); + + if (Val) + setValue(Loc, *Val); + + return Loc; } StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const { @@ -768,11 +902,6 @@ StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const { if (auto *Val = dyn_cast_or_null<ReferenceValue>(getValue(Loc))) return Val->getReferentLoc(); return Loc; - case SkipPast::ReferenceThenPointer: - StorageLocation &LocPastRef = skip(Loc, SkipPast::Reference); - if (auto *Val = dyn_cast_or_null<PointerValue>(getValue(LocPastRef))) - return Val->getPointeeLoc(); - return LocPastRef; } llvm_unreachable("bad SkipPast kind"); } @@ -782,12 +911,12 @@ const StorageLocation &Environment::skip(const StorageLocation &Loc, return skip(*const_cast<StorageLocation *>(&Loc), SP); } -void Environment::addToFlowCondition(BoolValue &Val) { - DACtx->addFlowConditionConstraint(*FlowConditionToken, Val); +void Environment::addToFlowCondition(const Formula &Val) { + DACtx->addFlowConditionConstraint(FlowConditionToken, Val); } -bool Environment::flowConditionImplies(BoolValue &Val) const { - return DACtx->flowConditionImplies(*FlowConditionToken, Val); +bool Environment::flowConditionImplies(const Formula &Val) const { + return DACtx->flowConditionImplies(FlowConditionToken, Val); } void Environment::dump(raw_ostream &OS) const { @@ -795,7 +924,7 @@ void Environment::dump(raw_ostream &OS) const { // fields are printed. OS << "DeclToLoc:\n"; for (auto [D, L] : DeclToLoc) - OS << " [" << D->getName() << ", " << L << "]\n"; + OS << " [" << D->getNameAsString() << ", " << L << "]\n"; OS << "ExprToLoc:\n"; for (auto [E, L] : ExprToLoc) @@ -807,12 +936,93 @@ void Environment::dump(raw_ostream &OS) const { } OS << "FlowConditionToken:\n"; - DACtx->dumpFlowCondition(*FlowConditionToken); + DACtx->dumpFlowCondition(FlowConditionToken, OS); } void Environment::dump() const { dump(llvm::dbgs()); } +AggregateStorageLocation * +getImplicitObjectLocation(const CXXMemberCallExpr &MCE, + const Environment &Env) { + Expr *ImplicitObject = MCE.getImplicitObjectArgument(); + if (ImplicitObject == nullptr) + return nullptr; + StorageLocation *Loc = + Env.getStorageLocation(*ImplicitObject, SkipPast::Reference); + if (Loc == nullptr) + return nullptr; + if (ImplicitObject->getType()->isPointerType()) { + if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Loc))) + return &cast<AggregateStorageLocation>(Val->getPointeeLoc()); + return nullptr; + } + return cast<AggregateStorageLocation>(Loc); +} + +AggregateStorageLocation *getBaseObjectLocation(const MemberExpr &ME, + const Environment &Env) { + Expr *Base = ME.getBase(); + if (Base == nullptr) + return nullptr; + StorageLocation *Loc = Env.getStorageLocation(*Base, SkipPast::Reference); + if (Loc == nullptr) + return nullptr; + if (ME.isArrow()) { + if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Loc))) + return &cast<AggregateStorageLocation>(Val->getPointeeLoc()); + return nullptr; + } + return cast<AggregateStorageLocation>(Loc); +} + +std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) { + // Unnamed bitfields are only used for padding and do not appear in + // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s + // field list, and we thus need to remove them before mapping inits to + // fields to avoid mapping inits to the wrongs fields. + std::vector<FieldDecl *> Fields; + llvm::copy_if( + RD->fields(), std::back_inserter(Fields), + [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); }); + return Fields; +} + +StructValue &refreshStructValue(AggregateStorageLocation &Loc, + Environment &Env) { + auto &NewVal = Env.create<StructValue>(Loc); + Env.setValue(Loc, NewVal); + return NewVal; +} + +StructValue &refreshStructValue(const Expr &Expr, Environment &Env) { + assert(Expr.getType()->isRecordType()); + + if (Expr.isPRValue()) { + if (auto *ExistingVal = + cast_or_null<StructValue>(Env.getValueStrict(Expr))) { + auto &NewVal = Env.create<StructValue>(ExistingVal->getAggregateLoc()); + Env.setValueStrict(Expr, NewVal); + return NewVal; + } + + auto &NewVal = *cast<StructValue>(Env.createValue(Expr.getType())); + Env.setValueStrict(Expr, NewVal); + return NewVal; + } + + if (auto *Loc = cast_or_null<AggregateStorageLocation>( + Env.getStorageLocationStrict(Expr))) { + auto &NewVal = Env.create<StructValue>(*Loc); + Env.setValue(*Loc, NewVal); + return NewVal; + } + + auto &NewVal = *cast<StructValue>(Env.createValue(Expr.getType())); + Env.setStorageLocationStrict(Expr, NewVal.getAggregateLoc()); + return NewVal; +} + } // namespace dataflow } // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp index d4886f154b33..f8a049adea5e 100644 --- a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp @@ -16,22 +16,12 @@ #include "clang/Analysis/FlowSensitive/DebugSupport.h" #include "clang/Analysis/FlowSensitive/Solver.h" #include "clang/Analysis/FlowSensitive/Value.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSet.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormatAdapters.h" -#include "llvm/Support/FormatCommon.h" -#include "llvm/Support/FormatVariadic.h" namespace clang { namespace dataflow { -using llvm::AlignStyle; -using llvm::fmt_pad; -using llvm::formatv; - llvm::StringRef debugString(Value::Kind Kind) { switch (Kind) { case Value::Kind::Integer: @@ -46,26 +36,19 @@ llvm::StringRef debugString(Value::Kind Kind) { return "AtomicBool"; case Value::Kind::TopBool: return "TopBool"; - case Value::Kind::Conjunction: - return "Conjunction"; - case Value::Kind::Disjunction: - return "Disjunction"; - case Value::Kind::Negation: - return "Negation"; - case Value::Kind::Implication: - return "Implication"; - case Value::Kind::Biconditional: - return "Biconditional"; + case Value::Kind::FormulaBool: + return "FormulaBool"; } llvm_unreachable("Unhandled value kind"); } -llvm::StringRef debugString(Solver::Result::Assignment Assignment) { +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + Solver::Result::Assignment Assignment) { switch (Assignment) { case Solver::Result::Assignment::AssignedFalse: - return "False"; + return OS << "False"; case Solver::Result::Assignment::AssignedTrue: - return "True"; + return OS << "True"; } llvm_unreachable("Booleans can only be assigned true/false"); } @@ -82,177 +65,16 @@ llvm::StringRef debugString(Solver::Result::Status Status) { llvm_unreachable("Unhandled SAT check result status"); } -namespace { - -class DebugStringGenerator { -public: - explicit DebugStringGenerator( - llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNamesArg) - : Counter(0), AtomNames(std::move(AtomNamesArg)) { -#ifndef NDEBUG - llvm::StringSet<> Names; - for (auto &N : AtomNames) { - assert(Names.insert(N.second).second && - "The same name must not assigned to different atoms"); - } -#endif - } - - /// Returns a string representation of a boolean value `B`. - std::string debugString(const BoolValue &B, size_t Depth = 0) { - std::string S; - switch (B.getKind()) { - case Value::Kind::AtomicBool: { - S = getAtomName(&cast<AtomicBoolValue>(B)); - break; - } - case Value::Kind::Conjunction: { - auto &C = cast<ConjunctionValue>(B); - auto L = debugString(C.getLeftSubValue(), Depth + 1); - auto R = debugString(C.getRightSubValue(), Depth + 1); - S = formatv("(and\n{0}\n{1})", L, R); - break; - } - case Value::Kind::Disjunction: { - auto &D = cast<DisjunctionValue>(B); - auto L = debugString(D.getLeftSubValue(), Depth + 1); - auto R = debugString(D.getRightSubValue(), Depth + 1); - S = formatv("(or\n{0}\n{1})", L, R); - break; - } - case Value::Kind::Negation: { - auto &N = cast<NegationValue>(B); - S = formatv("(not\n{0})", debugString(N.getSubVal(), Depth + 1)); - break; - } - case Value::Kind::Implication: { - auto &IV = cast<ImplicationValue>(B); - auto L = debugString(IV.getLeftSubValue(), Depth + 1); - auto R = debugString(IV.getRightSubValue(), Depth + 1); - S = formatv("(=>\n{0}\n{1})", L, R); - break; - } - case Value::Kind::Biconditional: { - auto &BV = cast<BiconditionalValue>(B); - auto L = debugString(BV.getLeftSubValue(), Depth + 1); - auto R = debugString(BV.getRightSubValue(), Depth + 1); - S = formatv("(=\n{0}\n{1})", L, R); - break; - } - default: - llvm_unreachable("Unhandled value kind"); - } - auto Indent = Depth * 4; - return formatv("{0}", fmt_pad(S, Indent, 0)); - } - - std::string debugString(const llvm::DenseSet<BoolValue *> &Constraints) { - std::vector<std::string> ConstraintsStrings; - ConstraintsStrings.reserve(Constraints.size()); - for (BoolValue *Constraint : Constraints) { - ConstraintsStrings.push_back(debugString(*Constraint)); - } - llvm::sort(ConstraintsStrings); - - std::string Result; - for (const std::string &S : ConstraintsStrings) { - Result += S; - Result += '\n'; - } - return Result; +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Solver::Result &R) { + OS << debugString(R.getStatus()) << "\n"; + if (auto Solution = R.getSolution()) { + std::vector<std::pair<Atom, Solver::Result::Assignment>> Sorted = { + Solution->begin(), Solution->end()}; + llvm::sort(Sorted); + for (const auto &Entry : Sorted) + OS << Entry.first << " = " << Entry.second << "\n"; } - - /// Returns a string representation of a set of boolean `Constraints` and the - /// `Result` of satisfiability checking on the `Constraints`. - std::string debugString(ArrayRef<BoolValue *> &Constraints, - const Solver::Result &Result) { - auto Template = R"( -Constraints ------------- -{0:$[ - -]} ------------- -{1}. -{2} -)"; - - std::vector<std::string> ConstraintsStrings; - ConstraintsStrings.reserve(Constraints.size()); - for (auto &Constraint : Constraints) { - ConstraintsStrings.push_back(debugString(*Constraint)); - } - - auto StatusString = clang::dataflow::debugString(Result.getStatus()); - auto Solution = Result.getSolution(); - auto SolutionString = Solution ? "\n" + debugString(*Solution) : ""; - - return formatv( - Template, - llvm::make_range(ConstraintsStrings.begin(), ConstraintsStrings.end()), - StatusString, SolutionString); - } - -private: - /// Returns a string representation of a truth assignment to atom booleans. - std::string debugString( - const llvm::DenseMap<AtomicBoolValue *, Solver::Result::Assignment> - &AtomAssignments) { - size_t MaxNameLength = 0; - for (auto &AtomName : AtomNames) { - MaxNameLength = std::max(MaxNameLength, AtomName.second.size()); - } - - std::vector<std::string> Lines; - for (auto &AtomAssignment : AtomAssignments) { - auto Line = formatv("{0} = {1}", - fmt_align(getAtomName(AtomAssignment.first), - AlignStyle::Left, MaxNameLength), - clang::dataflow::debugString(AtomAssignment.second)); - Lines.push_back(Line); - } - llvm::sort(Lines); - - return formatv("{0:$[\n]}", llvm::make_range(Lines.begin(), Lines.end())); - } - - /// Returns the name assigned to `Atom`, either user-specified or created by - /// default rules (B0, B1, ...). - std::string getAtomName(const AtomicBoolValue *Atom) { - auto Entry = AtomNames.try_emplace(Atom, formatv("B{0}", Counter)); - if (Entry.second) { - Counter++; - } - return Entry.first->second; - } - - // Keep track of number of atoms without a user-specified name, used to assign - // non-repeating default names to such atoms. - size_t Counter; - - // Keep track of names assigned to atoms. - llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames; -}; - -} // namespace - -std::string -debugString(const BoolValue &B, - llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames) { - return DebugStringGenerator(std::move(AtomNames)).debugString(B); -} - -std::string -debugString(const llvm::DenseSet<BoolValue *> &Constraints, - llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames) { - return DebugStringGenerator(std::move(AtomNames)).debugString(Constraints); -} - -std::string -debugString(ArrayRef<BoolValue *> Constraints, const Solver::Result &Result, - llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames) { - return DebugStringGenerator(std::move(AtomNames)) - .debugString(Constraints, Result); + return OS; } } // namespace dataflow diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Formula.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Formula.cpp new file mode 100644 index 000000000000..504ad2fb7938 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Formula.cpp @@ -0,0 +1,82 @@ +//===- Formula.cpp ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Formula.h" +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> + +namespace clang::dataflow { + +Formula &Formula::create(llvm::BumpPtrAllocator &Alloc, Kind K, + ArrayRef<const Formula *> Operands, unsigned Value) { + assert(Operands.size() == numOperands(K)); + if (Value != 0) // Currently, formulas have values or operands, not both. + assert(numOperands(K) == 0); + void *Mem = Alloc.Allocate(sizeof(Formula) + + Operands.size() * sizeof(Operands.front()), + alignof(Formula)); + Formula *Result = new (Mem) Formula(); + Result->FormulaKind = K; + Result->Value = Value; + // Operands are stored as `const Formula *`s after the formula itself. + // We don't need to construct an object as pointers are trivial types. + // Formula is alignas(const Formula *), so alignment is satisfied. + llvm::copy(Operands, reinterpret_cast<const Formula **>(Result + 1)); + return *Result; +} + +static llvm::StringLiteral sigil(Formula::Kind K) { + switch (K) { + case Formula::AtomRef: + return ""; + case Formula::Not: + return "!"; + case Formula::And: + return " & "; + case Formula::Or: + return " | "; + case Formula::Implies: + return " => "; + case Formula::Equal: + return " = "; + } + llvm_unreachable("unhandled formula kind"); +} + +void Formula::print(llvm::raw_ostream &OS, const AtomNames *Names) const { + if (Names && kind() == AtomRef) + if (auto It = Names->find(getAtom()); It != Names->end()) { + OS << It->second; + return; + } + + switch (numOperands(kind())) { + case 0: + OS << getAtom(); + break; + case 1: + OS << sigil(kind()); + operands()[0]->print(OS, Names); + break; + case 2: + OS << '('; + operands()[0]->print(OS, Names); + OS << sigil(kind()); + operands()[1]->print(OS, Names); + OS << ')'; + break; + default: + llvm_unreachable("unhandled formula arity"); + } +} + +} // namespace clang::dataflow
\ No newline at end of file diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp new file mode 100644 index 000000000000..ee89e074f846 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp @@ -0,0 +1,536 @@ +//===-- HTMLLogger.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the HTML logger. Given a directory dir/, we write +// dir/0.html for the first analysis, etc. +// These files contain a visualization that allows inspecting the CFG and the +// state of the analysis at each point. +// Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded +// so each output file is self-contained. +// +// VIEWS +// +// The timeline and function view are always shown. These allow selecting basic +// blocks, statements within them, and processing iterations (BBs are visited +// multiple times when e.g. loops are involved). +// These are written directly into the HTML body. +// +// There are also listings of particular basic blocks, and dumps of the state +// at particular analysis points (i.e. BB2 iteration 3 statement 2). +// These are only shown when the relevant BB/analysis point is *selected*. +// +// DATA AND TEMPLATES +// +// The HTML proper is mostly static. +// The analysis data is in a JSON object HTMLLoggerData which is embedded as +// a <script> in the <head>. +// This gets rendered into DOM by a simple template processor which substitutes +// the data into <template> tags embedded in the HTML. (see inflate() in JS). +// +// SELECTION +// +// This is the only real interactive mechanism. +// +// At any given time, there are several named selections, e.g.: +// bb: B2 (basic block 0 is selected) +// elt: B2.4 (statement 4 is selected) +// iter: B2:1 (iteration 1 of the basic block is selected) +// hover: B3 (hovering over basic block 3) +// +// The selection is updated by mouse events: hover by moving the mouse and +// others by clicking. Elements that are click targets generally have attributes +// (id or data-foo) that define what they should select. +// See watchSelection() in JS for the exact logic. +// +// When the "bb" selection is set to "B2": +// - sections <section data-selection="bb"> get shown +// - templates under such sections get re-rendered +// - elements with class/id "B2" get class "bb-select" +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/ControlFlowContext.h" +#include "clang/Analysis/FlowSensitive/DebugSupport.h" +#include "clang/Analysis/FlowSensitive/Logger.h" +#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/raw_ostream.h" +// Defines assets: HTMLLogger_{html_js,css} +#include "HTMLLogger.inc" + +namespace clang::dataflow { +namespace { + +// Render a graphviz graph specification to SVG using the `dot` tool. +llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph); + +using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>; + +// Recursively dumps Values/StorageLocations as JSON +class ModelDumper { +public: + ModelDumper(llvm::json::OStream &JOS, const Environment &Env) + : JOS(JOS), Env(Env) {} + + void dump(Value &V) { + JOS.attribute("value_id", llvm::to_string(&V)); + if (!Visited.insert(&V).second) + return; + + JOS.attribute("kind", debugString(V.getKind())); + + switch (V.getKind()) { + case Value::Kind::Integer: + case Value::Kind::TopBool: + case Value::Kind::AtomicBool: + case Value::Kind::FormulaBool: + break; + case Value::Kind::Reference: + JOS.attributeObject( + "referent", [&] { dump(cast<ReferenceValue>(V).getReferentLoc()); }); + break; + case Value::Kind::Pointer: + JOS.attributeObject( + "pointee", [&] { dump(cast<PointerValue>(V).getPointeeLoc()); }); + break; + case Value::Kind::Struct: + for (const auto &Child : + cast<StructValue>(V).getAggregateLoc().children()) + JOS.attributeObject("f:" + Child.first->getNameAsString(), [&] { + if (Child.second) + if (Value *Val = Env.getValue(*Child.second)) + dump(*Val); + }); + break; + } + + for (const auto& Prop : V.properties()) + JOS.attributeObject(("p:" + Prop.first()).str(), + [&] { dump(*Prop.second); }); + + // Running the SAT solver is expensive, but knowing which booleans are + // guaranteed true/false here is valuable and hard to determine by hand. + if (auto *B = llvm::dyn_cast<BoolValue>(&V)) { + JOS.attribute("formula", llvm::to_string(B->formula())); + JOS.attribute( + "truth", Env.flowConditionImplies(B->formula()) ? "true" + : Env.flowConditionImplies(Env.arena().makeNot(B->formula())) + ? "false" + : "unknown"); + } + } + void dump(const StorageLocation &L) { + JOS.attribute("location", llvm::to_string(&L)); + if (!Visited.insert(&L).second) + return; + + JOS.attribute("type", L.getType().getAsString()); + if (auto *V = Env.getValue(L)) + dump(*V); + } + + llvm::DenseSet<const void*> Visited; + llvm::json::OStream &JOS; + const Environment &Env; +}; + +class HTMLLogger : public Logger { + StreamFactory Streams; + std::unique_ptr<llvm::raw_ostream> OS; + std::optional<llvm::json::OStream> JOS; + + const ControlFlowContext *CFG; + // Timeline of iterations of CFG block visitation. + std::vector<std::pair<const CFGBlock *, unsigned>> Iters; + // Number of times each CFG block has been seen. + llvm::DenseMap<const CFGBlock *, unsigned> BlockIters; + // The messages logged in the current context but not yet written. + std::string ContextLogs; + // The number of elements we have visited within the current CFG block. + unsigned ElementIndex; + +public: + explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {} + void beginAnalysis(const ControlFlowContext &CFG, + TypeErasedDataflowAnalysis &A) override { + OS = Streams(); + this->CFG = &CFG; + *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first; + + if (const auto *D = CFG.getDecl()) { + const auto &SM = A.getASTContext().getSourceManager(); + *OS << "<title>"; + if (const auto *ND = dyn_cast<NamedDecl>(D)) + *OS << ND->getNameAsString() << " at "; + *OS << SM.getFilename(D->getLocation()) << ":" + << SM.getSpellingLineNumber(D->getLocation()); + *OS << "</title>\n"; + }; + + *OS << "<style>" << HTMLLogger_css << "</style>\n"; + *OS << "<script>" << HTMLLogger_js << "</script>\n"; + + writeCode(); + writeCFG(); + + *OS << "<script>var HTMLLoggerData = \n"; + JOS.emplace(*OS, /*Indent=*/2); + JOS->objectBegin(); + JOS->attributeBegin("states"); + JOS->objectBegin(); + } + // Between beginAnalysis() and endAnalysis() we write all the states for + // particular analysis points into the `timeline` array. + void endAnalysis() override { + JOS->objectEnd(); + JOS->attributeEnd(); + + JOS->attributeArray("timeline", [&] { + for (const auto &E : Iters) { + JOS->object([&] { + JOS->attribute("block", blockID(E.first->getBlockID())); + JOS->attribute("iter", E.second); + }); + } + }); + JOS->attributeObject("cfg", [&] { + for (const auto &E : BlockIters) + writeBlock(*E.first, E.second); + }); + + JOS->objectEnd(); + JOS.reset(); + *OS << ";\n</script>\n"; + *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second; + } + + void enterBlock(const CFGBlock &B) override { + Iters.emplace_back(&B, ++BlockIters[&B]); + ElementIndex = 0; + } + void enterElement(const CFGElement &E) override { + ++ElementIndex; + } + + static std::string blockID(unsigned Block) { + return llvm::formatv("B{0}", Block); + } + static std::string eltID(unsigned Block, unsigned Element) { + return llvm::formatv("B{0}.{1}", Block, Element); + } + static std::string iterID(unsigned Block, unsigned Iter) { + return llvm::formatv("B{0}:{1}", Block, Iter); + } + static std::string elementIterID(unsigned Block, unsigned Iter, + unsigned Element) { + return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element); + } + + // Write the analysis state associated with a particular analysis point. + // FIXME: this dump is fairly opaque. We should show: + // - values associated with the current Stmt + // - values associated with its children + // - meaningful names for values + // - which boolean values are implied true/false by the flow condition + void recordState(TypeErasedDataflowAnalysisState &State) override { + unsigned Block = Iters.back().first->getBlockID(); + unsigned Iter = Iters.back().second; + JOS->attributeObject(elementIterID(Block, Iter, ElementIndex), [&] { + JOS->attribute("block", blockID(Block)); + JOS->attribute("iter", Iter); + JOS->attribute("element", ElementIndex); + + // If this state immediately follows an Expr, show its built-in model. + if (ElementIndex > 0) { + auto S = + Iters.back().first->Elements[ElementIndex - 1].getAs<CFGStmt>(); + if (const Expr *E = S ? llvm::dyn_cast<Expr>(S->getStmt()) : nullptr) + if (auto *Loc = State.Env.getStorageLocation(*E, SkipPast::None)) + JOS->attributeObject( + "value", [&] { ModelDumper(*JOS, State.Env).dump(*Loc); }); + } + if (!ContextLogs.empty()) { + JOS->attribute("logs", ContextLogs); + ContextLogs.clear(); + } + { + std::string BuiltinLattice; + llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice); + State.Env.dump(BuiltinLatticeS); + JOS->attribute("builtinLattice", BuiltinLattice); + } + }); + } + void blockConverged() override { logText("Block converged"); } + + void logText(llvm::StringRef S) override { + ContextLogs.append(S.begin(), S.end()); + ContextLogs.push_back('\n'); + } + +private: + // Write the CFG block details. + // Currently this is just the list of elements in execution order. + // FIXME: an AST dump would be a useful view, too. + void writeBlock(const CFGBlock &B, unsigned Iters) { + JOS->attributeObject(blockID(B.getBlockID()), [&] { + JOS->attribute("iters", Iters); + JOS->attributeArray("elements", [&] { + for (const auto &Elt : B.Elements) { + std::string Dump; + llvm::raw_string_ostream DumpS(Dump); + Elt.dumpToStream(DumpS); + JOS->value(Dump); + } + }); + }); + } + + // Write the code of function being examined. + // We want to overlay the code with <span>s that mark which BB particular + // tokens are associated with, and even which BB element (so that clicking + // can select the right element). + void writeCode() { + if (!CFG->getDecl()) + return; + const auto &AST = CFG->getDecl()->getASTContext(); + bool Invalid = false; + + // Extract the source code from the original file. + // Pretty-printing from the AST would probably be nicer (no macros or + // indentation to worry about), but we need the boundaries of particular + // AST nodes and the printer doesn't provide this. + auto Range = clang::Lexer::makeFileCharRange( + CharSourceRange::getTokenRange(CFG->getDecl()->getSourceRange()), + AST.getSourceManager(), AST.getLangOpts()); + if (Range.isInvalid()) + return; + llvm::StringRef Code = clang::Lexer::getSourceText( + Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid); + if (Invalid) + return; + + static constexpr unsigned Missing = -1; + // TokenInfo stores the BB and set of elements that a token is part of. + struct TokenInfo { + // The basic block this is part of. + // This is the BB of the stmt with the smallest containing range. + unsigned BB = Missing; + unsigned BBPriority = 0; + // The most specific stmt this is part of (smallest range). + unsigned Elt = Missing; + unsigned EltPriority = 0; + // All stmts this is part of. + SmallVector<unsigned> Elts; + + // Mark this token as being part of BB.Elt. + // RangeLen is the character length of the element's range, used to + // distinguish inner vs outer statements. + // For example in `a==0`, token "a" is part of the stmts "a" and "a==0". + // However "a" has a smaller range, so is more specific. Clicking on the + // token "a" should select the stmt "a". + void assign(unsigned BB, unsigned Elt, unsigned RangeLen) { + // A worse BB (larger range) => ignore. + if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen) + return; + if (BB != this->BB) { + this->BB = BB; + Elts.clear(); + BBPriority = RangeLen; + } + BBPriority = std::min(BBPriority, RangeLen); + Elts.push_back(Elt); + if (this->Elt == Missing || EltPriority > RangeLen) + this->Elt = Elt; + } + bool operator==(const TokenInfo &Other) const { + return std::tie(BB, Elt, Elts) == + std::tie(Other.BB, Other.Elt, Other.Elts); + } + // Write the attributes for the <span> on this token. + void write(llvm::raw_ostream &OS) const { + OS << "class='c"; + if (BB != Missing) + OS << " " << blockID(BB); + for (unsigned Elt : Elts) + OS << " " << eltID(BB, Elt); + OS << "'"; + + if (Elt != Missing) + OS << " data-elt='" << eltID(BB, Elt) << "'"; + if (BB != Missing) + OS << " data-bb='" << blockID(BB) << "'"; + } + }; + + // Construct one TokenInfo per character in a flat array. + // This is inefficient (chars in a token all have the same info) but simple. + std::vector<TokenInfo> State(Code.size()); + for (const auto *Block : CFG->getCFG()) { + unsigned EltIndex = 0; + for (const auto& Elt : *Block) { + ++EltIndex; + if (const auto S = Elt.getAs<CFGStmt>()) { + auto EltRange = clang::Lexer::makeFileCharRange( + CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()), + AST.getSourceManager(), AST.getLangOpts()); + if (EltRange.isInvalid()) + continue; + if (EltRange.getBegin() < Range.getBegin() || + EltRange.getEnd() >= Range.getEnd() || + EltRange.getEnd() < Range.getBegin() || + EltRange.getEnd() >= Range.getEnd()) + continue; + unsigned Off = EltRange.getBegin().getRawEncoding() - + Range.getBegin().getRawEncoding(); + unsigned Len = EltRange.getEnd().getRawEncoding() - + EltRange.getBegin().getRawEncoding(); + for (unsigned I = 0; I < Len; ++I) + State[Off + I].assign(Block->getBlockID(), EltIndex, Len); + } + } + } + + // Finally, write the code with the correct <span>s. + unsigned Line = + AST.getSourceManager().getSpellingLineNumber(Range.getBegin()); + *OS << "<template data-copy='code'>\n"; + *OS << "<code class='filename'>"; + llvm::printHTMLEscaped( + llvm::sys::path::filename( + AST.getSourceManager().getFilename(Range.getBegin())), + *OS); + *OS << "</code>"; + *OS << "<code class='line' data-line='" << Line++ << "'>"; + for (unsigned I = 0; I < Code.size(); ++I) { + // Don't actually write a <span> around each character, only break spans + // when the TokenInfo changes. + bool NeedOpen = I == 0 || !(State[I] == State[I-1]); + bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]); + if (NeedOpen) { + *OS << "<span "; + State[I].write(*OS); + *OS << ">"; + } + if (Code[I] == '\n') + *OS << "</code>\n<code class='line' data-line='" << Line++ << "'>"; + else + llvm::printHTMLEscaped(Code.substr(I, 1), *OS); + if (NeedClose) *OS << "</span>"; + } + *OS << "</code>\n"; + *OS << "</template>"; + } + + // Write the CFG diagram, a graph of basic blocks. + // Laying out graphs is hard, so we construct a graphviz description and shell + // out to `dot` to turn it into an SVG. + void writeCFG() { + *OS << "<template data-copy='cfg'>\n"; + if (auto SVG = renderSVG(buildCFGDot(CFG->getCFG()))) + *OS << *SVG; + else + *OS << "Can't draw CFG: " << toString(SVG.takeError()); + *OS << "</template>\n"; + } + + // Produce a graphviz description of a CFG. + static std::string buildCFGDot(const clang::CFG &CFG) { + std::string Graph; + llvm::raw_string_ostream GraphS(Graph); + // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses. + GraphS << R"(digraph { + tooltip=" " + node[class=bb, shape=square, fontname="sans-serif", tooltip=" "] + edge[tooltip = " "] +)"; + for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I) + GraphS << " " << blockID(I) << " [id=" << blockID(I) << "]\n"; + for (const auto *Block : CFG) { + for (const auto &Succ : Block->succs()) { + GraphS << " " << blockID(Block->getBlockID()) << " -> " + << blockID(Succ.getReachableBlock()->getBlockID()) << "\n"; + } + } + GraphS << "}\n"; + return Graph; + } +}; + +// Nothing interesting here, just subprocess/temp-file plumbing. +llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) { + std::string DotPath; + if (const auto *FromEnv = ::getenv("GRAPHVIZ_DOT")) + DotPath = FromEnv; + else { + auto FromPath = llvm::sys::findProgramByName("dot"); + if (!FromPath) + return llvm::createStringError(FromPath.getError(), + "'dot' not found on PATH"); + DotPath = FromPath.get(); + } + + // Create input and output files for `dot` subprocess. + // (We create the output file as empty, to reserve the temp filename). + llvm::SmallString<256> Input, Output; + int InputFD; + if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD, + Input)) + return llvm::createStringError(EC, "failed to create `dot` temp input"); + llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph; + auto DeleteInput = + llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); }); + if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output)) + return llvm::createStringError(EC, "failed to create `dot` temp output"); + auto DeleteOutput = + llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); }); + + std::vector<std::optional<llvm::StringRef>> Redirects = { + Input, Output, + /*stderr=*/std::nullopt}; + std::string ErrMsg; + int Code = llvm::sys::ExecuteAndWait( + DotPath, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects, + /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg); + if (!ErrMsg.empty()) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "'dot' failed: " + ErrMsg); + if (Code != 0) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "'dot' failed (" + llvm::Twine(Code) + ")"); + + auto Buf = llvm::MemoryBuffer::getFile(Output); + if (!Buf) + return llvm::createStringError(Buf.getError(), "Can't read `dot` output"); + + // Output has <?xml> prefix we don't want. Skip to <svg> tag. + llvm::StringRef Result = Buf.get()->getBuffer(); + auto Pos = Result.find("<svg"); + if (Pos == llvm::StringRef::npos) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "Can't find <svg> tag in `dot` output"); + return Result.substr(Pos).str(); +} + +} // namespace + +std::unique_ptr<Logger> +Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) { + return std::make_unique<HTMLLogger>(std::move(Streams)); +} + +} // namespace clang::dataflow diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.css b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.css new file mode 100644 index 000000000000..c8212df1f94b --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.css @@ -0,0 +1,142 @@ +/*===-- HTMLLogger.css ----------------------------------------------------=== +* +* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +* See https://llvm.org/LICENSE.txt for license information. +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +* +*===----------------------------------------------------------------------===*/ +html { font-family: sans-serif; } +body { margin: 0; display: flex; justify-content: left; } +body > * { box-sizing: border-box; } +body > section { + border: 1px solid black; + min-width: 20em; + overflow: auto; + max-height: 100vh; +} +section header { + background-color: #008; + color: white; + font-weight: bold; + font-size: large; +} +section h2 { + font-size: medium; + margin-bottom: 0.5em; + padding-top: 0.5em; + border-top: 1px solid #aaa; +} +#timeline { + min-width: 0; +} +#timeline .entry.hover { + background-color: #aaa; +} +#timeline .entry.iter-select { + background-color: #aac; +} + +#bb-elements { + font-family: monospace; + font-size: x-small; + border-collapse: collapse; +} +#bb-elements td:nth-child(1) { + text-align: right; + width: 4em; + border-right: 1px solid #008; + padding: 0.3em 0.5em; + + font-weight: bold; + color: #888; +}; +#bb-elements tr.hover { + background-color: #abc; +} +#bb-elements tr.elt-select { + background-color: #acf; +} +#iterations { + display: flex; +} +#iterations .chooser { + flex-grow: 1; + text-align: center; +} +#iterations .chooser:not(.iter-select).hover { + background-color: #aaa; +} +#iterations .iter-select { + font-weight: bold; + background-color: #ccc; +} +#iterations .chooser:not(.iter-select) { + text-decoration: underline; + color: blue; +} + +code.filename { + font-weight: bold; + color: black; + background-color: #ccc; + display: block; + text-align: center; +} +code.line { + display: block; + white-space: pre; +} +code.line:before { /* line numbers */ + content: attr(data-line); + display: inline-block; + width: 2em; + text-align: right; + padding-right: 2px; + background-color: #ccc; + border-right: 1px solid #888; + margin-right: 8px; +} +code.line:has(.bb-select):before { + border-right: 4px solid black; + margin-right: 5px; +} +.c.hover, .bb.hover { + filter: saturate(200%) brightness(90%); +} +.c.elt-select { + box-shadow: inset 0 -4px 2px -2px #a00; +} +.bb.bb-select polygon { + stroke-width: 4px; + filter: brightness(70%) saturate(150%); +} +.bb { user-select: none; } +.bb polygon { fill: white; } +#cfg { + position: relative; + margin-left: 0.5em; +} + +.value { + border: 1px solid #888; + font-size: x-small; + flex-grow: 1; +} +.value summary { + background-color: #ace; + display: flex; + justify-content: space-between; +} +.value .address { + font-size: xx-small; + font-family: monospace; + color: #888; +} +.value .property { + display: flex; + margin-top: 0.5em; +} +.value .property .key { + font-weight: bold; + min-width: 5em; +} diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.html b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.html new file mode 100644 index 000000000000..a60259a99cce --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.html @@ -0,0 +1,107 @@ +<!doctype html> +<html> +<!-- HTMLLogger.cpp ---------------------------------------------------- + + Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + See https://llvm.org/LICENSE.txt for license information. + SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +//===------------------------------------------------------------------------> + +<head> +<?INJECT?> + +<template id="value-template"> + <details class="value" open> + <summary> + <span>{{v.kind}} + <template data-if="v.value_id"><span class="address">#{{v.value_id}}</span></template> + </span> + <template data-if="v.location"> + <span>{{v.type}} <span class="address">@{{v.location}}</span></span> + </template> + </summary> + <template + data-for="kv in Object.entries(v)" + data-if="['kind', 'value_id', 'type', 'location'].indexOf(kv[0]) < 0"> + <div class="property"><span class="key">{{kv[0]}}</span> + <template data-if="typeof(kv[1]) != 'object'">{{kv[1]}}</template> + <template data-if="typeof(kv[1]) == 'object'" data-let="v = kv[1]"> + <template data-use="value-template"></template> + </template> + </div> + </template> + </details> +</template> + +</head> + +<body> + +<section id="timeline" data-selection=""> +<header>Timeline</header> +<template data-for="entry in timeline"> + <div id="{{entry.block}}:{{entry.iter}}" data-bb="{{entry.block}}" class="entry">{{entry.block}} ({{entry.iter}})</div> +</template> +</section> + +<section id="function" data-selection=""> +<header>Function</header> +<div id="code"></div> +<div id="cfg"></div> +</section> + +<section id="block" data-selection="bb"> +<header><template>Block {{selection.bb}}</template></header> +<div id="iterations"> + <template data-for="i in Array(cfg[selection.bb].iters).keys()"> + <a class="chooser {{selection.bb}}:{{i+1}}" data-iter="{{selection.bb}}:{{i+1}}">Iteration {{i+1}}</a> + </template> +</div> +<table id="bb-elements"> +<template> + <tr id="{{selection.bb}}.0"> + <td class="{{selection.bb}}">{{selection.bb}}.0</td> + <td>(initial state)</td> + </tr> +</template> +<template data-for="elt in cfg[selection.bb].elements"> + <tr id="{{selection.bb}}.{{elt_index+1}}"> + <td class="{{selection.bb}}">{{selection.bb}}.{{elt_index+1}}</td> + <td>{{elt}}</td> + </tr> +</template> +</table> +</section> + +<section id="element" data-selection="iter,elt"> +<template data-let="state = states[selection.iter + '_' + selection.elt]"> +<header> + <template data-if="state.element == 0">{{state.block}} (iteration {{state.iter}}) initial state</template> + <template data-if="state.element != 0">Element {{selection.elt}} (iteration {{state.iter}})</template> +</header> +<template data-if="state.value" data-let="v = state.value"> + <h2>Value</h2> + <template data-use="value-template"></template> +</template> +<template data-if="state.logs"> + <h2>Logs</h2> + <pre>{{state.logs}}</pre> +</template> +<h2>Built-in lattice</h2> +<pre>{{state.builtinLattice}}</pre> +</template> +</section> + +<script> +addBBColors(Object.keys(HTMLLoggerData.cfg).length); +watchSelection(HTMLLoggerData); +updateSelection({}, HTMLLoggerData); +// Copy code and cfg from <template>s into the body. +for (tmpl of document.querySelectorAll('template[data-copy]')) + document.getElementById(tmpl.dataset.copy).replaceChildren( + ...tmpl.content.cloneNode(/*deep=*/true).childNodes); +</script> + +</body> +</html> diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.js b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.js new file mode 100644 index 000000000000..6e04bc00f663 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.js @@ -0,0 +1,219 @@ +//===-- HTMLLogger.js -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Based on selected objects, hide/show sections & populate data from templates. +// +// For example, if the selection is {bb="BB4", elt="BB4.6" iter="BB4:2"}: +// - show the "block" and "element" sections +// - re-render templates within these sections (if selection changed) +// - apply "bb-select" to items with class class "BB4", etc +let selection = {}; +function updateSelection(changes, data) { + Object.assign(selection, changes); + + data = Object.create(data); + data.selection = selection; + for (root of document.querySelectorAll('[data-selection]')) + updateSection(root, data); + + for (var k in changes) + applyClassIf(k + '-select', classSelector(changes[k])); +} + +// Given <section data-selection="x,y">: +// - hide section if selections x or y are null +// - re-render templates if x or y have changed +function updateSection(root, data) { + let changed = root.selection == null; + root.selection ||= {}; + for (key of root.dataset.selection.split(',')) { + if (!key) continue; + if (data.selection[key] != root.selection[key]) { + root.selection[key] = data.selection[key]; + changed = true; + } + if (data.selection[key] == null) { + root.hidden = true; + return; + } + } + if (changed) { + root.hidden = false; + for (tmpl of root.getElementsByTagName('template')) + reinflate(tmpl, data); + } +} + +// Expands template `tmpl` based on input `data`: +// - interpolates {{expressions}} in text and attributes +// - <template> tags can modify expansion: if, for etc +// Outputs to `parent` element, inserting before `next`. +function inflate(tmpl, data, parent, next) { + // We use eval() as our expression language in templates! + // The templates are static and trusted. + let evalExpr = (expr, data) => eval('with (data) { ' + expr + ' }'); + let interpolate = (str, data) => + str.replace(/\{\{(.*?)\}\}/g, (_, expr) => evalExpr(expr, data)) + // Anything other than <template> tag: copy, interpolate, recursively inflate. + if (tmpl.nodeName != 'TEMPLATE') { + let clone = tmpl.cloneNode(); + clone.inflated = true; + if (clone instanceof Text) + clone.textContent = interpolate(clone.textContent, data); + if (clone instanceof Element) { + for (attr of clone.attributes) + attr.value = interpolate(attr.value, data); + for (c of tmpl.childNodes) + inflate(c, data, clone, /*next=*/null); + } + return parent.insertBefore(clone, next); + } + // data-use="xyz": use <template id="xyz"> instead. (Allows recursion.) + if ('use' in tmpl.dataset) + return inflate(document.getElementById(tmpl.dataset.use), data, parent, next); + // <template> tag handling. Base case: recursively inflate. + function handle(data) { + for (c of tmpl.content.childNodes) + inflate(c, data, parent, next); + } + // Directives on <template> tags modify behavior. + const directives = { + // data-for="x in expr": expr is enumerable, bind x to each in turn + 'for': (nameInExpr, data, proceed) => { + let [name, expr] = nameInExpr.split(' in '); + let newData = Object.create(data); + let index = 0; + for (val of evalExpr(expr, data) || []) { + newData[name] = val; + newData[name + '_index'] = index++; + proceed(newData); + } + }, + // data-if="expr": only include contents if expression is truthy + 'if': (expr, data, proceed) => { if (evalExpr(expr, data)) proceed(data); }, + // data-let="x = expr": bind x to value of expr + 'let': (nameEqExpr, data, proceed) => { + let [name, expr] = nameEqExpr.split(' = '); + let newData = Object.create(data); + newData[name] = evalExpr(expr, data); + proceed(newData); + }, + } + // Compose directive handlers on top of the base handler. + for (let [dir, value] of Object.entries(tmpl.dataset).reverse()) { + if (dir in directives) { + let proceed = handle; + handle = (data) => directives[dir](value, data, proceed); + } + } + handle(data); +} +// Expand a template, after first removing any prior expansion of it. +function reinflate(tmpl, data) { + // Clear previously rendered template contents. + while (tmpl.nextSibling && tmpl.nextSibling.inflated) + tmpl.parentNode.removeChild(tmpl.nextSibling); + inflate(tmpl, data, tmpl.parentNode, tmpl.nextSibling); +} + +// Handle a mouse event on a region containing selectable items. +// This might end up changing the hover state or the selection state. +// +// targetSelector describes what target HTML element is selectable. +// targetToID specifies how to determine the selection from it: +// hover: a function from target to the class name to highlight +// bb: a function from target to the basic-block name to select (BB4) +// elt: a function from target to the CFG element name to select (BB4.5) +// iter: a function from target to the BB iteration to select (BB4:2) +// If an entry is missing, the selection is unmodified. +// If an entry is null, the selection is always cleared. +function mouseEventHandler(event, targetSelector, targetToID, data) { + var target = event.type == "mouseout" ? null : event.target.closest(targetSelector); + let selTarget = k => (target && targetToID[k]) ? targetToID[k](target) : null; + if (event.type == "click") { + let newSel = {}; + for (var k in targetToID) { + if (k == 'hover') continue; + let t = selTarget(k); + newSel[k] = t; + } + updateSelection(newSel, data); + } else if ("hover" in targetToID) { + applyClassIf("hover", classSelector(selTarget("hover"))); + } +} +function watch(rootSelector, targetSelector, targetToID, data) { + var root = document.querySelector(rootSelector); + for (event of ['mouseout', 'mousemove', 'click']) + root.addEventListener(event, e => mouseEventHandler(e, targetSelector, targetToID, data)); +} +function watchSelection(data) { + let lastIter = (bb) => `${bb}:${data.cfg[bb].iters}`; + watch('#code', '.c', { + hover: e => e.dataset.elt, + bb: e => e.dataset.bb, + elt: e => e.dataset.elt, + // If we're already viewing an iteration of this BB, stick with the same. + iter: e => (selection.iter && selection.bb == e.dataset.bb) ? selection.iter : lastIter(e.dataset.bb), + }, data); + watch('#cfg', '.bb', { + hover: e => e.id, + bb: e => e.id, + elt: e => e.id + ".0", + iter: e => lastIter(e.id), + }, data); + watch('#timeline', '.entry', { + hover: e => [e.id, e.dataset.bb], + bb: e => e.dataset.bb, + elt: e => e.dataset.bb + ".0", + iter: e => e.id, + }, data); + watch('#bb-elements', 'tr', { + hover: e => e.id, + elt: e => e.id, + }, data); + watch('#iterations', '.chooser', { + hover: e => e.dataset.iter, + iter: e => e.dataset.iter, + }, data); + updateSelection({}, data); +} +function applyClassIf(cls, query) { + document.querySelectorAll('.' + cls).forEach(elt => elt.classList.remove(cls)); + document.querySelectorAll(query).forEach(elt => elt.classList.add(cls)); +} +// Turns a class name into a CSS selector matching it, with some wrinkles: +// - we treat id="foo" just like class="foo" to avoid repetition in the HTML +// - cls can be an array of strings, we match them all +function classSelector(cls) { + if (cls == null) return null; + if (Array.isArray(cls)) return cls.map(classSelector).join(', '); + var escaped = cls.replace('.', '\\.').replace(':', '\\:'); + // don't require id="foo" class="foo" + return '.' + escaped + ", #" + escaped; +} + +// Add a stylesheet defining colors for n basic blocks. +function addBBColors(n) { + let sheet = new CSSStyleSheet(); + // hex values to subtract from fff to get a base color + options = [0x001, 0x010, 0x011, 0x100, 0x101, 0x110, 0x111]; + function color(hex) { + return "#" + hex.toString(16).padStart(3, "0"); + } + function add(selector, property, hex) { + sheet.insertRule(`${selector} { ${property}: ${color(hex)}; }`) + } + for (var i = 0; i < n; ++i) { + let opt = options[i%options.length]; + add(`.B${i}`, 'background-color', 0xfff - 2*opt); + add(`#B${i} polygon`, 'fill', 0xfff - 2*opt); + add(`#B${i} polygon`, 'stroke', 0x888 - 4*opt); + } + document.adoptedStyleSheets.push(sheet); +} diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Logger.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Logger.cpp new file mode 100644 index 000000000000..469fea338e45 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Logger.cpp @@ -0,0 +1,108 @@ +//===-- Logger.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Logger.h" +#include "clang/Analysis/FlowSensitive/ControlFlowContext.h" +#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" +#include "llvm/Support/WithColor.h" + +namespace clang::dataflow { + +Logger &Logger::null() { + struct NullLogger final : Logger {}; + static auto *Instance = new NullLogger(); + return *Instance; +} + +namespace { +struct TextualLogger final : Logger { + llvm::raw_ostream &OS; + const CFG *CurrentCFG; + const CFGBlock *CurrentBlock; + const CFGElement *CurrentElement; + unsigned CurrentElementIndex; + bool ShowColors; + llvm::DenseMap<const CFGBlock *, unsigned> VisitCount; + TypeErasedDataflowAnalysis *CurrentAnalysis; + + TextualLogger(llvm::raw_ostream &OS) + : OS(OS), ShowColors(llvm::WithColor::defaultAutoDetectFunction()(OS)) {} + + virtual void beginAnalysis(const ControlFlowContext &CFG, + TypeErasedDataflowAnalysis &Analysis) override { + { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + OS << "=== Beginning data flow analysis ===\n"; + } + if (auto *D = CFG.getDecl()) { + D->print(OS); + OS << "\n"; + D->dump(OS); + } + CurrentCFG = &CFG.getCFG(); + CurrentCFG->print(OS, Analysis.getASTContext().getLangOpts(), ShowColors); + CurrentAnalysis = &Analysis; + } + virtual void endAnalysis() override { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + unsigned Blocks = 0, Steps = 0; + for (const auto &E : VisitCount) { + ++Blocks; + Steps += E.second; + } + llvm::errs() << "=== Finished analysis: " << Blocks << " blocks in " + << Steps << " total steps ===\n"; + } + virtual void enterBlock(const CFGBlock &Block) override { + unsigned Count = ++VisitCount[&Block]; + { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + OS << "=== Entering block B" << Block.getBlockID() << " (iteration " + << Count << ") ===\n"; + } + Block.print(OS, CurrentCFG, CurrentAnalysis->getASTContext().getLangOpts(), + ShowColors); + CurrentBlock = &Block; + CurrentElement = nullptr; + CurrentElementIndex = 0; + } + virtual void enterElement(const CFGElement &Element) override { + ++CurrentElementIndex; + CurrentElement = ∈ + { + llvm::WithColor Subheader(OS, llvm::raw_ostream::Colors::CYAN, + /*Bold=*/true); + OS << "Processing element B" << CurrentBlock->getBlockID() << "." + << CurrentElementIndex << ": "; + Element.dumpToStream(OS); + } + } + void recordState(TypeErasedDataflowAnalysisState &State) override { + { + llvm::WithColor Subheader(OS, llvm::raw_ostream::Colors::CYAN, + /*Bold=*/true); + OS << "Computed state for B" << CurrentBlock->getBlockID() << "." + << CurrentElementIndex << ":\n"; + } + // FIXME: currently the environment dump is verbose and unenlightening. + // FIXME: dump the user-defined lattice, too. + State.Env.dump(OS); + OS << "\n"; + } + void blockConverged() override { + OS << "B" << CurrentBlock->getBlockID() << " has converged!\n"; + } + virtual void logText(llvm::StringRef S) override { OS << S << "\n"; } +}; +} // namespace + +std::unique_ptr<Logger> Logger::textual(llvm::raw_ostream &OS) { + return std::make_unique<TextualLogger>(OS); +} + +} // namespace clang::dataflow diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp index f457964fb132..895f4ff04a17 100644 --- a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp @@ -50,8 +50,8 @@ bool isCheckLikeMethod(llvm::SmallDenseSet<const CXXMethodDecl *> &CheckDecls, return CheckDecls.contains(&D); } -bool ChromiumCheckModel::transfer(const CFGElement *Element, Environment &Env) { - auto CS = Element->getAs<CFGStmt>(); +bool ChromiumCheckModel::transfer(const CFGElement &Element, Environment &Env) { + auto CS = Element.getAs<CFGStmt>(); if (!CS) return false; auto Stmt = CS->getStmt(); @@ -59,7 +59,7 @@ bool ChromiumCheckModel::transfer(const CFGElement *Element, Environment &Env) { if (const auto *M = dyn_cast<CXXMethodDecl>(Call->getDirectCallee())) { if (isCheckLikeMethod(CheckDecls, *M)) { // Mark this branch as unreachable. - Env.addToFlowCondition(Env.getBoolLiteralValue(false)); + Env.addToFlowCondition(Env.arena().makeLiteral(false)); return true; } } diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp index 308dc25dad1f..b0a8667f3fe5 100644 --- a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp @@ -18,9 +18,11 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/Stmt.h" #include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/ASTMatchers/ASTMatchersMacros.h" #include "clang/Analysis/CFG.h" #include "clang/Analysis/FlowSensitive/CFGMatchSwitch.h" #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/Formula.h" #include "clang/Analysis/FlowSensitive/NoopLattice.h" #include "clang/Analysis/FlowSensitive/StorageLocation.h" #include "clang/Analysis/FlowSensitive/Value.h" @@ -36,16 +38,45 @@ namespace clang { namespace dataflow { + +static bool isTopLevelNamespaceWithName(const NamespaceDecl &NS, + llvm::StringRef Name) { + return NS.getDeclName().isIdentifier() && NS.getName() == Name && + NS.getParent() != nullptr && NS.getParent()->isTranslationUnit(); +} + +static bool hasOptionalClassName(const CXXRecordDecl &RD) { + if (!RD.getDeclName().isIdentifier()) + return false; + + if (RD.getName() == "optional") { + if (const auto *N = dyn_cast_or_null<NamespaceDecl>(RD.getDeclContext())) + return N->isStdNamespace() || isTopLevelNamespaceWithName(*N, "absl"); + return false; + } + + if (RD.getName() == "Optional") { + // Check whether namespace is "::base" or "::folly". + const auto *N = dyn_cast_or_null<NamespaceDecl>(RD.getDeclContext()); + return N != nullptr && (isTopLevelNamespaceWithName(*N, "base") || + isTopLevelNamespaceWithName(*N, "folly")); + } + + return false; +} + namespace { using namespace ::clang::ast_matchers; using LatticeTransferState = TransferState<NoopLattice>; +AST_MATCHER(CXXRecordDecl, hasOptionalClassNameMatcher) { + return hasOptionalClassName(Node); +} + DeclarationMatcher optionalClass() { return classTemplateSpecializationDecl( - anyOf(hasName("std::optional"), hasName("std::__optional_storage_base"), - hasName("__optional_destruct_base"), hasName("absl::optional"), - hasName("base::Optional")), + hasOptionalClassNameMatcher(), hasTemplateArgument(0, refersToType(type().bind("T")))); } @@ -57,14 +88,16 @@ auto optionalOrAliasType() { /// Matches any of the spellings of the optional types and sugar, aliases, etc. auto hasOptionalType() { return hasType(optionalOrAliasType()); } -auto isOptionalMemberCallWithName( - llvm::StringRef MemberName, +auto isOptionalMemberCallWithNameMatcher( + ast_matchers::internal::Matcher<NamedDecl> matcher, const std::optional<StatementMatcher> &Ignorable = std::nullopt) { auto Exception = unless(Ignorable ? expr(anyOf(*Ignorable, cxxThisExpr())) : cxxThisExpr()); return cxxMemberCallExpr( - on(expr(Exception)), - callee(cxxMethodDecl(hasName(MemberName), ofClass(optionalClass())))); + on(expr(Exception, + anyOf(hasOptionalType(), + hasType(pointerType(pointee(optionalOrAliasType())))))), + callee(cxxMethodDecl(matcher))); } auto isOptionalOperatorCallWithName( @@ -77,15 +110,15 @@ auto isOptionalOperatorCallWithName( } auto isMakeOptionalCall() { - return callExpr( - callee(functionDecl(hasAnyName( - "std::make_optional", "base::make_optional", "absl::make_optional"))), - hasOptionalType()); + return callExpr(callee(functionDecl(hasAnyName( + "std::make_optional", "base::make_optional", + "absl::make_optional", "folly::make_optional"))), + hasOptionalType()); } auto nulloptTypeDecl() { - return namedDecl( - hasAnyName("std::nullopt_t", "absl::nullopt_t", "base::nullopt_t")); + return namedDecl(hasAnyName("std::nullopt_t", "absl::nullopt_t", + "base::nullopt_t", "folly::None")); } auto hasNulloptType() { return hasType(nulloptTypeDecl()); } @@ -96,10 +129,9 @@ auto hasAnyOptionalType() { recordType(hasDeclaration(anyOf(nulloptTypeDecl(), optionalClass()))))); } - auto inPlaceClass() { - return recordDecl( - hasAnyName("std::in_place_t", "absl::in_place_t", "base::in_place_t")); + return recordDecl(hasAnyName("std::in_place_t", "absl::in_place_t", + "base::in_place_t", "folly::in_place_t")); } auto isOptionalNulloptConstructor() { @@ -149,6 +181,11 @@ auto isStdSwapCall() { hasArgument(1, hasOptionalType())); } +auto isStdForwardCall() { + return callExpr(callee(functionDecl(hasName("std::forward"))), + argumentCountIs(1), hasArgument(0, hasOptionalType())); +} + constexpr llvm::StringLiteral ValueOrCallID = "ValueOrCall"; auto isValueOrStringEmptyCall() { @@ -199,17 +236,17 @@ auto isComparisonOperatorCall(L lhs_arg_matcher, R rhs_arg_matcher) { hasArgument(1, rhs_arg_matcher)); } -// Ensures that `Expr` is mapped to a `BoolValue` and returns it. -BoolValue &forceBoolValue(Environment &Env, const Expr &Expr) { +/// Ensures that `Expr` is mapped to a `BoolValue` and returns its formula. +const Formula &forceBoolValue(Environment &Env, const Expr &Expr) { auto *Value = cast_or_null<BoolValue>(Env.getValue(Expr, SkipPast::None)); if (Value != nullptr) - return *Value; + return Value->formula(); auto &Loc = Env.createStorageLocation(Expr); Value = &Env.makeAtomicBoolValue(); Env.setValue(Loc, *Value); Env.setStorageLocation(Expr, Loc); - return *Value; + return Value->formula(); } /// Sets `HasValueVal` as the symbolic value that represents the "has_value" @@ -218,12 +255,15 @@ void setHasValue(Value &OptionalVal, BoolValue &HasValueVal) { OptionalVal.setProperty("has_value", HasValueVal); } -/// Creates a symbolic value for an `optional` value using `HasValueVal` as the -/// symbolic value of its "has_value" property. -StructValue &createOptionalValue(Environment &Env, BoolValue &HasValueVal) { - auto OptionalVal = std::make_unique<StructValue>(); - setHasValue(*OptionalVal, HasValueVal); - return Env.takeOwnership(std::move(OptionalVal)); +/// Creates a symbolic value for an `optional` value at an existing storage +/// location. Uses `HasValueVal` as the symbolic value of the "has_value" +/// property. +StructValue &createOptionalValue(AggregateStorageLocation &Loc, + BoolValue &HasValueVal, Environment &Env) { + auto &OptionalVal = Env.create<StructValue>(Loc); + Env.setValue(Loc, OptionalVal); + setHasValue(OptionalVal, HasValueVal); + return OptionalVal; } /// Returns the symbolic value that represents the "has_value" property of the @@ -241,20 +281,12 @@ BoolValue *getHasValue(Environment &Env, Value *OptionalVal) { return nullptr; } -/// If `Type` is a reference type, returns the type of its pointee. Otherwise, -/// returns `Type` itself. -QualType stripReference(QualType Type) { - return Type->isReferenceType() ? Type->getPointeeType() : Type; -} - /// Returns true if and only if `Type` is an optional type. bool isOptionalType(QualType Type) { if (!Type->isRecordType()) return false; - // FIXME: Optimize this by avoiding the `getQualifiedNameAsString` call. - auto TypeName = Type->getAsCXXRecordDecl()->getQualifiedNameAsString(); - return TypeName == "std::optional" || TypeName == "absl::optional" || - TypeName == "base::Optional"; + const CXXRecordDecl *D = Type->getAsCXXRecordDecl(); + return D != nullptr && hasOptionalClassName(*D); } /// Returns the number of optional wrappers in `Type`. @@ -280,40 +312,91 @@ StorageLocation *maybeInitializeOptionalValueMember(QualType Q, Environment &Env) { // The "value" property represents a synthetic field. As such, it needs // `StorageLocation`, like normal fields (and other variables). So, we model - // it with a `ReferenceValue`, since that includes a storage location. Once + // it with a `PointerValue`, since that includes a storage location. Once // the property is set, it will be shared by all environments that access the // `Value` representing the optional (here, `OptionalVal`). if (auto *ValueProp = OptionalVal.getProperty("value")) { - auto *ValueRef = clang::cast<ReferenceValue>(ValueProp); - auto &ValueLoc = ValueRef->getReferentLoc(); - if (Env.getValue(ValueLoc) == nullptr) { - // The property was previously set, but the value has been lost. This can - // happen, for example, because of an environment merge (where the two - // environments mapped the property to different values, which resulted in - // them both being discarded), or when two blocks in the CFG, with neither - // a dominator of the other, visit the same optional value, or even when a - // block is revisited during testing to collect per-statement state. - // FIXME: This situation means that the optional contents are not shared - // between branches and the like. Practically, this lack of sharing - // reduces the precision of the model when the contents are relevant to - // the check, like another optional or a boolean that influences control - // flow. + auto *ValuePtr = clang::cast<PointerValue>(ValueProp); + auto &ValueLoc = ValuePtr->getPointeeLoc(); + if (Env.getValue(ValueLoc) != nullptr) + return &ValueLoc; + + // The property was previously set, but the value has been lost. This can + // happen in various situations, for example: + // - Because of an environment merge (where the two environments mapped the + // property to different values, which resulted in them both being + // discarded). + // - When two blocks in the CFG, with neither a dominator of the other, + // visit the same optional value. (FIXME: This is something we can and + // should fix -- see also the lengthy FIXME below.) + // - Or even when a block is revisited during testing to collect + // per-statement state. + // FIXME: This situation means that the optional contents are not shared + // between branches and the like. Practically, this lack of sharing + // reduces the precision of the model when the contents are relevant to + // the check, like another optional or a boolean that influences control + // flow. + if (ValueLoc.getType()->isRecordType()) { + refreshStructValue(cast<AggregateStorageLocation>(ValueLoc), Env); + return &ValueLoc; + } else { auto *ValueVal = Env.createValue(ValueLoc.getType()); if (ValueVal == nullptr) return nullptr; Env.setValue(ValueLoc, *ValueVal); + return &ValueLoc; } - return &ValueLoc; } - auto Ty = stripReference(Q); - auto *ValueVal = Env.createValue(Ty); - if (ValueVal == nullptr) - return nullptr; - auto &ValueLoc = Env.createStorageLocation(Ty); - Env.setValue(ValueLoc, *ValueVal); - auto ValueRef = std::make_unique<ReferenceValue>(ValueLoc); - OptionalVal.setProperty("value", Env.takeOwnership(std::move(ValueRef))); + auto Ty = Q.getNonReferenceType(); + auto &ValueLoc = Env.createObject(Ty); + auto &ValuePtr = Env.create<PointerValue>(ValueLoc); + // FIXME: + // The change we make to the `value` property below may become visible to + // other blocks that aren't successors of the current block and therefore + // don't see the change we made above mapping `ValueLoc` to `ValueVal`. For + // example: + // + // void target(optional<int> oo, bool b) { + // // `oo` is associated with a `StructValue` here, which we will call + // // `OptionalVal`. + // + // // The `has_value` property is set on `OptionalVal` (but not the + // // `value` property yet). + // if (!oo.has_value()) return; + // + // if (b) { + // // Let's assume we transfer the `if` branch first. + // // + // // This causes us to call `maybeInitializeOptionalValueMember()`, + // // which causes us to set the `value` property on `OptionalVal` + // // (which had not been set until this point). This `value` property + // // refers to a `PointerValue`, which in turn refers to a + // // StorageLocation` that is associated to an `IntegerValue`. + // oo.value(); + // } else { + // // Let's assume we transfer the `else` branch after the `if` branch. + // // + // // We see the `value` property that the `if` branch set on + // // `OptionalVal`, but in the environment for this block, the + // // `StorageLocation` in the `PointerValue` is not associated with any + // // `Value`. + // oo.value(); + // } + // } + // + // This situation is currently "saved" by the code above that checks whether + // the `value` property is already set, and if, the `ValueLoc` is not + // associated with a `ValueVal`, creates a new `ValueVal`. + // + // However, what we should really do is to make sure that the change to the + // `value` property does not "leak" to other blocks that are not successors + // of this block. To do this, instead of simply setting the `value` property + // on the existing `OptionalVal`, we should create a new `Value` for the + // optional, set the property on that, and associate the storage location that + // is currently associated with the existing `OptionalVal` with the newly + // created `Value` instead. + OptionalVal.setProperty("value", ValuePtr); return &ValueLoc; } @@ -329,26 +412,34 @@ void initializeOptionalReference(const Expr *OptionalExpr, } /// Returns true if and only if `OptionalVal` is initialized and known to be -/// empty in `Env. +/// empty in `Env`. bool isEmptyOptional(const Value &OptionalVal, const Environment &Env) { auto *HasValueVal = cast_or_null<BoolValue>(OptionalVal.getProperty("has_value")); return HasValueVal != nullptr && - Env.flowConditionImplies(Env.makeNot(*HasValueVal)); + Env.flowConditionImplies(Env.arena().makeNot(HasValueVal->formula())); } /// Returns true if and only if `OptionalVal` is initialized and known to be -/// non-empty in `Env. +/// non-empty in `Env`. bool isNonEmptyOptional(const Value &OptionalVal, const Environment &Env) { auto *HasValueVal = cast_or_null<BoolValue>(OptionalVal.getProperty("has_value")); - return HasValueVal != nullptr && Env.flowConditionImplies(*HasValueVal); + return HasValueVal != nullptr && + Env.flowConditionImplies(HasValueVal->formula()); +} + +Value *getValueBehindPossiblePointer(const Expr &E, const Environment &Env) { + Value *Val = Env.getValue(E, SkipPast::Reference); + if (auto *PointerVal = dyn_cast_or_null<PointerValue>(Val)) + return Env.getValue(PointerVal->getPointeeLoc()); + return Val; } void transferUnwrapCall(const Expr *UnwrapExpr, const Expr *ObjectExpr, LatticeTransferState &State) { if (auto *OptionalVal = - State.Env.getValue(*ObjectExpr, SkipPast::ReferenceThenPointer)) { + getValueBehindPossiblePointer(*ObjectExpr, State.Env)) { if (State.Env.getStorageLocation(*UnwrapExpr, SkipPast::None) == nullptr) if (auto *Loc = maybeInitializeOptionalValueMember( UnwrapExpr->getType(), *OptionalVal, State.Env)) @@ -356,21 +447,31 @@ void transferUnwrapCall(const Expr *UnwrapExpr, const Expr *ObjectExpr, } } +void transferArrowOpCall(const Expr *UnwrapExpr, const Expr *ObjectExpr, + LatticeTransferState &State) { + if (auto *OptionalVal = + getValueBehindPossiblePointer(*ObjectExpr, State.Env)) { + if (auto *Loc = maybeInitializeOptionalValueMember( + UnwrapExpr->getType()->getPointeeType(), *OptionalVal, State.Env)) { + State.Env.setValueStrict(*UnwrapExpr, + State.Env.create<PointerValue>(*Loc)); + } + } +} + void transferMakeOptionalCall(const CallExpr *E, const MatchFinder::MatchResult &, LatticeTransferState &State) { - auto &Loc = State.Env.createStorageLocation(*E); - State.Env.setStorageLocation(*E, Loc); - State.Env.setValue( - Loc, createOptionalValue(State.Env, State.Env.getBoolLiteralValue(true))); + createOptionalValue(State.Env.getResultObjectLocation(*E), + State.Env.getBoolLiteralValue(true), State.Env); } void transferOptionalHasValueCall(const CXXMemberCallExpr *CallExpr, const MatchFinder::MatchResult &, LatticeTransferState &State) { if (auto *HasValueVal = getHasValue( - State.Env, State.Env.getValue(*CallExpr->getImplicitObjectArgument(), - SkipPast::ReferenceThenPointer))) { + State.Env, getValueBehindPossiblePointer( + *CallExpr->getImplicitObjectArgument(), State.Env))) { auto &CallExprLoc = State.Env.createStorageLocation(*CallExpr); State.Env.setValue(CallExprLoc, *HasValueVal); State.Env.setStorageLocation(*CallExpr, CallExprLoc); @@ -379,12 +480,11 @@ void transferOptionalHasValueCall(const CXXMemberCallExpr *CallExpr, /// `ModelPred` builds a logical formula relating the predicate in /// `ValueOrPredExpr` to the optional's `has_value` property. -void transferValueOrImpl(const clang::Expr *ValueOrPredExpr, - const MatchFinder::MatchResult &Result, - LatticeTransferState &State, - BoolValue &(*ModelPred)(Environment &Env, - BoolValue &ExprVal, - BoolValue &HasValueVal)) { +void transferValueOrImpl( + const clang::Expr *ValueOrPredExpr, const MatchFinder::MatchResult &Result, + LatticeTransferState &State, + const Formula &(*ModelPred)(Environment &Env, const Formula &ExprVal, + const Formula &HasValueVal)) { auto &Env = State.Env; const auto *ObjectArgumentExpr = @@ -392,29 +492,29 @@ void transferValueOrImpl(const clang::Expr *ValueOrPredExpr, ->getImplicitObjectArgument(); auto *HasValueVal = getHasValue( - State.Env, - State.Env.getValue(*ObjectArgumentExpr, SkipPast::ReferenceThenPointer)); + State.Env, getValueBehindPossiblePointer(*ObjectArgumentExpr, State.Env)); if (HasValueVal == nullptr) return; - Env.addToFlowCondition( - ModelPred(Env, forceBoolValue(Env, *ValueOrPredExpr), *HasValueVal)); + Env.addToFlowCondition(ModelPred(Env, forceBoolValue(Env, *ValueOrPredExpr), + HasValueVal->formula())); } void transferValueOrStringEmptyCall(const clang::Expr *ComparisonExpr, const MatchFinder::MatchResult &Result, LatticeTransferState &State) { return transferValueOrImpl(ComparisonExpr, Result, State, - [](Environment &Env, BoolValue &ExprVal, - BoolValue &HasValueVal) -> BoolValue & { + [](Environment &Env, const Formula &ExprVal, + const Formula &HasValueVal) -> const Formula & { + auto &A = Env.arena(); // If the result is *not* empty, then we know the // optional must have been holding a value. If // `ExprVal` is true, though, we don't learn // anything definite about `has_value`, so we // don't add any corresponding implications to // the flow condition. - return Env.makeImplication(Env.makeNot(ExprVal), - HasValueVal); + return A.makeImplies(A.makeNot(ExprVal), + HasValueVal); }); } @@ -422,12 +522,13 @@ void transferValueOrNotEqX(const Expr *ComparisonExpr, const MatchFinder::MatchResult &Result, LatticeTransferState &State) { transferValueOrImpl(ComparisonExpr, Result, State, - [](Environment &Env, BoolValue &ExprVal, - BoolValue &HasValueVal) -> BoolValue & { + [](Environment &Env, const Formula &ExprVal, + const Formula &HasValueVal) -> const Formula & { + auto &A = Env.arena(); // We know that if `(opt.value_or(X) != X)` then // `opt.hasValue()`, even without knowing further // details about the contents of `opt`. - return Env.makeImplication(ExprVal, HasValueVal); + return A.makeImplies(ExprVal, HasValueVal); }); } @@ -437,18 +538,21 @@ void transferCallReturningOptional(const CallExpr *E, if (State.Env.getStorageLocation(*E, SkipPast::None) != nullptr) return; - auto &Loc = State.Env.createStorageLocation(*E); - State.Env.setStorageLocation(*E, Loc); - State.Env.setValue( - Loc, createOptionalValue(State.Env, State.Env.makeAtomicBoolValue())); + AggregateStorageLocation *Loc = nullptr; + if (E->isPRValue()) { + Loc = &State.Env.getResultObjectLocation(*E); + } else { + Loc = &cast<AggregateStorageLocation>(State.Env.createStorageLocation(*E)); + State.Env.setStorageLocationStrict(*E, *Loc); + } + + createOptionalValue(*Loc, State.Env.makeAtomicBoolValue(), State.Env); } -void assignOptionalValue(const Expr &E, Environment &Env, - BoolValue &HasValueVal) { - if (auto *OptionalLoc = - Env.getStorageLocation(E, SkipPast::ReferenceThenPointer)) { - Env.setValue(*OptionalLoc, createOptionalValue(Env, HasValueVal)); - } +void constructOptionalValue(const Expr &E, Environment &Env, + BoolValue &HasValueVal) { + AggregateStorageLocation &Loc = Env.getResultObjectLocation(E); + Env.setValueStrict(E, createOptionalValue(Loc, HasValueVal, Env)); } /// Returns a symbolic value for the "has_value" property of an `optional<T>` @@ -460,11 +564,13 @@ BoolValue &valueOrConversionHasValue(const FunctionDecl &F, const Expr &E, assert(F.getTemplateSpecializationArgs() != nullptr); assert(F.getTemplateSpecializationArgs()->size() > 0); - const int TemplateParamOptionalWrappersCount = countOptionalWrappers( - *MatchRes.Context, - stripReference(F.getTemplateSpecializationArgs()->get(0).getAsType())); - const int ArgTypeOptionalWrappersCount = - countOptionalWrappers(*MatchRes.Context, stripReference(E.getType())); + const int TemplateParamOptionalWrappersCount = + countOptionalWrappers(*MatchRes.Context, F.getTemplateSpecializationArgs() + ->get(0) + .getAsType() + .getNonReferenceType()); + const int ArgTypeOptionalWrappersCount = countOptionalWrappers( + *MatchRes.Context, E.getType().getNonReferenceType()); // Check if this is a constructor/assignment call for `optional<T>` with // argument of type `U` such that `T` is constructible from `U`. @@ -484,25 +590,23 @@ void transferValueOrConversionConstructor( LatticeTransferState &State) { assert(E->getNumArgs() > 0); - assignOptionalValue(*E, State.Env, - valueOrConversionHasValue(*E->getConstructor(), - *E->getArg(0), MatchRes, - State)); + constructOptionalValue(*E, State.Env, + valueOrConversionHasValue(*E->getConstructor(), + *E->getArg(0), MatchRes, + State)); } void transferAssignment(const CXXOperatorCallExpr *E, BoolValue &HasValueVal, LatticeTransferState &State) { assert(E->getNumArgs() > 0); - auto *OptionalLoc = - State.Env.getStorageLocation(*E->getArg(0), SkipPast::Reference); - if (OptionalLoc == nullptr) - return; + if (auto *Loc = cast<AggregateStorageLocation>( + State.Env.getStorageLocationStrict(*E->getArg(0)))) { + createOptionalValue(*Loc, HasValueVal, State.Env); - State.Env.setValue(*OptionalLoc, createOptionalValue(State.Env, HasValueVal)); - - // Assign a storage location for the whole expression. - State.Env.setStorageLocation(*E, *OptionalLoc); + // Assign a storage location for the whole expression. + State.Env.setStorageLocationStrict(*E, *Loc); + } } void transferValueOrConversionAssignment( @@ -521,52 +625,69 @@ void transferNulloptAssignment(const CXXOperatorCallExpr *E, transferAssignment(E, State.Env.getBoolLiteralValue(false), State); } -void transferSwap(const StorageLocation &OptionalLoc1, - const StorageLocation &OptionalLoc2, - LatticeTransferState &State) { - auto *OptionalVal1 = State.Env.getValue(OptionalLoc1); - assert(OptionalVal1 != nullptr); +void transferSwap(AggregateStorageLocation *Loc1, + AggregateStorageLocation *Loc2, Environment &Env) { + // We account for cases where one or both of the optionals are not modeled, + // either lacking associated storage locations, or lacking values associated + // to such storage locations. - auto *OptionalVal2 = State.Env.getValue(OptionalLoc2); - assert(OptionalVal2 != nullptr); + if (Loc1 == nullptr) { + if (Loc2 != nullptr) + createOptionalValue(*Loc2, Env.makeAtomicBoolValue(), Env); + return; + } + if (Loc2 == nullptr) { + createOptionalValue(*Loc1, Env.makeAtomicBoolValue(), Env); + return; + } - State.Env.setValue(OptionalLoc1, *OptionalVal2); - State.Env.setValue(OptionalLoc2, *OptionalVal1); + // Both expressions have locations, though they may not have corresponding + // values. In that case, we create a fresh value at this point. Note that if + // two branches both do this, they will not share the value, but it at least + // allows for local reasoning about the value. To avoid the above, we would + // need *lazy* value allocation. + // FIXME: allocate values lazily, instead of just creating a fresh value. + BoolValue *BoolVal1 = getHasValue(Env, Env.getValue(*Loc1)); + if (BoolVal1 == nullptr) + BoolVal1 = &Env.makeAtomicBoolValue(); + + BoolValue *BoolVal2 = getHasValue(Env, Env.getValue(*Loc2)); + if (BoolVal2 == nullptr) + BoolVal2 = &Env.makeAtomicBoolValue(); + + createOptionalValue(*Loc1, *BoolVal2, Env); + createOptionalValue(*Loc2, *BoolVal1, Env); } void transferSwapCall(const CXXMemberCallExpr *E, const MatchFinder::MatchResult &, LatticeTransferState &State) { assert(E->getNumArgs() == 1); - - auto *OptionalLoc1 = State.Env.getStorageLocation( - *E->getImplicitObjectArgument(), SkipPast::ReferenceThenPointer); - assert(OptionalLoc1 != nullptr); - - auto *OptionalLoc2 = - State.Env.getStorageLocation(*E->getArg(0), SkipPast::Reference); - assert(OptionalLoc2 != nullptr); - - transferSwap(*OptionalLoc1, *OptionalLoc2, State); + auto *OtherLoc = cast_or_null<AggregateStorageLocation>( + State.Env.getStorageLocationStrict(*E->getArg(0))); + transferSwap(getImplicitObjectLocation(*E, State.Env), OtherLoc, State.Env); } void transferStdSwapCall(const CallExpr *E, const MatchFinder::MatchResult &, LatticeTransferState &State) { assert(E->getNumArgs() == 2); + auto *Arg0Loc = cast_or_null<AggregateStorageLocation>( + State.Env.getStorageLocationStrict(*E->getArg(0))); + auto *Arg1Loc = cast_or_null<AggregateStorageLocation>( + State.Env.getStorageLocationStrict(*E->getArg(1))); + transferSwap(Arg0Loc, Arg1Loc, State.Env); +} - auto *OptionalLoc1 = - State.Env.getStorageLocation(*E->getArg(0), SkipPast::Reference); - assert(OptionalLoc1 != nullptr); - - auto *OptionalLoc2 = - State.Env.getStorageLocation(*E->getArg(1), SkipPast::Reference); - assert(OptionalLoc2 != nullptr); +void transferStdForwardCall(const CallExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + assert(E->getNumArgs() == 1); - transferSwap(*OptionalLoc1, *OptionalLoc2, State); + if (auto *Loc = State.Env.getStorageLocationStrict(*E->getArg(0))) + State.Env.setStorageLocationStrict(*E, *Loc); } -BoolValue &evaluateEquality(Environment &Env, BoolValue &EqVal, BoolValue &LHS, - BoolValue &RHS) { +const Formula &evaluateEquality(Arena &A, const Formula &EqVal, + const Formula &LHS, const Formula &RHS) { // Logically, an optional<T> object is composed of two values - a `has_value` // bit and a value of type T. Equality of optional objects compares both // values. Therefore, merely comparing the `has_value` bits isn't sufficient: @@ -581,37 +702,38 @@ BoolValue &evaluateEquality(Environment &Env, BoolValue &EqVal, BoolValue &LHS, // b) (!LHS & !RHS) => EqVal // If neither is set, then they are equal. // We rewrite b) as !EqVal => (LHS v RHS), for a more compact formula. - return Env.makeAnd( - Env.makeImplication( - EqVal, Env.makeOr(Env.makeAnd(LHS, RHS), - Env.makeAnd(Env.makeNot(LHS), Env.makeNot(RHS)))), - Env.makeImplication(Env.makeNot(EqVal), Env.makeOr(LHS, RHS))); + return A.makeAnd( + A.makeImplies(EqVal, A.makeOr(A.makeAnd(LHS, RHS), + A.makeAnd(A.makeNot(LHS), A.makeNot(RHS)))), + A.makeImplies(A.makeNot(EqVal), A.makeOr(LHS, RHS))); } void transferOptionalAndOptionalCmp(const clang::CXXOperatorCallExpr *CmpExpr, const MatchFinder::MatchResult &, LatticeTransferState &State) { Environment &Env = State.Env; + auto &A = Env.arena(); auto *CmpValue = &forceBoolValue(Env, *CmpExpr); if (auto *LHasVal = getHasValue( Env, Env.getValue(*CmpExpr->getArg(0), SkipPast::Reference))) if (auto *RHasVal = getHasValue( Env, Env.getValue(*CmpExpr->getArg(1), SkipPast::Reference))) { if (CmpExpr->getOperator() == clang::OO_ExclaimEqual) - CmpValue = &State.Env.makeNot(*CmpValue); - Env.addToFlowCondition( - evaluateEquality(Env, *CmpValue, *LHasVal, *RHasVal)); + CmpValue = &A.makeNot(*CmpValue); + Env.addToFlowCondition(evaluateEquality(A, *CmpValue, LHasVal->formula(), + RHasVal->formula())); } } void transferOptionalAndValueCmp(const clang::CXXOperatorCallExpr *CmpExpr, const clang::Expr *E, Environment &Env) { + auto &A = Env.arena(); auto *CmpValue = &forceBoolValue(Env, *CmpExpr); if (auto *HasVal = getHasValue(Env, Env.getValue(*E, SkipPast::Reference))) { if (CmpExpr->getOperator() == clang::OO_ExclaimEqual) - CmpValue = &Env.makeNot(*CmpValue); - Env.addToFlowCondition(evaluateEquality(Env, *CmpValue, *HasVal, - Env.getBoolLiteralValue(true))); + CmpValue = &A.makeNot(*CmpValue); + Env.addToFlowCondition( + evaluateEquality(A, *CmpValue, HasVal->formula(), A.makeLiteral(true))); } } @@ -629,7 +751,8 @@ ignorableOptional(const UncheckedOptionalAccessModelOptions &Options) { StatementMatcher valueCall(const std::optional<StatementMatcher> &IgnorableOptional) { - return isOptionalMemberCallWithName("value", IgnorableOptional); + return isOptionalMemberCallWithNameMatcher(hasName("value"), + IgnorableOptional); } StatementMatcher @@ -657,30 +780,29 @@ auto buildTransferMatchSwitch() { isOptionalInPlaceConstructor(), [](const CXXConstructExpr *E, const MatchFinder::MatchResult &, LatticeTransferState &State) { - assignOptionalValue(*E, State.Env, - State.Env.getBoolLiteralValue(true)); + constructOptionalValue(*E, State.Env, + State.Env.getBoolLiteralValue(true)); }) // nullopt_t::nullopt_t .CaseOfCFGStmt<CXXConstructExpr>( isNulloptConstructor(), [](const CXXConstructExpr *E, const MatchFinder::MatchResult &, LatticeTransferState &State) { - assignOptionalValue(*E, State.Env, - State.Env.getBoolLiteralValue(false)); + constructOptionalValue(*E, State.Env, + State.Env.getBoolLiteralValue(false)); }) // optional::optional(nullopt_t) .CaseOfCFGStmt<CXXConstructExpr>( isOptionalNulloptConstructor(), [](const CXXConstructExpr *E, const MatchFinder::MatchResult &, LatticeTransferState &State) { - assignOptionalValue(*E, State.Env, - State.Env.getBoolLiteralValue(false)); + constructOptionalValue(*E, State.Env, + State.Env.getBoolLiteralValue(false)); }) // optional::optional (value/conversion) .CaseOfCFGStmt<CXXConstructExpr>(isOptionalValueOrConversionConstructor(), transferValueOrConversionConstructor) - // optional::operator= .CaseOfCFGStmt<CXXOperatorCallExpr>( isOptionalValueOrConversionAssignment(), @@ -696,49 +818,70 @@ auto buildTransferMatchSwitch() { transferUnwrapCall(E, E->getImplicitObjectArgument(), State); }) - // optional::operator*, optional::operator-> - .CaseOfCFGStmt<CallExpr>(valueOperatorCall(std::nullopt), + // optional::operator* + .CaseOfCFGStmt<CallExpr>(isOptionalOperatorCallWithName("*"), [](const CallExpr *E, const MatchFinder::MatchResult &, LatticeTransferState &State) { transferUnwrapCall(E, E->getArg(0), State); }) - // optional::has_value + // optional::operator-> + .CaseOfCFGStmt<CallExpr>(isOptionalOperatorCallWithName("->"), + [](const CallExpr *E, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + transferArrowOpCall(E, E->getArg(0), State); + }) + + // optional::has_value, optional::hasValue + // Of the supported optionals only folly::Optional uses hasValue, but this + // will also pass for other types .CaseOfCFGStmt<CXXMemberCallExpr>( - isOptionalMemberCallWithName("has_value"), + isOptionalMemberCallWithNameMatcher( + hasAnyName("has_value", "hasValue")), transferOptionalHasValueCall) // optional::operator bool .CaseOfCFGStmt<CXXMemberCallExpr>( - isOptionalMemberCallWithName("operator bool"), + isOptionalMemberCallWithNameMatcher(hasName("operator bool")), transferOptionalHasValueCall) // optional::emplace .CaseOfCFGStmt<CXXMemberCallExpr>( - isOptionalMemberCallWithName("emplace"), + isOptionalMemberCallWithNameMatcher(hasName("emplace")), [](const CXXMemberCallExpr *E, const MatchFinder::MatchResult &, LatticeTransferState &State) { - assignOptionalValue(*E->getImplicitObjectArgument(), State.Env, - State.Env.getBoolLiteralValue(true)); + if (AggregateStorageLocation *Loc = + getImplicitObjectLocation(*E, State.Env)) { + createOptionalValue(*Loc, State.Env.getBoolLiteralValue(true), + State.Env); + } }) // optional::reset .CaseOfCFGStmt<CXXMemberCallExpr>( - isOptionalMemberCallWithName("reset"), + isOptionalMemberCallWithNameMatcher(hasName("reset")), [](const CXXMemberCallExpr *E, const MatchFinder::MatchResult &, LatticeTransferState &State) { - assignOptionalValue(*E->getImplicitObjectArgument(), State.Env, - State.Env.getBoolLiteralValue(false)); + if (AggregateStorageLocation *Loc = + getImplicitObjectLocation(*E, State.Env)) { + createOptionalValue(*Loc, State.Env.getBoolLiteralValue(false), + State.Env); + } }) // optional::swap - .CaseOfCFGStmt<CXXMemberCallExpr>(isOptionalMemberCallWithName("swap"), - transferSwapCall) + .CaseOfCFGStmt<CXXMemberCallExpr>( + isOptionalMemberCallWithNameMatcher(hasName("swap")), + transferSwapCall) // std::swap .CaseOfCFGStmt<CallExpr>(isStdSwapCall(), transferStdSwapCall) + // std::forward + .CaseOfCFGStmt<CallExpr>(isStdForwardCall(), transferStdForwardCall) + // opt.value_or("").empty() .CaseOfCFGStmt<Expr>(isValueOrStringEmptyCall(), transferValueOrStringEmptyCall) @@ -772,14 +915,12 @@ auto buildTransferMatchSwitch() { .Build(); } -std::vector<SourceLocation> diagnoseUnwrapCall(const Expr *UnwrapExpr, - const Expr *ObjectExpr, +std::vector<SourceLocation> diagnoseUnwrapCall(const Expr *ObjectExpr, const Environment &Env) { - if (auto *OptionalVal = - Env.getValue(*ObjectExpr, SkipPast::ReferenceThenPointer)) { + if (auto *OptionalVal = getValueBehindPossiblePointer(*ObjectExpr, Env)) { auto *Prop = OptionalVal->getProperty("has_value"); if (auto *HasValueVal = cast_or_null<BoolValue>(Prop)) { - if (Env.flowConditionImplies(*HasValueVal)) + if (Env.flowConditionImplies(HasValueVal->formula())) return {}; } } @@ -802,16 +943,16 @@ auto buildDiagnoseMatchSwitch( valueCall(IgnorableOptional), [](const CXXMemberCallExpr *E, const MatchFinder::MatchResult &, const Environment &Env) { - return diagnoseUnwrapCall(E, E->getImplicitObjectArgument(), Env); + return diagnoseUnwrapCall(E->getImplicitObjectArgument(), Env); }) // optional::operator*, optional::operator-> - .CaseOfCFGStmt<CallExpr>( - valueOperatorCall(IgnorableOptional), - [](const CallExpr *E, const MatchFinder::MatchResult &, - const Environment &Env) { - return diagnoseUnwrapCall(E, E->getArg(0), Env); - }) + .CaseOfCFGStmt<CallExpr>(valueOperatorCall(IgnorableOptional), + [](const CallExpr *E, + const MatchFinder::MatchResult &, + const Environment &Env) { + return diagnoseUnwrapCall(E->getArg(0), Env); + }) .Build(); } @@ -826,10 +967,10 @@ UncheckedOptionalAccessModel::UncheckedOptionalAccessModel(ASTContext &Ctx) : DataflowAnalysis<UncheckedOptionalAccessModel, NoopLattice>(Ctx), TransferMatchSwitch(buildTransferMatchSwitch()) {} -void UncheckedOptionalAccessModel::transfer(const CFGElement *Elt, +void UncheckedOptionalAccessModel::transfer(const CFGElement &Elt, NoopLattice &L, Environment &Env) { LatticeTransferState State(L, Env); - TransferMatchSwitch(*Elt, getASTContext(), State); + TransferMatchSwitch(Elt, getASTContext(), State); } ComparisonResult UncheckedOptionalAccessModel::compare( @@ -839,10 +980,12 @@ ComparisonResult UncheckedOptionalAccessModel::compare( return ComparisonResult::Unknown; bool MustNonEmpty1 = isNonEmptyOptional(Val1, Env1); bool MustNonEmpty2 = isNonEmptyOptional(Val2, Env2); - if (MustNonEmpty1 && MustNonEmpty2) return ComparisonResult::Same; + if (MustNonEmpty1 && MustNonEmpty2) + return ComparisonResult::Same; // If exactly one is true, then they're different, no reason to check whether // they're definitely empty. - if (MustNonEmpty1 || MustNonEmpty2) return ComparisonResult::Different; + if (MustNonEmpty1 || MustNonEmpty2) + return ComparisonResult::Different; // Check if they're both definitely empty. return (isEmptyOptional(Val1, Env1) && isEmptyOptional(Val2, Env2)) ? ComparisonResult::Same @@ -863,13 +1006,14 @@ bool UncheckedOptionalAccessModel::merge(QualType Type, const Value &Val1, bool MustNonEmpty1 = isNonEmptyOptional(Val1, Env1); bool MustNonEmpty2 = isNonEmptyOptional(Val2, Env2); if (MustNonEmpty1 && MustNonEmpty2) - MergedEnv.addToFlowCondition(HasValueVal); + MergedEnv.addToFlowCondition(HasValueVal.formula()); else if ( // Only make the costly calls to `isEmptyOptional` if we got "unknown" // (false) for both calls to `isNonEmptyOptional`. !MustNonEmpty1 && !MustNonEmpty2 && isEmptyOptional(Val1, Env1) && isEmptyOptional(Val2, Env2)) - MergedEnv.addToFlowCondition(MergedEnv.makeNot(HasValueVal)); + MergedEnv.addToFlowCondition( + MergedEnv.arena().makeNot(HasValueVal.formula())); setHasValue(MergedVal, HasValueVal); return true; } @@ -892,7 +1036,8 @@ Value *UncheckedOptionalAccessModel::widen(QualType Type, Value &Prev, if (isa<TopBoolValue>(CurrentHasVal)) return &Current; } - return &createOptionalValue(CurrentEnv, CurrentEnv.makeTopBoolValue()); + return &createOptionalValue(cast<StructValue>(Current).getAggregateLoc(), + CurrentEnv.makeTopBoolValue(), CurrentEnv); case ComparisonResult::Unknown: return nullptr; } diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/RecordOps.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/RecordOps.cpp new file mode 100644 index 000000000000..60144531c251 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/RecordOps.cpp @@ -0,0 +1,117 @@ +//===-- RecordOps.cpp -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Operations on records (structs, classes, and unions). +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/RecordOps.h" + +#define DEBUG_TYPE "dataflow" + +void clang::dataflow::copyRecord(AggregateStorageLocation &Src, + AggregateStorageLocation &Dst, + Environment &Env) { + LLVM_DEBUG({ + if (Dst.getType().getCanonicalType().getUnqualifiedType() != + Src.getType().getCanonicalType().getUnqualifiedType()) { + llvm::dbgs() << "Source type " << Src.getType() << "\n"; + llvm::dbgs() << "Destination type " << Dst.getType() << "\n"; + } + }); + assert(Dst.getType().getCanonicalType().getUnqualifiedType() == + Src.getType().getCanonicalType().getUnqualifiedType()); + + for (auto [Field, SrcFieldLoc] : Src.children()) { + StorageLocation *DstFieldLoc = Dst.getChild(*Field); + + assert(Field->getType()->isReferenceType() || + (SrcFieldLoc != nullptr && DstFieldLoc != nullptr)); + + if (Field->getType()->isRecordType()) { + copyRecord(cast<AggregateStorageLocation>(*SrcFieldLoc), + cast<AggregateStorageLocation>(*DstFieldLoc), Env); + } else if (Field->getType()->isReferenceType()) { + Dst.setChild(*Field, SrcFieldLoc); + } else { + if (Value *Val = Env.getValue(*SrcFieldLoc)) + Env.setValue(*DstFieldLoc, *Val); + else + Env.clearValue(*DstFieldLoc); + } + } + + StructValue *SrcVal = cast_or_null<StructValue>(Env.getValue(Src)); + StructValue *DstVal = cast_or_null<StructValue>(Env.getValue(Dst)); + + DstVal = &Env.create<StructValue>(Dst); + Env.setValue(Dst, *DstVal); + + if (SrcVal == nullptr) + return; + + for (const auto &[Name, Value] : SrcVal->properties()) { + if (Value != nullptr) + DstVal->setProperty(Name, *Value); + } +} + +bool clang::dataflow::recordsEqual(const AggregateStorageLocation &Loc1, + const Environment &Env1, + const AggregateStorageLocation &Loc2, + const Environment &Env2) { + LLVM_DEBUG({ + if (Loc2.getType().getCanonicalType().getUnqualifiedType() != + Loc1.getType().getCanonicalType().getUnqualifiedType()) { + llvm::dbgs() << "Loc1 type " << Loc1.getType() << "\n"; + llvm::dbgs() << "Loc2 type " << Loc2.getType() << "\n"; + } + }); + assert(Loc2.getType().getCanonicalType().getUnqualifiedType() == + Loc1.getType().getCanonicalType().getUnqualifiedType()); + + for (auto [Field, FieldLoc1] : Loc1.children()) { + StorageLocation *FieldLoc2 = Loc2.getChild(*Field); + + assert(Field->getType()->isReferenceType() || + (FieldLoc1 != nullptr && FieldLoc2 != nullptr)); + + if (Field->getType()->isRecordType()) { + if (!recordsEqual(cast<AggregateStorageLocation>(*FieldLoc1), Env1, + cast<AggregateStorageLocation>(*FieldLoc2), Env2)) + return false; + } else if (Field->getType()->isReferenceType()) { + if (FieldLoc1 != FieldLoc2) + return false; + } else if (Env1.getValue(*FieldLoc1) != Env2.getValue(*FieldLoc2)) { + return false; + } + } + + llvm::StringMap<Value *> Props1, Props2; + + if (StructValue *Val1 = cast_or_null<StructValue>(Env1.getValue(Loc1))) + for (const auto &[Name, Value] : Val1->properties()) + Props1[Name] = Value; + if (StructValue *Val2 = cast_or_null<StructValue>(Env2.getValue(Loc2))) + for (const auto &[Name, Value] : Val2->properties()) + Props2[Name] = Value; + + if (Props1.size() != Props2.size()) + return false; + + for (const auto &[Name, Value] : Props1) { + auto It = Props2.find(Name); + if (It == Props2.end()) + return false; + if (Value != It->second) + return false; + } + + return true; +} diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Transfer.cpp index 0e6c484b67e7..39faeca4b45c 100644 --- a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Transfer.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -23,6 +23,7 @@ #include "clang/Analysis/FlowSensitive/ControlFlowContext.h" #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/Analysis/FlowSensitive/NoopAnalysis.h" +#include "clang/Analysis/FlowSensitive/RecordOps.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/OperatorKinds.h" @@ -36,83 +37,44 @@ namespace clang { namespace dataflow { -static BoolValue &evaluateBooleanEquality(const Expr &LHS, const Expr &RHS, - Environment &Env) { - if (auto *LHSValue = - dyn_cast_or_null<BoolValue>(Env.getValue(LHS, SkipPast::Reference))) - if (auto *RHSValue = - dyn_cast_or_null<BoolValue>(Env.getValue(RHS, SkipPast::Reference))) - return Env.makeIff(*LHSValue, *RHSValue); - - return Env.makeAtomicBoolValue(); +const Environment *StmtToEnvMap::getEnvironment(const Stmt &S) const { + auto BlockIt = CFCtx.getStmtToBlock().find(&ignoreCFGOmittedNodes(S)); + assert(BlockIt != CFCtx.getStmtToBlock().end()); + if (!CFCtx.isBlockReachable(*BlockIt->getSecond())) + return nullptr; + const auto &State = BlockToState[BlockIt->getSecond()->getBlockID()]; + assert(State); + return &State->Env; } -// Functionally updates `V` such that any instances of `TopBool` are replaced -// with fresh atomic bools. Note: This implementation assumes that `B` is a -// tree; if `B` is a DAG, it will lose any sharing between subvalues that was -// present in the original . -static BoolValue &unpackValue(BoolValue &V, Environment &Env); +static BoolValue &evaluateBooleanEquality(const Expr &LHS, const Expr &RHS, + Environment &Env) { + Value *LHSValue = Env.getValueStrict(LHS); + Value *RHSValue = Env.getValueStrict(RHS); -template <typename Derived, typename M> -BoolValue &unpackBinaryBoolValue(Environment &Env, BoolValue &B, M build) { - auto &V = *cast<Derived>(&B); - BoolValue &Left = V.getLeftSubValue(); - BoolValue &Right = V.getRightSubValue(); - BoolValue &ULeft = unpackValue(Left, Env); - BoolValue &URight = unpackValue(Right, Env); + if (LHSValue == RHSValue) + return Env.getBoolLiteralValue(true); - if (&ULeft == &Left && &URight == &Right) - return V; + if (auto *LHSBool = dyn_cast_or_null<BoolValue>(LHSValue)) + if (auto *RHSBool = dyn_cast_or_null<BoolValue>(RHSValue)) + return Env.makeIff(*LHSBool, *RHSBool); - return (Env.*build)(ULeft, URight); + return Env.makeAtomicBoolValue(); } static BoolValue &unpackValue(BoolValue &V, Environment &Env) { - switch (V.getKind()) { - case Value::Kind::Integer: - case Value::Kind::Reference: - case Value::Kind::Pointer: - case Value::Kind::Struct: - llvm_unreachable("BoolValue cannot have any of these kinds."); - - case Value::Kind::AtomicBool: - return V; - - case Value::Kind::TopBool: - // Unpack `TopBool` into a fresh atomic bool. - return Env.makeAtomicBoolValue(); - - case Value::Kind::Negation: { - auto &N = *cast<NegationValue>(&V); - BoolValue &Sub = N.getSubVal(); - BoolValue &USub = unpackValue(Sub, Env); - - if (&USub == &Sub) - return V; - return Env.makeNot(USub); - } - case Value::Kind::Conjunction: - return unpackBinaryBoolValue<ConjunctionValue>(Env, V, - &Environment::makeAnd); - case Value::Kind::Disjunction: - return unpackBinaryBoolValue<DisjunctionValue>(Env, V, - &Environment::makeOr); - case Value::Kind::Implication: - return unpackBinaryBoolValue<ImplicationValue>( - Env, V, &Environment::makeImplication); - case Value::Kind::Biconditional: - return unpackBinaryBoolValue<BiconditionalValue>(Env, V, - &Environment::makeIff); + if (auto *Top = llvm::dyn_cast<TopBoolValue>(&V)) { + auto &A = Env.getDataflowAnalysisContext().arena(); + return A.makeBoolValue(A.makeAtomRef(Top->getAtom())); } - llvm_unreachable("All reachable cases in switch return"); + return V; } // Unpacks the value (if any) associated with `E` and updates `E` to the new -// value, if any unpacking occured. +// value, if any unpacking occured. Also, does the lvalue-to-rvalue conversion, +// by skipping past the reference. static Value *maybeUnpackLValueExpr(const Expr &E, Environment &Env) { - // FIXME: this is too flexible: it _allows_ a reference, while it should - // _require_ one, since lvalues should always be wrapped in `ReferenceValue`. - auto *Loc = Env.getStorageLocation(E, SkipPast::Reference); + auto *Loc = Env.getStorageLocationStrict(E); if (Loc == nullptr) return nullptr; auto *Val = Env.getValue(*Loc); @@ -128,6 +90,31 @@ static Value *maybeUnpackLValueExpr(const Expr &E, Environment &Env) { return &UnpackedVal; } +static void propagateValue(const Expr &From, const Expr &To, Environment &Env) { + if (auto *Val = Env.getValueStrict(From)) + Env.setValueStrict(To, *Val); +} + +static void propagateStorageLocation(const Expr &From, const Expr &To, + Environment &Env) { + if (auto *Loc = Env.getStorageLocationStrict(From)) + Env.setStorageLocationStrict(To, *Loc); +} + +// Propagates the value or storage location of `From` to `To` in cases where +// `From` may be either a glvalue or a prvalue. `To` must be a glvalue iff +// `From` is a glvalue. +static void propagateValueOrStorageLocation(const Expr &From, const Expr &To, + Environment &Env) { + assert(From.isGLValue() == To.isGLValue()); + if (From.isGLValue()) + propagateStorageLocation(From, To, Env); + else + propagateValue(From, To, Env); +} + +namespace { + class TransferVisitor : public ConstStmtVisitor<TransferVisitor> { public: TransferVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env) @@ -142,11 +129,11 @@ public: switch (S->getOpcode()) { case BO_Assign: { - auto *LHSLoc = Env.getStorageLocation(*LHS, SkipPast::Reference); + auto *LHSLoc = Env.getStorageLocationStrict(*LHS); if (LHSLoc == nullptr) break; - auto *RHSVal = Env.getValue(*RHS, SkipPast::Reference); + auto *RHSVal = Env.getValueStrict(*RHS); if (RHSVal == nullptr) break; @@ -159,11 +146,12 @@ public: } case BO_LAnd: case BO_LOr: { + auto &Loc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, Loc); + BoolValue &LHSVal = getLogicOperatorSubExprValue(*LHS); BoolValue &RHSVal = getLogicOperatorSubExprValue(*RHS); - auto &Loc = Env.createStorageLocation(*S); - Env.setStorageLocation(*S, Loc); if (S->getOpcode() == BO_LAnd) Env.setValue(Loc, Env.makeAnd(LHSVal, RHSVal)); else @@ -180,8 +168,7 @@ public: break; } case BO_Comma: { - if (auto *Loc = Env.getStorageLocation(*RHS, SkipPast::None)) - Env.setStorageLocation(*S, *Loc); + propagateValueOrStorageLocation(*RHS, *S, Env); break; } default: @@ -192,20 +179,20 @@ public: void VisitDeclRefExpr(const DeclRefExpr *S) { const ValueDecl *VD = S->getDecl(); assert(VD != nullptr); - auto *DeclLoc = Env.getStorageLocation(*VD, SkipPast::None); + + // `DeclRefExpr`s to fields and non-static methods aren't glvalues, and + // there's also no sensible `Value` we can assign to them, so skip them. + if (isa<FieldDecl>(VD)) + return; + if (auto *Method = dyn_cast<CXXMethodDecl>(VD); + Method && !Method->isStatic()) + return; + + auto *DeclLoc = Env.getStorageLocation(*VD); if (DeclLoc == nullptr) return; - if (VD->getType()->isReferenceType()) { - assert(isa_and_nonnull<ReferenceValue>(Env.getValue((*DeclLoc))) && - "reference-typed declarations map to `ReferenceValue`s"); - Env.setStorageLocation(*S, *DeclLoc); - } else { - auto &Loc = Env.createStorageLocation(*S); - auto &Val = Env.takeOwnership(std::make_unique<ReferenceValue>(*DeclLoc)); - Env.setStorageLocation(*S, Loc); - Env.setValue(Loc, Val); - } + Env.setStorageLocationStrict(*S, *DeclLoc); } void VisitDeclStmt(const DeclStmt *S) { @@ -213,57 +200,27 @@ public: // is safe. const auto &D = *cast<VarDecl>(S->getSingleDecl()); + ProcessVarDecl(D); + } + + void ProcessVarDecl(const VarDecl &D) { // Static local vars are already initialized in `Environment`. if (D.hasGlobalStorage()) return; - // The storage location for `D` could have been created earlier, before the - // variable's declaration statement (for example, in the case of - // BindingDecls). - auto *MaybeLoc = Env.getStorageLocation(D, SkipPast::None); - if (MaybeLoc == nullptr) { - MaybeLoc = &Env.createStorageLocation(D); - Env.setStorageLocation(D, *MaybeLoc); - } - auto &Loc = *MaybeLoc; - - const Expr *InitExpr = D.getInit(); - if (InitExpr == nullptr) { - // No initializer expression - associate `Loc` with a new value. - if (Value *Val = Env.createValue(D.getType())) - Env.setValue(Loc, *Val); + // If this is the holding variable for a `BindingDecl`, we may already + // have a storage location set up -- so check. (See also explanation below + // where we process the `BindingDecl`.) + if (D.getType()->isReferenceType() && Env.getStorageLocation(D) != nullptr) return; - } - if (D.getType()->isReferenceType()) { - // Initializing a reference variable - do not create a reference to - // reference. - if (auto *InitExprLoc = - Env.getStorageLocation(*InitExpr, SkipPast::Reference)) { - auto &Val = - Env.takeOwnership(std::make_unique<ReferenceValue>(*InitExprLoc)); - Env.setValue(Loc, Val); - } - } else if (auto *InitExprVal = Env.getValue(*InitExpr, SkipPast::None)) { - Env.setValue(Loc, *InitExprVal); - } + assert(Env.getStorageLocation(D) == nullptr); - if (Env.getValue(Loc) == nullptr) { - // We arrive here in (the few) cases where an expression is intentionally - // "uninterpreted". There are two ways to handle this situation: propagate - // the status, so that uninterpreted initializers result in uninterpreted - // variables, or provide a default value. We choose the latter so that - // later refinements of the variable can be used for reasoning about the - // surrounding code. - // - // FIXME. If and when we interpret all language cases, change this to - // assert that `InitExpr` is interpreted, rather than supplying a default - // value (assuming we don't update the environment API to return - // references). - if (Value *Val = Env.createValue(D.getType())) - Env.setValue(Loc, *Val); - } + Env.setStorageLocation(D, Env.createObject(D)); + // `DecompositionDecl` must be handled after we've interpreted the loc + // itself, because the binding expression refers back to the + // `DecompositionDecl` (even though it has no written name). if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D)) { // If VarDecl is a DecompositionDecl, evaluate each of its bindings. This // needs to be evaluated after initializing the values in the storage for @@ -284,14 +241,16 @@ public: if (auto *Loc = Env.getStorageLocation(*ME, SkipPast::Reference)) Env.setStorageLocation(*B, *Loc); } else if (auto *VD = B->getHoldingVar()) { - // Holding vars are used to back the BindingDecls of tuple-like - // types. The holding var declarations appear *after* this statement, - // so we have to create a location for them here to share with `B`. We - // don't visit the binding, because we know it will be a DeclRefExpr - // to `VD`. - auto &VDLoc = Env.createStorageLocation(*VD); - Env.setStorageLocation(*VD, VDLoc); - Env.setStorageLocation(*B, VDLoc); + // Holding vars are used to back the `BindingDecl`s of tuple-like + // types. The holding var declarations appear after the + // `DecompositionDecl`, so we have to explicitly process them here + // to know their storage location. They will be processed a second + // time when we visit their `VarDecl`s, so we have code that protects + // against this above. + ProcessVarDecl(*VD); + auto *VDLoc = Env.getStorageLocation(*VD); + assert(VDLoc != nullptr); + Env.setStorageLocation(*B, *VDLoc); } } } @@ -306,21 +265,20 @@ public: // This cast creates a new, boolean value from the integral value. We // model that with a fresh value in the environment, unless it's already a // boolean. - auto &Loc = Env.createStorageLocation(*S); - Env.setStorageLocation(*S, Loc); - if (auto *SubExprVal = dyn_cast_or_null<BoolValue>( - Env.getValue(*SubExpr, SkipPast::Reference))) - Env.setValue(Loc, *SubExprVal); + if (auto *SubExprVal = + dyn_cast_or_null<BoolValue>(Env.getValueStrict(*SubExpr))) + Env.setValueStrict(*S, *SubExprVal); else // FIXME: If integer modeling is added, then update this code to create // the boolean based on the integer model. - Env.setValue(Loc, Env.makeAtomicBoolValue()); + Env.setValueStrict(*S, Env.makeAtomicBoolValue()); break; } case CK_LValueToRValue: { // When an L-value is used as an R-value, it may result in sharing, so we - // need to unpack any nested `Top`s. + // need to unpack any nested `Top`s. We also need to strip off the + // `ReferenceValue` associated with the lvalue. auto *SubExprVal = maybeUnpackLValueExpr(*SubExpr, Env); if (SubExprVal == nullptr) break; @@ -344,17 +302,12 @@ public: // CK_ConstructorConversion, and CK_UserDefinedConversion. case CK_NoOp: { // FIXME: Consider making `Environment::getStorageLocation` skip noop - // expressions (this and other similar expressions in the file) instead of - // assigning them storage locations. - auto *SubExprLoc = Env.getStorageLocation(*SubExpr, SkipPast::None); - if (SubExprLoc == nullptr) - break; - - Env.setStorageLocation(*S, *SubExprLoc); + // expressions (this and other similar expressions in the file) instead + // of assigning them storage locations. + propagateValueOrStorageLocation(*SubExpr, *S, Env); break; } - case CK_NullToPointer: - case CK_NullToMemberPointer: { + case CK_NullToPointer: { auto &Loc = Env.createStorageLocation(S->getType()); Env.setStorageLocation(*S, Loc); @@ -363,6 +316,28 @@ public: Env.setValue(Loc, NullPointerVal); break; } + case CK_NullToMemberPointer: + // FIXME: Implement pointers to members. For now, don't associate a value + // with this expression. + break; + case CK_FunctionToPointerDecay: { + StorageLocation *PointeeLoc = + Env.getStorageLocation(*SubExpr, SkipPast::Reference); + if (PointeeLoc == nullptr) + break; + + auto &PointerLoc = Env.createStorageLocation(*S); + auto &PointerVal = Env.create<PointerValue>(*PointeeLoc); + Env.setStorageLocation(*S, PointerLoc); + Env.setValue(PointerLoc, PointerVal); + break; + } + case CK_BuiltinFnToFnPtr: + // Despite its name, the result type of `BuiltinFnToFnPtr` is a function, + // not a function pointer. In addition, builtin functions can only be + // called directly; it is not legal to take their address. We therefore + // don't need to create a value or storage location for them. + break; default: break; } @@ -374,37 +349,26 @@ public: switch (S->getOpcode()) { case UO_Deref: { - // Skip past a reference to handle dereference of a dependent pointer. - const auto *SubExprVal = cast_or_null<PointerValue>( - Env.getValue(*SubExpr, SkipPast::Reference)); + const auto *SubExprVal = + cast_or_null<PointerValue>(Env.getValueStrict(*SubExpr)); if (SubExprVal == nullptr) break; - auto &Loc = Env.createStorageLocation(*S); - Env.setStorageLocation(*S, Loc); - Env.setValue(Loc, Env.takeOwnership(std::make_unique<ReferenceValue>( - SubExprVal->getPointeeLoc()))); + Env.setStorageLocationStrict(*S, SubExprVal->getPointeeLoc()); break; } case UO_AddrOf: { - // Do not form a pointer to a reference. If `SubExpr` is assigned a - // `ReferenceValue` then form a value that points to the location of its - // pointee. - StorageLocation *PointeeLoc = - Env.getStorageLocation(*SubExpr, SkipPast::Reference); - if (PointeeLoc == nullptr) + // FIXME: Model pointers to members. + if (S->getType()->isMemberPointerType()) break; - auto &PointerLoc = Env.createStorageLocation(*S); - auto &PointerVal = - Env.takeOwnership(std::make_unique<PointerValue>(*PointeeLoc)); - Env.setStorageLocation(*S, PointerLoc); - Env.setValue(PointerLoc, PointerVal); + if (StorageLocation *PointeeLoc = Env.getStorageLocationStrict(*SubExpr)) + Env.setValueStrict(*S, Env.create<PointerValue>(*PointeeLoc)); break; } case UO_LNot: { auto *SubExprVal = - dyn_cast_or_null<BoolValue>(Env.getValue(*SubExpr, SkipPast::None)); + dyn_cast_or_null<BoolValue>(Env.getValueStrict(*SubExpr)); if (SubExprVal == nullptr) break; @@ -427,34 +391,46 @@ public: auto &Loc = Env.createStorageLocation(*S); Env.setStorageLocation(*S, Loc); - Env.setValue(Loc, Env.takeOwnership( - std::make_unique<PointerValue>(*ThisPointeeLoc))); + Env.setValue(Loc, Env.create<PointerValue>(*ThisPointeeLoc)); + } + + void VisitCXXNewExpr(const CXXNewExpr *S) { + auto &Loc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, Loc); + if (Value *Val = Env.createValue(S->getType())) + Env.setValue(Loc, *Val); + } + + void VisitCXXDeleteExpr(const CXXDeleteExpr *S) { + // Empty method. + // We consciously don't do anything on deletes. Diagnosing double deletes + // (for example) should be done by a specific analysis, not by the + // framework. } void VisitReturnStmt(const ReturnStmt *S) { - if (!Env.getAnalysisOptions().ContextSensitiveOpts) + if (!Env.getDataflowAnalysisContext().getOptions().ContextSensitiveOpts) return; auto *Ret = S->getRetValue(); if (Ret == nullptr) return; - auto *Val = Env.getValue(*Ret, SkipPast::None); - if (Val == nullptr) - return; - - // FIXME: Support reference-type returns. - if (Val->getKind() == Value::Kind::Reference) - return; + if (Ret->isPRValue()) { + auto *Val = Env.getValueStrict(*Ret); + if (Val == nullptr) + return; - auto *Loc = Env.getReturnStorageLocation(); - assert(Loc != nullptr); - // FIXME: Support reference-type returns. - if (Loc->getType()->isReferenceType()) - return; + // FIXME: Model NRVO. + Env.setReturnValue(Val); + } else { + auto *Loc = Env.getStorageLocationStrict(*Ret); + if (Loc == nullptr) + return; - // FIXME: Model NRVO. - Env.setValue(*Loc, *Val); + // FIXME: Model NRVO. + Env.setReturnStorageLocation(Loc); + } } void VisitMemberExpr(const MemberExpr *S) { @@ -471,72 +447,29 @@ public: if (auto *D = dyn_cast<VarDecl>(Member)) { if (D->hasGlobalStorage()) { - auto *VarDeclLoc = Env.getStorageLocation(*D, SkipPast::None); + auto *VarDeclLoc = Env.getStorageLocation(*D); if (VarDeclLoc == nullptr) return; - if (VarDeclLoc->getType()->isReferenceType()) { - assert(isa_and_nonnull<ReferenceValue>(Env.getValue((*VarDeclLoc))) && - "reference-typed declarations map to `ReferenceValue`s"); - Env.setStorageLocation(*S, *VarDeclLoc); - } else { - auto &Loc = Env.createStorageLocation(*S); - Env.setStorageLocation(*S, Loc); - Env.setValue(Loc, Env.takeOwnership( - std::make_unique<ReferenceValue>(*VarDeclLoc))); - } + Env.setStorageLocation(*S, *VarDeclLoc); return; } } - // The receiver can be either a value or a pointer to a value. Skip past the - // indirection to handle both cases. - auto *BaseLoc = cast_or_null<AggregateStorageLocation>( - Env.getStorageLocation(*S->getBase(), SkipPast::ReferenceThenPointer)); + AggregateStorageLocation *BaseLoc = getBaseObjectLocation(*S, Env); if (BaseLoc == nullptr) return; - auto &MemberLoc = BaseLoc->getChild(*Member); - if (MemberLoc.getType()->isReferenceType()) { - // Based on its type, `MemberLoc` must be mapped either to nothing or to a - // `ReferenceValue`. For the former, we won't set a storage location for - // this expression, so as to maintain an invariant lvalue expressions; - // namely, that their location maps to a `ReferenceValue`. In this, - // lvalues are unlike other expressions, where it is valid for their - // location to map to nothing (because they are not modeled). - // - // Note: we need this invariant for lvalues so that, when accessing a - // value, we can distinguish an rvalue from an lvalue. An alternative - // design, which takes the expression's value category into account, would - // avoid the need for this invariant. - if (auto *V = Env.getValue(MemberLoc)) { - assert(isa<ReferenceValue>(V) && - "reference-typed declarations map to `ReferenceValue`s"); - Env.setStorageLocation(*S, MemberLoc); - } - } else { - auto &Loc = Env.createStorageLocation(*S); - Env.setStorageLocation(*S, Loc); - Env.setValue( - Loc, Env.takeOwnership(std::make_unique<ReferenceValue>(MemberLoc))); - } + auto *MemberLoc = BaseLoc->getChild(*Member); + if (MemberLoc == nullptr) + return; + Env.setStorageLocationStrict(*S, *MemberLoc); } void VisitCXXDefaultInitExpr(const CXXDefaultInitExpr *S) { const Expr *InitExpr = S->getExpr(); assert(InitExpr != nullptr); - - Value *InitExprVal = Env.getValue(*InitExpr, SkipPast::None); - if (InitExprVal == nullptr) - return; - - const FieldDecl *Field = S->getField(); - assert(Field != nullptr); - - auto &ThisLoc = - *cast<AggregateStorageLocation>(Env.getThisPointeeStorageLocation()); - auto &FieldLoc = ThisLoc.getChild(*Field); - Env.setValue(FieldLoc, *InitExprVal); + propagateValueOrStorageLocation(*InitExpr, *S, Env); } void VisitCXXConstructExpr(const CXXConstructExpr *S) { @@ -544,29 +477,32 @@ public: assert(ConstructorDecl != nullptr); if (ConstructorDecl->isCopyOrMoveConstructor()) { - assert(S->getNumArgs() == 1); + // It is permissible for a copy/move constructor to have additional + // parameters as long as they have default arguments defined for them. + assert(S->getNumArgs() != 0); const Expr *Arg = S->getArg(0); assert(Arg != nullptr); - if (S->isElidable()) { - auto *ArgLoc = Env.getStorageLocation(*Arg, SkipPast::Reference); - if (ArgLoc == nullptr) - return; + auto *ArgLoc = cast_or_null<AggregateStorageLocation>( + Env.getStorageLocation(*Arg, SkipPast::Reference)); + if (ArgLoc == nullptr) + return; + if (S->isElidable()) { Env.setStorageLocation(*S, *ArgLoc); - } else if (auto *ArgVal = Env.getValue(*Arg, SkipPast::Reference)) { - auto &Loc = Env.createStorageLocation(*S); - Env.setStorageLocation(*S, Loc); - Env.setValue(Loc, *ArgVal); + } else if (auto *ArgVal = cast_or_null<StructValue>( + Env.getValue(*Arg, SkipPast::Reference))) { + auto &Val = *cast<StructValue>(Env.createValue(S->getType())); + Env.setValueStrict(*S, Val); + copyRecord(ArgVal->getAggregateLoc(), Val.getAggregateLoc(), Env); } return; } - auto &Loc = Env.createStorageLocation(*S); - Env.setStorageLocation(*S, Loc); - if (Value *Val = Env.createValue(S->getType())) - Env.setValue(Loc, *Val); + auto &InitialVal = *cast<StructValue>(Env.createValue(S->getType())); + copyRecord(InitialVal.getAggregateLoc(), Env.getResultObjectLocation(*S), + Env); transferInlineCall(S, ConstructorDecl); } @@ -582,25 +518,23 @@ public: assert(Arg1 != nullptr); // Evaluate only copy and move assignment operators. - auto *Arg0Type = Arg0->getType()->getUnqualifiedDesugaredType(); - auto *Arg1Type = Arg1->getType()->getUnqualifiedDesugaredType(); - if (Arg0Type != Arg1Type) + const auto *Method = + dyn_cast_or_null<CXXMethodDecl>(S->getDirectCallee()); + if (!Method) return; - - auto *ObjectLoc = Env.getStorageLocation(*Arg0, SkipPast::Reference); - if (ObjectLoc == nullptr) + if (!Method->isCopyAssignmentOperator() && + !Method->isMoveAssignmentOperator()) return; - auto *Val = Env.getValue(*Arg1, SkipPast::Reference); - if (Val == nullptr) - return; - - // Assign a value to the storage location of the object. - Env.setValue(*ObjectLoc, *Val); + auto *LocSrc = cast_or_null<AggregateStorageLocation>( + Env.getStorageLocationStrict(*Arg1)); + auto *LocDst = cast_or_null<AggregateStorageLocation>( + Env.getStorageLocationStrict(*Arg0)); - // FIXME: Add a test for the value of the whole expression. - // Assign a storage location for the whole expression. - Env.setStorageLocation(*S, *ObjectLoc); + if (LocSrc != nullptr && LocDst != nullptr) { + copyRecord(*LocSrc, *LocDst, Env); + Env.setStorageLocationStrict(*S, *LocDst); + } } } @@ -609,19 +543,13 @@ public: const Expr *SubExpr = S->getSubExpr(); assert(SubExpr != nullptr); - auto *SubExprLoc = Env.getStorageLocation(*SubExpr, SkipPast::None); - if (SubExprLoc == nullptr) - return; - - Env.setStorageLocation(*S, *SubExprLoc); + propagateValue(*SubExpr, *S, Env); } } void VisitCXXTemporaryObjectExpr(const CXXTemporaryObjectExpr *S) { - auto &Loc = Env.createStorageLocation(*S); - Env.setStorageLocation(*S, Loc); if (Value *Val = Env.createValue(S->getType())) - Env.setValue(Loc, *Val); + Env.setValueStrict(*S, *Val); } void VisitCallExpr(const CallExpr *S) { @@ -659,22 +587,25 @@ public: const Expr *SubExpr = S->getSubExpr(); assert(SubExpr != nullptr); - auto *SubExprLoc = Env.getStorageLocation(*SubExpr, SkipPast::None); - if (SubExprLoc == nullptr) + Value *SubExprVal = Env.getValueStrict(*SubExpr); + if (SubExprVal == nullptr) return; - Env.setStorageLocation(*S, *SubExprLoc); + if (StructValue *StructVal = dyn_cast<StructValue>(SubExprVal)) { + Env.setStorageLocation(*S, StructVal->getAggregateLoc()); + return; + } + + StorageLocation &Loc = Env.createStorageLocation(*S); + Env.setValue(Loc, *SubExprVal); + Env.setStorageLocation(*S, Loc); } void VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *S) { const Expr *SubExpr = S->getSubExpr(); assert(SubExpr != nullptr); - auto *SubExprLoc = Env.getStorageLocation(*SubExpr, SkipPast::None); - if (SubExprLoc == nullptr) - return; - - Env.setStorageLocation(*S, *SubExprLoc); + propagateValue(*SubExpr, *S, Env); } void VisitCXXStaticCastExpr(const CXXStaticCastExpr *S) { @@ -682,11 +613,7 @@ public: const Expr *SubExpr = S->getSubExpr(); assert(SubExpr != nullptr); - auto *SubExprLoc = Env.getStorageLocation(*SubExpr, SkipPast::None); - if (SubExprLoc == nullptr) - return; - - Env.setStorageLocation(*S, *SubExprLoc); + propagateValueOrStorageLocation(*SubExpr, *S, Env); } } @@ -694,43 +621,52 @@ public: // FIXME: Revisit this once flow conditions are added to the framework. For // `a = b ? c : d` we can add `b => a == c && !b => a == d` to the flow // condition. - auto &Loc = Env.createStorageLocation(*S); - Env.setStorageLocation(*S, Loc); - if (Value *Val = Env.createValue(S->getType())) - Env.setValue(Loc, *Val); + if (S->isGLValue()) + Env.setStorageLocationStrict(*S, Env.createObject(S->getType())); + else if (Value *Val = Env.createValue(S->getType())) + Env.setValueStrict(*S, *Val); } void VisitInitListExpr(const InitListExpr *S) { QualType Type = S->getType(); - auto &Loc = Env.createStorageLocation(*S); - Env.setStorageLocation(*S, Loc); + if (!Type->isStructureOrClassType()) { + if (auto *Val = Env.createValue(Type)) + Env.setValueStrict(*S, *Val); - auto *Val = Env.createValue(Type); - if (Val == nullptr) return; + } - Env.setValue(Loc, *Val); - - if (Type->isStructureOrClassType()) { - for (auto It : llvm::zip(Type->getAsRecordDecl()->fields(), S->inits())) { - const FieldDecl *Field = std::get<0>(It); - assert(Field != nullptr); + std::vector<FieldDecl *> Fields = + getFieldsForInitListExpr(Type->getAsRecordDecl()); + llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; - const Expr *Init = std::get<1>(It); - assert(Init != nullptr); + for (auto [Field, Init] : llvm::zip(Fields, S->inits())) { + assert(Field != nullptr); + assert(Init != nullptr); - if (Value *InitVal = Env.getValue(*Init, SkipPast::None)) - cast<StructValue>(Val)->setChild(*Field, *InitVal); - } + FieldLocs.insert({Field, &Env.createObject(Field->getType(), Init)}); } + + auto &Loc = + Env.getDataflowAnalysisContext() + .arena() + .create<AggregateStorageLocation>(Type, std::move(FieldLocs)); + StructValue &StructVal = Env.create<StructValue>(Loc); + + Env.setValue(Loc, StructVal); + + Env.setValueStrict(*S, StructVal); + // FIXME: Implement array initialization. } void VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *S) { - auto &Loc = Env.createStorageLocation(*S); - Env.setStorageLocation(*S, Loc); - Env.setValue(Loc, Env.getBoolLiteralValue(S->getValue())); + Env.setValueStrict(*S, Env.getBoolLiteralValue(S->getValue())); + } + + void VisitIntegerLiteral(const IntegerLiteral *S) { + Env.setValueStrict(*S, Env.getIntLiteralValue(S->getValue())); } void VisitParenExpr(const ParenExpr *S) { @@ -752,27 +688,24 @@ public: } private: + /// Returns the value for the sub-expression `SubExpr` of a logic operator. BoolValue &getLogicOperatorSubExprValue(const Expr &SubExpr) { // `SubExpr` and its parent logic operator might be part of different basic // blocks. We try to access the value that is assigned to `SubExpr` in the // corresponding environment. - if (const Environment *SubExprEnv = StmtToEnv.getEnvironment(SubExpr)) { - if (auto *Val = dyn_cast_or_null<BoolValue>( - SubExprEnv->getValue(SubExpr, SkipPast::Reference))) + if (const Environment *SubExprEnv = StmtToEnv.getEnvironment(SubExpr)) + if (auto *Val = + dyn_cast_or_null<BoolValue>(SubExprEnv->getValueStrict(SubExpr))) return *Val; - } - if (Env.getStorageLocation(SubExpr, SkipPast::None) == nullptr) { - // Sub-expressions that are logic operators are not added in basic blocks - // (e.g. see CFG for `bool d = a && (b || c);`). If `SubExpr` is a logic - // operator, it may not have been evaluated and assigned a value yet. In - // that case, we need to first visit `SubExpr` and then try to get the - // value that gets assigned to it. + // The sub-expression may lie within a basic block that isn't reachable, + // even if we need it to evaluate the current (reachable) expression + // (see https://discourse.llvm.org/t/70775). In this case, visit `SubExpr` + // within the current environment and then try to get the value that gets + // assigned to it. + if (Env.getValueStrict(SubExpr) == nullptr) Visit(&SubExpr); - } - - if (auto *Val = dyn_cast_or_null<BoolValue>( - Env.getValue(SubExpr, SkipPast::Reference))) + if (auto *Val = dyn_cast_or_null<BoolValue>(Env.getValueStrict(SubExpr))) return *Val; // If the value of `SubExpr` is still unknown, we create a fresh symbolic @@ -784,12 +717,13 @@ private: // `F` of `S`. The type `E` must be either `CallExpr` or `CXXConstructExpr`. template <typename E> void transferInlineCall(const E *S, const FunctionDecl *F) { - const auto &Options = Env.getAnalysisOptions(); + const auto &Options = Env.getDataflowAnalysisContext().getOptions(); if (!(Options.ContextSensitiveOpts && Env.canDescend(Options.ContextSensitiveOpts->Depth, F))) return; - const ControlFlowContext *CFCtx = Env.getControlFlowContext(F); + const ControlFlowContext *CFCtx = + Env.getDataflowAnalysisContext().getControlFlowContext(F); if (!CFCtx) return; @@ -799,13 +733,6 @@ private: auto ExitBlock = CFCtx->getCFG().getExit().getBlockID(); - if (const auto *NonConstructExpr = dyn_cast<CallExpr>(S)) { - // Note that it is important for the storage location of `S` to be set - // before `pushCall`, because the latter uses it to set the storage - // location for `return`. - auto &ReturnLoc = Env.createStorageLocation(*S); - Env.setStorageLocation(*S, ReturnLoc); - } auto CalleeEnv = Env.pushCall(S); // FIXME: Use the same analysis as the caller for the callee. Note, @@ -821,16 +748,18 @@ private: assert(BlockToOutputState); assert(ExitBlock < BlockToOutputState->size()); - auto ExitState = (*BlockToOutputState)[ExitBlock]; + auto &ExitState = (*BlockToOutputState)[ExitBlock]; assert(ExitState); - Env.popCall(ExitState->Env); + Env.popCall(S, ExitState->Env); } const StmtToEnvMap &StmtToEnv; Environment &Env; }; +} // namespace + void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env) { TransferVisitor(StmtToEnv, Env).Visit(&S); } diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index b125701212c9..1b68d76ffc8c 100644 --- a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -18,6 +18,7 @@ #include <utility> #include <vector> +#include "clang/AST/ASTDumper.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/OperationKinds.h" #include "clang/AST/StmtVisitor.h" @@ -26,6 +27,7 @@ #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" #include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "clang/Analysis/FlowSensitive/RecordOps.h" #include "clang/Analysis/FlowSensitive/Transfer.h" #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" #include "clang/Analysis/FlowSensitive/Value.h" @@ -40,27 +42,6 @@ namespace clang { namespace dataflow { -class StmtToEnvMapImpl : public StmtToEnvMap { -public: - StmtToEnvMapImpl( - const ControlFlowContext &CFCtx, - llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> - BlockToState) - : CFCtx(CFCtx), BlockToState(BlockToState) {} - - const Environment *getEnvironment(const Stmt &S) const override { - auto BlockIt = CFCtx.getStmtToBlock().find(&ignoreCFGOmittedNodes(S)); - assert(BlockIt != CFCtx.getStmtToBlock().end()); - const auto &State = BlockToState[BlockIt->getSecond()->getBlockID()]; - assert(State); - return &State->Env; - } - -private: - const ControlFlowContext &CFCtx; - llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockToState; -}; - /// Returns the index of `Block` in the successors of `Pred`. static int blockIndexInPredecessor(const CFGBlock &Pred, const CFGBlock &Block) { @@ -85,6 +66,8 @@ static bool isLoopHead(const CFGBlock &B) { return false; } +namespace { + // The return type of the visit functions in TerminatorVisitor. The first // element represents the terminator expression (that is the conditional // expression in case of a path split in the CFG). The second element @@ -142,26 +125,17 @@ public: private: TerminatorVisitorRetTy extendFlowCondition(const Expr &Cond) { // The terminator sub-expression might not be evaluated. - if (Env.getStorageLocation(Cond, SkipPast::None) == nullptr) + if (Env.getValueStrict(Cond) == nullptr) transfer(StmtToEnv, Cond, Env); - // FIXME: The flow condition must be an r-value, so `SkipPast::None` should - // suffice. - auto *Val = - cast_or_null<BoolValue>(Env.getValue(Cond, SkipPast::Reference)); + auto *Val = cast_or_null<BoolValue>(Env.getValueStrict(Cond)); // Value merging depends on flow conditions from different environments // being mutually exclusive -- that is, they cannot both be true in their // entirety (even if they may share some clauses). So, we need *some* value // for the condition expression, even if just an atom. if (Val == nullptr) { - // FIXME: Consider introducing a helper for this get-or-create pattern. - auto *Loc = Env.getStorageLocation(Cond, SkipPast::None); - if (Loc == nullptr) { - Loc = &Env.createStorageLocation(Cond); - Env.setStorageLocation(Cond, *Loc); - } Val = &Env.makeAtomicBoolValue(); - Env.setValue(*Loc, *Val); + Env.setValueStrict(Cond, *Val); } bool ConditionValue = true; @@ -172,7 +146,7 @@ private: ConditionValue = false; } - Env.addToFlowCondition(*Val); + Env.addToFlowCondition(Val->formula()); return {&Cond, ConditionValue}; } @@ -189,7 +163,11 @@ struct AnalysisContext { llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockStates) : CFCtx(CFCtx), Analysis(Analysis), InitEnv(InitEnv), - BlockStates(BlockStates) {} + Log(*InitEnv.getDataflowAnalysisContext().getOptions().Log), + BlockStates(BlockStates) { + Log.beginAnalysis(CFCtx, Analysis); + } + ~AnalysisContext() { Log.endAnalysis(); } /// Contains the CFG being analyzed. const ControlFlowContext &CFCtx; @@ -197,11 +175,97 @@ struct AnalysisContext { TypeErasedDataflowAnalysis &Analysis; /// Initial state to start the analysis. const Environment &InitEnv; + Logger &Log; /// Stores the state of a CFG block if it has been evaluated by the analysis. /// The indices correspond to the block IDs. llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockStates; }; +class PrettyStackTraceAnalysis : public llvm::PrettyStackTraceEntry { +public: + PrettyStackTraceAnalysis(const ControlFlowContext &CFCtx, const char *Message) + : CFCtx(CFCtx), Message(Message) {} + + void print(raw_ostream &OS) const override { + OS << Message << "\n"; + OS << "Decl:\n"; + CFCtx.getDecl()->dump(OS); + OS << "CFG:\n"; + CFCtx.getCFG().print(OS, LangOptions(), false); + } + +private: + const ControlFlowContext &CFCtx; + const char *Message; +}; + +class PrettyStackTraceCFGElement : public llvm::PrettyStackTraceEntry { +public: + PrettyStackTraceCFGElement(const CFGElement &Element, int BlockIdx, + int ElementIdx, const char *Message) + : Element(Element), BlockIdx(BlockIdx), ElementIdx(ElementIdx), + Message(Message) {} + + void print(raw_ostream &OS) const override { + OS << Message << ": Element [B" << BlockIdx << "." << ElementIdx << "]\n"; + if (auto Stmt = Element.getAs<CFGStmt>()) { + OS << "Stmt:\n"; + ASTDumper Dumper(OS, false); + Dumper.Visit(Stmt->getStmt()); + } + } + +private: + const CFGElement ∈ + int BlockIdx; + int ElementIdx; + const char *Message; +}; + +// Builds a joined TypeErasedDataflowAnalysisState from 0 or more sources, +// each of which may be owned (built as part of the join) or external (a +// reference to an Environment that will outlive the builder). +// Avoids unneccesary copies of the environment. +class JoinedStateBuilder { + AnalysisContext &AC; + std::vector<const TypeErasedDataflowAnalysisState *> All; + std::deque<TypeErasedDataflowAnalysisState> Owned; + + TypeErasedDataflowAnalysisState + join(const TypeErasedDataflowAnalysisState &L, + const TypeErasedDataflowAnalysisState &R) { + return {AC.Analysis.joinTypeErased(L.Lattice, R.Lattice), + Environment::join(L.Env, R.Env, AC.Analysis)}; + } + +public: + JoinedStateBuilder(AnalysisContext &AC) : AC(AC) {} + + void addOwned(TypeErasedDataflowAnalysisState State) { + Owned.push_back(std::move(State)); + All.push_back(&Owned.back()); + } + void addUnowned(const TypeErasedDataflowAnalysisState &State) { + All.push_back(&State); + } + TypeErasedDataflowAnalysisState take() && { + if (All.empty()) + // FIXME: Consider passing `Block` to Analysis.typeErasedInitialElement + // to enable building analyses like computation of dominators that + // initialize the state of each basic block differently. + return {AC.Analysis.typeErasedInitialElement(), AC.InitEnv.fork()}; + if (All.size() == 1) + return Owned.empty() ? All.front()->fork() : std::move(Owned.front()); + + auto Result = join(*All[0], *All[1]); + for (unsigned I = 2; I < All.size(); ++I) + Result = join(Result, *All[I]); + return Result; + } +}; + +} // namespace + /// Computes the input state for a given basic block by joining the output /// states of its predecessors. /// @@ -212,8 +276,7 @@ struct AnalysisContext { /// `std::nullopt` represent basic blocks that are not evaluated yet. static TypeErasedDataflowAnalysisState computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) { - llvm::DenseSet<const CFGBlock *> Preds; - Preds.insert(Block.pred_begin(), Block.pred_end()); + std::vector<const CFGBlock *> Preds(Block.pred_begin(), Block.pred_end()); if (Block.getTerminator().isTemporaryDtorsBranch()) { // This handles a special case where the code that produced the CFG includes // a conditional operator with a branch that constructs a temporary and @@ -237,17 +300,16 @@ computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) { // operator includes a branch that contains a noreturn destructor call. // // See `NoreturnDestructorTest` for concrete examples. - if (Block.succ_begin()->getReachableBlock()->hasNoReturnElement()) { + if (Block.succ_begin()->getReachableBlock() != nullptr && + Block.succ_begin()->getReachableBlock()->hasNoReturnElement()) { auto &StmtToBlock = AC.CFCtx.getStmtToBlock(); auto StmtBlock = StmtToBlock.find(Block.getTerminatorStmt()); assert(StmtBlock != StmtToBlock.end()); - Preds.erase(StmtBlock->getSecond()); + llvm::erase_value(Preds, StmtBlock->getSecond()); } } - std::optional<TypeErasedDataflowAnalysisState> MaybeState; - - auto &Analysis = AC.Analysis; + JoinedStateBuilder Builder(AC); for (const CFGBlock *Pred : Preds) { // Skip if the `Block` is unreachable or control flow cannot get past it. if (!Pred || Pred->hasNoReturnElement()) @@ -260,86 +322,109 @@ computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) { if (!MaybePredState) continue; - TypeErasedDataflowAnalysisState PredState = *MaybePredState; - if (Analysis.builtinOptions()) { + if (AC.Analysis.builtinOptions()) { if (const Stmt *PredTerminatorStmt = Pred->getTerminatorStmt()) { - const StmtToEnvMapImpl StmtToEnv(AC.CFCtx, AC.BlockStates); + // We have a terminator: we need to mutate an environment to describe + // when the terminator is taken. Copy now. + TypeErasedDataflowAnalysisState Copy = MaybePredState->fork(); + + const StmtToEnvMap StmtToEnv(AC.CFCtx, AC.BlockStates); auto [Cond, CondValue] = - TerminatorVisitor(StmtToEnv, PredState.Env, + TerminatorVisitor(StmtToEnv, Copy.Env, blockIndexInPredecessor(*Pred, Block)) .Visit(PredTerminatorStmt); if (Cond != nullptr) // FIXME: Call transferBranchTypeErased even if BuiltinTransferOpts // are not set. - Analysis.transferBranchTypeErased(CondValue, Cond, PredState.Lattice, - PredState.Env); + AC.Analysis.transferBranchTypeErased(CondValue, Cond, Copy.Lattice, + Copy.Env); + Builder.addOwned(std::move(Copy)); + continue; } } - - if (MaybeState) { - Analysis.joinTypeErased(MaybeState->Lattice, PredState.Lattice); - MaybeState->Env.join(PredState.Env, Analysis); - } else { - MaybeState = std::move(PredState); - } - } - if (!MaybeState) { - // FIXME: Consider passing `Block` to `Analysis.typeErasedInitialElement()` - // to enable building analyses like computation of dominators that - // initialize the state of each basic block differently. - MaybeState.emplace(Analysis.typeErasedInitialElement(), AC.InitEnv); + Builder.addUnowned(*MaybePredState); + continue; } - return *MaybeState; + return std::move(Builder).take(); } /// Built-in transfer function for `CFGStmt`. -void builtinTransferStatement(const CFGStmt &Elt, - TypeErasedDataflowAnalysisState &InputState, - AnalysisContext &AC) { +static void +builtinTransferStatement(const CFGStmt &Elt, + TypeErasedDataflowAnalysisState &InputState, + AnalysisContext &AC) { const Stmt *S = Elt.getStmt(); assert(S != nullptr); - transfer(StmtToEnvMapImpl(AC.CFCtx, AC.BlockStates), *S, InputState.Env); + transfer(StmtToEnvMap(AC.CFCtx, AC.BlockStates), *S, InputState.Env); } /// Built-in transfer function for `CFGInitializer`. -void builtinTransferInitializer(const CFGInitializer &Elt, - TypeErasedDataflowAnalysisState &InputState) { +static void +builtinTransferInitializer(const CFGInitializer &Elt, + TypeErasedDataflowAnalysisState &InputState) { const CXXCtorInitializer *Init = Elt.getInitializer(); assert(Init != nullptr); auto &Env = InputState.Env; - const auto &ThisLoc = - *cast<AggregateStorageLocation>(Env.getThisPointeeStorageLocation()); + auto &ThisLoc = *Env.getThisPointeeStorageLocation(); - const FieldDecl *Member = Init->getMember(); - if (Member == nullptr) - // Not a field initializer. + if (!Init->isAnyMemberInitializer()) + // FIXME: Handle base initialization return; - auto *InitStmt = Init->getInit(); - assert(InitStmt != nullptr); + auto *InitExpr = Init->getInit(); + assert(InitExpr != nullptr); - auto *InitStmtLoc = Env.getStorageLocation(*InitStmt, SkipPast::Reference); - if (InitStmtLoc == nullptr) - return; - - auto *InitStmtVal = Env.getValue(*InitStmtLoc); - if (InitStmtVal == nullptr) - return; - - if (Member->getType()->isReferenceType()) { - auto &MemberLoc = ThisLoc.getChild(*Member); - Env.setValue(MemberLoc, Env.takeOwnership(std::make_unique<ReferenceValue>( - *InitStmtLoc))); + const FieldDecl *Member = nullptr; + AggregateStorageLocation *ParentLoc = &ThisLoc; + StorageLocation *MemberLoc = nullptr; + if (Init->isMemberInitializer()) { + Member = Init->getMember(); + MemberLoc = ThisLoc.getChild(*Member); } else { - auto &MemberLoc = ThisLoc.getChild(*Member); - Env.setValue(MemberLoc, *InitStmtVal); + IndirectFieldDecl *IndirectField = Init->getIndirectMember(); + assert(IndirectField != nullptr); + MemberLoc = &ThisLoc; + for (const auto *I : IndirectField->chain()) { + Member = cast<FieldDecl>(I); + ParentLoc = cast<AggregateStorageLocation>(MemberLoc); + MemberLoc = ParentLoc->getChild(*Member); + } + } + assert(Member != nullptr); + assert(MemberLoc != nullptr); + + // FIXME: Instead of these case distinctions, we would ideally want to be able + // to simply use `Environment::createObject()` here, the same way that we do + // this in `TransferVisitor::VisitInitListExpr()`. However, this would require + // us to be able to build a list of fields that we then use to initialize an + // `AggregateStorageLocation` -- and the problem is that, when we get here, + // the `AggregateStorageLocation` already exists. We should explore if there's + // anything that we can do to change this. + if (Member->getType()->isReferenceType()) { + auto *InitExprLoc = Env.getStorageLocationStrict(*InitExpr); + if (InitExprLoc == nullptr) + return; + + ParentLoc->setChild(*Member, InitExprLoc); + } else if (auto *InitExprVal = Env.getValueStrict(*InitExpr)) { + if (Member->getType()->isRecordType()) { + auto *InitValStruct = cast<StructValue>(InitExprVal); + // FIXME: Rather than performing a copy here, we should really be + // initializing the field in place. This would require us to propagate the + // storage location of the field to the AST node that creates the + // `StructValue`. + copyRecord(InitValStruct->getAggregateLoc(), + *cast<AggregateStorageLocation>(MemberLoc), Env); + } else { + Env.setValue(*MemberLoc, *InitExprVal); + } } } -void builtinTransfer(const CFGElement &Elt, - TypeErasedDataflowAnalysisState &State, - AnalysisContext &AC) { +static void builtinTransfer(const CFGElement &Elt, + TypeErasedDataflowAnalysisState &State, + AnalysisContext &AC) { switch (Elt.getKind()) { case CFGElement::Statement: builtinTransferStatement(Elt.castAs<CFGStmt>(), State, AC); @@ -348,7 +433,18 @@ void builtinTransfer(const CFGElement &Elt, builtinTransferInitializer(Elt.castAs<CFGInitializer>(), State); break; default: - // FIXME: Evaluate other kinds of `CFGElement`. + // FIXME: Evaluate other kinds of `CFGElement`, including: + // - When encountering `CFGLifetimeEnds`, remove the declaration from + // `Environment::DeclToLoc`. This would serve two purposes: + // a) Eliminate unnecessary clutter from `Environment::DeclToLoc` + // b) Allow us to implement an assertion that, when joining two + // `Environments`, the two `DeclToLoc` maps never contain entries that + // map the same declaration to different storage locations. + // Unfortunately, however, we can't currently process `CFGLifetimeEnds` + // because the corresponding CFG option `AddLifetime` is incompatible with + // the option 'AddImplicitDtors`, which we already use. We will first + // need to modify the CFG implementation to make these two options + // compatible before we can process `CFGLifetimeEnds`. break; } } @@ -361,25 +457,33 @@ void builtinTransfer(const CFGElement &Elt, /// user-specified analysis. /// `PostVisitCFG` (if provided) will be applied to the element after evaluation /// by the user-specified analysis. -TypeErasedDataflowAnalysisState +static TypeErasedDataflowAnalysisState transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC, std::function<void(const CFGElement &, const TypeErasedDataflowAnalysisState &)> PostVisitCFG = nullptr) { + AC.Log.enterBlock(Block); auto State = computeBlockInputState(Block, AC); + AC.Log.recordState(State); + int ElementIdx = 1; for (const auto &Element : Block) { + PrettyStackTraceCFGElement CrashInfo(Element, Block.getBlockID(), + ElementIdx++, "transferCFGBlock"); + + AC.Log.enterElement(Element); // Built-in analysis if (AC.Analysis.builtinOptions()) { builtinTransfer(Element, State, AC); } // User-provided analysis - AC.Analysis.transferTypeErased(&Element, State.Lattice, State.Env); + AC.Analysis.transferTypeErased(Element, State.Lattice, State.Env); // Post processing if (PostVisitCFG) { PostVisitCFG(Element, State); } + AC.Log.recordState(State); } return State; } @@ -403,16 +507,18 @@ runTypeErasedDataflowAnalysis( std::function<void(const CFGElement &, const TypeErasedDataflowAnalysisState &)> PostVisitCFG) { + PrettyStackTraceAnalysis CrashInfo(CFCtx, "runTypeErasedDataflowAnalysis"); + PostOrderCFGView POV(&CFCtx.getCFG()); ForwardDataflowWorklist Worklist(CFCtx.getCFG(), &POV); std::vector<std::optional<TypeErasedDataflowAnalysisState>> BlockStates( - CFCtx.getCFG().size(), std::nullopt); + CFCtx.getCFG().size()); // The entry basic block doesn't contain statements so it can be skipped. const CFGBlock &Entry = CFCtx.getCFG().getEntry(); BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(), - InitEnv}; + InitEnv.fork()}; Worklist.enqueueSuccessors(&Entry); AnalysisContext AC(CFCtx, Analysis, InitEnv, BlockStates); @@ -460,15 +566,18 @@ runTypeErasedDataflowAnalysis( LatticeJoinEffect Effect2 = NewBlockState.Env.widen(OldBlockState->Env, Analysis); if (Effect1 == LatticeJoinEffect::Unchanged && - Effect2 == LatticeJoinEffect::Unchanged) + Effect2 == LatticeJoinEffect::Unchanged) { // The state of `Block` didn't change from widening so there's no need // to revisit its successors. + AC.Log.blockConverged(); continue; + } } else if (Analysis.isEqualTypeErased(OldBlockState->Lattice, NewBlockState.Lattice) && OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) { // The state of `Block` didn't change after transfer so there's no need // to revisit its successors. + AC.Log.blockConverged(); continue; } } @@ -493,7 +602,7 @@ runTypeErasedDataflowAnalysis( } } - return BlockStates; + return std::move(BlockStates); } } // namespace dataflow diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp index caa1ed266c5f..037886d09c4f 100644 --- a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp @@ -17,9 +17,10 @@ #include <queue> #include <vector> +#include "clang/Analysis/FlowSensitive/Formula.h" #include "clang/Analysis/FlowSensitive/Solver.h" -#include "clang/Analysis/FlowSensitive/Value.h" #include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" @@ -78,7 +79,7 @@ using ClauseID = uint32_t; static constexpr ClauseID NullClause = 0; /// A boolean formula in conjunctive normal form. -struct BooleanFormula { +struct CNFFormula { /// `LargestVar` is equal to the largest positive integer that represents a /// variable in the formula. const Variable LargestVar; @@ -120,12 +121,12 @@ struct BooleanFormula { /// clauses in the formula start from the element at index 1. std::vector<ClauseID> NextWatched; - /// Stores the variable identifier and value location for atomic booleans in - /// the formula. - llvm::DenseMap<Variable, AtomicBoolValue *> Atomics; + /// Stores the variable identifier and Atom for atomic booleans in the + /// formula. + llvm::DenseMap<Variable, Atom> Atomics; - explicit BooleanFormula(Variable LargestVar, - llvm::DenseMap<Variable, AtomicBoolValue *> Atomics) + explicit CNFFormula(Variable LargestVar, + llvm::DenseMap<Variable, Atom> Atomics) : LargestVar(LargestVar), Atomics(std::move(Atomics)) { Clauses.push_back(0); ClauseStarts.push_back(0); @@ -143,8 +144,8 @@ struct BooleanFormula { /// /// All literals in the input that are not `NullLit` must be distinct. void addClause(Literal L1, Literal L2 = NullLit, Literal L3 = NullLit) { - // The literals are guaranteed to be distinct from properties of BoolValue - // and the construction in `buildBooleanFormula`. + // The literals are guaranteed to be distinct from properties of Formula + // and the construction in `buildCNF`. assert(L1 != NullLit && L1 != L2 && L1 != L3 && (L2 != L3 || L2 == NullLit)); @@ -177,98 +178,59 @@ struct BooleanFormula { /// Converts the conjunction of `Vals` into a formula in conjunctive normal /// form where each clause has at least one and at most three literals. -BooleanFormula buildBooleanFormula(const llvm::DenseSet<BoolValue *> &Vals) { +CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) { // The general strategy of the algorithm implemented below is to map each // of the sub-values in `Vals` to a unique variable and use these variables in // the resulting CNF expression to avoid exponential blow up. The number of // literals in the resulting formula is guaranteed to be linear in the number - // of sub-values in `Vals`. + // of sub-formulas in `Vals`. - // Map each sub-value in `Vals` to a unique variable. - llvm::DenseMap<BoolValue *, Variable> SubValsToVar; - // Store variable identifiers and value location of atomic booleans. - llvm::DenseMap<Variable, AtomicBoolValue *> Atomics; + // Map each sub-formula in `Vals` to a unique variable. + llvm::DenseMap<const Formula *, Variable> SubValsToVar; + // Store variable identifiers and Atom of atomic booleans. + llvm::DenseMap<Variable, Atom> Atomics; Variable NextVar = 1; { - std::queue<BoolValue *> UnprocessedSubVals; - for (BoolValue *Val : Vals) + std::queue<const Formula *> UnprocessedSubVals; + for (const Formula *Val : Vals) UnprocessedSubVals.push(Val); while (!UnprocessedSubVals.empty()) { Variable Var = NextVar; - BoolValue *Val = UnprocessedSubVals.front(); + const Formula *Val = UnprocessedSubVals.front(); UnprocessedSubVals.pop(); if (!SubValsToVar.try_emplace(Val, Var).second) continue; ++NextVar; - // Visit the sub-values of `Val`. - switch (Val->getKind()) { - case Value::Kind::Conjunction: { - auto *C = cast<ConjunctionValue>(Val); - UnprocessedSubVals.push(&C->getLeftSubValue()); - UnprocessedSubVals.push(&C->getRightSubValue()); - break; - } - case Value::Kind::Disjunction: { - auto *D = cast<DisjunctionValue>(Val); - UnprocessedSubVals.push(&D->getLeftSubValue()); - UnprocessedSubVals.push(&D->getRightSubValue()); - break; - } - case Value::Kind::Negation: { - auto *N = cast<NegationValue>(Val); - UnprocessedSubVals.push(&N->getSubVal()); - break; - } - case Value::Kind::Implication: { - auto *I = cast<ImplicationValue>(Val); - UnprocessedSubVals.push(&I->getLeftSubValue()); - UnprocessedSubVals.push(&I->getRightSubValue()); - break; - } - case Value::Kind::Biconditional: { - auto *B = cast<BiconditionalValue>(Val); - UnprocessedSubVals.push(&B->getLeftSubValue()); - UnprocessedSubVals.push(&B->getRightSubValue()); - break; - } - case Value::Kind::TopBool: - // Nothing more to do. This `TopBool` instance has already been mapped - // to a fresh solver variable (`NextVar`, above) and is thereafter - // anonymous. The solver never sees `Top`. - break; - case Value::Kind::AtomicBool: { - Atomics[Var] = cast<AtomicBoolValue>(Val); - break; - } - default: - llvm_unreachable("buildBooleanFormula: unhandled value kind"); - } + for (const Formula *F : Val->operands()) + UnprocessedSubVals.push(F); + if (Val->kind() == Formula::AtomRef) + Atomics[Var] = Val->getAtom(); } } - auto GetVar = [&SubValsToVar](const BoolValue *Val) { + auto GetVar = [&SubValsToVar](const Formula *Val) { auto ValIt = SubValsToVar.find(Val); assert(ValIt != SubValsToVar.end()); return ValIt->second; }; - BooleanFormula Formula(NextVar - 1, std::move(Atomics)); + CNFFormula CNF(NextVar - 1, std::move(Atomics)); std::vector<bool> ProcessedSubVals(NextVar, false); - // Add a conjunct for each variable that represents a top-level conjunction - // value in `Vals`. - for (BoolValue *Val : Vals) - Formula.addClause(posLit(GetVar(Val))); + // Add a conjunct for each variable that represents a top-level formula in + // `Vals`. + for (const Formula *Val : Vals) + CNF.addClause(posLit(GetVar(Val))); // Add conjuncts that represent the mapping between newly-created variables - // and their corresponding sub-values. - std::queue<BoolValue *> UnprocessedSubVals; - for (BoolValue *Val : Vals) + // and their corresponding sub-formulas. + std::queue<const Formula *> UnprocessedSubVals; + for (const Formula *Val : Vals) UnprocessedSubVals.push(Val); while (!UnprocessedSubVals.empty()) { - const BoolValue *Val = UnprocessedSubVals.front(); + const Formula *Val = UnprocessedSubVals.front(); UnprocessedSubVals.pop(); const Variable Var = GetVar(Val); @@ -276,117 +238,107 @@ BooleanFormula buildBooleanFormula(const llvm::DenseSet<BoolValue *> &Vals) { continue; ProcessedSubVals[Var] = true; - if (auto *C = dyn_cast<ConjunctionValue>(Val)) { - const Variable LeftSubVar = GetVar(&C->getLeftSubValue()); - const Variable RightSubVar = GetVar(&C->getRightSubValue()); + switch (Val->kind()) { + case Formula::AtomRef: + break; + case Formula::And: { + const Variable LHS = GetVar(Val->operands()[0]); + const Variable RHS = GetVar(Val->operands()[1]); - if (LeftSubVar == RightSubVar) { + if (LHS == RHS) { // `X <=> (A ^ A)` is equivalent to `(!X v A) ^ (X v !A)` which is // already in conjunctive normal form. Below we add each of the // conjuncts of the latter expression to the result. - Formula.addClause(negLit(Var), posLit(LeftSubVar)); - Formula.addClause(posLit(Var), negLit(LeftSubVar)); - - // Visit a sub-value of `Val` (pick any, they are identical). - UnprocessedSubVals.push(&C->getLeftSubValue()); + CNF.addClause(negLit(Var), posLit(LHS)); + CNF.addClause(posLit(Var), negLit(LHS)); } else { // `X <=> (A ^ B)` is equivalent to `(!X v A) ^ (!X v B) ^ (X v !A v !B)` // which is already in conjunctive normal form. Below we add each of the // conjuncts of the latter expression to the result. - Formula.addClause(negLit(Var), posLit(LeftSubVar)); - Formula.addClause(negLit(Var), posLit(RightSubVar)); - Formula.addClause(posLit(Var), negLit(LeftSubVar), negLit(RightSubVar)); - - // Visit the sub-values of `Val`. - UnprocessedSubVals.push(&C->getLeftSubValue()); - UnprocessedSubVals.push(&C->getRightSubValue()); + CNF.addClause(negLit(Var), posLit(LHS)); + CNF.addClause(negLit(Var), posLit(RHS)); + CNF.addClause(posLit(Var), negLit(LHS), negLit(RHS)); } - } else if (auto *D = dyn_cast<DisjunctionValue>(Val)) { - const Variable LeftSubVar = GetVar(&D->getLeftSubValue()); - const Variable RightSubVar = GetVar(&D->getRightSubValue()); + break; + } + case Formula::Or: { + const Variable LHS = GetVar(Val->operands()[0]); + const Variable RHS = GetVar(Val->operands()[1]); - if (LeftSubVar == RightSubVar) { + if (LHS == RHS) { // `X <=> (A v A)` is equivalent to `(!X v A) ^ (X v !A)` which is // already in conjunctive normal form. Below we add each of the // conjuncts of the latter expression to the result. - Formula.addClause(negLit(Var), posLit(LeftSubVar)); - Formula.addClause(posLit(Var), negLit(LeftSubVar)); - - // Visit a sub-value of `Val` (pick any, they are identical). - UnprocessedSubVals.push(&D->getLeftSubValue()); + CNF.addClause(negLit(Var), posLit(LHS)); + CNF.addClause(posLit(Var), negLit(LHS)); } else { - // `X <=> (A v B)` is equivalent to `(!X v A v B) ^ (X v !A) ^ (X v !B)` - // which is already in conjunctive normal form. Below we add each of the - // conjuncts of the latter expression to the result. - Formula.addClause(negLit(Var), posLit(LeftSubVar), posLit(RightSubVar)); - Formula.addClause(posLit(Var), negLit(LeftSubVar)); - Formula.addClause(posLit(Var), negLit(RightSubVar)); - - // Visit the sub-values of `Val`. - UnprocessedSubVals.push(&D->getLeftSubValue()); - UnprocessedSubVals.push(&D->getRightSubValue()); + // `X <=> (A v B)` is equivalent to `(!X v A v B) ^ (X v !A) ^ (X v + // !B)` which is already in conjunctive normal form. Below we add each + // of the conjuncts of the latter expression to the result. + CNF.addClause(negLit(Var), posLit(LHS), posLit(RHS)); + CNF.addClause(posLit(Var), negLit(LHS)); + CNF.addClause(posLit(Var), negLit(RHS)); } - } else if (auto *N = dyn_cast<NegationValue>(Val)) { - const Variable SubVar = GetVar(&N->getSubVal()); - - // `X <=> !Y` is equivalent to `(!X v !Y) ^ (X v Y)` which is already in - // conjunctive normal form. Below we add each of the conjuncts of the - // latter expression to the result. - Formula.addClause(negLit(Var), negLit(SubVar)); - Formula.addClause(posLit(Var), posLit(SubVar)); - - // Visit the sub-values of `Val`. - UnprocessedSubVals.push(&N->getSubVal()); - } else if (auto *I = dyn_cast<ImplicationValue>(Val)) { - const Variable LeftSubVar = GetVar(&I->getLeftSubValue()); - const Variable RightSubVar = GetVar(&I->getRightSubValue()); + break; + } + case Formula::Not: { + const Variable Operand = GetVar(Val->operands()[0]); + + // `X <=> !Y` is equivalent to `(!X v !Y) ^ (X v Y)` which is + // already in conjunctive normal form. Below we add each of the + // conjuncts of the latter expression to the result. + CNF.addClause(negLit(Var), negLit(Operand)); + CNF.addClause(posLit(Var), posLit(Operand)); + break; + } + case Formula::Implies: { + const Variable LHS = GetVar(Val->operands()[0]); + const Variable RHS = GetVar(Val->operands()[1]); // `X <=> (A => B)` is equivalent to // `(X v A) ^ (X v !B) ^ (!X v !A v B)` which is already in - // conjunctive normal form. Below we add each of the conjuncts of the - // latter expression to the result. - Formula.addClause(posLit(Var), posLit(LeftSubVar)); - Formula.addClause(posLit(Var), negLit(RightSubVar)); - Formula.addClause(negLit(Var), negLit(LeftSubVar), posLit(RightSubVar)); - - // Visit the sub-values of `Val`. - UnprocessedSubVals.push(&I->getLeftSubValue()); - UnprocessedSubVals.push(&I->getRightSubValue()); - } else if (auto *B = dyn_cast<BiconditionalValue>(Val)) { - const Variable LeftSubVar = GetVar(&B->getLeftSubValue()); - const Variable RightSubVar = GetVar(&B->getRightSubValue()); - - if (LeftSubVar == RightSubVar) { + // conjunctive normal form. Below we add each of the conjuncts of + // the latter expression to the result. + CNF.addClause(posLit(Var), posLit(LHS)); + CNF.addClause(posLit(Var), negLit(RHS)); + CNF.addClause(negLit(Var), negLit(LHS), posLit(RHS)); + break; + } + case Formula::Equal: { + const Variable LHS = GetVar(Val->operands()[0]); + const Variable RHS = GetVar(Val->operands()[1]); + + if (LHS == RHS) { // `X <=> (A <=> A)` is equvalent to `X` which is already in // conjunctive normal form. Below we add each of the conjuncts of the // latter expression to the result. - Formula.addClause(posLit(Var)); + CNF.addClause(posLit(Var)); // No need to visit the sub-values of `Val`. - } else { - // `X <=> (A <=> B)` is equivalent to - // `(X v A v B) ^ (X v !A v !B) ^ (!X v A v !B) ^ (!X v !A v B)` which is - // already in conjunctive normal form. Below we add each of the conjuncts - // of the latter expression to the result. - Formula.addClause(posLit(Var), posLit(LeftSubVar), posLit(RightSubVar)); - Formula.addClause(posLit(Var), negLit(LeftSubVar), negLit(RightSubVar)); - Formula.addClause(negLit(Var), posLit(LeftSubVar), negLit(RightSubVar)); - Formula.addClause(negLit(Var), negLit(LeftSubVar), posLit(RightSubVar)); - - // Visit the sub-values of `Val`. - UnprocessedSubVals.push(&B->getLeftSubValue()); - UnprocessedSubVals.push(&B->getRightSubValue()); + continue; } + // `X <=> (A <=> B)` is equivalent to + // `(X v A v B) ^ (X v !A v !B) ^ (!X v A v !B) ^ (!X v !A v B)` which + // is already in conjunctive normal form. Below we add each of the + // conjuncts of the latter expression to the result. + CNF.addClause(posLit(Var), posLit(LHS), posLit(RHS)); + CNF.addClause(posLit(Var), negLit(LHS), negLit(RHS)); + CNF.addClause(negLit(Var), posLit(LHS), negLit(RHS)); + CNF.addClause(negLit(Var), negLit(LHS), posLit(RHS)); + break; } + } + for (const Formula *Child : Val->operands()) + UnprocessedSubVals.push(Child); } - return Formula; + return CNF; } class WatchedLiteralsSolverImpl { /// A boolean formula in conjunctive normal form that the solver will attempt /// to prove satisfiable. The formula will be modified in the process. - BooleanFormula Formula; + CNFFormula CNF; /// The search for a satisfying assignment of the variables in `Formula` will /// proceed in levels, starting from 1 and going up to `Formula.LargestVar` @@ -438,9 +390,10 @@ class WatchedLiteralsSolverImpl { std::vector<Variable> ActiveVars; public: - explicit WatchedLiteralsSolverImpl(const llvm::DenseSet<BoolValue *> &Vals) - : Formula(buildBooleanFormula(Vals)), LevelVars(Formula.LargestVar + 1), - LevelStates(Formula.LargestVar + 1) { + explicit WatchedLiteralsSolverImpl( + const llvm::ArrayRef<const Formula *> &Vals) + : CNF(buildCNF(Vals)), LevelVars(CNF.LargestVar + 1), + LevelStates(CNF.LargestVar + 1) { assert(!Vals.empty()); // Initialize the state at the root level to a decision so that in @@ -449,25 +402,31 @@ public: LevelStates[0] = State::Decision; // Initialize all variables as unassigned. - VarAssignments.resize(Formula.LargestVar + 1, Assignment::Unassigned); + VarAssignments.resize(CNF.LargestVar + 1, Assignment::Unassigned); // Initialize the active variables. - for (Variable Var = Formula.LargestVar; Var != NullVar; --Var) { + for (Variable Var = CNF.LargestVar; Var != NullVar; --Var) { if (isWatched(posLit(Var)) || isWatched(negLit(Var))) ActiveVars.push_back(Var); } } - Solver::Result solve() && { + // Returns the `Result` and the number of iterations "remaining" from + // `MaxIterations` (that is, `MaxIterations` - iterations in this call). + std::pair<Solver::Result, std::int64_t> solve(std::int64_t MaxIterations) && { size_t I = 0; while (I < ActiveVars.size()) { + if (MaxIterations == 0) + return std::make_pair(Solver::Result::TimedOut(), 0); + --MaxIterations; + // Assert that the following invariants hold: // 1. All active variables are unassigned. // 2. All active variables form watched literals. // 3. Unassigned variables that form watched literals are active. // FIXME: Consider replacing these with test cases that fail if the any // of the invariants is broken. That might not be easy due to the - // transformations performed by `buildBooleanFormula`. + // transformations performed by `buildCNF`. assert(activeVarsAreUnassigned()); assert(activeVarsFormWatchedLiterals()); assert(unassignedVarsFormingWatchedLiteralsAreActive()); @@ -487,7 +446,7 @@ public: // If the root level is reached, then all possible assignments lead to // a conflict. if (Level == 0) - return Solver::Result::Unsatisfiable(); + return std::make_pair(Solver::Result::Unsatisfiable(), MaxIterations); // Otherwise, take the other branch at the most recent level where a // decision was made. @@ -544,16 +503,14 @@ public: ++I; } } - return Solver::Result::Satisfiable(buildSolution()); + return std::make_pair(Solver::Result::Satisfiable(buildSolution()), MaxIterations); } private: - /// Returns a satisfying truth assignment to the atomic values in the boolean - /// formula. - llvm::DenseMap<AtomicBoolValue *, Solver::Result::Assignment> - buildSolution() { - llvm::DenseMap<AtomicBoolValue *, Solver::Result::Assignment> Solution; - for (auto &Atomic : Formula.Atomics) { + /// Returns a satisfying truth assignment to the atoms in the boolean formula. + llvm::DenseMap<Atom, Solver::Result::Assignment> buildSolution() { + llvm::DenseMap<Atom, Solver::Result::Assignment> Solution; + for (auto &Atomic : CNF.Atomics) { // A variable may have a definite true/false assignment, or it may be // unassigned indicating its truth value does not affect the result of // the formula. Unassigned variables are assigned to true as a default. @@ -589,24 +546,24 @@ private: const Literal FalseLit = VarAssignments[Var] == Assignment::AssignedTrue ? negLit(Var) : posLit(Var); - ClauseID FalseLitWatcher = Formula.WatchedHead[FalseLit]; - Formula.WatchedHead[FalseLit] = NullClause; + ClauseID FalseLitWatcher = CNF.WatchedHead[FalseLit]; + CNF.WatchedHead[FalseLit] = NullClause; while (FalseLitWatcher != NullClause) { - const ClauseID NextFalseLitWatcher = Formula.NextWatched[FalseLitWatcher]; + const ClauseID NextFalseLitWatcher = CNF.NextWatched[FalseLitWatcher]; // Pick the first non-false literal as the new watched literal. - const size_t FalseLitWatcherStart = Formula.ClauseStarts[FalseLitWatcher]; + const size_t FalseLitWatcherStart = CNF.ClauseStarts[FalseLitWatcher]; size_t NewWatchedLitIdx = FalseLitWatcherStart + 1; - while (isCurrentlyFalse(Formula.Clauses[NewWatchedLitIdx])) + while (isCurrentlyFalse(CNF.Clauses[NewWatchedLitIdx])) ++NewWatchedLitIdx; - const Literal NewWatchedLit = Formula.Clauses[NewWatchedLitIdx]; + const Literal NewWatchedLit = CNF.Clauses[NewWatchedLitIdx]; const Variable NewWatchedLitVar = var(NewWatchedLit); // Swap the old watched literal for the new one in `FalseLitWatcher` to // maintain the invariant that the watched literal is at the beginning of // the clause. - Formula.Clauses[NewWatchedLitIdx] = FalseLit; - Formula.Clauses[FalseLitWatcherStart] = NewWatchedLit; + CNF.Clauses[NewWatchedLitIdx] = FalseLit; + CNF.Clauses[FalseLitWatcherStart] = NewWatchedLit; // If the new watched literal isn't watched by any other clause and its // variable isn't assigned we need to add it to the active variables. @@ -614,8 +571,8 @@ private: VarAssignments[NewWatchedLitVar] == Assignment::Unassigned) ActiveVars.push_back(NewWatchedLitVar); - Formula.NextWatched[FalseLitWatcher] = Formula.WatchedHead[NewWatchedLit]; - Formula.WatchedHead[NewWatchedLit] = FalseLitWatcher; + CNF.NextWatched[FalseLitWatcher] = CNF.WatchedHead[NewWatchedLit]; + CNF.WatchedHead[NewWatchedLit] = FalseLitWatcher; // Go to the next clause that watches `FalseLit`. FalseLitWatcher = NextFalseLitWatcher; @@ -625,16 +582,15 @@ private: /// Returns true if and only if one of the clauses that watch `Lit` is a unit /// clause. bool watchedByUnitClause(Literal Lit) const { - for (ClauseID LitWatcher = Formula.WatchedHead[Lit]; - LitWatcher != NullClause; - LitWatcher = Formula.NextWatched[LitWatcher]) { - llvm::ArrayRef<Literal> Clause = Formula.clauseLiterals(LitWatcher); + for (ClauseID LitWatcher = CNF.WatchedHead[Lit]; LitWatcher != NullClause; + LitWatcher = CNF.NextWatched[LitWatcher]) { + llvm::ArrayRef<Literal> Clause = CNF.clauseLiterals(LitWatcher); // Assert the invariant that the watched literal is always the first one // in the clause. // FIXME: Consider replacing this with a test case that fails if the // invariant is broken by `updateWatchedLiterals`. That might not be easy - // due to the transformations performed by `buildBooleanFormula`. + // due to the transformations performed by `buildCNF`. assert(Clause.front() == Lit); if (isUnit(Clause)) @@ -658,7 +614,7 @@ private: /// Returns true if and only if `Lit` is watched by a clause in `Formula`. bool isWatched(Literal Lit) const { - return Formula.WatchedHead[Lit] != NullClause; + return CNF.WatchedHead[Lit] != NullClause; } /// Returns an assignment for an unassigned variable. @@ -671,8 +627,8 @@ private: /// Returns a set of all watched literals. llvm::DenseSet<Literal> watchedLiterals() const { llvm::DenseSet<Literal> WatchedLiterals; - for (Literal Lit = 2; Lit < Formula.WatchedHead.size(); Lit++) { - if (Formula.WatchedHead[Lit] == NullClause) + for (Literal Lit = 2; Lit < CNF.WatchedHead.size(); Lit++) { + if (CNF.WatchedHead[Lit] == NullClause) continue; WatchedLiterals.insert(Lit); } @@ -712,9 +668,14 @@ private: } }; -Solver::Result WatchedLiteralsSolver::solve(llvm::DenseSet<BoolValue *> Vals) { - return Vals.empty() ? Solver::Result::Satisfiable({{}}) - : WatchedLiteralsSolverImpl(Vals).solve(); +Solver::Result +WatchedLiteralsSolver::solve(llvm::ArrayRef<const Formula *> Vals) { + if (Vals.empty()) + return Solver::Result::Satisfiable({{}}); + auto [Res, Iterations] = + WatchedLiteralsSolverImpl(Vals).solve(MaxIterations); + MaxIterations = Iterations; + return Res; } } // namespace dataflow |
