diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/Analysis')
45 files changed, 28323 insertions, 0 deletions
diff --git a/contrib/llvm-project/clang/lib/Analysis/AnalysisDeclContext.cpp b/contrib/llvm-project/clang/lib/Analysis/AnalysisDeclContext.cpp new file mode 100644 index 000000000000..d3a1a993711f --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/AnalysisDeclContext.cpp @@ -0,0 +1,705 @@ +//===- AnalysisDeclContext.cpp - Analysis context for Path Sens analysis --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines AnalysisDeclContext, a class that manages the analysis +// context data for path sensitive analysis. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/DeclTemplate.h" +#include "clang/AST/Expr.h" +#include "clang/AST/LambdaCapture.h" +#include "clang/AST/ParentMap.h" +#include "clang/AST/PrettyPrinter.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/StmtCXX.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Analysis/Analyses/CFGReachabilityAnalysis.h" +#include "clang/Analysis/BodyFarm.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/CFGStmtMap.h" +#include "clang/Analysis/Support/BumpVector.h" +#include "clang/Basic/JsonSupport.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SaveAndRestore.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <memory> + +using namespace clang; + +using ManagedAnalysisMap = llvm::DenseMap<const void *, std::unique_ptr<ManagedAnalysis>>; + +AnalysisDeclContext::AnalysisDeclContext(AnalysisDeclContextManager *ADCMgr, + const Decl *D, + const CFG::BuildOptions &Options) + : ADCMgr(ADCMgr), D(D), cfgBuildOptions(Options) { + cfgBuildOptions.forcedBlkExprs = &forcedBlkExprs; +} + +AnalysisDeclContext::AnalysisDeclContext(AnalysisDeclContextManager *ADCMgr, + const Decl *D) + : ADCMgr(ADCMgr), D(D) { + cfgBuildOptions.forcedBlkExprs = &forcedBlkExprs; +} + +AnalysisDeclContextManager::AnalysisDeclContextManager( + ASTContext &ASTCtx, bool useUnoptimizedCFG, bool addImplicitDtors, + bool addInitializers, bool addTemporaryDtors, bool addLifetime, + bool addLoopExit, bool addScopes, bool synthesizeBodies, + bool addStaticInitBranch, bool addCXXNewAllocator, + bool addRichCXXConstructors, bool markElidedCXXConstructors, + bool addVirtualBaseBranches, CodeInjector *injector) + : Injector(injector), FunctionBodyFarm(ASTCtx, injector), + SynthesizeBodies(synthesizeBodies) { + cfgBuildOptions.PruneTriviallyFalseEdges = !useUnoptimizedCFG; + cfgBuildOptions.AddImplicitDtors = addImplicitDtors; + cfgBuildOptions.AddInitializers = addInitializers; + cfgBuildOptions.AddTemporaryDtors = addTemporaryDtors; + cfgBuildOptions.AddLifetime = addLifetime; + cfgBuildOptions.AddLoopExit = addLoopExit; + cfgBuildOptions.AddScopes = addScopes; + cfgBuildOptions.AddStaticInitBranches = addStaticInitBranch; + cfgBuildOptions.AddCXXNewAllocator = addCXXNewAllocator; + cfgBuildOptions.AddRichCXXConstructors = addRichCXXConstructors; + cfgBuildOptions.MarkElidedCXXConstructors = markElidedCXXConstructors; + cfgBuildOptions.AddVirtualBaseBranches = addVirtualBaseBranches; +} + +void AnalysisDeclContextManager::clear() { Contexts.clear(); } + +Stmt *AnalysisDeclContext::getBody(bool &IsAutosynthesized) const { + IsAutosynthesized = false; + if (const auto *FD = dyn_cast<FunctionDecl>(D)) { + Stmt *Body = FD->getBody(); + if (auto *CoroBody = dyn_cast_or_null<CoroutineBodyStmt>(Body)) + Body = CoroBody->getBody(); + if (ADCMgr && ADCMgr->synthesizeBodies()) { + Stmt *SynthesizedBody = ADCMgr->getBodyFarm().getBody(FD); + if (SynthesizedBody) { + Body = SynthesizedBody; + IsAutosynthesized = true; + } + } + return Body; + } + else if (const auto *MD = dyn_cast<ObjCMethodDecl>(D)) { + Stmt *Body = MD->getBody(); + if (ADCMgr && ADCMgr->synthesizeBodies()) { + Stmt *SynthesizedBody = ADCMgr->getBodyFarm().getBody(MD); + if (SynthesizedBody) { + Body = SynthesizedBody; + IsAutosynthesized = true; + } + } + return Body; + } else if (const auto *BD = dyn_cast<BlockDecl>(D)) + return BD->getBody(); + else if (const auto *FunTmpl = dyn_cast_or_null<FunctionTemplateDecl>(D)) + return FunTmpl->getTemplatedDecl()->getBody(); + + llvm_unreachable("unknown code decl"); +} + +Stmt *AnalysisDeclContext::getBody() const { + bool Tmp; + return getBody(Tmp); +} + +bool AnalysisDeclContext::isBodyAutosynthesized() const { + bool Tmp; + getBody(Tmp); + return Tmp; +} + +bool AnalysisDeclContext::isBodyAutosynthesizedFromModelFile() const { + bool Tmp; + Stmt *Body = getBody(Tmp); + return Tmp && Body->getBeginLoc().isValid(); +} + +/// Returns true if \param VD is an Objective-C implicit 'self' parameter. +static bool isSelfDecl(const VarDecl *VD) { + return isa_and_nonnull<ImplicitParamDecl>(VD) && VD->getName() == "self"; +} + +const ImplicitParamDecl *AnalysisDeclContext::getSelfDecl() const { + if (const auto *MD = dyn_cast<ObjCMethodDecl>(D)) + return MD->getSelfDecl(); + if (const auto *BD = dyn_cast<BlockDecl>(D)) { + // See if 'self' was captured by the block. + for (const auto &I : BD->captures()) { + const VarDecl *VD = I.getVariable(); + if (isSelfDecl(VD)) + return dyn_cast<ImplicitParamDecl>(VD); + } + } + + auto *CXXMethod = dyn_cast<CXXMethodDecl>(D); + if (!CXXMethod) + return nullptr; + + const CXXRecordDecl *parent = CXXMethod->getParent(); + if (!parent->isLambda()) + return nullptr; + + for (const auto &LC : parent->captures()) { + if (!LC.capturesVariable()) + continue; + + ValueDecl *VD = LC.getCapturedVar(); + if (isSelfDecl(dyn_cast<VarDecl>(VD))) + return dyn_cast<ImplicitParamDecl>(VD); + } + + return nullptr; +} + +void AnalysisDeclContext::registerForcedBlockExpression(const Stmt *stmt) { + if (!forcedBlkExprs) + forcedBlkExprs = new CFG::BuildOptions::ForcedBlkExprs(); + // Default construct an entry for 'stmt'. + if (const auto *e = dyn_cast<Expr>(stmt)) + stmt = e->IgnoreParens(); + (void) (*forcedBlkExprs)[stmt]; +} + +const CFGBlock * +AnalysisDeclContext::getBlockForRegisteredExpression(const Stmt *stmt) { + assert(forcedBlkExprs); + if (const auto *e = dyn_cast<Expr>(stmt)) + stmt = e->IgnoreParens(); + CFG::BuildOptions::ForcedBlkExprs::const_iterator itr = + forcedBlkExprs->find(stmt); + assert(itr != forcedBlkExprs->end()); + return itr->second; +} + +/// Add each synthetic statement in the CFG to the parent map, using the +/// source statement's parent. +static void addParentsForSyntheticStmts(const CFG *TheCFG, ParentMap &PM) { + if (!TheCFG) + return; + + for (CFG::synthetic_stmt_iterator I = TheCFG->synthetic_stmt_begin(), + E = TheCFG->synthetic_stmt_end(); + I != E; ++I) { + PM.setParent(I->first, PM.getParent(I->second)); + } +} + +CFG *AnalysisDeclContext::getCFG() { + if (!cfgBuildOptions.PruneTriviallyFalseEdges) + return getUnoptimizedCFG(); + + if (!builtCFG) { + cfg = CFG::buildCFG(D, getBody(), &D->getASTContext(), cfgBuildOptions); + // Even when the cfg is not successfully built, we don't + // want to try building it again. + builtCFG = true; + + if (PM) + addParentsForSyntheticStmts(cfg.get(), *PM); + + // The Observer should only observe one build of the CFG. + getCFGBuildOptions().Observer = nullptr; + } + return cfg.get(); +} + +CFG *AnalysisDeclContext::getUnoptimizedCFG() { + if (!builtCompleteCFG) { + SaveAndRestore NotPrune(cfgBuildOptions.PruneTriviallyFalseEdges, false); + completeCFG = + CFG::buildCFG(D, getBody(), &D->getASTContext(), cfgBuildOptions); + // Even when the cfg is not successfully built, we don't + // want to try building it again. + builtCompleteCFG = true; + + if (PM) + addParentsForSyntheticStmts(completeCFG.get(), *PM); + + // The Observer should only observe one build of the CFG. + getCFGBuildOptions().Observer = nullptr; + } + return completeCFG.get(); +} + +CFGStmtMap *AnalysisDeclContext::getCFGStmtMap() { + if (cfgStmtMap) + return cfgStmtMap.get(); + + if (CFG *c = getCFG()) { + cfgStmtMap.reset(CFGStmtMap::Build(c, &getParentMap())); + return cfgStmtMap.get(); + } + + return nullptr; +} + +CFGReverseBlockReachabilityAnalysis *AnalysisDeclContext::getCFGReachablityAnalysis() { + if (CFA) + return CFA.get(); + + if (CFG *c = getCFG()) { + CFA.reset(new CFGReverseBlockReachabilityAnalysis(*c)); + return CFA.get(); + } + + return nullptr; +} + +void AnalysisDeclContext::dumpCFG(bool ShowColors) { + getCFG()->dump(getASTContext().getLangOpts(), ShowColors); +} + +ParentMap &AnalysisDeclContext::getParentMap() { + if (!PM) { + PM.reset(new ParentMap(getBody())); + if (const auto *C = dyn_cast<CXXConstructorDecl>(getDecl())) { + for (const auto *I : C->inits()) { + PM->addStmt(I->getInit()); + } + } + if (builtCFG) + addParentsForSyntheticStmts(getCFG(), *PM); + if (builtCompleteCFG) + addParentsForSyntheticStmts(getUnoptimizedCFG(), *PM); + } + return *PM; +} + +AnalysisDeclContext *AnalysisDeclContextManager::getContext(const Decl *D) { + if (const auto *FD = dyn_cast<FunctionDecl>(D)) { + // Calling 'hasBody' replaces 'FD' in place with the FunctionDecl + // that has the body. + FD->hasBody(FD); + D = FD; + } + + std::unique_ptr<AnalysisDeclContext> &AC = Contexts[D]; + if (!AC) + AC = std::make_unique<AnalysisDeclContext>(this, D, cfgBuildOptions); + return AC.get(); +} + +BodyFarm &AnalysisDeclContextManager::getBodyFarm() { return FunctionBodyFarm; } + +const StackFrameContext * +AnalysisDeclContext::getStackFrame(const LocationContext *ParentLC, + const Stmt *S, const CFGBlock *Blk, + unsigned BlockCount, unsigned Index) { + return getLocationContextManager().getStackFrame(this, ParentLC, S, Blk, + BlockCount, Index); +} + +const BlockInvocationContext *AnalysisDeclContext::getBlockInvocationContext( + const LocationContext *ParentLC, const BlockDecl *BD, const void *Data) { + return getLocationContextManager().getBlockInvocationContext(this, ParentLC, + BD, Data); +} + +bool AnalysisDeclContext::isInStdNamespace(const Decl *D) { + const DeclContext *DC = D->getDeclContext()->getEnclosingNamespaceContext(); + const auto *ND = dyn_cast<NamespaceDecl>(DC); + if (!ND) + return false; + + while (const DeclContext *Parent = ND->getParent()) { + if (!isa<NamespaceDecl>(Parent)) + break; + ND = cast<NamespaceDecl>(Parent); + } + + return ND->isStdNamespace(); +} + +std::string AnalysisDeclContext::getFunctionName(const Decl *D) { + std::string Str; + llvm::raw_string_ostream OS(Str); + const ASTContext &Ctx = D->getASTContext(); + + if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) { + OS << FD->getQualifiedNameAsString(); + + // In C++, there are overloads. + + if (Ctx.getLangOpts().CPlusPlus) { + OS << '('; + for (const auto &P : FD->parameters()) { + if (P != *FD->param_begin()) + OS << ", "; + OS << P->getType(); + } + OS << ')'; + } + + } else if (isa<BlockDecl>(D)) { + PresumedLoc Loc = Ctx.getSourceManager().getPresumedLoc(D->getLocation()); + + if (Loc.isValid()) { + OS << "block (line: " << Loc.getLine() << ", col: " << Loc.getColumn() + << ')'; + } + + } else if (const ObjCMethodDecl *OMD = dyn_cast<ObjCMethodDecl>(D)) { + + // FIXME: copy-pasted from CGDebugInfo.cpp. + OS << (OMD->isInstanceMethod() ? '-' : '+') << '['; + const DeclContext *DC = OMD->getDeclContext(); + if (const auto *OID = dyn_cast<ObjCImplementationDecl>(DC)) { + OS << OID->getName(); + } else if (const auto *OID = dyn_cast<ObjCInterfaceDecl>(DC)) { + OS << OID->getName(); + } else if (const auto *OC = dyn_cast<ObjCCategoryDecl>(DC)) { + if (OC->IsClassExtension()) { + OS << OC->getClassInterface()->getName(); + } else { + OS << OC->getIdentifier()->getNameStart() << '(' + << OC->getIdentifier()->getNameStart() << ')'; + } + } else if (const auto *OCD = dyn_cast<ObjCCategoryImplDecl>(DC)) { + OS << OCD->getClassInterface()->getName() << '(' << OCD->getName() << ')'; + } + OS << ' ' << OMD->getSelector().getAsString() << ']'; + } + + return Str; +} + +LocationContextManager &AnalysisDeclContext::getLocationContextManager() { + assert( + ADCMgr && + "Cannot create LocationContexts without an AnalysisDeclContextManager!"); + return ADCMgr->getLocationContextManager(); +} + +//===----------------------------------------------------------------------===// +// FoldingSet profiling. +//===----------------------------------------------------------------------===// + +void LocationContext::ProfileCommon(llvm::FoldingSetNodeID &ID, + ContextKind ck, + AnalysisDeclContext *ctx, + const LocationContext *parent, + const void *data) { + ID.AddInteger(ck); + ID.AddPointer(ctx); + ID.AddPointer(parent); + ID.AddPointer(data); +} + +void StackFrameContext::Profile(llvm::FoldingSetNodeID &ID) { + Profile(ID, getAnalysisDeclContext(), getParent(), CallSite, Block, + BlockCount, Index); +} + +void BlockInvocationContext::Profile(llvm::FoldingSetNodeID &ID) { + Profile(ID, getAnalysisDeclContext(), getParent(), BD, Data); +} + +//===----------------------------------------------------------------------===// +// LocationContext creation. +//===----------------------------------------------------------------------===// + +const StackFrameContext *LocationContextManager::getStackFrame( + AnalysisDeclContext *ctx, const LocationContext *parent, const Stmt *s, + const CFGBlock *blk, unsigned blockCount, unsigned idx) { + llvm::FoldingSetNodeID ID; + StackFrameContext::Profile(ID, ctx, parent, s, blk, blockCount, idx); + void *InsertPos; + auto *L = + cast_or_null<StackFrameContext>(Contexts.FindNodeOrInsertPos(ID, InsertPos)); + if (!L) { + L = new StackFrameContext(ctx, parent, s, blk, blockCount, idx, ++NewID); + Contexts.InsertNode(L, InsertPos); + } + return L; +} + +const BlockInvocationContext *LocationContextManager::getBlockInvocationContext( + AnalysisDeclContext *ADC, const LocationContext *ParentLC, + const BlockDecl *BD, const void *Data) { + llvm::FoldingSetNodeID ID; + BlockInvocationContext::Profile(ID, ADC, ParentLC, BD, Data); + void *InsertPos; + auto *L = + cast_or_null<BlockInvocationContext>(Contexts.FindNodeOrInsertPos(ID, + InsertPos)); + if (!L) { + L = new BlockInvocationContext(ADC, ParentLC, BD, Data, ++NewID); + Contexts.InsertNode(L, InsertPos); + } + return L; +} + +//===----------------------------------------------------------------------===// +// LocationContext methods. +//===----------------------------------------------------------------------===// + +const StackFrameContext *LocationContext::getStackFrame() const { + const LocationContext *LC = this; + while (LC) { + if (const auto *SFC = dyn_cast<StackFrameContext>(LC)) + return SFC; + LC = LC->getParent(); + } + return nullptr; +} + +bool LocationContext::inTopFrame() const { + return getStackFrame()->inTopFrame(); +} + +bool LocationContext::isParentOf(const LocationContext *LC) const { + do { + const LocationContext *Parent = LC->getParent(); + if (Parent == this) + return true; + else + LC = Parent; + } while (LC); + + return false; +} + +static void printLocation(raw_ostream &Out, const SourceManager &SM, + SourceLocation Loc) { + if (Loc.isFileID() && SM.isInMainFile(Loc)) + Out << SM.getExpansionLineNumber(Loc); + else + Loc.print(Out, SM); +} + +void LocationContext::dumpStack(raw_ostream &Out) const { + ASTContext &Ctx = getAnalysisDeclContext()->getASTContext(); + PrintingPolicy PP(Ctx.getLangOpts()); + PP.TerseOutput = 1; + + const SourceManager &SM = + getAnalysisDeclContext()->getASTContext().getSourceManager(); + + unsigned Frame = 0; + for (const LocationContext *LCtx = this; LCtx; LCtx = LCtx->getParent()) { + switch (LCtx->getKind()) { + case StackFrame: + Out << "\t#" << Frame << ' '; + ++Frame; + if (const auto *D = dyn_cast<NamedDecl>(LCtx->getDecl())) + Out << "Calling " << AnalysisDeclContext::getFunctionName(D); + else + Out << "Calling anonymous code"; + if (const Stmt *S = cast<StackFrameContext>(LCtx)->getCallSite()) { + Out << " at line "; + printLocation(Out, SM, S->getBeginLoc()); + } + break; + case Block: + Out << "Invoking block"; + if (const Decl *D = cast<BlockInvocationContext>(LCtx)->getDecl()) { + Out << " defined at line "; + printLocation(Out, SM, D->getBeginLoc()); + } + break; + } + Out << '\n'; + } +} + +void LocationContext::printJson(raw_ostream &Out, const char *NL, + unsigned int Space, bool IsDot, + std::function<void(const LocationContext *)> + printMoreInfoPerContext) const { + ASTContext &Ctx = getAnalysisDeclContext()->getASTContext(); + PrintingPolicy PP(Ctx.getLangOpts()); + PP.TerseOutput = 1; + + const SourceManager &SM = + getAnalysisDeclContext()->getASTContext().getSourceManager(); + + unsigned Frame = 0; + for (const LocationContext *LCtx = this; LCtx; LCtx = LCtx->getParent()) { + Indent(Out, Space, IsDot) + << "{ \"lctx_id\": " << LCtx->getID() << ", \"location_context\": \""; + switch (LCtx->getKind()) { + case StackFrame: + Out << '#' << Frame << " Call\", \"calling\": \""; + ++Frame; + if (const auto *D = dyn_cast<NamedDecl>(LCtx->getDecl())) + Out << D->getQualifiedNameAsString(); + else + Out << "anonymous code"; + + Out << "\", \"location\": "; + if (const Stmt *S = cast<StackFrameContext>(LCtx)->getCallSite()) { + printSourceLocationAsJson(Out, S->getBeginLoc(), SM); + } else { + Out << "null"; + } + + Out << ", \"items\": "; + break; + case Block: + Out << "Invoking block\" "; + if (const Decl *D = cast<BlockInvocationContext>(LCtx)->getDecl()) { + Out << ", \"location\": "; + printSourceLocationAsJson(Out, D->getBeginLoc(), SM); + Out << ' '; + } + break; + } + + printMoreInfoPerContext(LCtx); + + Out << '}'; + if (LCtx->getParent()) + Out << ','; + Out << NL; + } +} + +LLVM_DUMP_METHOD void LocationContext::dump() const { printJson(llvm::errs()); } + +//===----------------------------------------------------------------------===// +// Lazily generated map to query the external variables referenced by a Block. +//===----------------------------------------------------------------------===// + +namespace { + +class FindBlockDeclRefExprsVals : public StmtVisitor<FindBlockDeclRefExprsVals>{ + BumpVector<const VarDecl *> &BEVals; + BumpVectorContext &BC; + llvm::SmallPtrSet<const VarDecl *, 4> Visited; + llvm::SmallPtrSet<const DeclContext *, 4> IgnoredContexts; + +public: + FindBlockDeclRefExprsVals(BumpVector<const VarDecl*> &bevals, + BumpVectorContext &bc) + : BEVals(bevals), BC(bc) {} + + void VisitStmt(Stmt *S) { + for (auto *Child : S->children()) + if (Child) + Visit(Child); + } + + void VisitDeclRefExpr(DeclRefExpr *DR) { + // Non-local variables are also directly modified. + if (const auto *VD = dyn_cast<VarDecl>(DR->getDecl())) { + if (!VD->hasLocalStorage()) { + if (Visited.insert(VD).second) + BEVals.push_back(VD, BC); + } + } + } + + void VisitBlockExpr(BlockExpr *BR) { + // Blocks containing blocks can transitively capture more variables. + IgnoredContexts.insert(BR->getBlockDecl()); + Visit(BR->getBlockDecl()->getBody()); + } + + void VisitPseudoObjectExpr(PseudoObjectExpr *PE) { + for (PseudoObjectExpr::semantics_iterator it = PE->semantics_begin(), + et = PE->semantics_end(); it != et; ++it) { + Expr *Semantic = *it; + if (auto *OVE = dyn_cast<OpaqueValueExpr>(Semantic)) + Semantic = OVE->getSourceExpr(); + Visit(Semantic); + } + } +}; + +} // namespace + +using DeclVec = BumpVector<const VarDecl *>; + +static DeclVec* LazyInitializeReferencedDecls(const BlockDecl *BD, + void *&Vec, + llvm::BumpPtrAllocator &A) { + if (Vec) + return (DeclVec*) Vec; + + BumpVectorContext BC(A); + DeclVec *BV = (DeclVec*) A.Allocate<DeclVec>(); + new (BV) DeclVec(BC, 10); + + // Go through the capture list. + for (const auto &CI : BD->captures()) { + BV->push_back(CI.getVariable(), BC); + } + + // Find the referenced global/static variables. + FindBlockDeclRefExprsVals F(*BV, BC); + F.Visit(BD->getBody()); + + Vec = BV; + return BV; +} + +llvm::iterator_range<AnalysisDeclContext::referenced_decls_iterator> +AnalysisDeclContext::getReferencedBlockVars(const BlockDecl *BD) { + if (!ReferencedBlockVars) + ReferencedBlockVars = new llvm::DenseMap<const BlockDecl*,void*>(); + + const DeclVec *V = + LazyInitializeReferencedDecls(BD, (*ReferencedBlockVars)[BD], A); + return llvm::make_range(V->begin(), V->end()); +} + +std::unique_ptr<ManagedAnalysis> &AnalysisDeclContext::getAnalysisImpl(const void *tag) { + if (!ManagedAnalyses) + ManagedAnalyses = new ManagedAnalysisMap(); + ManagedAnalysisMap *M = (ManagedAnalysisMap*) ManagedAnalyses; + return (*M)[tag]; +} + +//===----------------------------------------------------------------------===// +// Cleanup. +//===----------------------------------------------------------------------===// + +ManagedAnalysis::~ManagedAnalysis() = default; + +AnalysisDeclContext::~AnalysisDeclContext() { + delete forcedBlkExprs; + delete ReferencedBlockVars; + delete (ManagedAnalysisMap*) ManagedAnalyses; +} + +LocationContext::~LocationContext() = default; + +LocationContextManager::~LocationContextManager() { + clear(); +} + +void LocationContextManager::clear() { + for (llvm::FoldingSet<LocationContext>::iterator I = Contexts.begin(), + E = Contexts.end(); I != E; ) { + LocationContext *LC = &*I; + ++I; + delete LC; + } + Contexts.clear(); +} diff --git a/contrib/llvm-project/clang/lib/Analysis/BodyFarm.cpp b/contrib/llvm-project/clang/lib/Analysis/BodyFarm.cpp new file mode 100644 index 000000000000..c05534886cb5 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/BodyFarm.cpp @@ -0,0 +1,906 @@ +//== BodyFarm.cpp - Factory for conjuring up fake bodies ----------*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// BodyFarm is a factory for creating faux implementations for functions/methods +// for analysis purposes. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/BodyFarm.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/CXXInheritance.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/ExprObjC.h" +#include "clang/AST/NestedNameSpecifier.h" +#include "clang/Analysis/CodeInjector.h" +#include "clang/Basic/Builtins.h" +#include "clang/Basic/OperatorKinds.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Debug.h" +#include <optional> + +#define DEBUG_TYPE "body-farm" + +using namespace clang; + +//===----------------------------------------------------------------------===// +// Helper creation functions for constructing faux ASTs. +//===----------------------------------------------------------------------===// + +static bool isDispatchBlock(QualType Ty) { + // Is it a block pointer? + const BlockPointerType *BPT = Ty->getAs<BlockPointerType>(); + if (!BPT) + return false; + + // Check if the block pointer type takes no arguments and + // returns void. + const FunctionProtoType *FT = + BPT->getPointeeType()->getAs<FunctionProtoType>(); + return FT && FT->getReturnType()->isVoidType() && FT->getNumParams() == 0; +} + +namespace { +class ASTMaker { +public: + ASTMaker(ASTContext &C) : C(C) {} + + /// Create a new BinaryOperator representing a simple assignment. + BinaryOperator *makeAssignment(const Expr *LHS, const Expr *RHS, QualType Ty); + + /// Create a new BinaryOperator representing a comparison. + BinaryOperator *makeComparison(const Expr *LHS, const Expr *RHS, + BinaryOperator::Opcode Op); + + /// Create a new compound stmt using the provided statements. + CompoundStmt *makeCompound(ArrayRef<Stmt*>); + + /// Create a new DeclRefExpr for the referenced variable. + DeclRefExpr *makeDeclRefExpr(const VarDecl *D, + bool RefersToEnclosingVariableOrCapture = false); + + /// Create a new UnaryOperator representing a dereference. + UnaryOperator *makeDereference(const Expr *Arg, QualType Ty); + + /// Create an implicit cast for an integer conversion. + Expr *makeIntegralCast(const Expr *Arg, QualType Ty); + + /// Create an implicit cast to a builtin boolean type. + ImplicitCastExpr *makeIntegralCastToBoolean(const Expr *Arg); + + /// Create an implicit cast for lvalue-to-rvaluate conversions. + ImplicitCastExpr *makeLvalueToRvalue(const Expr *Arg, QualType Ty); + + /// Make RValue out of variable declaration, creating a temporary + /// DeclRefExpr in the process. + ImplicitCastExpr * + makeLvalueToRvalue(const VarDecl *Decl, + bool RefersToEnclosingVariableOrCapture = false); + + /// Create an implicit cast of the given type. + ImplicitCastExpr *makeImplicitCast(const Expr *Arg, QualType Ty, + CastKind CK = CK_LValueToRValue); + + /// Create a cast to reference type. + CastExpr *makeReferenceCast(const Expr *Arg, QualType Ty); + + /// Create an Objective-C bool literal. + ObjCBoolLiteralExpr *makeObjCBool(bool Val); + + /// Create an Objective-C ivar reference. + ObjCIvarRefExpr *makeObjCIvarRef(const Expr *Base, const ObjCIvarDecl *IVar); + + /// Create a Return statement. + ReturnStmt *makeReturn(const Expr *RetVal); + + /// Create an integer literal expression of the given type. + IntegerLiteral *makeIntegerLiteral(uint64_t Value, QualType Ty); + + /// Create a member expression. + MemberExpr *makeMemberExpression(Expr *base, ValueDecl *MemberDecl, + bool IsArrow = false, + ExprValueKind ValueKind = VK_LValue); + + /// Returns a *first* member field of a record declaration with a given name. + /// \return an nullptr if no member with such a name exists. + ValueDecl *findMemberField(const RecordDecl *RD, StringRef Name); + +private: + ASTContext &C; +}; +} + +BinaryOperator *ASTMaker::makeAssignment(const Expr *LHS, const Expr *RHS, + QualType Ty) { + return BinaryOperator::Create( + C, const_cast<Expr *>(LHS), const_cast<Expr *>(RHS), BO_Assign, Ty, + VK_PRValue, OK_Ordinary, SourceLocation(), FPOptionsOverride()); +} + +BinaryOperator *ASTMaker::makeComparison(const Expr *LHS, const Expr *RHS, + BinaryOperator::Opcode Op) { + assert(BinaryOperator::isLogicalOp(Op) || + BinaryOperator::isComparisonOp(Op)); + return BinaryOperator::Create( + C, const_cast<Expr *>(LHS), const_cast<Expr *>(RHS), Op, + C.getLogicalOperationType(), VK_PRValue, OK_Ordinary, SourceLocation(), + FPOptionsOverride()); +} + +CompoundStmt *ASTMaker::makeCompound(ArrayRef<Stmt *> Stmts) { + return CompoundStmt::Create(C, Stmts, FPOptionsOverride(), SourceLocation(), + SourceLocation()); +} + +DeclRefExpr *ASTMaker::makeDeclRefExpr( + const VarDecl *D, + bool RefersToEnclosingVariableOrCapture) { + QualType Type = D->getType().getNonReferenceType(); + + DeclRefExpr *DR = DeclRefExpr::Create( + C, NestedNameSpecifierLoc(), SourceLocation(), const_cast<VarDecl *>(D), + RefersToEnclosingVariableOrCapture, SourceLocation(), Type, VK_LValue); + return DR; +} + +UnaryOperator *ASTMaker::makeDereference(const Expr *Arg, QualType Ty) { + return UnaryOperator::Create(C, const_cast<Expr *>(Arg), UO_Deref, Ty, + VK_LValue, OK_Ordinary, SourceLocation(), + /*CanOverflow*/ false, FPOptionsOverride()); +} + +ImplicitCastExpr *ASTMaker::makeLvalueToRvalue(const Expr *Arg, QualType Ty) { + return makeImplicitCast(Arg, Ty, CK_LValueToRValue); +} + +ImplicitCastExpr * +ASTMaker::makeLvalueToRvalue(const VarDecl *Arg, + bool RefersToEnclosingVariableOrCapture) { + QualType Type = Arg->getType().getNonReferenceType(); + return makeLvalueToRvalue(makeDeclRefExpr(Arg, + RefersToEnclosingVariableOrCapture), + Type); +} + +ImplicitCastExpr *ASTMaker::makeImplicitCast(const Expr *Arg, QualType Ty, + CastKind CK) { + return ImplicitCastExpr::Create(C, Ty, + /* CastKind=*/CK, + /* Expr=*/const_cast<Expr *>(Arg), + /* CXXCastPath=*/nullptr, + /* ExprValueKind=*/VK_PRValue, + /* FPFeatures */ FPOptionsOverride()); +} + +CastExpr *ASTMaker::makeReferenceCast(const Expr *Arg, QualType Ty) { + assert(Ty->isReferenceType()); + return CXXStaticCastExpr::Create( + C, Ty.getNonReferenceType(), + Ty->isLValueReferenceType() ? VK_LValue : VK_XValue, CK_NoOp, + const_cast<Expr *>(Arg), /*CXXCastPath=*/nullptr, + /*Written=*/C.getTrivialTypeSourceInfo(Ty), FPOptionsOverride(), + SourceLocation(), SourceLocation(), SourceRange()); +} + +Expr *ASTMaker::makeIntegralCast(const Expr *Arg, QualType Ty) { + if (Arg->getType() == Ty) + return const_cast<Expr*>(Arg); + return makeImplicitCast(Arg, Ty, CK_IntegralCast); +} + +ImplicitCastExpr *ASTMaker::makeIntegralCastToBoolean(const Expr *Arg) { + return makeImplicitCast(Arg, C.BoolTy, CK_IntegralToBoolean); +} + +ObjCBoolLiteralExpr *ASTMaker::makeObjCBool(bool Val) { + QualType Ty = C.getBOOLDecl() ? C.getBOOLType() : C.ObjCBuiltinBoolTy; + return new (C) ObjCBoolLiteralExpr(Val, Ty, SourceLocation()); +} + +ObjCIvarRefExpr *ASTMaker::makeObjCIvarRef(const Expr *Base, + const ObjCIvarDecl *IVar) { + return new (C) ObjCIvarRefExpr(const_cast<ObjCIvarDecl*>(IVar), + IVar->getType(), SourceLocation(), + SourceLocation(), const_cast<Expr*>(Base), + /*arrow=*/true, /*free=*/false); +} + +ReturnStmt *ASTMaker::makeReturn(const Expr *RetVal) { + return ReturnStmt::Create(C, SourceLocation(), const_cast<Expr *>(RetVal), + /* NRVOCandidate=*/nullptr); +} + +IntegerLiteral *ASTMaker::makeIntegerLiteral(uint64_t Value, QualType Ty) { + llvm::APInt APValue = llvm::APInt(C.getTypeSize(Ty), Value); + return IntegerLiteral::Create(C, APValue, Ty, SourceLocation()); +} + +MemberExpr *ASTMaker::makeMemberExpression(Expr *base, ValueDecl *MemberDecl, + bool IsArrow, + ExprValueKind ValueKind) { + + DeclAccessPair FoundDecl = DeclAccessPair::make(MemberDecl, AS_public); + return MemberExpr::Create( + C, base, IsArrow, SourceLocation(), NestedNameSpecifierLoc(), + SourceLocation(), MemberDecl, FoundDecl, + DeclarationNameInfo(MemberDecl->getDeclName(), SourceLocation()), + /* TemplateArgumentListInfo=*/ nullptr, MemberDecl->getType(), ValueKind, + OK_Ordinary, NOUR_None); +} + +ValueDecl *ASTMaker::findMemberField(const RecordDecl *RD, StringRef Name) { + + CXXBasePaths Paths( + /* FindAmbiguities=*/false, + /* RecordPaths=*/false, + /* DetectVirtual=*/ false); + const IdentifierInfo &II = C.Idents.get(Name); + DeclarationName DeclName = C.DeclarationNames.getIdentifier(&II); + + DeclContextLookupResult Decls = RD->lookup(DeclName); + for (NamedDecl *FoundDecl : Decls) + if (!FoundDecl->getDeclContext()->isFunctionOrMethod()) + return cast<ValueDecl>(FoundDecl); + + return nullptr; +} + +//===----------------------------------------------------------------------===// +// Creation functions for faux ASTs. +//===----------------------------------------------------------------------===// + +typedef Stmt *(*FunctionFarmer)(ASTContext &C, const FunctionDecl *D); + +static CallExpr *create_call_once_funcptr_call(ASTContext &C, ASTMaker M, + const ParmVarDecl *Callback, + ArrayRef<Expr *> CallArgs) { + + QualType Ty = Callback->getType(); + DeclRefExpr *Call = M.makeDeclRefExpr(Callback); + Expr *SubExpr; + if (Ty->isRValueReferenceType()) { + SubExpr = M.makeImplicitCast( + Call, Ty.getNonReferenceType(), CK_LValueToRValue); + } else if (Ty->isLValueReferenceType() && + Call->getType()->isFunctionType()) { + Ty = C.getPointerType(Ty.getNonReferenceType()); + SubExpr = M.makeImplicitCast(Call, Ty, CK_FunctionToPointerDecay); + } else if (Ty->isLValueReferenceType() + && Call->getType()->isPointerType() + && Call->getType()->getPointeeType()->isFunctionType()){ + SubExpr = Call; + } else { + llvm_unreachable("Unexpected state"); + } + + return CallExpr::Create(C, SubExpr, CallArgs, C.VoidTy, VK_PRValue, + SourceLocation(), FPOptionsOverride()); +} + +static CallExpr *create_call_once_lambda_call(ASTContext &C, ASTMaker M, + const ParmVarDecl *Callback, + CXXRecordDecl *CallbackDecl, + ArrayRef<Expr *> CallArgs) { + assert(CallbackDecl != nullptr); + assert(CallbackDecl->isLambda()); + FunctionDecl *callOperatorDecl = CallbackDecl->getLambdaCallOperator(); + assert(callOperatorDecl != nullptr); + + DeclRefExpr *callOperatorDeclRef = + DeclRefExpr::Create(/* Ctx =*/ C, + /* QualifierLoc =*/ NestedNameSpecifierLoc(), + /* TemplateKWLoc =*/ SourceLocation(), + const_cast<FunctionDecl *>(callOperatorDecl), + /* RefersToEnclosingVariableOrCapture=*/ false, + /* NameLoc =*/ SourceLocation(), + /* T =*/ callOperatorDecl->getType(), + /* VK =*/ VK_LValue); + + return CXXOperatorCallExpr::Create( + /*AstContext=*/C, OO_Call, callOperatorDeclRef, + /*Args=*/CallArgs, + /*QualType=*/C.VoidTy, + /*ExprValueType=*/VK_PRValue, + /*SourceLocation=*/SourceLocation(), + /*FPFeatures=*/FPOptionsOverride()); +} + +/// Create a fake body for 'std::move' or 'std::forward'. This is just: +/// +/// \code +/// return static_cast<return_type>(param); +/// \endcode +static Stmt *create_std_move_forward(ASTContext &C, const FunctionDecl *D) { + LLVM_DEBUG(llvm::dbgs() << "Generating body for std::move / std::forward\n"); + + ASTMaker M(C); + + QualType ReturnType = D->getType()->castAs<FunctionType>()->getReturnType(); + Expr *Param = M.makeDeclRefExpr(D->getParamDecl(0)); + Expr *Cast = M.makeReferenceCast(Param, ReturnType); + return M.makeReturn(Cast); +} + +/// Create a fake body for std::call_once. +/// Emulates the following function body: +/// +/// \code +/// typedef struct once_flag_s { +/// unsigned long __state = 0; +/// } once_flag; +/// template<class Callable> +/// void call_once(once_flag& o, Callable func) { +/// if (!o.__state) { +/// func(); +/// } +/// o.__state = 1; +/// } +/// \endcode +static Stmt *create_call_once(ASTContext &C, const FunctionDecl *D) { + LLVM_DEBUG(llvm::dbgs() << "Generating body for call_once\n"); + + // We need at least two parameters. + if (D->param_size() < 2) + return nullptr; + + ASTMaker M(C); + + const ParmVarDecl *Flag = D->getParamDecl(0); + const ParmVarDecl *Callback = D->getParamDecl(1); + + if (!Callback->getType()->isReferenceType()) { + llvm::dbgs() << "libcxx03 std::call_once implementation, skipping.\n"; + return nullptr; + } + if (!Flag->getType()->isReferenceType()) { + llvm::dbgs() << "unknown std::call_once implementation, skipping.\n"; + return nullptr; + } + + QualType CallbackType = Callback->getType().getNonReferenceType(); + + // Nullable pointer, non-null iff function is a CXXRecordDecl. + CXXRecordDecl *CallbackRecordDecl = CallbackType->getAsCXXRecordDecl(); + QualType FlagType = Flag->getType().getNonReferenceType(); + auto *FlagRecordDecl = FlagType->getAsRecordDecl(); + + if (!FlagRecordDecl) { + LLVM_DEBUG(llvm::dbgs() << "Flag field is not a record: " + << "unknown std::call_once implementation, " + << "ignoring the call.\n"); + return nullptr; + } + + // We initially assume libc++ implementation of call_once, + // where the once_flag struct has a field `__state_`. + ValueDecl *FlagFieldDecl = M.findMemberField(FlagRecordDecl, "__state_"); + + // Otherwise, try libstdc++ implementation, with a field + // `_M_once` + if (!FlagFieldDecl) { + FlagFieldDecl = M.findMemberField(FlagRecordDecl, "_M_once"); + } + + if (!FlagFieldDecl) { + LLVM_DEBUG(llvm::dbgs() << "No field _M_once or __state_ found on " + << "std::once_flag struct: unknown std::call_once " + << "implementation, ignoring the call."); + return nullptr; + } + + bool isLambdaCall = CallbackRecordDecl && CallbackRecordDecl->isLambda(); + if (CallbackRecordDecl && !isLambdaCall) { + LLVM_DEBUG(llvm::dbgs() + << "Not supported: synthesizing body for functors when " + << "body farming std::call_once, ignoring the call."); + return nullptr; + } + + SmallVector<Expr *, 5> CallArgs; + const FunctionProtoType *CallbackFunctionType; + if (isLambdaCall) { + + // Lambda requires callback itself inserted as a first parameter. + CallArgs.push_back( + M.makeDeclRefExpr(Callback, + /* RefersToEnclosingVariableOrCapture=*/ true)); + CallbackFunctionType = CallbackRecordDecl->getLambdaCallOperator() + ->getType() + ->getAs<FunctionProtoType>(); + } else if (!CallbackType->getPointeeType().isNull()) { + CallbackFunctionType = + CallbackType->getPointeeType()->getAs<FunctionProtoType>(); + } else { + CallbackFunctionType = CallbackType->getAs<FunctionProtoType>(); + } + + if (!CallbackFunctionType) + return nullptr; + + // First two arguments are used for the flag and for the callback. + if (D->getNumParams() != CallbackFunctionType->getNumParams() + 2) { + LLVM_DEBUG(llvm::dbgs() << "Types of params of the callback do not match " + << "params passed to std::call_once, " + << "ignoring the call\n"); + return nullptr; + } + + // All arguments past first two ones are passed to the callback, + // and we turn lvalues into rvalues if the argument is not passed by + // reference. + for (unsigned int ParamIdx = 2; ParamIdx < D->getNumParams(); ParamIdx++) { + const ParmVarDecl *PDecl = D->getParamDecl(ParamIdx); + assert(PDecl); + if (CallbackFunctionType->getParamType(ParamIdx - 2) + .getNonReferenceType() + .getCanonicalType() != + PDecl->getType().getNonReferenceType().getCanonicalType()) { + LLVM_DEBUG(llvm::dbgs() << "Types of params of the callback do not match " + << "params passed to std::call_once, " + << "ignoring the call\n"); + return nullptr; + } + Expr *ParamExpr = M.makeDeclRefExpr(PDecl); + if (!CallbackFunctionType->getParamType(ParamIdx - 2)->isReferenceType()) { + QualType PTy = PDecl->getType().getNonReferenceType(); + ParamExpr = M.makeLvalueToRvalue(ParamExpr, PTy); + } + CallArgs.push_back(ParamExpr); + } + + CallExpr *CallbackCall; + if (isLambdaCall) { + + CallbackCall = create_call_once_lambda_call(C, M, Callback, + CallbackRecordDecl, CallArgs); + } else { + + // Function pointer case. + CallbackCall = create_call_once_funcptr_call(C, M, Callback, CallArgs); + } + + DeclRefExpr *FlagDecl = + M.makeDeclRefExpr(Flag, + /* RefersToEnclosingVariableOrCapture=*/true); + + + MemberExpr *Deref = M.makeMemberExpression(FlagDecl, FlagFieldDecl); + assert(Deref->isLValue()); + QualType DerefType = Deref->getType(); + + // Negation predicate. + UnaryOperator *FlagCheck = UnaryOperator::Create( + C, + /* input=*/ + M.makeImplicitCast(M.makeLvalueToRvalue(Deref, DerefType), DerefType, + CK_IntegralToBoolean), + /* opc=*/UO_LNot, + /* QualType=*/C.IntTy, + /* ExprValueKind=*/VK_PRValue, + /* ExprObjectKind=*/OK_Ordinary, SourceLocation(), + /* CanOverflow*/ false, FPOptionsOverride()); + + // Create assignment. + BinaryOperator *FlagAssignment = M.makeAssignment( + Deref, M.makeIntegralCast(M.makeIntegerLiteral(1, C.IntTy), DerefType), + DerefType); + + auto *Out = + IfStmt::Create(C, SourceLocation(), IfStatementKind::Ordinary, + /* Init=*/nullptr, + /* Var=*/nullptr, + /* Cond=*/FlagCheck, + /* LPL=*/SourceLocation(), + /* RPL=*/SourceLocation(), + /* Then=*/M.makeCompound({CallbackCall, FlagAssignment})); + + return Out; +} + +/// Create a fake body for dispatch_once. +static Stmt *create_dispatch_once(ASTContext &C, const FunctionDecl *D) { + // Check if we have at least two parameters. + if (D->param_size() != 2) + return nullptr; + + // Check if the first parameter is a pointer to integer type. + const ParmVarDecl *Predicate = D->getParamDecl(0); + QualType PredicateQPtrTy = Predicate->getType(); + const PointerType *PredicatePtrTy = PredicateQPtrTy->getAs<PointerType>(); + if (!PredicatePtrTy) + return nullptr; + QualType PredicateTy = PredicatePtrTy->getPointeeType(); + if (!PredicateTy->isIntegerType()) + return nullptr; + + // Check if the second parameter is the proper block type. + const ParmVarDecl *Block = D->getParamDecl(1); + QualType Ty = Block->getType(); + if (!isDispatchBlock(Ty)) + return nullptr; + + // Everything checks out. Create a fakse body that checks the predicate, + // sets it, and calls the block. Basically, an AST dump of: + // + // void dispatch_once(dispatch_once_t *predicate, dispatch_block_t block) { + // if (*predicate != ~0l) { + // *predicate = ~0l; + // block(); + // } + // } + + ASTMaker M(C); + + // (1) Create the call. + CallExpr *CE = CallExpr::Create( + /*ASTContext=*/C, + /*StmtClass=*/M.makeLvalueToRvalue(/*Expr=*/Block), + /*Args=*/std::nullopt, + /*QualType=*/C.VoidTy, + /*ExprValueType=*/VK_PRValue, + /*SourceLocation=*/SourceLocation(), FPOptionsOverride()); + + // (2) Create the assignment to the predicate. + Expr *DoneValue = + UnaryOperator::Create(C, M.makeIntegerLiteral(0, C.LongTy), UO_Not, + C.LongTy, VK_PRValue, OK_Ordinary, SourceLocation(), + /*CanOverflow*/ false, FPOptionsOverride()); + + BinaryOperator *B = + M.makeAssignment( + M.makeDereference( + M.makeLvalueToRvalue( + M.makeDeclRefExpr(Predicate), PredicateQPtrTy), + PredicateTy), + M.makeIntegralCast(DoneValue, PredicateTy), + PredicateTy); + + // (3) Create the compound statement. + Stmt *Stmts[] = { B, CE }; + CompoundStmt *CS = M.makeCompound(Stmts); + + // (4) Create the 'if' condition. + ImplicitCastExpr *LValToRval = + M.makeLvalueToRvalue( + M.makeDereference( + M.makeLvalueToRvalue( + M.makeDeclRefExpr(Predicate), + PredicateQPtrTy), + PredicateTy), + PredicateTy); + + Expr *GuardCondition = M.makeComparison(LValToRval, DoneValue, BO_NE); + // (5) Create the 'if' statement. + auto *If = IfStmt::Create(C, SourceLocation(), IfStatementKind::Ordinary, + /* Init=*/nullptr, + /* Var=*/nullptr, + /* Cond=*/GuardCondition, + /* LPL=*/SourceLocation(), + /* RPL=*/SourceLocation(), + /* Then=*/CS); + return If; +} + +/// Create a fake body for dispatch_sync. +static Stmt *create_dispatch_sync(ASTContext &C, const FunctionDecl *D) { + // Check if we have at least two parameters. + if (D->param_size() != 2) + return nullptr; + + // Check if the second parameter is a block. + const ParmVarDecl *PV = D->getParamDecl(1); + QualType Ty = PV->getType(); + if (!isDispatchBlock(Ty)) + return nullptr; + + // Everything checks out. Create a fake body that just calls the block. + // This is basically just an AST dump of: + // + // void dispatch_sync(dispatch_queue_t queue, void (^block)(void)) { + // block(); + // } + // + ASTMaker M(C); + DeclRefExpr *DR = M.makeDeclRefExpr(PV); + ImplicitCastExpr *ICE = M.makeLvalueToRvalue(DR, Ty); + CallExpr *CE = CallExpr::Create(C, ICE, std::nullopt, C.VoidTy, VK_PRValue, + SourceLocation(), FPOptionsOverride()); + return CE; +} + +static Stmt *create_OSAtomicCompareAndSwap(ASTContext &C, const FunctionDecl *D) +{ + // There are exactly 3 arguments. + if (D->param_size() != 3) + return nullptr; + + // Signature: + // _Bool OSAtomicCompareAndSwapPtr(void *__oldValue, + // void *__newValue, + // void * volatile *__theValue) + // Generate body: + // if (oldValue == *theValue) { + // *theValue = newValue; + // return YES; + // } + // else return NO; + + QualType ResultTy = D->getReturnType(); + bool isBoolean = ResultTy->isBooleanType(); + if (!isBoolean && !ResultTy->isIntegralType(C)) + return nullptr; + + const ParmVarDecl *OldValue = D->getParamDecl(0); + QualType OldValueTy = OldValue->getType(); + + const ParmVarDecl *NewValue = D->getParamDecl(1); + QualType NewValueTy = NewValue->getType(); + + assert(OldValueTy == NewValueTy); + + const ParmVarDecl *TheValue = D->getParamDecl(2); + QualType TheValueTy = TheValue->getType(); + const PointerType *PT = TheValueTy->getAs<PointerType>(); + if (!PT) + return nullptr; + QualType PointeeTy = PT->getPointeeType(); + + ASTMaker M(C); + // Construct the comparison. + Expr *Comparison = + M.makeComparison( + M.makeLvalueToRvalue(M.makeDeclRefExpr(OldValue), OldValueTy), + M.makeLvalueToRvalue( + M.makeDereference( + M.makeLvalueToRvalue(M.makeDeclRefExpr(TheValue), TheValueTy), + PointeeTy), + PointeeTy), + BO_EQ); + + // Construct the body of the IfStmt. + Stmt *Stmts[2]; + Stmts[0] = + M.makeAssignment( + M.makeDereference( + M.makeLvalueToRvalue(M.makeDeclRefExpr(TheValue), TheValueTy), + PointeeTy), + M.makeLvalueToRvalue(M.makeDeclRefExpr(NewValue), NewValueTy), + NewValueTy); + + Expr *BoolVal = M.makeObjCBool(true); + Expr *RetVal = isBoolean ? M.makeIntegralCastToBoolean(BoolVal) + : M.makeIntegralCast(BoolVal, ResultTy); + Stmts[1] = M.makeReturn(RetVal); + CompoundStmt *Body = M.makeCompound(Stmts); + + // Construct the else clause. + BoolVal = M.makeObjCBool(false); + RetVal = isBoolean ? M.makeIntegralCastToBoolean(BoolVal) + : M.makeIntegralCast(BoolVal, ResultTy); + Stmt *Else = M.makeReturn(RetVal); + + /// Construct the If. + auto *If = + IfStmt::Create(C, SourceLocation(), IfStatementKind::Ordinary, + /* Init=*/nullptr, + /* Var=*/nullptr, Comparison, + /* LPL=*/SourceLocation(), + /* RPL=*/SourceLocation(), Body, SourceLocation(), Else); + + return If; +} + +Stmt *BodyFarm::getBody(const FunctionDecl *D) { + std::optional<Stmt *> &Val = Bodies[D]; + if (Val) + return *Val; + + Val = nullptr; + + if (D->getIdentifier() == nullptr) + return nullptr; + + StringRef Name = D->getName(); + if (Name.empty()) + return nullptr; + + FunctionFarmer FF; + + if (unsigned BuiltinID = D->getBuiltinID()) { + switch (BuiltinID) { + case Builtin::BIas_const: + case Builtin::BIforward: + case Builtin::BImove: + case Builtin::BImove_if_noexcept: + FF = create_std_move_forward; + break; + default: + FF = nullptr; + break; + } + } else if (Name.startswith("OSAtomicCompareAndSwap") || + Name.startswith("objc_atomicCompareAndSwap")) { + FF = create_OSAtomicCompareAndSwap; + } else if (Name == "call_once" && D->getDeclContext()->isStdNamespace()) { + FF = create_call_once; + } else { + FF = llvm::StringSwitch<FunctionFarmer>(Name) + .Case("dispatch_sync", create_dispatch_sync) + .Case("dispatch_once", create_dispatch_once) + .Default(nullptr); + } + + if (FF) { Val = FF(C, D); } + else if (Injector) { Val = Injector->getBody(D); } + return *Val; +} + +static const ObjCIvarDecl *findBackingIvar(const ObjCPropertyDecl *Prop) { + const ObjCIvarDecl *IVar = Prop->getPropertyIvarDecl(); + + if (IVar) + return IVar; + + // When a readonly property is shadowed in a class extensions with a + // a readwrite property, the instance variable belongs to the shadowing + // property rather than the shadowed property. If there is no instance + // variable on a readonly property, check to see whether the property is + // shadowed and if so try to get the instance variable from shadowing + // property. + if (!Prop->isReadOnly()) + return nullptr; + + auto *Container = cast<ObjCContainerDecl>(Prop->getDeclContext()); + const ObjCInterfaceDecl *PrimaryInterface = nullptr; + if (auto *InterfaceDecl = dyn_cast<ObjCInterfaceDecl>(Container)) { + PrimaryInterface = InterfaceDecl; + } else if (auto *CategoryDecl = dyn_cast<ObjCCategoryDecl>(Container)) { + PrimaryInterface = CategoryDecl->getClassInterface(); + } else if (auto *ImplDecl = dyn_cast<ObjCImplDecl>(Container)) { + PrimaryInterface = ImplDecl->getClassInterface(); + } else { + return nullptr; + } + + // FindPropertyVisibleInPrimaryClass() looks first in class extensions, so it + // is guaranteed to find the shadowing property, if it exists, rather than + // the shadowed property. + auto *ShadowingProp = PrimaryInterface->FindPropertyVisibleInPrimaryClass( + Prop->getIdentifier(), Prop->getQueryKind()); + if (ShadowingProp && ShadowingProp != Prop) { + IVar = ShadowingProp->getPropertyIvarDecl(); + } + + return IVar; +} + +static Stmt *createObjCPropertyGetter(ASTContext &Ctx, + const ObjCMethodDecl *MD) { + // First, find the backing ivar. + const ObjCIvarDecl *IVar = nullptr; + const ObjCPropertyDecl *Prop = nullptr; + + // Property accessor stubs sometimes do not correspond to any property decl + // in the current interface (but in a superclass). They still have a + // corresponding property impl decl in this case. + if (MD->isSynthesizedAccessorStub()) { + const ObjCInterfaceDecl *IntD = MD->getClassInterface(); + const ObjCImplementationDecl *ImpD = IntD->getImplementation(); + for (const auto *PI : ImpD->property_impls()) { + if (const ObjCPropertyDecl *Candidate = PI->getPropertyDecl()) { + if (Candidate->getGetterName() == MD->getSelector()) { + Prop = Candidate; + IVar = Prop->getPropertyIvarDecl(); + } + } + } + } + + if (!IVar) { + Prop = MD->findPropertyDecl(); + IVar = findBackingIvar(Prop); + } + + if (!IVar || !Prop) + return nullptr; + + // Ignore weak variables, which have special behavior. + if (Prop->getPropertyAttributes() & ObjCPropertyAttribute::kind_weak) + return nullptr; + + // Look to see if Sema has synthesized a body for us. This happens in + // Objective-C++ because the return value may be a C++ class type with a + // non-trivial copy constructor. We can only do this if we can find the + // @synthesize for this property, though (or if we know it's been auto- + // synthesized). + const ObjCImplementationDecl *ImplDecl = + IVar->getContainingInterface()->getImplementation(); + if (ImplDecl) { + for (const auto *I : ImplDecl->property_impls()) { + if (I->getPropertyDecl() != Prop) + continue; + + if (I->getGetterCXXConstructor()) { + ASTMaker M(Ctx); + return M.makeReturn(I->getGetterCXXConstructor()); + } + } + } + + // We expect that the property is the same type as the ivar, or a reference to + // it, and that it is either an object pointer or trivially copyable. + if (!Ctx.hasSameUnqualifiedType(IVar->getType(), + Prop->getType().getNonReferenceType())) + return nullptr; + if (!IVar->getType()->isObjCLifetimeType() && + !IVar->getType().isTriviallyCopyableType(Ctx)) + return nullptr; + + // Generate our body: + // return self->_ivar; + ASTMaker M(Ctx); + + const VarDecl *selfVar = MD->getSelfDecl(); + if (!selfVar) + return nullptr; + + Expr *loadedIVar = M.makeObjCIvarRef( + M.makeLvalueToRvalue(M.makeDeclRefExpr(selfVar), selfVar->getType()), + IVar); + + if (!MD->getReturnType()->isReferenceType()) + loadedIVar = M.makeLvalueToRvalue(loadedIVar, IVar->getType()); + + return M.makeReturn(loadedIVar); +} + +Stmt *BodyFarm::getBody(const ObjCMethodDecl *D) { + // We currently only know how to synthesize property accessors. + if (!D->isPropertyAccessor()) + return nullptr; + + D = D->getCanonicalDecl(); + + // We should not try to synthesize explicitly redefined accessors. + // We do not know for sure how they behave. + if (!D->isImplicit()) + return nullptr; + + std::optional<Stmt *> &Val = Bodies[D]; + if (Val) + return *Val; + Val = nullptr; + + // For now, we only synthesize getters. + // Synthesizing setters would cause false negatives in the + // RetainCountChecker because the method body would bind the parameter + // to an instance variable, causing it to escape. This would prevent + // warning in the following common scenario: + // + // id foo = [[NSObject alloc] init]; + // self.foo = foo; // We should warn that foo leaks here. + // + if (D->param_size() != 0) + return nullptr; + + // If the property was defined in an extension, search the extensions for + // overrides. + const ObjCInterfaceDecl *OID = D->getClassInterface(); + if (dyn_cast<ObjCInterfaceDecl>(D->getParent()) != OID) + for (auto *Ext : OID->known_extensions()) { + auto *OMD = Ext->getInstanceMethod(D->getSelector()); + if (OMD && !OMD->isImplicit()) + return nullptr; + } + + Val = createObjCPropertyGetter(C, D); + + return *Val; +} diff --git a/contrib/llvm-project/clang/lib/Analysis/CFG.cpp b/contrib/llvm-project/clang/lib/Analysis/CFG.cpp new file mode 100644 index 000000000000..ea8b73e81ea2 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/CFG.cpp @@ -0,0 +1,6354 @@ +//===- CFG.cpp - Classes for representing and building CFGs ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the CFG and CFGBuilder classes for representing and +// building Control-Flow Graphs (CFGs) from ASTs. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/CFG.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclGroup.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/OperationKinds.h" +#include "clang/AST/PrettyPrinter.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/StmtCXX.h" +#include "clang/AST/StmtObjC.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/ConstructionContext.h" +#include "clang/Analysis/Support/BumpVector.h" +#include "clang/Basic/Builtins.h" +#include "clang/Basic/ExceptionSpecificationType.h" +#include "clang/Basic/JsonSupport.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/Specifiers.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DOTGraphTraits.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/SaveAndRestore.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <memory> +#include <optional> +#include <string> +#include <tuple> +#include <utility> +#include <vector> + +using namespace clang; + +static SourceLocation GetEndLoc(Decl *D) { + if (VarDecl *VD = dyn_cast<VarDecl>(D)) + if (Expr *Ex = VD->getInit()) + return Ex->getSourceRange().getEnd(); + return D->getLocation(); +} + +/// Returns true on constant values based around a single IntegerLiteral. +/// Allow for use of parentheses, integer casts, and negative signs. +/// FIXME: it would be good to unify this function with +/// getIntegerLiteralSubexpressionValue at some point given the similarity +/// between the functions. + +static bool IsIntegerLiteralConstantExpr(const Expr *E) { + // Allow parentheses + E = E->IgnoreParens(); + + // Allow conversions to different integer kind. + if (const auto *CE = dyn_cast<CastExpr>(E)) { + if (CE->getCastKind() != CK_IntegralCast) + return false; + E = CE->getSubExpr(); + } + + // Allow negative numbers. + if (const auto *UO = dyn_cast<UnaryOperator>(E)) { + if (UO->getOpcode() != UO_Minus) + return false; + E = UO->getSubExpr(); + } + + return isa<IntegerLiteral>(E); +} + +/// Helper for tryNormalizeBinaryOperator. Attempts to extract an IntegerLiteral +/// constant expression or EnumConstantDecl from the given Expr. If it fails, +/// returns nullptr. +static const Expr *tryTransformToIntOrEnumConstant(const Expr *E) { + E = E->IgnoreParens(); + if (IsIntegerLiteralConstantExpr(E)) + return E; + if (auto *DR = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) + return isa<EnumConstantDecl>(DR->getDecl()) ? DR : nullptr; + return nullptr; +} + +/// Tries to interpret a binary operator into `Expr Op NumExpr` form, if +/// NumExpr is an integer literal or an enum constant. +/// +/// If this fails, at least one of the returned DeclRefExpr or Expr will be +/// null. +static std::tuple<const Expr *, BinaryOperatorKind, const Expr *> +tryNormalizeBinaryOperator(const BinaryOperator *B) { + BinaryOperatorKind Op = B->getOpcode(); + + const Expr *MaybeDecl = B->getLHS(); + const Expr *Constant = tryTransformToIntOrEnumConstant(B->getRHS()); + // Expr looked like `0 == Foo` instead of `Foo == 0` + if (Constant == nullptr) { + // Flip the operator + if (Op == BO_GT) + Op = BO_LT; + else if (Op == BO_GE) + Op = BO_LE; + else if (Op == BO_LT) + Op = BO_GT; + else if (Op == BO_LE) + Op = BO_GE; + + MaybeDecl = B->getRHS(); + Constant = tryTransformToIntOrEnumConstant(B->getLHS()); + } + + return std::make_tuple(MaybeDecl, Op, Constant); +} + +/// For an expression `x == Foo && x == Bar`, this determines whether the +/// `Foo` and `Bar` are either of the same enumeration type, or both integer +/// literals. +/// +/// It's an error to pass this arguments that are not either IntegerLiterals +/// or DeclRefExprs (that have decls of type EnumConstantDecl) +static bool areExprTypesCompatible(const Expr *E1, const Expr *E2) { + // User intent isn't clear if they're mixing int literals with enum + // constants. + if (isa<DeclRefExpr>(E1) != isa<DeclRefExpr>(E2)) + return false; + + // Integer literal comparisons, regardless of literal type, are acceptable. + if (!isa<DeclRefExpr>(E1)) + return true; + + // IntegerLiterals are handled above and only EnumConstantDecls are expected + // beyond this point + assert(isa<DeclRefExpr>(E1) && isa<DeclRefExpr>(E2)); + auto *Decl1 = cast<DeclRefExpr>(E1)->getDecl(); + auto *Decl2 = cast<DeclRefExpr>(E2)->getDecl(); + + assert(isa<EnumConstantDecl>(Decl1) && isa<EnumConstantDecl>(Decl2)); + const DeclContext *DC1 = Decl1->getDeclContext(); + const DeclContext *DC2 = Decl2->getDeclContext(); + + assert(isa<EnumDecl>(DC1) && isa<EnumDecl>(DC2)); + return DC1 == DC2; +} + +namespace { + +class CFGBuilder; + +/// The CFG builder uses a recursive algorithm to build the CFG. When +/// we process an expression, sometimes we know that we must add the +/// subexpressions as block-level expressions. For example: +/// +/// exp1 || exp2 +/// +/// When processing the '||' expression, we know that exp1 and exp2 +/// need to be added as block-level expressions, even though they +/// might not normally need to be. AddStmtChoice records this +/// contextual information. If AddStmtChoice is 'NotAlwaysAdd', then +/// the builder has an option not to add a subexpression as a +/// block-level expression. +class AddStmtChoice { +public: + enum Kind { NotAlwaysAdd = 0, AlwaysAdd = 1 }; + + AddStmtChoice(Kind a_kind = NotAlwaysAdd) : kind(a_kind) {} + + bool alwaysAdd(CFGBuilder &builder, + const Stmt *stmt) const; + + /// Return a copy of this object, except with the 'always-add' bit + /// set as specified. + AddStmtChoice withAlwaysAdd(bool alwaysAdd) const { + return AddStmtChoice(alwaysAdd ? AlwaysAdd : NotAlwaysAdd); + } + +private: + Kind kind; +}; + +/// LocalScope - Node in tree of local scopes created for C++ implicit +/// destructor calls generation. It contains list of automatic variables +/// declared in the scope and link to position in previous scope this scope +/// began in. +/// +/// The process of creating local scopes is as follows: +/// - Init CFGBuilder::ScopePos with invalid position (equivalent for null), +/// - Before processing statements in scope (e.g. CompoundStmt) create +/// LocalScope object using CFGBuilder::ScopePos as link to previous scope +/// and set CFGBuilder::ScopePos to the end of new scope, +/// - On every occurrence of VarDecl increase CFGBuilder::ScopePos if it points +/// at this VarDecl, +/// - For every normal (without jump) end of scope add to CFGBlock destructors +/// for objects in the current scope, +/// - For every jump add to CFGBlock destructors for objects +/// between CFGBuilder::ScopePos and local scope position saved for jump +/// target. Thanks to C++ restrictions on goto jumps we can be sure that +/// jump target position will be on the path to root from CFGBuilder::ScopePos +/// (adding any variable that doesn't need constructor to be called to +/// LocalScope can break this assumption), +/// +class LocalScope { +public: + using AutomaticVarsTy = BumpVector<VarDecl *>; + + /// const_iterator - Iterates local scope backwards and jumps to previous + /// scope on reaching the beginning of currently iterated scope. + class const_iterator { + const LocalScope* Scope = nullptr; + + /// VarIter is guaranteed to be greater then 0 for every valid iterator. + /// Invalid iterator (with null Scope) has VarIter equal to 0. + unsigned VarIter = 0; + + public: + /// Create invalid iterator. Dereferencing invalid iterator is not allowed. + /// Incrementing invalid iterator is allowed and will result in invalid + /// iterator. + const_iterator() = default; + + /// Create valid iterator. In case when S.Prev is an invalid iterator and + /// I is equal to 0, this will create invalid iterator. + const_iterator(const LocalScope& S, unsigned I) + : Scope(&S), VarIter(I) { + // Iterator to "end" of scope is not allowed. Handle it by going up + // in scopes tree possibly up to invalid iterator in the root. + if (VarIter == 0 && Scope) + *this = Scope->Prev; + } + + VarDecl *const* operator->() const { + assert(Scope && "Dereferencing invalid iterator is not allowed"); + assert(VarIter != 0 && "Iterator has invalid value of VarIter member"); + return &Scope->Vars[VarIter - 1]; + } + + const VarDecl *getFirstVarInScope() const { + assert(Scope && "Dereferencing invalid iterator is not allowed"); + assert(VarIter != 0 && "Iterator has invalid value of VarIter member"); + return Scope->Vars[0]; + } + + VarDecl *operator*() const { + return *this->operator->(); + } + + const_iterator &operator++() { + if (!Scope) + return *this; + + assert(VarIter != 0 && "Iterator has invalid value of VarIter member"); + --VarIter; + if (VarIter == 0) + *this = Scope->Prev; + return *this; + } + const_iterator operator++(int) { + const_iterator P = *this; + ++*this; + return P; + } + + bool operator==(const const_iterator &rhs) const { + return Scope == rhs.Scope && VarIter == rhs.VarIter; + } + bool operator!=(const const_iterator &rhs) const { + return !(*this == rhs); + } + + explicit operator bool() const { + return *this != const_iterator(); + } + + int distance(const_iterator L); + const_iterator shared_parent(const_iterator L); + bool pointsToFirstDeclaredVar() { return VarIter == 1; } + }; + +private: + BumpVectorContext ctx; + + /// Automatic variables in order of declaration. + AutomaticVarsTy Vars; + + /// Iterator to variable in previous scope that was declared just before + /// begin of this scope. + const_iterator Prev; + +public: + /// Constructs empty scope linked to previous scope in specified place. + LocalScope(BumpVectorContext ctx, const_iterator P) + : ctx(std::move(ctx)), Vars(this->ctx, 4), Prev(P) {} + + /// Begin of scope in direction of CFG building (backwards). + const_iterator begin() const { return const_iterator(*this, Vars.size()); } + + void addVar(VarDecl *VD) { + Vars.push_back(VD, ctx); + } +}; + +} // namespace + +/// distance - Calculates distance from this to L. L must be reachable from this +/// (with use of ++ operator). Cost of calculating the distance is linear w.r.t. +/// number of scopes between this and L. +int LocalScope::const_iterator::distance(LocalScope::const_iterator L) { + int D = 0; + const_iterator F = *this; + while (F.Scope != L.Scope) { + assert(F != const_iterator() && + "L iterator is not reachable from F iterator."); + D += F.VarIter; + F = F.Scope->Prev; + } + D += F.VarIter - L.VarIter; + return D; +} + +/// Calculates the closest parent of this iterator +/// that is in a scope reachable through the parents of L. +/// I.e. when using 'goto' from this to L, the lifetime of all variables +/// between this and shared_parent(L) end. +LocalScope::const_iterator +LocalScope::const_iterator::shared_parent(LocalScope::const_iterator L) { + llvm::SmallPtrSet<const LocalScope *, 4> ScopesOfL; + while (true) { + ScopesOfL.insert(L.Scope); + if (L == const_iterator()) + break; + L = L.Scope->Prev; + } + + const_iterator F = *this; + while (true) { + if (ScopesOfL.count(F.Scope)) + return F; + assert(F != const_iterator() && + "L iterator is not reachable from F iterator."); + F = F.Scope->Prev; + } +} + +namespace { + +/// Structure for specifying position in CFG during its build process. It +/// consists of CFGBlock that specifies position in CFG and +/// LocalScope::const_iterator that specifies position in LocalScope graph. +struct BlockScopePosPair { + CFGBlock *block = nullptr; + LocalScope::const_iterator scopePosition; + + BlockScopePosPair() = default; + BlockScopePosPair(CFGBlock *b, LocalScope::const_iterator scopePos) + : block(b), scopePosition(scopePos) {} +}; + +/// TryResult - a class representing a variant over the values +/// 'true', 'false', or 'unknown'. This is returned by tryEvaluateBool, +/// and is used by the CFGBuilder to decide if a branch condition +/// can be decided up front during CFG construction. +class TryResult { + int X = -1; + +public: + TryResult() = default; + TryResult(bool b) : X(b ? 1 : 0) {} + + bool isTrue() const { return X == 1; } + bool isFalse() const { return X == 0; } + bool isKnown() const { return X >= 0; } + + void negate() { + assert(isKnown()); + X ^= 0x1; + } +}; + +} // namespace + +static TryResult bothKnownTrue(TryResult R1, TryResult R2) { + if (!R1.isKnown() || !R2.isKnown()) + return TryResult(); + return TryResult(R1.isTrue() && R2.isTrue()); +} + +namespace { + +class reverse_children { + llvm::SmallVector<Stmt *, 12> childrenBuf; + ArrayRef<Stmt *> children; + +public: + reverse_children(Stmt *S); + + using iterator = ArrayRef<Stmt *>::reverse_iterator; + + iterator begin() const { return children.rbegin(); } + iterator end() const { return children.rend(); } +}; + +} // namespace + +reverse_children::reverse_children(Stmt *S) { + if (CallExpr *CE = dyn_cast<CallExpr>(S)) { + children = CE->getRawSubExprs(); + return; + } + switch (S->getStmtClass()) { + // Note: Fill in this switch with more cases we want to optimize. + case Stmt::InitListExprClass: { + InitListExpr *IE = cast<InitListExpr>(S); + children = llvm::ArrayRef(reinterpret_cast<Stmt **>(IE->getInits()), + IE->getNumInits()); + return; + } + default: + break; + } + + // Default case for all other statements. + llvm::append_range(childrenBuf, S->children()); + + // This needs to be done *after* childrenBuf has been populated. + children = childrenBuf; +} + +namespace { + +/// CFGBuilder - This class implements CFG construction from an AST. +/// The builder is stateful: an instance of the builder should be used to only +/// construct a single CFG. +/// +/// Example usage: +/// +/// CFGBuilder builder; +/// std::unique_ptr<CFG> cfg = builder.buildCFG(decl, stmt1); +/// +/// CFG construction is done via a recursive walk of an AST. We actually parse +/// the AST in reverse order so that the successor of a basic block is +/// constructed prior to its predecessor. This allows us to nicely capture +/// implicit fall-throughs without extra basic blocks. +class CFGBuilder { + using JumpTarget = BlockScopePosPair; + using JumpSource = BlockScopePosPair; + + ASTContext *Context; + std::unique_ptr<CFG> cfg; + + // Current block. + CFGBlock *Block = nullptr; + + // Block after the current block. + CFGBlock *Succ = nullptr; + + JumpTarget ContinueJumpTarget; + JumpTarget BreakJumpTarget; + JumpTarget SEHLeaveJumpTarget; + CFGBlock *SwitchTerminatedBlock = nullptr; + CFGBlock *DefaultCaseBlock = nullptr; + + // This can point to either a C++ try, an Objective-C @try, or an SEH __try. + // try and @try can be mixed and generally work the same. + // The frontend forbids mixing SEH __try with either try or @try. + // So having one for all three is enough. + CFGBlock *TryTerminatedBlock = nullptr; + + // Current position in local scope. + LocalScope::const_iterator ScopePos; + + // LabelMap records the mapping from Label expressions to their jump targets. + using LabelMapTy = llvm::DenseMap<LabelDecl *, JumpTarget>; + LabelMapTy LabelMap; + + // A list of blocks that end with a "goto" that must be backpatched to their + // resolved targets upon completion of CFG construction. + using BackpatchBlocksTy = std::vector<JumpSource>; + BackpatchBlocksTy BackpatchBlocks; + + // A list of labels whose address has been taken (for indirect gotos). + using LabelSetTy = llvm::SmallSetVector<LabelDecl *, 8>; + LabelSetTy AddressTakenLabels; + + // Information about the currently visited C++ object construction site. + // This is set in the construction trigger and read when the constructor + // or a function that returns an object by value is being visited. + llvm::DenseMap<Expr *, const ConstructionContextLayer *> + ConstructionContextMap; + + using DeclsWithEndedScopeSetTy = llvm::SmallSetVector<VarDecl *, 16>; + DeclsWithEndedScopeSetTy DeclsWithEndedScope; + + bool badCFG = false; + const CFG::BuildOptions &BuildOpts; + + // State to track for building switch statements. + bool switchExclusivelyCovered = false; + Expr::EvalResult *switchCond = nullptr; + + CFG::BuildOptions::ForcedBlkExprs::value_type *cachedEntry = nullptr; + const Stmt *lastLookup = nullptr; + + // Caches boolean evaluations of expressions to avoid multiple re-evaluations + // during construction of branches for chained logical operators. + using CachedBoolEvalsTy = llvm::DenseMap<Expr *, TryResult>; + CachedBoolEvalsTy CachedBoolEvals; + +public: + explicit CFGBuilder(ASTContext *astContext, + const CFG::BuildOptions &buildOpts) + : Context(astContext), cfg(new CFG()), BuildOpts(buildOpts) {} + + // buildCFG - Used by external clients to construct the CFG. + std::unique_ptr<CFG> buildCFG(const Decl *D, Stmt *Statement); + + bool alwaysAdd(const Stmt *stmt); + +private: + // Visitors to walk an AST and construct the CFG. + CFGBlock *VisitInitListExpr(InitListExpr *ILE, AddStmtChoice asc); + CFGBlock *VisitAddrLabelExpr(AddrLabelExpr *A, AddStmtChoice asc); + CFGBlock *VisitAttributedStmt(AttributedStmt *A, AddStmtChoice asc); + CFGBlock *VisitBinaryOperator(BinaryOperator *B, AddStmtChoice asc); + CFGBlock *VisitBreakStmt(BreakStmt *B); + CFGBlock *VisitCallExpr(CallExpr *C, AddStmtChoice asc); + CFGBlock *VisitCaseStmt(CaseStmt *C); + CFGBlock *VisitChooseExpr(ChooseExpr *C, AddStmtChoice asc); + CFGBlock *VisitCompoundStmt(CompoundStmt *C, bool ExternallyDestructed); + CFGBlock *VisitConditionalOperator(AbstractConditionalOperator *C, + AddStmtChoice asc); + CFGBlock *VisitContinueStmt(ContinueStmt *C); + CFGBlock *VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E, + AddStmtChoice asc); + CFGBlock *VisitCXXCatchStmt(CXXCatchStmt *S); + CFGBlock *VisitCXXConstructExpr(CXXConstructExpr *C, AddStmtChoice asc); + CFGBlock *VisitCXXNewExpr(CXXNewExpr *DE, AddStmtChoice asc); + CFGBlock *VisitCXXDeleteExpr(CXXDeleteExpr *DE, AddStmtChoice asc); + CFGBlock *VisitCXXForRangeStmt(CXXForRangeStmt *S); + CFGBlock *VisitCXXFunctionalCastExpr(CXXFunctionalCastExpr *E, + AddStmtChoice asc); + CFGBlock *VisitCXXTemporaryObjectExpr(CXXTemporaryObjectExpr *C, + AddStmtChoice asc); + CFGBlock *VisitCXXThrowExpr(CXXThrowExpr *T); + CFGBlock *VisitCXXTryStmt(CXXTryStmt *S); + CFGBlock *VisitCXXTypeidExpr(CXXTypeidExpr *S, AddStmtChoice asc); + CFGBlock *VisitDeclStmt(DeclStmt *DS); + CFGBlock *VisitDeclSubExpr(DeclStmt *DS); + CFGBlock *VisitDefaultStmt(DefaultStmt *D); + CFGBlock *VisitDoStmt(DoStmt *D); + CFGBlock *VisitExprWithCleanups(ExprWithCleanups *E, + AddStmtChoice asc, bool ExternallyDestructed); + CFGBlock *VisitForStmt(ForStmt *F); + CFGBlock *VisitGotoStmt(GotoStmt *G); + CFGBlock *VisitGCCAsmStmt(GCCAsmStmt *G, AddStmtChoice asc); + CFGBlock *VisitIfStmt(IfStmt *I); + CFGBlock *VisitImplicitCastExpr(ImplicitCastExpr *E, AddStmtChoice asc); + CFGBlock *VisitConstantExpr(ConstantExpr *E, AddStmtChoice asc); + CFGBlock *VisitIndirectGotoStmt(IndirectGotoStmt *I); + CFGBlock *VisitLabelStmt(LabelStmt *L); + CFGBlock *VisitBlockExpr(BlockExpr *E, AddStmtChoice asc); + CFGBlock *VisitLambdaExpr(LambdaExpr *E, AddStmtChoice asc); + CFGBlock *VisitLogicalOperator(BinaryOperator *B); + std::pair<CFGBlock *, CFGBlock *> VisitLogicalOperator(BinaryOperator *B, + Stmt *Term, + CFGBlock *TrueBlock, + CFGBlock *FalseBlock); + CFGBlock *VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *MTE, + AddStmtChoice asc); + CFGBlock *VisitMemberExpr(MemberExpr *M, AddStmtChoice asc); + CFGBlock *VisitObjCAtCatchStmt(ObjCAtCatchStmt *S); + CFGBlock *VisitObjCAtSynchronizedStmt(ObjCAtSynchronizedStmt *S); + CFGBlock *VisitObjCAtThrowStmt(ObjCAtThrowStmt *S); + CFGBlock *VisitObjCAtTryStmt(ObjCAtTryStmt *S); + CFGBlock *VisitObjCAutoreleasePoolStmt(ObjCAutoreleasePoolStmt *S); + CFGBlock *VisitObjCForCollectionStmt(ObjCForCollectionStmt *S); + CFGBlock *VisitObjCMessageExpr(ObjCMessageExpr *E, AddStmtChoice asc); + CFGBlock *VisitPseudoObjectExpr(PseudoObjectExpr *E); + CFGBlock *VisitReturnStmt(Stmt *S); + CFGBlock *VisitCoroutineSuspendExpr(CoroutineSuspendExpr *S, + AddStmtChoice asc); + CFGBlock *VisitSEHExceptStmt(SEHExceptStmt *S); + CFGBlock *VisitSEHFinallyStmt(SEHFinallyStmt *S); + CFGBlock *VisitSEHLeaveStmt(SEHLeaveStmt *S); + CFGBlock *VisitSEHTryStmt(SEHTryStmt *S); + CFGBlock *VisitStmtExpr(StmtExpr *S, AddStmtChoice asc); + CFGBlock *VisitSwitchStmt(SwitchStmt *S); + CFGBlock *VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *E, + AddStmtChoice asc); + CFGBlock *VisitUnaryOperator(UnaryOperator *U, AddStmtChoice asc); + CFGBlock *VisitWhileStmt(WhileStmt *W); + CFGBlock *VisitArrayInitLoopExpr(ArrayInitLoopExpr *A, AddStmtChoice asc); + + CFGBlock *Visit(Stmt *S, AddStmtChoice asc = AddStmtChoice::NotAlwaysAdd, + bool ExternallyDestructed = false); + CFGBlock *VisitStmt(Stmt *S, AddStmtChoice asc); + CFGBlock *VisitChildren(Stmt *S); + CFGBlock *VisitNoRecurse(Expr *E, AddStmtChoice asc); + CFGBlock *VisitOMPExecutableDirective(OMPExecutableDirective *D, + AddStmtChoice asc); + + void maybeAddScopeBeginForVarDecl(CFGBlock *B, const VarDecl *VD, + const Stmt *S) { + if (ScopePos && (VD == ScopePos.getFirstVarInScope())) + appendScopeBegin(B, VD, S); + } + + /// When creating the CFG for temporary destructors, we want to mirror the + /// branch structure of the corresponding constructor calls. + /// Thus, while visiting a statement for temporary destructors, we keep a + /// context to keep track of the following information: + /// - whether a subexpression is executed unconditionally + /// - if a subexpression is executed conditionally, the first + /// CXXBindTemporaryExpr we encounter in that subexpression (which + /// corresponds to the last temporary destructor we have to call for this + /// subexpression) and the CFG block at that point (which will become the + /// successor block when inserting the decision point). + /// + /// That way, we can build the branch structure for temporary destructors as + /// follows: + /// 1. If a subexpression is executed unconditionally, we add the temporary + /// destructor calls to the current block. + /// 2. If a subexpression is executed conditionally, when we encounter a + /// CXXBindTemporaryExpr: + /// a) If it is the first temporary destructor call in the subexpression, + /// we remember the CXXBindTemporaryExpr and the current block in the + /// TempDtorContext; we start a new block, and insert the temporary + /// destructor call. + /// b) Otherwise, add the temporary destructor call to the current block. + /// 3. When we finished visiting a conditionally executed subexpression, + /// and we found at least one temporary constructor during the visitation + /// (2.a has executed), we insert a decision block that uses the + /// CXXBindTemporaryExpr as terminator, and branches to the current block + /// if the CXXBindTemporaryExpr was marked executed, and otherwise + /// branches to the stored successor. + struct TempDtorContext { + TempDtorContext() = default; + TempDtorContext(TryResult KnownExecuted) + : IsConditional(true), KnownExecuted(KnownExecuted) {} + + /// Returns whether we need to start a new branch for a temporary destructor + /// call. This is the case when the temporary destructor is + /// conditionally executed, and it is the first one we encounter while + /// visiting a subexpression - other temporary destructors at the same level + /// will be added to the same block and are executed under the same + /// condition. + bool needsTempDtorBranch() const { + return IsConditional && !TerminatorExpr; + } + + /// Remember the successor S of a temporary destructor decision branch for + /// the corresponding CXXBindTemporaryExpr E. + void setDecisionPoint(CFGBlock *S, CXXBindTemporaryExpr *E) { + Succ = S; + TerminatorExpr = E; + } + + const bool IsConditional = false; + const TryResult KnownExecuted = true; + CFGBlock *Succ = nullptr; + CXXBindTemporaryExpr *TerminatorExpr = nullptr; + }; + + // Visitors to walk an AST and generate destructors of temporaries in + // full expression. + CFGBlock *VisitForTemporaryDtors(Stmt *E, bool ExternallyDestructed, + TempDtorContext &Context); + CFGBlock *VisitChildrenForTemporaryDtors(Stmt *E, bool ExternallyDestructed, + TempDtorContext &Context); + CFGBlock *VisitBinaryOperatorForTemporaryDtors(BinaryOperator *E, + bool ExternallyDestructed, + TempDtorContext &Context); + CFGBlock *VisitCXXBindTemporaryExprForTemporaryDtors( + CXXBindTemporaryExpr *E, bool ExternallyDestructed, TempDtorContext &Context); + CFGBlock *VisitConditionalOperatorForTemporaryDtors( + AbstractConditionalOperator *E, bool ExternallyDestructed, + TempDtorContext &Context); + void InsertTempDtorDecisionBlock(const TempDtorContext &Context, + CFGBlock *FalseSucc = nullptr); + + // NYS == Not Yet Supported + CFGBlock *NYS() { + badCFG = true; + return Block; + } + + // Remember to apply the construction context based on the current \p Layer + // when constructing the CFG element for \p CE. + void consumeConstructionContext(const ConstructionContextLayer *Layer, + Expr *E); + + // Scan \p Child statement to find constructors in it, while keeping in mind + // that its parent statement is providing a partial construction context + // described by \p Layer. If a constructor is found, it would be assigned + // the context based on the layer. If an additional construction context layer + // is found, the function recurses into that. + void findConstructionContexts(const ConstructionContextLayer *Layer, + Stmt *Child); + + // Scan all arguments of a call expression for a construction context. + // These sorts of call expressions don't have a common superclass, + // hence strict duck-typing. + template <typename CallLikeExpr, + typename = std::enable_if_t< + std::is_base_of_v<CallExpr, CallLikeExpr> || + std::is_base_of_v<CXXConstructExpr, CallLikeExpr> || + std::is_base_of_v<ObjCMessageExpr, CallLikeExpr>>> + void findConstructionContextsForArguments(CallLikeExpr *E) { + for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { + Expr *Arg = E->getArg(i); + if (Arg->getType()->getAsCXXRecordDecl() && !Arg->isGLValue()) + findConstructionContexts( + ConstructionContextLayer::create(cfg->getBumpVectorContext(), + ConstructionContextItem(E, i)), + Arg); + } + } + + // Unset the construction context after consuming it. This is done immediately + // after adding the CFGConstructor or CFGCXXRecordTypedCall element, so + // there's no need to do this manually in every Visit... function. + void cleanupConstructionContext(Expr *E); + + void autoCreateBlock() { if (!Block) Block = createBlock(); } + CFGBlock *createBlock(bool add_successor = true); + CFGBlock *createNoReturnBlock(); + + CFGBlock *addStmt(Stmt *S) { + return Visit(S, AddStmtChoice::AlwaysAdd); + } + + CFGBlock *addInitializer(CXXCtorInitializer *I); + void addLoopExit(const Stmt *LoopStmt); + void addAutomaticObjDtors(LocalScope::const_iterator B, + LocalScope::const_iterator E, Stmt *S); + void addLifetimeEnds(LocalScope::const_iterator B, + LocalScope::const_iterator E, Stmt *S); + void addAutomaticObjHandling(LocalScope::const_iterator B, + LocalScope::const_iterator E, Stmt *S); + void addImplicitDtorsForDestructor(const CXXDestructorDecl *DD); + void addScopesEnd(LocalScope::const_iterator B, LocalScope::const_iterator E, + Stmt *S); + + void getDeclsWithEndedScope(LocalScope::const_iterator B, + LocalScope::const_iterator E, Stmt *S); + + // Local scopes creation. + LocalScope* createOrReuseLocalScope(LocalScope* Scope); + + void addLocalScopeForStmt(Stmt *S); + LocalScope* addLocalScopeForDeclStmt(DeclStmt *DS, + LocalScope* Scope = nullptr); + LocalScope* addLocalScopeForVarDecl(VarDecl *VD, LocalScope* Scope = nullptr); + + void addLocalScopeAndDtors(Stmt *S); + + const ConstructionContext *retrieveAndCleanupConstructionContext(Expr *E) { + if (!BuildOpts.AddRichCXXConstructors) + return nullptr; + + const ConstructionContextLayer *Layer = ConstructionContextMap.lookup(E); + if (!Layer) + return nullptr; + + cleanupConstructionContext(E); + return ConstructionContext::createFromLayers(cfg->getBumpVectorContext(), + Layer); + } + + // Interface to CFGBlock - adding CFGElements. + + void appendStmt(CFGBlock *B, const Stmt *S) { + if (alwaysAdd(S) && cachedEntry) + cachedEntry->second = B; + + // All block-level expressions should have already been IgnoreParens()ed. + assert(!isa<Expr>(S) || cast<Expr>(S)->IgnoreParens() == S); + B->appendStmt(const_cast<Stmt*>(S), cfg->getBumpVectorContext()); + } + + void appendConstructor(CFGBlock *B, CXXConstructExpr *CE) { + if (const ConstructionContext *CC = + retrieveAndCleanupConstructionContext(CE)) { + B->appendConstructor(CE, CC, cfg->getBumpVectorContext()); + return; + } + + // No valid construction context found. Fall back to statement. + B->appendStmt(CE, cfg->getBumpVectorContext()); + } + + void appendCall(CFGBlock *B, CallExpr *CE) { + if (alwaysAdd(CE) && cachedEntry) + cachedEntry->second = B; + + if (const ConstructionContext *CC = + retrieveAndCleanupConstructionContext(CE)) { + B->appendCXXRecordTypedCall(CE, CC, cfg->getBumpVectorContext()); + return; + } + + // No valid construction context found. Fall back to statement. + B->appendStmt(CE, cfg->getBumpVectorContext()); + } + + void appendInitializer(CFGBlock *B, CXXCtorInitializer *I) { + B->appendInitializer(I, cfg->getBumpVectorContext()); + } + + void appendNewAllocator(CFGBlock *B, CXXNewExpr *NE) { + B->appendNewAllocator(NE, cfg->getBumpVectorContext()); + } + + void appendBaseDtor(CFGBlock *B, const CXXBaseSpecifier *BS) { + B->appendBaseDtor(BS, cfg->getBumpVectorContext()); + } + + void appendMemberDtor(CFGBlock *B, FieldDecl *FD) { + B->appendMemberDtor(FD, cfg->getBumpVectorContext()); + } + + void appendObjCMessage(CFGBlock *B, ObjCMessageExpr *ME) { + if (alwaysAdd(ME) && cachedEntry) + cachedEntry->second = B; + + if (const ConstructionContext *CC = + retrieveAndCleanupConstructionContext(ME)) { + B->appendCXXRecordTypedCall(ME, CC, cfg->getBumpVectorContext()); + return; + } + + B->appendStmt(const_cast<ObjCMessageExpr *>(ME), + cfg->getBumpVectorContext()); + } + + void appendTemporaryDtor(CFGBlock *B, CXXBindTemporaryExpr *E) { + B->appendTemporaryDtor(E, cfg->getBumpVectorContext()); + } + + void appendAutomaticObjDtor(CFGBlock *B, VarDecl *VD, Stmt *S) { + B->appendAutomaticObjDtor(VD, S, cfg->getBumpVectorContext()); + } + + void appendLifetimeEnds(CFGBlock *B, VarDecl *VD, Stmt *S) { + B->appendLifetimeEnds(VD, S, cfg->getBumpVectorContext()); + } + + void appendLoopExit(CFGBlock *B, const Stmt *LoopStmt) { + B->appendLoopExit(LoopStmt, cfg->getBumpVectorContext()); + } + + void appendDeleteDtor(CFGBlock *B, CXXRecordDecl *RD, CXXDeleteExpr *DE) { + B->appendDeleteDtor(RD, DE, cfg->getBumpVectorContext()); + } + + void prependAutomaticObjDtorsWithTerminator(CFGBlock *Blk, + LocalScope::const_iterator B, LocalScope::const_iterator E); + + void prependAutomaticObjLifetimeWithTerminator(CFGBlock *Blk, + LocalScope::const_iterator B, + LocalScope::const_iterator E); + + const VarDecl * + prependAutomaticObjScopeEndWithTerminator(CFGBlock *Blk, + LocalScope::const_iterator B, + LocalScope::const_iterator E); + + void addSuccessor(CFGBlock *B, CFGBlock *S, bool IsReachable = true) { + B->addSuccessor(CFGBlock::AdjacentBlock(S, IsReachable), + cfg->getBumpVectorContext()); + } + + /// Add a reachable successor to a block, with the alternate variant that is + /// unreachable. + void addSuccessor(CFGBlock *B, CFGBlock *ReachableBlock, CFGBlock *AltBlock) { + B->addSuccessor(CFGBlock::AdjacentBlock(ReachableBlock, AltBlock), + cfg->getBumpVectorContext()); + } + + void appendScopeBegin(CFGBlock *B, const VarDecl *VD, const Stmt *S) { + if (BuildOpts.AddScopes) + B->appendScopeBegin(VD, S, cfg->getBumpVectorContext()); + } + + void prependScopeBegin(CFGBlock *B, const VarDecl *VD, const Stmt *S) { + if (BuildOpts.AddScopes) + B->prependScopeBegin(VD, S, cfg->getBumpVectorContext()); + } + + void appendScopeEnd(CFGBlock *B, const VarDecl *VD, const Stmt *S) { + if (BuildOpts.AddScopes) + B->appendScopeEnd(VD, S, cfg->getBumpVectorContext()); + } + + void prependScopeEnd(CFGBlock *B, const VarDecl *VD, const Stmt *S) { + if (BuildOpts.AddScopes) + B->prependScopeEnd(VD, S, cfg->getBumpVectorContext()); + } + + /// Find a relational comparison with an expression evaluating to a + /// boolean and a constant other than 0 and 1. + /// e.g. if ((x < y) == 10) + TryResult checkIncorrectRelationalOperator(const BinaryOperator *B) { + const Expr *LHSExpr = B->getLHS()->IgnoreParens(); + const Expr *RHSExpr = B->getRHS()->IgnoreParens(); + + const IntegerLiteral *IntLiteral = dyn_cast<IntegerLiteral>(LHSExpr); + const Expr *BoolExpr = RHSExpr; + bool IntFirst = true; + if (!IntLiteral) { + IntLiteral = dyn_cast<IntegerLiteral>(RHSExpr); + BoolExpr = LHSExpr; + IntFirst = false; + } + + if (!IntLiteral || !BoolExpr->isKnownToHaveBooleanValue()) + return TryResult(); + + llvm::APInt IntValue = IntLiteral->getValue(); + if ((IntValue == 1) || (IntValue == 0)) + return TryResult(); + + bool IntLarger = IntLiteral->getType()->isUnsignedIntegerType() || + !IntValue.isNegative(); + + BinaryOperatorKind Bok = B->getOpcode(); + if (Bok == BO_GT || Bok == BO_GE) { + // Always true for 10 > bool and bool > -1 + // Always false for -1 > bool and bool > 10 + return TryResult(IntFirst == IntLarger); + } else { + // Always true for -1 < bool and bool < 10 + // Always false for 10 < bool and bool < -1 + return TryResult(IntFirst != IntLarger); + } + } + + /// Find an incorrect equality comparison. Either with an expression + /// evaluating to a boolean and a constant other than 0 and 1. + /// e.g. if (!x == 10) or a bitwise and/or operation that always evaluates to + /// true/false e.q. (x & 8) == 4. + TryResult checkIncorrectEqualityOperator(const BinaryOperator *B) { + const Expr *LHSExpr = B->getLHS()->IgnoreParens(); + const Expr *RHSExpr = B->getRHS()->IgnoreParens(); + + std::optional<llvm::APInt> IntLiteral1 = + getIntegerLiteralSubexpressionValue(LHSExpr); + const Expr *BoolExpr = RHSExpr; + + if (!IntLiteral1) { + IntLiteral1 = getIntegerLiteralSubexpressionValue(RHSExpr); + BoolExpr = LHSExpr; + } + + if (!IntLiteral1) + return TryResult(); + + const BinaryOperator *BitOp = dyn_cast<BinaryOperator>(BoolExpr); + if (BitOp && (BitOp->getOpcode() == BO_And || + BitOp->getOpcode() == BO_Or)) { + const Expr *LHSExpr2 = BitOp->getLHS()->IgnoreParens(); + const Expr *RHSExpr2 = BitOp->getRHS()->IgnoreParens(); + + std::optional<llvm::APInt> IntLiteral2 = + getIntegerLiteralSubexpressionValue(LHSExpr2); + + if (!IntLiteral2) + IntLiteral2 = getIntegerLiteralSubexpressionValue(RHSExpr2); + + if (!IntLiteral2) + return TryResult(); + + if ((BitOp->getOpcode() == BO_And && + (*IntLiteral2 & *IntLiteral1) != *IntLiteral1) || + (BitOp->getOpcode() == BO_Or && + (*IntLiteral2 | *IntLiteral1) != *IntLiteral1)) { + if (BuildOpts.Observer) + BuildOpts.Observer->compareBitwiseEquality(B, + B->getOpcode() != BO_EQ); + return TryResult(B->getOpcode() != BO_EQ); + } + } else if (BoolExpr->isKnownToHaveBooleanValue()) { + if ((*IntLiteral1 == 1) || (*IntLiteral1 == 0)) { + return TryResult(); + } + return TryResult(B->getOpcode() != BO_EQ); + } + + return TryResult(); + } + + // Helper function to get an APInt from an expression. Supports expressions + // which are an IntegerLiteral or a UnaryOperator and returns the value with + // all operations performed on it. + // FIXME: it would be good to unify this function with + // IsIntegerLiteralConstantExpr at some point given the similarity between the + // functions. + std::optional<llvm::APInt> + getIntegerLiteralSubexpressionValue(const Expr *E) { + + // If unary. + if (const auto *UnOp = dyn_cast<UnaryOperator>(E->IgnoreParens())) { + // Get the sub expression of the unary expression and get the Integer + // Literal. + const Expr *SubExpr = UnOp->getSubExpr()->IgnoreParens(); + + if (const auto *IntLiteral = dyn_cast<IntegerLiteral>(SubExpr)) { + + llvm::APInt Value = IntLiteral->getValue(); + + // Perform the operation manually. + switch (UnOp->getOpcode()) { + case UO_Plus: + return Value; + case UO_Minus: + return -Value; + case UO_Not: + return ~Value; + case UO_LNot: + return llvm::APInt(Context->getTypeSize(Context->IntTy), !Value); + default: + assert(false && "Unexpected unary operator!"); + return std::nullopt; + } + } + } else if (const auto *IntLiteral = + dyn_cast<IntegerLiteral>(E->IgnoreParens())) + return IntLiteral->getValue(); + + return std::nullopt; + } + + TryResult analyzeLogicOperatorCondition(BinaryOperatorKind Relation, + const llvm::APSInt &Value1, + const llvm::APSInt &Value2) { + assert(Value1.isSigned() == Value2.isSigned()); + switch (Relation) { + default: + return TryResult(); + case BO_EQ: + return TryResult(Value1 == Value2); + case BO_NE: + return TryResult(Value1 != Value2); + case BO_LT: + return TryResult(Value1 < Value2); + case BO_LE: + return TryResult(Value1 <= Value2); + case BO_GT: + return TryResult(Value1 > Value2); + case BO_GE: + return TryResult(Value1 >= Value2); + } + } + + /// Find a pair of comparison expressions with or without parentheses + /// with a shared variable and constants and a logical operator between them + /// that always evaluates to either true or false. + /// e.g. if (x != 3 || x != 4) + TryResult checkIncorrectLogicOperator(const BinaryOperator *B) { + assert(B->isLogicalOp()); + const BinaryOperator *LHS = + dyn_cast<BinaryOperator>(B->getLHS()->IgnoreParens()); + const BinaryOperator *RHS = + dyn_cast<BinaryOperator>(B->getRHS()->IgnoreParens()); + if (!LHS || !RHS) + return {}; + + if (!LHS->isComparisonOp() || !RHS->isComparisonOp()) + return {}; + + const Expr *DeclExpr1; + const Expr *NumExpr1; + BinaryOperatorKind BO1; + std::tie(DeclExpr1, BO1, NumExpr1) = tryNormalizeBinaryOperator(LHS); + + if (!DeclExpr1 || !NumExpr1) + return {}; + + const Expr *DeclExpr2; + const Expr *NumExpr2; + BinaryOperatorKind BO2; + std::tie(DeclExpr2, BO2, NumExpr2) = tryNormalizeBinaryOperator(RHS); + + if (!DeclExpr2 || !NumExpr2) + return {}; + + // Check that it is the same variable on both sides. + if (!Expr::isSameComparisonOperand(DeclExpr1, DeclExpr2)) + return {}; + + // Make sure the user's intent is clear (e.g. they're comparing against two + // int literals, or two things from the same enum) + if (!areExprTypesCompatible(NumExpr1, NumExpr2)) + return {}; + + Expr::EvalResult L1Result, L2Result; + if (!NumExpr1->EvaluateAsInt(L1Result, *Context) || + !NumExpr2->EvaluateAsInt(L2Result, *Context)) + return {}; + + llvm::APSInt L1 = L1Result.Val.getInt(); + llvm::APSInt L2 = L2Result.Val.getInt(); + + // Can't compare signed with unsigned or with different bit width. + if (L1.isSigned() != L2.isSigned() || L1.getBitWidth() != L2.getBitWidth()) + return {}; + + // Values that will be used to determine if result of logical + // operator is always true/false + const llvm::APSInt Values[] = { + // Value less than both Value1 and Value2 + llvm::APSInt::getMinValue(L1.getBitWidth(), L1.isUnsigned()), + // L1 + L1, + // Value between Value1 and Value2 + ((L1 < L2) ? L1 : L2) + llvm::APSInt(llvm::APInt(L1.getBitWidth(), 1), + L1.isUnsigned()), + // L2 + L2, + // Value greater than both Value1 and Value2 + llvm::APSInt::getMaxValue(L1.getBitWidth(), L1.isUnsigned()), + }; + + // Check whether expression is always true/false by evaluating the following + // * variable x is less than the smallest literal. + // * variable x is equal to the smallest literal. + // * Variable x is between smallest and largest literal. + // * Variable x is equal to the largest literal. + // * Variable x is greater than largest literal. + bool AlwaysTrue = true, AlwaysFalse = true; + // Track value of both subexpressions. If either side is always + // true/false, another warning should have already been emitted. + bool LHSAlwaysTrue = true, LHSAlwaysFalse = true; + bool RHSAlwaysTrue = true, RHSAlwaysFalse = true; + for (const llvm::APSInt &Value : Values) { + TryResult Res1, Res2; + Res1 = analyzeLogicOperatorCondition(BO1, Value, L1); + Res2 = analyzeLogicOperatorCondition(BO2, Value, L2); + + if (!Res1.isKnown() || !Res2.isKnown()) + return {}; + + if (B->getOpcode() == BO_LAnd) { + AlwaysTrue &= (Res1.isTrue() && Res2.isTrue()); + AlwaysFalse &= !(Res1.isTrue() && Res2.isTrue()); + } else { + AlwaysTrue &= (Res1.isTrue() || Res2.isTrue()); + AlwaysFalse &= !(Res1.isTrue() || Res2.isTrue()); + } + + LHSAlwaysTrue &= Res1.isTrue(); + LHSAlwaysFalse &= Res1.isFalse(); + RHSAlwaysTrue &= Res2.isTrue(); + RHSAlwaysFalse &= Res2.isFalse(); + } + + if (AlwaysTrue || AlwaysFalse) { + if (!LHSAlwaysTrue && !LHSAlwaysFalse && !RHSAlwaysTrue && + !RHSAlwaysFalse && BuildOpts.Observer) + BuildOpts.Observer->compareAlwaysTrue(B, AlwaysTrue); + return TryResult(AlwaysTrue); + } + return {}; + } + + /// A bitwise-or with a non-zero constant always evaluates to true. + TryResult checkIncorrectBitwiseOrOperator(const BinaryOperator *B) { + const Expr *LHSConstant = + tryTransformToIntOrEnumConstant(B->getLHS()->IgnoreParenImpCasts()); + const Expr *RHSConstant = + tryTransformToIntOrEnumConstant(B->getRHS()->IgnoreParenImpCasts()); + + if ((LHSConstant && RHSConstant) || (!LHSConstant && !RHSConstant)) + return {}; + + const Expr *Constant = LHSConstant ? LHSConstant : RHSConstant; + + Expr::EvalResult Result; + if (!Constant->EvaluateAsInt(Result, *Context)) + return {}; + + if (Result.Val.getInt() == 0) + return {}; + + if (BuildOpts.Observer) + BuildOpts.Observer->compareBitwiseOr(B); + + return TryResult(true); + } + + /// Try and evaluate an expression to an integer constant. + bool tryEvaluate(Expr *S, Expr::EvalResult &outResult) { + if (!BuildOpts.PruneTriviallyFalseEdges) + return false; + return !S->isTypeDependent() && + !S->isValueDependent() && + S->EvaluateAsRValue(outResult, *Context); + } + + /// tryEvaluateBool - Try and evaluate the Stmt and return 0 or 1 + /// if we can evaluate to a known value, otherwise return -1. + TryResult tryEvaluateBool(Expr *S) { + if (!BuildOpts.PruneTriviallyFalseEdges || + S->isTypeDependent() || S->isValueDependent()) + return {}; + + if (BinaryOperator *Bop = dyn_cast<BinaryOperator>(S)) { + if (Bop->isLogicalOp() || Bop->isEqualityOp()) { + // Check the cache first. + CachedBoolEvalsTy::iterator I = CachedBoolEvals.find(S); + if (I != CachedBoolEvals.end()) + return I->second; // already in map; + + // Retrieve result at first, or the map might be updated. + TryResult Result = evaluateAsBooleanConditionNoCache(S); + CachedBoolEvals[S] = Result; // update or insert + return Result; + } + else { + switch (Bop->getOpcode()) { + default: break; + // For 'x & 0' and 'x * 0', we can determine that + // the value is always false. + case BO_Mul: + case BO_And: { + // If either operand is zero, we know the value + // must be false. + Expr::EvalResult LHSResult; + if (Bop->getLHS()->EvaluateAsInt(LHSResult, *Context)) { + llvm::APSInt IntVal = LHSResult.Val.getInt(); + if (!IntVal.getBoolValue()) { + return TryResult(false); + } + } + Expr::EvalResult RHSResult; + if (Bop->getRHS()->EvaluateAsInt(RHSResult, *Context)) { + llvm::APSInt IntVal = RHSResult.Val.getInt(); + if (!IntVal.getBoolValue()) { + return TryResult(false); + } + } + } + break; + } + } + } + + return evaluateAsBooleanConditionNoCache(S); + } + + /// Evaluate as boolean \param E without using the cache. + TryResult evaluateAsBooleanConditionNoCache(Expr *E) { + if (BinaryOperator *Bop = dyn_cast<BinaryOperator>(E)) { + if (Bop->isLogicalOp()) { + TryResult LHS = tryEvaluateBool(Bop->getLHS()); + if (LHS.isKnown()) { + // We were able to evaluate the LHS, see if we can get away with not + // evaluating the RHS: 0 && X -> 0, 1 || X -> 1 + if (LHS.isTrue() == (Bop->getOpcode() == BO_LOr)) + return LHS.isTrue(); + + TryResult RHS = tryEvaluateBool(Bop->getRHS()); + if (RHS.isKnown()) { + if (Bop->getOpcode() == BO_LOr) + return LHS.isTrue() || RHS.isTrue(); + else + return LHS.isTrue() && RHS.isTrue(); + } + } else { + TryResult RHS = tryEvaluateBool(Bop->getRHS()); + if (RHS.isKnown()) { + // We can't evaluate the LHS; however, sometimes the result + // is determined by the RHS: X && 0 -> 0, X || 1 -> 1. + if (RHS.isTrue() == (Bop->getOpcode() == BO_LOr)) + return RHS.isTrue(); + } else { + TryResult BopRes = checkIncorrectLogicOperator(Bop); + if (BopRes.isKnown()) + return BopRes.isTrue(); + } + } + + return {}; + } else if (Bop->isEqualityOp()) { + TryResult BopRes = checkIncorrectEqualityOperator(Bop); + if (BopRes.isKnown()) + return BopRes.isTrue(); + } else if (Bop->isRelationalOp()) { + TryResult BopRes = checkIncorrectRelationalOperator(Bop); + if (BopRes.isKnown()) + return BopRes.isTrue(); + } else if (Bop->getOpcode() == BO_Or) { + TryResult BopRes = checkIncorrectBitwiseOrOperator(Bop); + if (BopRes.isKnown()) + return BopRes.isTrue(); + } + } + + bool Result; + if (E->EvaluateAsBooleanCondition(Result, *Context)) + return Result; + + return {}; + } + + bool hasTrivialDestructor(VarDecl *VD); +}; + +} // namespace + +Expr * +clang::extractElementInitializerFromNestedAILE(const ArrayInitLoopExpr *AILE) { + if (!AILE) + return nullptr; + + Expr *AILEInit = AILE->getSubExpr(); + while (const auto *E = dyn_cast<ArrayInitLoopExpr>(AILEInit)) + AILEInit = E->getSubExpr(); + + return AILEInit; +} + +inline bool AddStmtChoice::alwaysAdd(CFGBuilder &builder, + const Stmt *stmt) const { + return builder.alwaysAdd(stmt) || kind == AlwaysAdd; +} + +bool CFGBuilder::alwaysAdd(const Stmt *stmt) { + bool shouldAdd = BuildOpts.alwaysAdd(stmt); + + if (!BuildOpts.forcedBlkExprs) + return shouldAdd; + + if (lastLookup == stmt) { + if (cachedEntry) { + assert(cachedEntry->first == stmt); + return true; + } + return shouldAdd; + } + + lastLookup = stmt; + + // Perform the lookup! + CFG::BuildOptions::ForcedBlkExprs *fb = *BuildOpts.forcedBlkExprs; + + if (!fb) { + // No need to update 'cachedEntry', since it will always be null. + assert(!cachedEntry); + return shouldAdd; + } + + CFG::BuildOptions::ForcedBlkExprs::iterator itr = fb->find(stmt); + if (itr == fb->end()) { + cachedEntry = nullptr; + return shouldAdd; + } + + cachedEntry = &*itr; + return true; +} + +// FIXME: Add support for dependent-sized array types in C++? +// Does it even make sense to build a CFG for an uninstantiated template? +static const VariableArrayType *FindVA(const Type *t) { + while (const ArrayType *vt = dyn_cast<ArrayType>(t)) { + if (const VariableArrayType *vat = dyn_cast<VariableArrayType>(vt)) + if (vat->getSizeExpr()) + return vat; + + t = vt->getElementType().getTypePtr(); + } + + return nullptr; +} + +void CFGBuilder::consumeConstructionContext( + const ConstructionContextLayer *Layer, Expr *E) { + assert((isa<CXXConstructExpr>(E) || isa<CallExpr>(E) || + isa<ObjCMessageExpr>(E)) && "Expression cannot construct an object!"); + if (const ConstructionContextLayer *PreviouslyStoredLayer = + ConstructionContextMap.lookup(E)) { + (void)PreviouslyStoredLayer; + // We might have visited this child when we were finding construction + // contexts within its parents. + assert(PreviouslyStoredLayer->isStrictlyMoreSpecificThan(Layer) && + "Already within a different construction context!"); + } else { + ConstructionContextMap[E] = Layer; + } +} + +void CFGBuilder::findConstructionContexts( + const ConstructionContextLayer *Layer, Stmt *Child) { + if (!BuildOpts.AddRichCXXConstructors) + return; + + if (!Child) + return; + + auto withExtraLayer = [this, Layer](const ConstructionContextItem &Item) { + return ConstructionContextLayer::create(cfg->getBumpVectorContext(), Item, + Layer); + }; + + switch(Child->getStmtClass()) { + case Stmt::CXXConstructExprClass: + case Stmt::CXXTemporaryObjectExprClass: { + // Support pre-C++17 copy elision AST. + auto *CE = cast<CXXConstructExpr>(Child); + if (BuildOpts.MarkElidedCXXConstructors && CE->isElidable()) { + findConstructionContexts(withExtraLayer(CE), CE->getArg(0)); + } + + consumeConstructionContext(Layer, CE); + break; + } + // FIXME: This, like the main visit, doesn't support CUDAKernelCallExpr. + // FIXME: An isa<> would look much better but this whole switch is a + // workaround for an internal compiler error in MSVC 2015 (see r326021). + case Stmt::CallExprClass: + case Stmt::CXXMemberCallExprClass: + case Stmt::CXXOperatorCallExprClass: + case Stmt::UserDefinedLiteralClass: + case Stmt::ObjCMessageExprClass: { + auto *E = cast<Expr>(Child); + if (CFGCXXRecordTypedCall::isCXXRecordTypedCall(E)) + consumeConstructionContext(Layer, E); + break; + } + case Stmt::ExprWithCleanupsClass: { + auto *Cleanups = cast<ExprWithCleanups>(Child); + findConstructionContexts(Layer, Cleanups->getSubExpr()); + break; + } + case Stmt::CXXFunctionalCastExprClass: { + auto *Cast = cast<CXXFunctionalCastExpr>(Child); + findConstructionContexts(Layer, Cast->getSubExpr()); + break; + } + case Stmt::ImplicitCastExprClass: { + auto *Cast = cast<ImplicitCastExpr>(Child); + // Should we support other implicit cast kinds? + switch (Cast->getCastKind()) { + case CK_NoOp: + case CK_ConstructorConversion: + findConstructionContexts(Layer, Cast->getSubExpr()); + break; + default: + break; + } + break; + } + case Stmt::CXXBindTemporaryExprClass: { + auto *BTE = cast<CXXBindTemporaryExpr>(Child); + findConstructionContexts(withExtraLayer(BTE), BTE->getSubExpr()); + break; + } + case Stmt::MaterializeTemporaryExprClass: { + // Normally we don't want to search in MaterializeTemporaryExpr because + // it indicates the beginning of a temporary object construction context, + // so it shouldn't be found in the middle. However, if it is the beginning + // of an elidable copy or move construction context, we need to include it. + if (Layer->getItem().getKind() == + ConstructionContextItem::ElidableConstructorKind) { + auto *MTE = cast<MaterializeTemporaryExpr>(Child); + findConstructionContexts(withExtraLayer(MTE), MTE->getSubExpr()); + } + break; + } + case Stmt::ConditionalOperatorClass: { + auto *CO = cast<ConditionalOperator>(Child); + if (Layer->getItem().getKind() != + ConstructionContextItem::MaterializationKind) { + // If the object returned by the conditional operator is not going to be a + // temporary object that needs to be immediately materialized, then + // it must be C++17 with its mandatory copy elision. Do not yet promise + // to support this case. + assert(!CO->getType()->getAsCXXRecordDecl() || CO->isGLValue() || + Context->getLangOpts().CPlusPlus17); + break; + } + findConstructionContexts(Layer, CO->getLHS()); + findConstructionContexts(Layer, CO->getRHS()); + break; + } + case Stmt::InitListExprClass: { + auto *ILE = cast<InitListExpr>(Child); + if (ILE->isTransparent()) { + findConstructionContexts(Layer, ILE->getInit(0)); + break; + } + // TODO: Handle other cases. For now, fail to find construction contexts. + break; + } + case Stmt::ParenExprClass: { + // If expression is placed into parenthesis we should propagate the parent + // construction context to subexpressions. + auto *PE = cast<ParenExpr>(Child); + findConstructionContexts(Layer, PE->getSubExpr()); + break; + } + default: + break; + } +} + +void CFGBuilder::cleanupConstructionContext(Expr *E) { + assert(BuildOpts.AddRichCXXConstructors && + "We should not be managing construction contexts!"); + assert(ConstructionContextMap.count(E) && + "Cannot exit construction context without the context!"); + ConstructionContextMap.erase(E); +} + + +/// BuildCFG - Constructs a CFG from an AST (a Stmt*). The AST can represent an +/// arbitrary statement. Examples include a single expression or a function +/// body (compound statement). The ownership of the returned CFG is +/// transferred to the caller. If CFG construction fails, this method returns +/// NULL. +std::unique_ptr<CFG> CFGBuilder::buildCFG(const Decl *D, Stmt *Statement) { + assert(cfg.get()); + if (!Statement) + return nullptr; + + // Create an empty block that will serve as the exit block for the CFG. Since + // this is the first block added to the CFG, it will be implicitly registered + // as the exit block. + Succ = createBlock(); + assert(Succ == &cfg->getExit()); + Block = nullptr; // the EXIT block is empty. Create all other blocks lazily. + + assert(!(BuildOpts.AddImplicitDtors && BuildOpts.AddLifetime) && + "AddImplicitDtors and AddLifetime cannot be used at the same time"); + + if (BuildOpts.AddImplicitDtors) + if (const CXXDestructorDecl *DD = dyn_cast_or_null<CXXDestructorDecl>(D)) + addImplicitDtorsForDestructor(DD); + + // Visit the statements and create the CFG. + CFGBlock *B = addStmt(Statement); + + if (badCFG) + return nullptr; + + // For C++ constructor add initializers to CFG. Constructors of virtual bases + // are ignored unless the object is of the most derived class. + // class VBase { VBase() = default; VBase(int) {} }; + // class A : virtual public VBase { A() : VBase(0) {} }; + // class B : public A {}; + // B b; // Constructor calls in order: VBase(), A(), B(). + // // VBase(0) is ignored because A isn't the most derived class. + // This may result in the virtual base(s) being already initialized at this + // point, in which case we should jump right onto non-virtual bases and + // fields. To handle this, make a CFG branch. We only need to add one such + // branch per constructor, since the Standard states that all virtual bases + // shall be initialized before non-virtual bases and direct data members. + if (const auto *CD = dyn_cast_or_null<CXXConstructorDecl>(D)) { + CFGBlock *VBaseSucc = nullptr; + for (auto *I : llvm::reverse(CD->inits())) { + if (BuildOpts.AddVirtualBaseBranches && !VBaseSucc && + I->isBaseInitializer() && I->isBaseVirtual()) { + // We've reached the first virtual base init while iterating in reverse + // order. Make a new block for virtual base initializers so that we + // could skip them. + VBaseSucc = Succ = B ? B : &cfg->getExit(); + Block = createBlock(); + } + B = addInitializer(I); + if (badCFG) + return nullptr; + } + if (VBaseSucc) { + // Make a branch block for potentially skipping virtual base initializers. + Succ = VBaseSucc; + B = createBlock(); + B->setTerminator( + CFGTerminator(nullptr, CFGTerminator::VirtualBaseBranch)); + addSuccessor(B, Block, true); + } + } + + if (B) + Succ = B; + + // Backpatch the gotos whose label -> block mappings we didn't know when we + // encountered them. + for (BackpatchBlocksTy::iterator I = BackpatchBlocks.begin(), + E = BackpatchBlocks.end(); I != E; ++I ) { + + CFGBlock *B = I->block; + if (auto *G = dyn_cast<GotoStmt>(B->getTerminator())) { + LabelMapTy::iterator LI = LabelMap.find(G->getLabel()); + // If there is no target for the goto, then we are looking at an + // incomplete AST. Handle this by not registering a successor. + if (LI == LabelMap.end()) + continue; + JumpTarget JT = LI->second; + prependAutomaticObjLifetimeWithTerminator(B, I->scopePosition, + JT.scopePosition); + prependAutomaticObjDtorsWithTerminator(B, I->scopePosition, + JT.scopePosition); + const VarDecl *VD = prependAutomaticObjScopeEndWithTerminator( + B, I->scopePosition, JT.scopePosition); + appendScopeBegin(JT.block, VD, G); + addSuccessor(B, JT.block); + }; + if (auto *G = dyn_cast<GCCAsmStmt>(B->getTerminator())) { + CFGBlock *Successor = (I+1)->block; + for (auto *L : G->labels()) { + LabelMapTy::iterator LI = LabelMap.find(L->getLabel()); + // If there is no target for the goto, then we are looking at an + // incomplete AST. Handle this by not registering a successor. + if (LI == LabelMap.end()) + continue; + JumpTarget JT = LI->second; + // Successor has been added, so skip it. + if (JT.block == Successor) + continue; + addSuccessor(B, JT.block); + } + I++; + } + } + + // Add successors to the Indirect Goto Dispatch block (if we have one). + if (CFGBlock *B = cfg->getIndirectGotoBlock()) + for (LabelSetTy::iterator I = AddressTakenLabels.begin(), + E = AddressTakenLabels.end(); I != E; ++I ) { + // Lookup the target block. + LabelMapTy::iterator LI = LabelMap.find(*I); + + // If there is no target block that contains label, then we are looking + // at an incomplete AST. Handle this by not registering a successor. + if (LI == LabelMap.end()) continue; + + addSuccessor(B, LI->second.block); + } + + // Create an empty entry block that has no predecessors. + cfg->setEntry(createBlock()); + + if (BuildOpts.AddRichCXXConstructors) + assert(ConstructionContextMap.empty() && + "Not all construction contexts were cleaned up!"); + + return std::move(cfg); +} + +/// createBlock - Used to lazily create blocks that are connected +/// to the current (global) successor. +CFGBlock *CFGBuilder::createBlock(bool add_successor) { + CFGBlock *B = cfg->createBlock(); + if (add_successor && Succ) + addSuccessor(B, Succ); + return B; +} + +/// createNoReturnBlock - Used to create a block is a 'noreturn' point in the +/// CFG. It is *not* connected to the current (global) successor, and instead +/// directly tied to the exit block in order to be reachable. +CFGBlock *CFGBuilder::createNoReturnBlock() { + CFGBlock *B = createBlock(false); + B->setHasNoReturnElement(); + addSuccessor(B, &cfg->getExit(), Succ); + return B; +} + +/// addInitializer - Add C++ base or member initializer element to CFG. +CFGBlock *CFGBuilder::addInitializer(CXXCtorInitializer *I) { + if (!BuildOpts.AddInitializers) + return Block; + + bool HasTemporaries = false; + + // Destructors of temporaries in initialization expression should be called + // after initialization finishes. + Expr *Init = I->getInit(); + if (Init) { + HasTemporaries = isa<ExprWithCleanups>(Init); + + if (BuildOpts.AddTemporaryDtors && HasTemporaries) { + // Generate destructors for temporaries in initialization expression. + TempDtorContext Context; + VisitForTemporaryDtors(cast<ExprWithCleanups>(Init)->getSubExpr(), + /*ExternallyDestructed=*/false, Context); + } + } + + autoCreateBlock(); + appendInitializer(Block, I); + + if (Init) { + // If the initializer is an ArrayInitLoopExpr, we want to extract the + // initializer, that's used for each element. + auto *AILEInit = extractElementInitializerFromNestedAILE( + dyn_cast<ArrayInitLoopExpr>(Init)); + + findConstructionContexts( + ConstructionContextLayer::create(cfg->getBumpVectorContext(), I), + AILEInit ? AILEInit : Init); + + if (HasTemporaries) { + // For expression with temporaries go directly to subexpression to omit + // generating destructors for the second time. + return Visit(cast<ExprWithCleanups>(Init)->getSubExpr()); + } + if (BuildOpts.AddCXXDefaultInitExprInCtors) { + if (CXXDefaultInitExpr *Default = dyn_cast<CXXDefaultInitExpr>(Init)) { + // In general, appending the expression wrapped by a CXXDefaultInitExpr + // may cause the same Expr to appear more than once in the CFG. Doing it + // here is safe because there's only one initializer per field. + autoCreateBlock(); + appendStmt(Block, Default); + if (Stmt *Child = Default->getExpr()) + if (CFGBlock *R = Visit(Child)) + Block = R; + return Block; + } + } + return Visit(Init); + } + + return Block; +} + +/// Retrieve the type of the temporary object whose lifetime was +/// extended by a local reference with the given initializer. +static QualType getReferenceInitTemporaryType(const Expr *Init, + bool *FoundMTE = nullptr) { + while (true) { + // Skip parentheses. + Init = Init->IgnoreParens(); + + // Skip through cleanups. + if (const ExprWithCleanups *EWC = dyn_cast<ExprWithCleanups>(Init)) { + Init = EWC->getSubExpr(); + continue; + } + + // Skip through the temporary-materialization expression. + if (const MaterializeTemporaryExpr *MTE + = dyn_cast<MaterializeTemporaryExpr>(Init)) { + Init = MTE->getSubExpr(); + if (FoundMTE) + *FoundMTE = true; + continue; + } + + // Skip sub-object accesses into rvalues. + SmallVector<const Expr *, 2> CommaLHSs; + SmallVector<SubobjectAdjustment, 2> Adjustments; + const Expr *SkippedInit = + Init->skipRValueSubobjectAdjustments(CommaLHSs, Adjustments); + if (SkippedInit != Init) { + Init = SkippedInit; + continue; + } + + break; + } + + return Init->getType(); +} + +// TODO: Support adding LoopExit element to the CFG in case where the loop is +// ended by ReturnStmt, GotoStmt or ThrowExpr. +void CFGBuilder::addLoopExit(const Stmt *LoopStmt){ + if(!BuildOpts.AddLoopExit) + return; + autoCreateBlock(); + appendLoopExit(Block, LoopStmt); +} + +void CFGBuilder::getDeclsWithEndedScope(LocalScope::const_iterator B, + LocalScope::const_iterator E, Stmt *S) { + if (!BuildOpts.AddScopes) + return; + + if (B == E) + return; + + // To go from B to E, one first goes up the scopes from B to P + // then sideways in one scope from P to P' and then down + // the scopes from P' to E. + // The lifetime of all objects between B and P end. + LocalScope::const_iterator P = B.shared_parent(E); + int Dist = B.distance(P); + if (Dist <= 0) + return; + + for (LocalScope::const_iterator I = B; I != P; ++I) + if (I.pointsToFirstDeclaredVar()) + DeclsWithEndedScope.insert(*I); +} + +void CFGBuilder::addAutomaticObjHandling(LocalScope::const_iterator B, + LocalScope::const_iterator E, + Stmt *S) { + getDeclsWithEndedScope(B, E, S); + if (BuildOpts.AddScopes) + addScopesEnd(B, E, S); + if (BuildOpts.AddImplicitDtors) + addAutomaticObjDtors(B, E, S); + if (BuildOpts.AddLifetime) + addLifetimeEnds(B, E, S); +} + +/// Add to current block automatic objects that leave the scope. +void CFGBuilder::addLifetimeEnds(LocalScope::const_iterator B, + LocalScope::const_iterator E, Stmt *S) { + if (!BuildOpts.AddLifetime) + return; + + if (B == E) + return; + + // To go from B to E, one first goes up the scopes from B to P + // then sideways in one scope from P to P' and then down + // the scopes from P' to E. + // The lifetime of all objects between B and P end. + LocalScope::const_iterator P = B.shared_parent(E); + int dist = B.distance(P); + if (dist <= 0) + return; + + // We need to perform the scope leaving in reverse order + SmallVector<VarDecl *, 10> DeclsTrivial; + SmallVector<VarDecl *, 10> DeclsNonTrivial; + DeclsTrivial.reserve(dist); + DeclsNonTrivial.reserve(dist); + + for (LocalScope::const_iterator I = B; I != P; ++I) + if (hasTrivialDestructor(*I)) + DeclsTrivial.push_back(*I); + else + DeclsNonTrivial.push_back(*I); + + autoCreateBlock(); + // object with trivial destructor end their lifetime last (when storage + // duration ends) + for (VarDecl *VD : llvm::reverse(DeclsTrivial)) + appendLifetimeEnds(Block, VD, S); + + for (VarDecl *VD : llvm::reverse(DeclsNonTrivial)) + appendLifetimeEnds(Block, VD, S); +} + +/// Add to current block markers for ending scopes. +void CFGBuilder::addScopesEnd(LocalScope::const_iterator B, + LocalScope::const_iterator E, Stmt *S) { + // If implicit destructors are enabled, we'll add scope ends in + // addAutomaticObjDtors. + if (BuildOpts.AddImplicitDtors) + return; + + autoCreateBlock(); + + for (VarDecl *VD : llvm::reverse(DeclsWithEndedScope)) + appendScopeEnd(Block, VD, S); +} + +/// addAutomaticObjDtors - Add to current block automatic objects destructors +/// for objects in range of local scope positions. Use S as trigger statement +/// for destructors. +void CFGBuilder::addAutomaticObjDtors(LocalScope::const_iterator B, + LocalScope::const_iterator E, Stmt *S) { + if (!BuildOpts.AddImplicitDtors) + return; + + if (B == E) + return; + + // We need to append the destructors in reverse order, but any one of them + // may be a no-return destructor which changes the CFG. As a result, buffer + // this sequence up and replay them in reverse order when appending onto the + // CFGBlock(s). + SmallVector<VarDecl*, 10> Decls; + Decls.reserve(B.distance(E)); + for (LocalScope::const_iterator I = B; I != E; ++I) + Decls.push_back(*I); + + for (VarDecl *VD : llvm::reverse(Decls)) { + if (hasTrivialDestructor(VD)) { + // If AddScopes is enabled and *I is a first variable in a scope, add a + // ScopeEnd marker in a Block. + if (BuildOpts.AddScopes && DeclsWithEndedScope.count(VD)) { + autoCreateBlock(); + appendScopeEnd(Block, VD, S); + } + continue; + } + // If this destructor is marked as a no-return destructor, we need to + // create a new block for the destructor which does not have as a successor + // anything built thus far: control won't flow out of this block. + QualType Ty = VD->getType(); + if (Ty->isReferenceType()) { + Ty = getReferenceInitTemporaryType(VD->getInit()); + } + Ty = Context->getBaseElementType(Ty); + + if (Ty->getAsCXXRecordDecl()->isAnyDestructorNoReturn()) + Block = createNoReturnBlock(); + else + autoCreateBlock(); + + // Add ScopeEnd just after automatic obj destructor. + if (BuildOpts.AddScopes && DeclsWithEndedScope.count(VD)) + appendScopeEnd(Block, VD, S); + appendAutomaticObjDtor(Block, VD, S); + } +} + +/// addImplicitDtorsForDestructor - Add implicit destructors generated for +/// base and member objects in destructor. +void CFGBuilder::addImplicitDtorsForDestructor(const CXXDestructorDecl *DD) { + assert(BuildOpts.AddImplicitDtors && + "Can be called only when dtors should be added"); + const CXXRecordDecl *RD = DD->getParent(); + + // At the end destroy virtual base objects. + for (const auto &VI : RD->vbases()) { + // TODO: Add a VirtualBaseBranch to see if the most derived class + // (which is different from the current class) is responsible for + // destroying them. + const CXXRecordDecl *CD = VI.getType()->getAsCXXRecordDecl(); + if (CD && !CD->hasTrivialDestructor()) { + autoCreateBlock(); + appendBaseDtor(Block, &VI); + } + } + + // Before virtual bases destroy direct base objects. + for (const auto &BI : RD->bases()) { + if (!BI.isVirtual()) { + const CXXRecordDecl *CD = BI.getType()->getAsCXXRecordDecl(); + if (CD && !CD->hasTrivialDestructor()) { + autoCreateBlock(); + appendBaseDtor(Block, &BI); + } + } + } + + // First destroy member objects. + for (auto *FI : RD->fields()) { + // Check for constant size array. Set type to array element type. + QualType QT = FI->getType(); + // It may be a multidimensional array. + while (const ConstantArrayType *AT = Context->getAsConstantArrayType(QT)) { + if (AT->getSize() == 0) + break; + QT = AT->getElementType(); + } + + if (const CXXRecordDecl *CD = QT->getAsCXXRecordDecl()) + if (!CD->hasTrivialDestructor()) { + autoCreateBlock(); + appendMemberDtor(Block, FI); + } + } +} + +/// createOrReuseLocalScope - If Scope is NULL create new LocalScope. Either +/// way return valid LocalScope object. +LocalScope* CFGBuilder::createOrReuseLocalScope(LocalScope* Scope) { + if (Scope) + return Scope; + llvm::BumpPtrAllocator &alloc = cfg->getAllocator(); + return new (alloc.Allocate<LocalScope>()) + LocalScope(BumpVectorContext(alloc), ScopePos); +} + +/// addLocalScopeForStmt - Add LocalScope to local scopes tree for statement +/// that should create implicit scope (e.g. if/else substatements). +void CFGBuilder::addLocalScopeForStmt(Stmt *S) { + if (!BuildOpts.AddImplicitDtors && !BuildOpts.AddLifetime && + !BuildOpts.AddScopes) + return; + + LocalScope *Scope = nullptr; + + // For compound statement we will be creating explicit scope. + if (CompoundStmt *CS = dyn_cast<CompoundStmt>(S)) { + for (auto *BI : CS->body()) { + Stmt *SI = BI->stripLabelLikeStatements(); + if (DeclStmt *DS = dyn_cast<DeclStmt>(SI)) + Scope = addLocalScopeForDeclStmt(DS, Scope); + } + return; + } + + // For any other statement scope will be implicit and as such will be + // interesting only for DeclStmt. + if (DeclStmt *DS = dyn_cast<DeclStmt>(S->stripLabelLikeStatements())) + addLocalScopeForDeclStmt(DS); +} + +/// addLocalScopeForDeclStmt - Add LocalScope for declaration statement. Will +/// reuse Scope if not NULL. +LocalScope* CFGBuilder::addLocalScopeForDeclStmt(DeclStmt *DS, + LocalScope* Scope) { + if (!BuildOpts.AddImplicitDtors && !BuildOpts.AddLifetime && + !BuildOpts.AddScopes) + return Scope; + + for (auto *DI : DS->decls()) + if (VarDecl *VD = dyn_cast<VarDecl>(DI)) + Scope = addLocalScopeForVarDecl(VD, Scope); + return Scope; +} + +bool CFGBuilder::hasTrivialDestructor(VarDecl *VD) { + // Check for const references bound to temporary. Set type to pointee. + QualType QT = VD->getType(); + if (QT->isReferenceType()) { + // Attempt to determine whether this declaration lifetime-extends a + // temporary. + // + // FIXME: This is incorrect. Non-reference declarations can lifetime-extend + // temporaries, and a single declaration can extend multiple temporaries. + // We should look at the storage duration on each nested + // MaterializeTemporaryExpr instead. + + const Expr *Init = VD->getInit(); + if (!Init) { + // Probably an exception catch-by-reference variable. + // FIXME: It doesn't really mean that the object has a trivial destructor. + // Also are there other cases? + return true; + } + + // Lifetime-extending a temporary? + bool FoundMTE = false; + QT = getReferenceInitTemporaryType(Init, &FoundMTE); + if (!FoundMTE) + return true; + } + + // Check for constant size array. Set type to array element type. + while (const ConstantArrayType *AT = Context->getAsConstantArrayType(QT)) { + if (AT->getSize() == 0) + return true; + QT = AT->getElementType(); + } + + // Check if type is a C++ class with non-trivial destructor. + if (const CXXRecordDecl *CD = QT->getAsCXXRecordDecl()) + return !CD->hasDefinition() || CD->hasTrivialDestructor(); + return true; +} + +/// addLocalScopeForVarDecl - Add LocalScope for variable declaration. It will +/// create add scope for automatic objects and temporary objects bound to +/// const reference. Will reuse Scope if not NULL. +LocalScope* CFGBuilder::addLocalScopeForVarDecl(VarDecl *VD, + LocalScope* Scope) { + assert(!(BuildOpts.AddImplicitDtors && BuildOpts.AddLifetime) && + "AddImplicitDtors and AddLifetime cannot be used at the same time"); + if (!BuildOpts.AddImplicitDtors && !BuildOpts.AddLifetime && + !BuildOpts.AddScopes) + return Scope; + + // Check if variable is local. + if (!VD->hasLocalStorage()) + return Scope; + + if (BuildOpts.AddImplicitDtors) { + if (!hasTrivialDestructor(VD) || BuildOpts.AddScopes) { + // Add the variable to scope + Scope = createOrReuseLocalScope(Scope); + Scope->addVar(VD); + ScopePos = Scope->begin(); + } + return Scope; + } + + assert(BuildOpts.AddLifetime); + // Add the variable to scope + Scope = createOrReuseLocalScope(Scope); + Scope->addVar(VD); + ScopePos = Scope->begin(); + return Scope; +} + +/// addLocalScopeAndDtors - For given statement add local scope for it and +/// add destructors that will cleanup the scope. Will reuse Scope if not NULL. +void CFGBuilder::addLocalScopeAndDtors(Stmt *S) { + LocalScope::const_iterator scopeBeginPos = ScopePos; + addLocalScopeForStmt(S); + addAutomaticObjHandling(ScopePos, scopeBeginPos, S); +} + +/// prependAutomaticObjDtorsWithTerminator - Prepend destructor CFGElements for +/// variables with automatic storage duration to CFGBlock's elements vector. +/// Elements will be prepended to physical beginning of the vector which +/// happens to be logical end. Use blocks terminator as statement that specifies +/// destructors call site. +/// FIXME: This mechanism for adding automatic destructors doesn't handle +/// no-return destructors properly. +void CFGBuilder::prependAutomaticObjDtorsWithTerminator(CFGBlock *Blk, + LocalScope::const_iterator B, LocalScope::const_iterator E) { + if (!BuildOpts.AddImplicitDtors) + return; + BumpVectorContext &C = cfg->getBumpVectorContext(); + CFGBlock::iterator InsertPos + = Blk->beginAutomaticObjDtorsInsert(Blk->end(), B.distance(E), C); + for (LocalScope::const_iterator I = B; I != E; ++I) + InsertPos = Blk->insertAutomaticObjDtor(InsertPos, *I, + Blk->getTerminatorStmt()); +} + +/// prependAutomaticObjLifetimeWithTerminator - Prepend lifetime CFGElements for +/// variables with automatic storage duration to CFGBlock's elements vector. +/// Elements will be prepended to physical beginning of the vector which +/// happens to be logical end. Use blocks terminator as statement that specifies +/// where lifetime ends. +void CFGBuilder::prependAutomaticObjLifetimeWithTerminator( + CFGBlock *Blk, LocalScope::const_iterator B, LocalScope::const_iterator E) { + if (!BuildOpts.AddLifetime) + return; + BumpVectorContext &C = cfg->getBumpVectorContext(); + CFGBlock::iterator InsertPos = + Blk->beginLifetimeEndsInsert(Blk->end(), B.distance(E), C); + for (LocalScope::const_iterator I = B; I != E; ++I) { + InsertPos = + Blk->insertLifetimeEnds(InsertPos, *I, Blk->getTerminatorStmt()); + } +} + +/// prependAutomaticObjScopeEndWithTerminator - Prepend scope end CFGElements for +/// variables with automatic storage duration to CFGBlock's elements vector. +/// Elements will be prepended to physical beginning of the vector which +/// happens to be logical end. Use blocks terminator as statement that specifies +/// where scope ends. +const VarDecl * +CFGBuilder::prependAutomaticObjScopeEndWithTerminator( + CFGBlock *Blk, LocalScope::const_iterator B, LocalScope::const_iterator E) { + if (!BuildOpts.AddScopes) + return nullptr; + BumpVectorContext &C = cfg->getBumpVectorContext(); + CFGBlock::iterator InsertPos = + Blk->beginScopeEndInsert(Blk->end(), 1, C); + LocalScope::const_iterator PlaceToInsert = B; + for (LocalScope::const_iterator I = B; I != E; ++I) + PlaceToInsert = I; + Blk->insertScopeEnd(InsertPos, *PlaceToInsert, Blk->getTerminatorStmt()); + return *PlaceToInsert; +} + +/// Visit - Walk the subtree of a statement and add extra +/// blocks for ternary operators, &&, and ||. We also process "," and +/// DeclStmts (which may contain nested control-flow). +CFGBlock *CFGBuilder::Visit(Stmt * S, AddStmtChoice asc, + bool ExternallyDestructed) { + if (!S) { + badCFG = true; + return nullptr; + } + + if (Expr *E = dyn_cast<Expr>(S)) + S = E->IgnoreParens(); + + if (Context->getLangOpts().OpenMP) + if (auto *D = dyn_cast<OMPExecutableDirective>(S)) + return VisitOMPExecutableDirective(D, asc); + + switch (S->getStmtClass()) { + default: + return VisitStmt(S, asc); + + case Stmt::ImplicitValueInitExprClass: + if (BuildOpts.OmitImplicitValueInitializers) + return Block; + return VisitStmt(S, asc); + + case Stmt::InitListExprClass: + return VisitInitListExpr(cast<InitListExpr>(S), asc); + + case Stmt::AttributedStmtClass: + return VisitAttributedStmt(cast<AttributedStmt>(S), asc); + + case Stmt::AddrLabelExprClass: + return VisitAddrLabelExpr(cast<AddrLabelExpr>(S), asc); + + case Stmt::BinaryConditionalOperatorClass: + return VisitConditionalOperator(cast<BinaryConditionalOperator>(S), asc); + + case Stmt::BinaryOperatorClass: + return VisitBinaryOperator(cast<BinaryOperator>(S), asc); + + case Stmt::BlockExprClass: + return VisitBlockExpr(cast<BlockExpr>(S), asc); + + case Stmt::BreakStmtClass: + return VisitBreakStmt(cast<BreakStmt>(S)); + + case Stmt::CallExprClass: + case Stmt::CXXOperatorCallExprClass: + case Stmt::CXXMemberCallExprClass: + case Stmt::UserDefinedLiteralClass: + return VisitCallExpr(cast<CallExpr>(S), asc); + + case Stmt::CaseStmtClass: + return VisitCaseStmt(cast<CaseStmt>(S)); + + case Stmt::ChooseExprClass: + return VisitChooseExpr(cast<ChooseExpr>(S), asc); + + case Stmt::CompoundStmtClass: + return VisitCompoundStmt(cast<CompoundStmt>(S), ExternallyDestructed); + + case Stmt::ConditionalOperatorClass: + return VisitConditionalOperator(cast<ConditionalOperator>(S), asc); + + case Stmt::ContinueStmtClass: + return VisitContinueStmt(cast<ContinueStmt>(S)); + + case Stmt::CXXCatchStmtClass: + return VisitCXXCatchStmt(cast<CXXCatchStmt>(S)); + + case Stmt::ExprWithCleanupsClass: + return VisitExprWithCleanups(cast<ExprWithCleanups>(S), + asc, ExternallyDestructed); + + case Stmt::CXXDefaultArgExprClass: + case Stmt::CXXDefaultInitExprClass: + // FIXME: The expression inside a CXXDefaultArgExpr is owned by the + // called function's declaration, not by the caller. If we simply add + // this expression to the CFG, we could end up with the same Expr + // appearing multiple times. + // PR13385 / <rdar://problem/12156507> + // + // It's likewise possible for multiple CXXDefaultInitExprs for the same + // expression to be used in the same function (through aggregate + // initialization). + return VisitStmt(S, asc); + + case Stmt::CXXBindTemporaryExprClass: + return VisitCXXBindTemporaryExpr(cast<CXXBindTemporaryExpr>(S), asc); + + case Stmt::CXXConstructExprClass: + return VisitCXXConstructExpr(cast<CXXConstructExpr>(S), asc); + + case Stmt::CXXNewExprClass: + return VisitCXXNewExpr(cast<CXXNewExpr>(S), asc); + + case Stmt::CXXDeleteExprClass: + return VisitCXXDeleteExpr(cast<CXXDeleteExpr>(S), asc); + + case Stmt::CXXFunctionalCastExprClass: + return VisitCXXFunctionalCastExpr(cast<CXXFunctionalCastExpr>(S), asc); + + case Stmt::CXXTemporaryObjectExprClass: + return VisitCXXTemporaryObjectExpr(cast<CXXTemporaryObjectExpr>(S), asc); + + case Stmt::CXXThrowExprClass: + return VisitCXXThrowExpr(cast<CXXThrowExpr>(S)); + + case Stmt::CXXTryStmtClass: + return VisitCXXTryStmt(cast<CXXTryStmt>(S)); + + case Stmt::CXXTypeidExprClass: + return VisitCXXTypeidExpr(cast<CXXTypeidExpr>(S), asc); + + case Stmt::CXXForRangeStmtClass: + return VisitCXXForRangeStmt(cast<CXXForRangeStmt>(S)); + + case Stmt::DeclStmtClass: + return VisitDeclStmt(cast<DeclStmt>(S)); + + case Stmt::DefaultStmtClass: + return VisitDefaultStmt(cast<DefaultStmt>(S)); + + case Stmt::DoStmtClass: + return VisitDoStmt(cast<DoStmt>(S)); + + case Stmt::ForStmtClass: + return VisitForStmt(cast<ForStmt>(S)); + + case Stmt::GotoStmtClass: + return VisitGotoStmt(cast<GotoStmt>(S)); + + case Stmt::GCCAsmStmtClass: + return VisitGCCAsmStmt(cast<GCCAsmStmt>(S), asc); + + case Stmt::IfStmtClass: + return VisitIfStmt(cast<IfStmt>(S)); + + case Stmt::ImplicitCastExprClass: + return VisitImplicitCastExpr(cast<ImplicitCastExpr>(S), asc); + + case Stmt::ConstantExprClass: + return VisitConstantExpr(cast<ConstantExpr>(S), asc); + + case Stmt::IndirectGotoStmtClass: + return VisitIndirectGotoStmt(cast<IndirectGotoStmt>(S)); + + case Stmt::LabelStmtClass: + return VisitLabelStmt(cast<LabelStmt>(S)); + + case Stmt::LambdaExprClass: + return VisitLambdaExpr(cast<LambdaExpr>(S), asc); + + case Stmt::MaterializeTemporaryExprClass: + return VisitMaterializeTemporaryExpr(cast<MaterializeTemporaryExpr>(S), + asc); + + case Stmt::MemberExprClass: + return VisitMemberExpr(cast<MemberExpr>(S), asc); + + case Stmt::NullStmtClass: + return Block; + + case Stmt::ObjCAtCatchStmtClass: + return VisitObjCAtCatchStmt(cast<ObjCAtCatchStmt>(S)); + + case Stmt::ObjCAutoreleasePoolStmtClass: + return VisitObjCAutoreleasePoolStmt(cast<ObjCAutoreleasePoolStmt>(S)); + + case Stmt::ObjCAtSynchronizedStmtClass: + return VisitObjCAtSynchronizedStmt(cast<ObjCAtSynchronizedStmt>(S)); + + case Stmt::ObjCAtThrowStmtClass: + return VisitObjCAtThrowStmt(cast<ObjCAtThrowStmt>(S)); + + case Stmt::ObjCAtTryStmtClass: + return VisitObjCAtTryStmt(cast<ObjCAtTryStmt>(S)); + + case Stmt::ObjCForCollectionStmtClass: + return VisitObjCForCollectionStmt(cast<ObjCForCollectionStmt>(S)); + + case Stmt::ObjCMessageExprClass: + return VisitObjCMessageExpr(cast<ObjCMessageExpr>(S), asc); + + case Stmt::OpaqueValueExprClass: + return Block; + + case Stmt::PseudoObjectExprClass: + return VisitPseudoObjectExpr(cast<PseudoObjectExpr>(S)); + + case Stmt::ReturnStmtClass: + case Stmt::CoreturnStmtClass: + return VisitReturnStmt(S); + + case Stmt::CoyieldExprClass: + case Stmt::CoawaitExprClass: + return VisitCoroutineSuspendExpr(cast<CoroutineSuspendExpr>(S), asc); + + case Stmt::SEHExceptStmtClass: + return VisitSEHExceptStmt(cast<SEHExceptStmt>(S)); + + case Stmt::SEHFinallyStmtClass: + return VisitSEHFinallyStmt(cast<SEHFinallyStmt>(S)); + + case Stmt::SEHLeaveStmtClass: + return VisitSEHLeaveStmt(cast<SEHLeaveStmt>(S)); + + case Stmt::SEHTryStmtClass: + return VisitSEHTryStmt(cast<SEHTryStmt>(S)); + + case Stmt::UnaryExprOrTypeTraitExprClass: + return VisitUnaryExprOrTypeTraitExpr(cast<UnaryExprOrTypeTraitExpr>(S), + asc); + + case Stmt::StmtExprClass: + return VisitStmtExpr(cast<StmtExpr>(S), asc); + + case Stmt::SwitchStmtClass: + return VisitSwitchStmt(cast<SwitchStmt>(S)); + + case Stmt::UnaryOperatorClass: + return VisitUnaryOperator(cast<UnaryOperator>(S), asc); + + case Stmt::WhileStmtClass: + return VisitWhileStmt(cast<WhileStmt>(S)); + + case Stmt::ArrayInitLoopExprClass: + return VisitArrayInitLoopExpr(cast<ArrayInitLoopExpr>(S), asc); + } +} + +CFGBlock *CFGBuilder::VisitStmt(Stmt *S, AddStmtChoice asc) { + if (asc.alwaysAdd(*this, S)) { + autoCreateBlock(); + appendStmt(Block, S); + } + + return VisitChildren(S); +} + +/// VisitChildren - Visit the children of a Stmt. +CFGBlock *CFGBuilder::VisitChildren(Stmt *S) { + CFGBlock *B = Block; + + // Visit the children in their reverse order so that they appear in + // left-to-right (natural) order in the CFG. + reverse_children RChildren(S); + for (Stmt *Child : RChildren) { + if (Child) + if (CFGBlock *R = Visit(Child)) + B = R; + } + return B; +} + +CFGBlock *CFGBuilder::VisitInitListExpr(InitListExpr *ILE, AddStmtChoice asc) { + if (asc.alwaysAdd(*this, ILE)) { + autoCreateBlock(); + appendStmt(Block, ILE); + } + CFGBlock *B = Block; + + reverse_children RChildren(ILE); + for (Stmt *Child : RChildren) { + if (!Child) + continue; + if (CFGBlock *R = Visit(Child)) + B = R; + if (BuildOpts.AddCXXDefaultInitExprInAggregates) { + if (auto *DIE = dyn_cast<CXXDefaultInitExpr>(Child)) + if (Stmt *Child = DIE->getExpr()) + if (CFGBlock *R = Visit(Child)) + B = R; + } + } + return B; +} + +CFGBlock *CFGBuilder::VisitAddrLabelExpr(AddrLabelExpr *A, + AddStmtChoice asc) { + AddressTakenLabels.insert(A->getLabel()); + + if (asc.alwaysAdd(*this, A)) { + autoCreateBlock(); + appendStmt(Block, A); + } + + return Block; +} + +static bool isFallthroughStatement(const AttributedStmt *A) { + bool isFallthrough = hasSpecificAttr<FallThroughAttr>(A->getAttrs()); + assert((!isFallthrough || isa<NullStmt>(A->getSubStmt())) && + "expected fallthrough not to have children"); + return isFallthrough; +} + +CFGBlock *CFGBuilder::VisitAttributedStmt(AttributedStmt *A, + AddStmtChoice asc) { + // AttributedStmts for [[likely]] can have arbitrary statements as children, + // and the current visitation order here would add the AttributedStmts + // for [[likely]] after the child nodes, which is undesirable: For example, + // if the child contains an unconditional return, the [[likely]] would be + // considered unreachable. + // So only add the AttributedStmt for FallThrough, which has CFG effects and + // also no children, and omit the others. None of the other current StmtAttrs + // have semantic meaning for the CFG. + if (isFallthroughStatement(A) && asc.alwaysAdd(*this, A)) { + autoCreateBlock(); + appendStmt(Block, A); + } + + return VisitChildren(A); +} + +CFGBlock *CFGBuilder::VisitUnaryOperator(UnaryOperator *U, AddStmtChoice asc) { + if (asc.alwaysAdd(*this, U)) { + autoCreateBlock(); + appendStmt(Block, U); + } + + if (U->getOpcode() == UO_LNot) + tryEvaluateBool(U->getSubExpr()->IgnoreParens()); + + return Visit(U->getSubExpr(), AddStmtChoice()); +} + +CFGBlock *CFGBuilder::VisitLogicalOperator(BinaryOperator *B) { + CFGBlock *ConfluenceBlock = Block ? Block : createBlock(); + appendStmt(ConfluenceBlock, B); + + if (badCFG) + return nullptr; + + return VisitLogicalOperator(B, nullptr, ConfluenceBlock, + ConfluenceBlock).first; +} + +std::pair<CFGBlock*, CFGBlock*> +CFGBuilder::VisitLogicalOperator(BinaryOperator *B, + Stmt *Term, + CFGBlock *TrueBlock, + CFGBlock *FalseBlock) { + // Introspect the RHS. If it is a nested logical operation, we recursively + // build the CFG using this function. Otherwise, resort to default + // CFG construction behavior. + Expr *RHS = B->getRHS()->IgnoreParens(); + CFGBlock *RHSBlock, *ExitBlock; + + do { + if (BinaryOperator *B_RHS = dyn_cast<BinaryOperator>(RHS)) + if (B_RHS->isLogicalOp()) { + std::tie(RHSBlock, ExitBlock) = + VisitLogicalOperator(B_RHS, Term, TrueBlock, FalseBlock); + break; + } + + // The RHS is not a nested logical operation. Don't push the terminator + // down further, but instead visit RHS and construct the respective + // pieces of the CFG, and link up the RHSBlock with the terminator + // we have been provided. + ExitBlock = RHSBlock = createBlock(false); + + // Even though KnownVal is only used in the else branch of the next + // conditional, tryEvaluateBool performs additional checking on the + // Expr, so it should be called unconditionally. + TryResult KnownVal = tryEvaluateBool(RHS); + if (!KnownVal.isKnown()) + KnownVal = tryEvaluateBool(B); + + if (!Term) { + assert(TrueBlock == FalseBlock); + addSuccessor(RHSBlock, TrueBlock); + } + else { + RHSBlock->setTerminator(Term); + addSuccessor(RHSBlock, TrueBlock, !KnownVal.isFalse()); + addSuccessor(RHSBlock, FalseBlock, !KnownVal.isTrue()); + } + + Block = RHSBlock; + RHSBlock = addStmt(RHS); + } + while (false); + + if (badCFG) + return std::make_pair(nullptr, nullptr); + + // Generate the blocks for evaluating the LHS. + Expr *LHS = B->getLHS()->IgnoreParens(); + + if (BinaryOperator *B_LHS = dyn_cast<BinaryOperator>(LHS)) + if (B_LHS->isLogicalOp()) { + if (B->getOpcode() == BO_LOr) + FalseBlock = RHSBlock; + else + TrueBlock = RHSBlock; + + // For the LHS, treat 'B' as the terminator that we want to sink + // into the nested branch. The RHS always gets the top-most + // terminator. + return VisitLogicalOperator(B_LHS, B, TrueBlock, FalseBlock); + } + + // Create the block evaluating the LHS. + // This contains the '&&' or '||' as the terminator. + CFGBlock *LHSBlock = createBlock(false); + LHSBlock->setTerminator(B); + + Block = LHSBlock; + CFGBlock *EntryLHSBlock = addStmt(LHS); + + if (badCFG) + return std::make_pair(nullptr, nullptr); + + // See if this is a known constant. + TryResult KnownVal = tryEvaluateBool(LHS); + + // Now link the LHSBlock with RHSBlock. + if (B->getOpcode() == BO_LOr) { + addSuccessor(LHSBlock, TrueBlock, !KnownVal.isFalse()); + addSuccessor(LHSBlock, RHSBlock, !KnownVal.isTrue()); + } else { + assert(B->getOpcode() == BO_LAnd); + addSuccessor(LHSBlock, RHSBlock, !KnownVal.isFalse()); + addSuccessor(LHSBlock, FalseBlock, !KnownVal.isTrue()); + } + + return std::make_pair(EntryLHSBlock, ExitBlock); +} + +CFGBlock *CFGBuilder::VisitBinaryOperator(BinaryOperator *B, + AddStmtChoice asc) { + // && or || + if (B->isLogicalOp()) + return VisitLogicalOperator(B); + + if (B->getOpcode() == BO_Comma) { // , + autoCreateBlock(); + appendStmt(Block, B); + addStmt(B->getRHS()); + return addStmt(B->getLHS()); + } + + if (B->isAssignmentOp()) { + if (asc.alwaysAdd(*this, B)) { + autoCreateBlock(); + appendStmt(Block, B); + } + Visit(B->getLHS()); + return Visit(B->getRHS()); + } + + if (asc.alwaysAdd(*this, B)) { + autoCreateBlock(); + appendStmt(Block, B); + } + + if (B->isEqualityOp() || B->isRelationalOp()) + tryEvaluateBool(B); + + CFGBlock *RBlock = Visit(B->getRHS()); + CFGBlock *LBlock = Visit(B->getLHS()); + // If visiting RHS causes us to finish 'Block', e.g. the RHS is a StmtExpr + // containing a DoStmt, and the LHS doesn't create a new block, then we should + // return RBlock. Otherwise we'll incorrectly return NULL. + return (LBlock ? LBlock : RBlock); +} + +CFGBlock *CFGBuilder::VisitNoRecurse(Expr *E, AddStmtChoice asc) { + if (asc.alwaysAdd(*this, E)) { + autoCreateBlock(); + appendStmt(Block, E); + } + return Block; +} + +CFGBlock *CFGBuilder::VisitBreakStmt(BreakStmt *B) { + // "break" is a control-flow statement. Thus we stop processing the current + // block. + if (badCFG) + return nullptr; + + // Now create a new block that ends with the break statement. + Block = createBlock(false); + Block->setTerminator(B); + + // If there is no target for the break, then we are looking at an incomplete + // AST. This means that the CFG cannot be constructed. + if (BreakJumpTarget.block) { + addAutomaticObjHandling(ScopePos, BreakJumpTarget.scopePosition, B); + addSuccessor(Block, BreakJumpTarget.block); + } else + badCFG = true; + + return Block; +} + +static bool CanThrow(Expr *E, ASTContext &Ctx) { + QualType Ty = E->getType(); + if (Ty->isFunctionPointerType() || Ty->isBlockPointerType()) + Ty = Ty->getPointeeType(); + + const FunctionType *FT = Ty->getAs<FunctionType>(); + if (FT) { + if (const FunctionProtoType *Proto = dyn_cast<FunctionProtoType>(FT)) + if (!isUnresolvedExceptionSpec(Proto->getExceptionSpecType()) && + Proto->isNothrow()) + return false; + } + return true; +} + +CFGBlock *CFGBuilder::VisitCallExpr(CallExpr *C, AddStmtChoice asc) { + // Compute the callee type. + QualType calleeType = C->getCallee()->getType(); + if (calleeType == Context->BoundMemberTy) { + QualType boundType = Expr::findBoundMemberType(C->getCallee()); + + // We should only get a null bound type if processing a dependent + // CFG. Recover by assuming nothing. + if (!boundType.isNull()) calleeType = boundType; + } + + // If this is a call to a no-return function, this stops the block here. + bool NoReturn = getFunctionExtInfo(*calleeType).getNoReturn(); + + bool AddEHEdge = false; + + // Languages without exceptions are assumed to not throw. + if (Context->getLangOpts().Exceptions) { + if (BuildOpts.AddEHEdges) + AddEHEdge = true; + } + + // If this is a call to a builtin function, it might not actually evaluate + // its arguments. Don't add them to the CFG if this is the case. + bool OmitArguments = false; + + if (FunctionDecl *FD = C->getDirectCallee()) { + // TODO: Support construction contexts for variadic function arguments. + // These are a bit problematic and not very useful because passing + // C++ objects as C-style variadic arguments doesn't work in general + // (see [expr.call]). + if (!FD->isVariadic()) + findConstructionContextsForArguments(C); + + if (FD->isNoReturn() || C->isBuiltinAssumeFalse(*Context)) + NoReturn = true; + if (FD->hasAttr<NoThrowAttr>()) + AddEHEdge = false; + if (FD->getBuiltinID() == Builtin::BI__builtin_object_size || + FD->getBuiltinID() == Builtin::BI__builtin_dynamic_object_size) + OmitArguments = true; + } + + if (!CanThrow(C->getCallee(), *Context)) + AddEHEdge = false; + + if (OmitArguments) { + assert(!NoReturn && "noreturn calls with unevaluated args not implemented"); + assert(!AddEHEdge && "EH calls with unevaluated args not implemented"); + autoCreateBlock(); + appendStmt(Block, C); + return Visit(C->getCallee()); + } + + if (!NoReturn && !AddEHEdge) { + autoCreateBlock(); + appendCall(Block, C); + + return VisitChildren(C); + } + + if (Block) { + Succ = Block; + if (badCFG) + return nullptr; + } + + if (NoReturn) + Block = createNoReturnBlock(); + else + Block = createBlock(); + + appendCall(Block, C); + + if (AddEHEdge) { + // Add exceptional edges. + if (TryTerminatedBlock) + addSuccessor(Block, TryTerminatedBlock); + else + addSuccessor(Block, &cfg->getExit()); + } + + return VisitChildren(C); +} + +CFGBlock *CFGBuilder::VisitChooseExpr(ChooseExpr *C, + AddStmtChoice asc) { + CFGBlock *ConfluenceBlock = Block ? Block : createBlock(); + appendStmt(ConfluenceBlock, C); + if (badCFG) + return nullptr; + + AddStmtChoice alwaysAdd = asc.withAlwaysAdd(true); + Succ = ConfluenceBlock; + Block = nullptr; + CFGBlock *LHSBlock = Visit(C->getLHS(), alwaysAdd); + if (badCFG) + return nullptr; + + Succ = ConfluenceBlock; + Block = nullptr; + CFGBlock *RHSBlock = Visit(C->getRHS(), alwaysAdd); + if (badCFG) + return nullptr; + + Block = createBlock(false); + // See if this is a known constant. + const TryResult& KnownVal = tryEvaluateBool(C->getCond()); + addSuccessor(Block, KnownVal.isFalse() ? nullptr : LHSBlock); + addSuccessor(Block, KnownVal.isTrue() ? nullptr : RHSBlock); + Block->setTerminator(C); + return addStmt(C->getCond()); +} + +CFGBlock *CFGBuilder::VisitCompoundStmt(CompoundStmt *C, + bool ExternallyDestructed) { + LocalScope::const_iterator scopeBeginPos = ScopePos; + addLocalScopeForStmt(C); + + if (!C->body_empty() && !isa<ReturnStmt>(*C->body_rbegin())) { + // If the body ends with a ReturnStmt, the dtors will be added in + // VisitReturnStmt. + addAutomaticObjHandling(ScopePos, scopeBeginPos, C); + } + + CFGBlock *LastBlock = Block; + + for (Stmt *S : llvm::reverse(C->body())) { + // If we hit a segment of code just containing ';' (NullStmts), we can + // get a null block back. In such cases, just use the LastBlock + CFGBlock *newBlock = Visit(S, AddStmtChoice::AlwaysAdd, + ExternallyDestructed); + + if (newBlock) + LastBlock = newBlock; + + if (badCFG) + return nullptr; + + ExternallyDestructed = false; + } + + return LastBlock; +} + +CFGBlock *CFGBuilder::VisitConditionalOperator(AbstractConditionalOperator *C, + AddStmtChoice asc) { + const BinaryConditionalOperator *BCO = dyn_cast<BinaryConditionalOperator>(C); + const OpaqueValueExpr *opaqueValue = (BCO ? BCO->getOpaqueValue() : nullptr); + + // Create the confluence block that will "merge" the results of the ternary + // expression. + CFGBlock *ConfluenceBlock = Block ? Block : createBlock(); + appendStmt(ConfluenceBlock, C); + if (badCFG) + return nullptr; + + AddStmtChoice alwaysAdd = asc.withAlwaysAdd(true); + + // Create a block for the LHS expression if there is an LHS expression. A + // GCC extension allows LHS to be NULL, causing the condition to be the + // value that is returned instead. + // e.g: x ?: y is shorthand for: x ? x : y; + Succ = ConfluenceBlock; + Block = nullptr; + CFGBlock *LHSBlock = nullptr; + const Expr *trueExpr = C->getTrueExpr(); + if (trueExpr != opaqueValue) { + LHSBlock = Visit(C->getTrueExpr(), alwaysAdd); + if (badCFG) + return nullptr; + Block = nullptr; + } + else + LHSBlock = ConfluenceBlock; + + // Create the block for the RHS expression. + Succ = ConfluenceBlock; + CFGBlock *RHSBlock = Visit(C->getFalseExpr(), alwaysAdd); + if (badCFG) + return nullptr; + + // If the condition is a logical '&&' or '||', build a more accurate CFG. + if (BinaryOperator *Cond = + dyn_cast<BinaryOperator>(C->getCond()->IgnoreParens())) + if (Cond->isLogicalOp()) + return VisitLogicalOperator(Cond, C, LHSBlock, RHSBlock).first; + + // Create the block that will contain the condition. + Block = createBlock(false); + + // See if this is a known constant. + const TryResult& KnownVal = tryEvaluateBool(C->getCond()); + addSuccessor(Block, LHSBlock, !KnownVal.isFalse()); + addSuccessor(Block, RHSBlock, !KnownVal.isTrue()); + Block->setTerminator(C); + Expr *condExpr = C->getCond(); + + if (opaqueValue) { + // Run the condition expression if it's not trivially expressed in + // terms of the opaque value (or if there is no opaque value). + if (condExpr != opaqueValue) + addStmt(condExpr); + + // Before that, run the common subexpression if there was one. + // At least one of this or the above will be run. + return addStmt(BCO->getCommon()); + } + + return addStmt(condExpr); +} + +CFGBlock *CFGBuilder::VisitDeclStmt(DeclStmt *DS) { + // Check if the Decl is for an __label__. If so, elide it from the + // CFG entirely. + if (isa<LabelDecl>(*DS->decl_begin())) + return Block; + + // This case also handles static_asserts. + if (DS->isSingleDecl()) + return VisitDeclSubExpr(DS); + + CFGBlock *B = nullptr; + + // Build an individual DeclStmt for each decl. + for (DeclStmt::reverse_decl_iterator I = DS->decl_rbegin(), + E = DS->decl_rend(); + I != E; ++I) { + + // Allocate the DeclStmt using the BumpPtrAllocator. It will get + // automatically freed with the CFG. + DeclGroupRef DG(*I); + Decl *D = *I; + DeclStmt *DSNew = new (Context) DeclStmt(DG, D->getLocation(), GetEndLoc(D)); + cfg->addSyntheticDeclStmt(DSNew, DS); + + // Append the fake DeclStmt to block. + B = VisitDeclSubExpr(DSNew); + } + + return B; +} + +/// VisitDeclSubExpr - Utility method to add block-level expressions for +/// DeclStmts and initializers in them. +CFGBlock *CFGBuilder::VisitDeclSubExpr(DeclStmt *DS) { + assert(DS->isSingleDecl() && "Can handle single declarations only."); + + if (const auto *TND = dyn_cast<TypedefNameDecl>(DS->getSingleDecl())) { + // If we encounter a VLA, process its size expressions. + const Type *T = TND->getUnderlyingType().getTypePtr(); + if (!T->isVariablyModifiedType()) + return Block; + + autoCreateBlock(); + appendStmt(Block, DS); + + CFGBlock *LastBlock = Block; + for (const VariableArrayType *VA = FindVA(T); VA != nullptr; + VA = FindVA(VA->getElementType().getTypePtr())) { + if (CFGBlock *NewBlock = addStmt(VA->getSizeExpr())) + LastBlock = NewBlock; + } + return LastBlock; + } + + VarDecl *VD = dyn_cast<VarDecl>(DS->getSingleDecl()); + + if (!VD) { + // Of everything that can be declared in a DeclStmt, only VarDecls and the + // exceptions above impact runtime semantics. + return Block; + } + + bool HasTemporaries = false; + + // Guard static initializers under a branch. + CFGBlock *blockAfterStaticInit = nullptr; + + if (BuildOpts.AddStaticInitBranches && VD->isStaticLocal()) { + // For static variables, we need to create a branch to track + // whether or not they are initialized. + if (Block) { + Succ = Block; + Block = nullptr; + if (badCFG) + return nullptr; + } + blockAfterStaticInit = Succ; + } + + // Destructors of temporaries in initialization expression should be called + // after initialization finishes. + Expr *Init = VD->getInit(); + if (Init) { + HasTemporaries = isa<ExprWithCleanups>(Init); + + if (BuildOpts.AddTemporaryDtors && HasTemporaries) { + // Generate destructors for temporaries in initialization expression. + TempDtorContext Context; + VisitForTemporaryDtors(cast<ExprWithCleanups>(Init)->getSubExpr(), + /*ExternallyDestructed=*/true, Context); + } + } + + // If we bind to a tuple-like type, we iterate over the HoldingVars, and + // create a DeclStmt for each of them. + if (const auto *DD = dyn_cast<DecompositionDecl>(VD)) { + for (auto *BD : llvm::reverse(DD->bindings())) { + if (auto *VD = BD->getHoldingVar()) { + DeclGroupRef DG(VD); + DeclStmt *DSNew = + new (Context) DeclStmt(DG, VD->getLocation(), GetEndLoc(VD)); + cfg->addSyntheticDeclStmt(DSNew, DS); + Block = VisitDeclSubExpr(DSNew); + } + } + } + + autoCreateBlock(); + appendStmt(Block, DS); + + // If the initializer is an ArrayInitLoopExpr, we want to extract the + // initializer, that's used for each element. + const auto *AILE = dyn_cast_or_null<ArrayInitLoopExpr>(Init); + + findConstructionContexts( + ConstructionContextLayer::create(cfg->getBumpVectorContext(), DS), + AILE ? AILE->getSubExpr() : Init); + + // Keep track of the last non-null block, as 'Block' can be nulled out + // if the initializer expression is something like a 'while' in a + // statement-expression. + CFGBlock *LastBlock = Block; + + if (Init) { + if (HasTemporaries) { + // For expression with temporaries go directly to subexpression to omit + // generating destructors for the second time. + ExprWithCleanups *EC = cast<ExprWithCleanups>(Init); + if (CFGBlock *newBlock = Visit(EC->getSubExpr())) + LastBlock = newBlock; + } + else { + if (CFGBlock *newBlock = Visit(Init)) + LastBlock = newBlock; + } + } + + // If the type of VD is a VLA, then we must process its size expressions. + // FIXME: This does not find the VLA if it is embedded in other types, + // like here: `int (*p_vla)[x];` + for (const VariableArrayType* VA = FindVA(VD->getType().getTypePtr()); + VA != nullptr; VA = FindVA(VA->getElementType().getTypePtr())) { + if (CFGBlock *newBlock = addStmt(VA->getSizeExpr())) + LastBlock = newBlock; + } + + maybeAddScopeBeginForVarDecl(Block, VD, DS); + + // Remove variable from local scope. + if (ScopePos && VD == *ScopePos) + ++ScopePos; + + CFGBlock *B = LastBlock; + if (blockAfterStaticInit) { + Succ = B; + Block = createBlock(false); + Block->setTerminator(DS); + addSuccessor(Block, blockAfterStaticInit); + addSuccessor(Block, B); + B = Block; + } + + return B; +} + +CFGBlock *CFGBuilder::VisitIfStmt(IfStmt *I) { + // We may see an if statement in the middle of a basic block, or it may be the + // first statement we are processing. In either case, we create a new basic + // block. First, we create the blocks for the then...else statements, and + // then we create the block containing the if statement. If we were in the + // middle of a block, we stop processing that block. That block is then the + // implicit successor for the "then" and "else" clauses. + + // Save local scope position because in case of condition variable ScopePos + // won't be restored when traversing AST. + SaveAndRestore save_scope_pos(ScopePos); + + // Create local scope for C++17 if init-stmt if one exists. + if (Stmt *Init = I->getInit()) + addLocalScopeForStmt(Init); + + // Create local scope for possible condition variable. + // Store scope position. Add implicit destructor. + if (VarDecl *VD = I->getConditionVariable()) + addLocalScopeForVarDecl(VD); + + addAutomaticObjHandling(ScopePos, save_scope_pos.get(), I); + + // The block we were processing is now finished. Make it the successor + // block. + if (Block) { + Succ = Block; + if (badCFG) + return nullptr; + } + + // Process the false branch. + CFGBlock *ElseBlock = Succ; + + if (Stmt *Else = I->getElse()) { + SaveAndRestore sv(Succ); + + // NULL out Block so that the recursive call to Visit will + // create a new basic block. + Block = nullptr; + + // If branch is not a compound statement create implicit scope + // and add destructors. + if (!isa<CompoundStmt>(Else)) + addLocalScopeAndDtors(Else); + + ElseBlock = addStmt(Else); + + if (!ElseBlock) // Can occur when the Else body has all NullStmts. + ElseBlock = sv.get(); + else if (Block) { + if (badCFG) + return nullptr; + } + } + + // Process the true branch. + CFGBlock *ThenBlock; + { + Stmt *Then = I->getThen(); + assert(Then); + SaveAndRestore sv(Succ); + Block = nullptr; + + // If branch is not a compound statement create implicit scope + // and add destructors. + if (!isa<CompoundStmt>(Then)) + addLocalScopeAndDtors(Then); + + ThenBlock = addStmt(Then); + + if (!ThenBlock) { + // We can reach here if the "then" body has all NullStmts. + // Create an empty block so we can distinguish between true and false + // branches in path-sensitive analyses. + ThenBlock = createBlock(false); + addSuccessor(ThenBlock, sv.get()); + } else if (Block) { + if (badCFG) + return nullptr; + } + } + + // Specially handle "if (expr1 || ...)" and "if (expr1 && ...)" by + // having these handle the actual control-flow jump. Note that + // if we introduce a condition variable, e.g. "if (int x = exp1 || exp2)" + // we resort to the old control-flow behavior. This special handling + // removes infeasible paths from the control-flow graph by having the + // control-flow transfer of '&&' or '||' go directly into the then/else + // blocks directly. + BinaryOperator *Cond = + (I->isConsteval() || I->getConditionVariable()) + ? nullptr + : dyn_cast<BinaryOperator>(I->getCond()->IgnoreParens()); + CFGBlock *LastBlock; + if (Cond && Cond->isLogicalOp()) + LastBlock = VisitLogicalOperator(Cond, I, ThenBlock, ElseBlock).first; + else { + // Now create a new block containing the if statement. + Block = createBlock(false); + + // Set the terminator of the new block to the If statement. + Block->setTerminator(I); + + // See if this is a known constant. + TryResult KnownVal; + if (!I->isConsteval()) + KnownVal = tryEvaluateBool(I->getCond()); + + // Add the successors. If we know that specific branches are + // unreachable, inform addSuccessor() of that knowledge. + addSuccessor(Block, ThenBlock, /* IsReachable = */ !KnownVal.isFalse()); + addSuccessor(Block, ElseBlock, /* IsReachable = */ !KnownVal.isTrue()); + + // Add the condition as the last statement in the new block. This may + // create new blocks as the condition may contain control-flow. Any newly + // created blocks will be pointed to be "Block". + LastBlock = addStmt(I->getCond()); + + // If the IfStmt contains a condition variable, add it and its + // initializer to the CFG. + if (const DeclStmt* DS = I->getConditionVariableDeclStmt()) { + autoCreateBlock(); + LastBlock = addStmt(const_cast<DeclStmt *>(DS)); + } + } + + // Finally, if the IfStmt contains a C++17 init-stmt, add it to the CFG. + if (Stmt *Init = I->getInit()) { + autoCreateBlock(); + LastBlock = addStmt(Init); + } + + return LastBlock; +} + +CFGBlock *CFGBuilder::VisitReturnStmt(Stmt *S) { + // If we were in the middle of a block we stop processing that block. + // + // NOTE: If a "return" or "co_return" appears in the middle of a block, this + // means that the code afterwards is DEAD (unreachable). We still keep + // a basic block for that code; a simple "mark-and-sweep" from the entry + // block will be able to report such dead blocks. + assert(isa<ReturnStmt>(S) || isa<CoreturnStmt>(S)); + + // Create the new block. + Block = createBlock(false); + + addAutomaticObjHandling(ScopePos, LocalScope::const_iterator(), S); + + if (auto *R = dyn_cast<ReturnStmt>(S)) + findConstructionContexts( + ConstructionContextLayer::create(cfg->getBumpVectorContext(), R), + R->getRetValue()); + + // If the one of the destructors does not return, we already have the Exit + // block as a successor. + if (!Block->hasNoReturnElement()) + addSuccessor(Block, &cfg->getExit()); + + // Add the return statement to the block. + appendStmt(Block, S); + + // Visit children + if (ReturnStmt *RS = dyn_cast<ReturnStmt>(S)) { + if (Expr *O = RS->getRetValue()) + return Visit(O, AddStmtChoice::AlwaysAdd, /*ExternallyDestructed=*/true); + return Block; + } + + CoreturnStmt *CRS = cast<CoreturnStmt>(S); + auto *B = Block; + if (CFGBlock *R = Visit(CRS->getPromiseCall())) + B = R; + + if (Expr *RV = CRS->getOperand()) + if (RV->getType()->isVoidType() && !isa<InitListExpr>(RV)) + // A non-initlist void expression. + if (CFGBlock *R = Visit(RV)) + B = R; + + return B; +} + +CFGBlock *CFGBuilder::VisitCoroutineSuspendExpr(CoroutineSuspendExpr *E, + AddStmtChoice asc) { + // We're modelling the pre-coro-xform CFG. Thus just evalate the various + // active components of the co_await or co_yield. Note we do not model the + // edge from the builtin_suspend to the exit node. + if (asc.alwaysAdd(*this, E)) { + autoCreateBlock(); + appendStmt(Block, E); + } + CFGBlock *B = Block; + if (auto *R = Visit(E->getResumeExpr())) + B = R; + if (auto *R = Visit(E->getSuspendExpr())) + B = R; + if (auto *R = Visit(E->getReadyExpr())) + B = R; + if (auto *R = Visit(E->getCommonExpr())) + B = R; + return B; +} + +CFGBlock *CFGBuilder::VisitSEHExceptStmt(SEHExceptStmt *ES) { + // SEHExceptStmt are treated like labels, so they are the first statement in a + // block. + + // Save local scope position because in case of exception variable ScopePos + // won't be restored when traversing AST. + SaveAndRestore save_scope_pos(ScopePos); + + addStmt(ES->getBlock()); + CFGBlock *SEHExceptBlock = Block; + if (!SEHExceptBlock) + SEHExceptBlock = createBlock(); + + appendStmt(SEHExceptBlock, ES); + + // Also add the SEHExceptBlock as a label, like with regular labels. + SEHExceptBlock->setLabel(ES); + + // Bail out if the CFG is bad. + if (badCFG) + return nullptr; + + // We set Block to NULL to allow lazy creation of a new block (if necessary). + Block = nullptr; + + return SEHExceptBlock; +} + +CFGBlock *CFGBuilder::VisitSEHFinallyStmt(SEHFinallyStmt *FS) { + return VisitCompoundStmt(FS->getBlock(), /*ExternallyDestructed=*/false); +} + +CFGBlock *CFGBuilder::VisitSEHLeaveStmt(SEHLeaveStmt *LS) { + // "__leave" is a control-flow statement. Thus we stop processing the current + // block. + if (badCFG) + return nullptr; + + // Now create a new block that ends with the __leave statement. + Block = createBlock(false); + Block->setTerminator(LS); + + // If there is no target for the __leave, then we are looking at an incomplete + // AST. This means that the CFG cannot be constructed. + if (SEHLeaveJumpTarget.block) { + addAutomaticObjHandling(ScopePos, SEHLeaveJumpTarget.scopePosition, LS); + addSuccessor(Block, SEHLeaveJumpTarget.block); + } else + badCFG = true; + + return Block; +} + +CFGBlock *CFGBuilder::VisitSEHTryStmt(SEHTryStmt *Terminator) { + // "__try"/"__except"/"__finally" is a control-flow statement. Thus we stop + // processing the current block. + CFGBlock *SEHTrySuccessor = nullptr; + + if (Block) { + if (badCFG) + return nullptr; + SEHTrySuccessor = Block; + } else SEHTrySuccessor = Succ; + + // FIXME: Implement __finally support. + if (Terminator->getFinallyHandler()) + return NYS(); + + CFGBlock *PrevSEHTryTerminatedBlock = TryTerminatedBlock; + + // Create a new block that will contain the __try statement. + CFGBlock *NewTryTerminatedBlock = createBlock(false); + + // Add the terminator in the __try block. + NewTryTerminatedBlock->setTerminator(Terminator); + + if (SEHExceptStmt *Except = Terminator->getExceptHandler()) { + // The code after the try is the implicit successor if there's an __except. + Succ = SEHTrySuccessor; + Block = nullptr; + CFGBlock *ExceptBlock = VisitSEHExceptStmt(Except); + if (!ExceptBlock) + return nullptr; + // Add this block to the list of successors for the block with the try + // statement. + addSuccessor(NewTryTerminatedBlock, ExceptBlock); + } + if (PrevSEHTryTerminatedBlock) + addSuccessor(NewTryTerminatedBlock, PrevSEHTryTerminatedBlock); + else + addSuccessor(NewTryTerminatedBlock, &cfg->getExit()); + + // The code after the try is the implicit successor. + Succ = SEHTrySuccessor; + + // Save the current "__try" context. + SaveAndRestore SaveTry(TryTerminatedBlock, NewTryTerminatedBlock); + cfg->addTryDispatchBlock(TryTerminatedBlock); + + // Save the current value for the __leave target. + // All __leaves should go to the code following the __try + // (FIXME: or if the __try has a __finally, to the __finally.) + SaveAndRestore save_break(SEHLeaveJumpTarget); + SEHLeaveJumpTarget = JumpTarget(SEHTrySuccessor, ScopePos); + + assert(Terminator->getTryBlock() && "__try must contain a non-NULL body"); + Block = nullptr; + return addStmt(Terminator->getTryBlock()); +} + +CFGBlock *CFGBuilder::VisitLabelStmt(LabelStmt *L) { + // Get the block of the labeled statement. Add it to our map. + addStmt(L->getSubStmt()); + CFGBlock *LabelBlock = Block; + + if (!LabelBlock) // This can happen when the body is empty, i.e. + LabelBlock = createBlock(); // scopes that only contains NullStmts. + + assert(LabelMap.find(L->getDecl()) == LabelMap.end() && + "label already in map"); + LabelMap[L->getDecl()] = JumpTarget(LabelBlock, ScopePos); + + // Labels partition blocks, so this is the end of the basic block we were + // processing (L is the block's label). Because this is label (and we have + // already processed the substatement) there is no extra control-flow to worry + // about. + LabelBlock->setLabel(L); + if (badCFG) + return nullptr; + + // We set Block to NULL to allow lazy creation of a new block (if necessary). + Block = nullptr; + + // This block is now the implicit successor of other blocks. + Succ = LabelBlock; + + return LabelBlock; +} + +CFGBlock *CFGBuilder::VisitBlockExpr(BlockExpr *E, AddStmtChoice asc) { + CFGBlock *LastBlock = VisitNoRecurse(E, asc); + for (const BlockDecl::Capture &CI : E->getBlockDecl()->captures()) { + if (Expr *CopyExpr = CI.getCopyExpr()) { + CFGBlock *Tmp = Visit(CopyExpr); + if (Tmp) + LastBlock = Tmp; + } + } + return LastBlock; +} + +CFGBlock *CFGBuilder::VisitLambdaExpr(LambdaExpr *E, AddStmtChoice asc) { + CFGBlock *LastBlock = VisitNoRecurse(E, asc); + + unsigned Idx = 0; + for (LambdaExpr::capture_init_iterator it = E->capture_init_begin(), + et = E->capture_init_end(); + it != et; ++it, ++Idx) { + if (Expr *Init = *it) { + // If the initializer is an ArrayInitLoopExpr, we want to extract the + // initializer, that's used for each element. + auto *AILEInit = extractElementInitializerFromNestedAILE( + dyn_cast<ArrayInitLoopExpr>(Init)); + + findConstructionContexts(ConstructionContextLayer::create( + cfg->getBumpVectorContext(), {E, Idx}), + AILEInit ? AILEInit : Init); + + CFGBlock *Tmp = Visit(Init); + if (Tmp) + LastBlock = Tmp; + } + } + return LastBlock; +} + +CFGBlock *CFGBuilder::VisitGotoStmt(GotoStmt *G) { + // Goto is a control-flow statement. Thus we stop processing the current + // block and create a new one. + + Block = createBlock(false); + Block->setTerminator(G); + + // If we already know the mapping to the label block add the successor now. + LabelMapTy::iterator I = LabelMap.find(G->getLabel()); + + if (I == LabelMap.end()) + // We will need to backpatch this block later. + BackpatchBlocks.push_back(JumpSource(Block, ScopePos)); + else { + JumpTarget JT = I->second; + addAutomaticObjHandling(ScopePos, JT.scopePosition, G); + addSuccessor(Block, JT.block); + } + + return Block; +} + +CFGBlock *CFGBuilder::VisitGCCAsmStmt(GCCAsmStmt *G, AddStmtChoice asc) { + // Goto is a control-flow statement. Thus we stop processing the current + // block and create a new one. + + if (!G->isAsmGoto()) + return VisitStmt(G, asc); + + if (Block) { + Succ = Block; + if (badCFG) + return nullptr; + } + Block = createBlock(); + Block->setTerminator(G); + // We will backpatch this block later for all the labels. + BackpatchBlocks.push_back(JumpSource(Block, ScopePos)); + // Save "Succ" in BackpatchBlocks. In the backpatch processing, "Succ" is + // used to avoid adding "Succ" again. + BackpatchBlocks.push_back(JumpSource(Succ, ScopePos)); + return VisitChildren(G); +} + +CFGBlock *CFGBuilder::VisitForStmt(ForStmt *F) { + CFGBlock *LoopSuccessor = nullptr; + + // Save local scope position because in case of condition variable ScopePos + // won't be restored when traversing AST. + SaveAndRestore save_scope_pos(ScopePos); + + // Create local scope for init statement and possible condition variable. + // Add destructor for init statement and condition variable. + // Store scope position for continue statement. + if (Stmt *Init = F->getInit()) + addLocalScopeForStmt(Init); + LocalScope::const_iterator LoopBeginScopePos = ScopePos; + + if (VarDecl *VD = F->getConditionVariable()) + addLocalScopeForVarDecl(VD); + LocalScope::const_iterator ContinueScopePos = ScopePos; + + addAutomaticObjHandling(ScopePos, save_scope_pos.get(), F); + + addLoopExit(F); + + // "for" is a control-flow statement. Thus we stop processing the current + // block. + if (Block) { + if (badCFG) + return nullptr; + LoopSuccessor = Block; + } else + LoopSuccessor = Succ; + + // Save the current value for the break targets. + // All breaks should go to the code following the loop. + SaveAndRestore save_break(BreakJumpTarget); + BreakJumpTarget = JumpTarget(LoopSuccessor, ScopePos); + + CFGBlock *BodyBlock = nullptr, *TransitionBlock = nullptr; + + // Now create the loop body. + { + assert(F->getBody()); + + // Save the current values for Block, Succ, continue and break targets. + SaveAndRestore save_Block(Block), save_Succ(Succ); + SaveAndRestore save_continue(ContinueJumpTarget); + + // Create an empty block to represent the transition block for looping back + // to the head of the loop. If we have increment code, it will + // go in this block as well. + Block = Succ = TransitionBlock = createBlock(false); + TransitionBlock->setLoopTarget(F); + + if (Stmt *I = F->getInc()) { + // Generate increment code in its own basic block. This is the target of + // continue statements. + Succ = addStmt(I); + } + + // Finish up the increment (or empty) block if it hasn't been already. + if (Block) { + assert(Block == Succ); + if (badCFG) + return nullptr; + Block = nullptr; + } + + // The starting block for the loop increment is the block that should + // represent the 'loop target' for looping back to the start of the loop. + ContinueJumpTarget = JumpTarget(Succ, ContinueScopePos); + ContinueJumpTarget.block->setLoopTarget(F); + + // Loop body should end with destructor of Condition variable (if any). + addAutomaticObjHandling(ScopePos, LoopBeginScopePos, F); + + // If body is not a compound statement create implicit scope + // and add destructors. + if (!isa<CompoundStmt>(F->getBody())) + addLocalScopeAndDtors(F->getBody()); + + // Now populate the body block, and in the process create new blocks as we + // walk the body of the loop. + BodyBlock = addStmt(F->getBody()); + + if (!BodyBlock) { + // In the case of "for (...;...;...);" we can have a null BodyBlock. + // Use the continue jump target as the proxy for the body. + BodyBlock = ContinueJumpTarget.block; + } + else if (badCFG) + return nullptr; + } + + // Because of short-circuit evaluation, the condition of the loop can span + // multiple basic blocks. Thus we need the "Entry" and "Exit" blocks that + // evaluate the condition. + CFGBlock *EntryConditionBlock = nullptr, *ExitConditionBlock = nullptr; + + do { + Expr *C = F->getCond(); + SaveAndRestore save_scope_pos(ScopePos); + + // Specially handle logical operators, which have a slightly + // more optimal CFG representation. + if (BinaryOperator *Cond = + dyn_cast_or_null<BinaryOperator>(C ? C->IgnoreParens() : nullptr)) + if (Cond->isLogicalOp()) { + std::tie(EntryConditionBlock, ExitConditionBlock) = + VisitLogicalOperator(Cond, F, BodyBlock, LoopSuccessor); + break; + } + + // The default case when not handling logical operators. + EntryConditionBlock = ExitConditionBlock = createBlock(false); + ExitConditionBlock->setTerminator(F); + + // See if this is a known constant. + TryResult KnownVal(true); + + if (C) { + // Now add the actual condition to the condition block. + // Because the condition itself may contain control-flow, new blocks may + // be created. Thus we update "Succ" after adding the condition. + Block = ExitConditionBlock; + EntryConditionBlock = addStmt(C); + + // If this block contains a condition variable, add both the condition + // variable and initializer to the CFG. + if (VarDecl *VD = F->getConditionVariable()) { + if (Expr *Init = VD->getInit()) { + autoCreateBlock(); + const DeclStmt *DS = F->getConditionVariableDeclStmt(); + assert(DS->isSingleDecl()); + findConstructionContexts( + ConstructionContextLayer::create(cfg->getBumpVectorContext(), DS), + Init); + appendStmt(Block, DS); + EntryConditionBlock = addStmt(Init); + assert(Block == EntryConditionBlock); + maybeAddScopeBeginForVarDecl(EntryConditionBlock, VD, C); + } + } + + if (Block && badCFG) + return nullptr; + + KnownVal = tryEvaluateBool(C); + } + + // Add the loop body entry as a successor to the condition. + addSuccessor(ExitConditionBlock, KnownVal.isFalse() ? nullptr : BodyBlock); + // Link up the condition block with the code that follows the loop. (the + // false branch). + addSuccessor(ExitConditionBlock, + KnownVal.isTrue() ? nullptr : LoopSuccessor); + } while (false); + + // Link up the loop-back block to the entry condition block. + addSuccessor(TransitionBlock, EntryConditionBlock); + + // The condition block is the implicit successor for any code above the loop. + Succ = EntryConditionBlock; + + // If the loop contains initialization, create a new block for those + // statements. This block can also contain statements that precede the loop. + if (Stmt *I = F->getInit()) { + SaveAndRestore save_scope_pos(ScopePos); + ScopePos = LoopBeginScopePos; + Block = createBlock(); + return addStmt(I); + } + + // There is no loop initialization. We are thus basically a while loop. + // NULL out Block to force lazy block construction. + Block = nullptr; + Succ = EntryConditionBlock; + return EntryConditionBlock; +} + +CFGBlock * +CFGBuilder::VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *MTE, + AddStmtChoice asc) { + findConstructionContexts( + ConstructionContextLayer::create(cfg->getBumpVectorContext(), MTE), + MTE->getSubExpr()); + + return VisitStmt(MTE, asc); +} + +CFGBlock *CFGBuilder::VisitMemberExpr(MemberExpr *M, AddStmtChoice asc) { + if (asc.alwaysAdd(*this, M)) { + autoCreateBlock(); + appendStmt(Block, M); + } + return Visit(M->getBase()); +} + +CFGBlock *CFGBuilder::VisitObjCForCollectionStmt(ObjCForCollectionStmt *S) { + // Objective-C fast enumeration 'for' statements: + // http://developer.apple.com/documentation/Cocoa/Conceptual/ObjectiveC + // + // for ( Type newVariable in collection_expression ) { statements } + // + // becomes: + // + // prologue: + // 1. collection_expression + // T. jump to loop_entry + // loop_entry: + // 1. side-effects of element expression + // 1. ObjCForCollectionStmt [performs binding to newVariable] + // T. ObjCForCollectionStmt TB, FB [jumps to TB if newVariable != nil] + // TB: + // statements + // T. jump to loop_entry + // FB: + // what comes after + // + // and + // + // Type existingItem; + // for ( existingItem in expression ) { statements } + // + // becomes: + // + // the same with newVariable replaced with existingItem; the binding works + // the same except that for one ObjCForCollectionStmt::getElement() returns + // a DeclStmt and the other returns a DeclRefExpr. + + CFGBlock *LoopSuccessor = nullptr; + + if (Block) { + if (badCFG) + return nullptr; + LoopSuccessor = Block; + Block = nullptr; + } else + LoopSuccessor = Succ; + + // Build the condition blocks. + CFGBlock *ExitConditionBlock = createBlock(false); + + // Set the terminator for the "exit" condition block. + ExitConditionBlock->setTerminator(S); + + // The last statement in the block should be the ObjCForCollectionStmt, which + // performs the actual binding to 'element' and determines if there are any + // more items in the collection. + appendStmt(ExitConditionBlock, S); + Block = ExitConditionBlock; + + // Walk the 'element' expression to see if there are any side-effects. We + // generate new blocks as necessary. We DON'T add the statement by default to + // the CFG unless it contains control-flow. + CFGBlock *EntryConditionBlock = Visit(S->getElement(), + AddStmtChoice::NotAlwaysAdd); + if (Block) { + if (badCFG) + return nullptr; + Block = nullptr; + } + + // The condition block is the implicit successor for the loop body as well as + // any code above the loop. + Succ = EntryConditionBlock; + + // Now create the true branch. + { + // Save the current values for Succ, continue and break targets. + SaveAndRestore save_Block(Block), save_Succ(Succ); + SaveAndRestore save_continue(ContinueJumpTarget), + save_break(BreakJumpTarget); + + // Add an intermediate block between the BodyBlock and the + // EntryConditionBlock to represent the "loop back" transition, for looping + // back to the head of the loop. + CFGBlock *LoopBackBlock = nullptr; + Succ = LoopBackBlock = createBlock(); + LoopBackBlock->setLoopTarget(S); + + BreakJumpTarget = JumpTarget(LoopSuccessor, ScopePos); + ContinueJumpTarget = JumpTarget(Succ, ScopePos); + + CFGBlock *BodyBlock = addStmt(S->getBody()); + + if (!BodyBlock) + BodyBlock = ContinueJumpTarget.block; // can happen for "for (X in Y) ;" + else if (Block) { + if (badCFG) + return nullptr; + } + + // This new body block is a successor to our "exit" condition block. + addSuccessor(ExitConditionBlock, BodyBlock); + } + + // Link up the condition block with the code that follows the loop. + // (the false branch). + addSuccessor(ExitConditionBlock, LoopSuccessor); + + // Now create a prologue block to contain the collection expression. + Block = createBlock(); + return addStmt(S->getCollection()); +} + +CFGBlock *CFGBuilder::VisitObjCAutoreleasePoolStmt(ObjCAutoreleasePoolStmt *S) { + // Inline the body. + return addStmt(S->getSubStmt()); + // TODO: consider adding cleanups for the end of @autoreleasepool scope. +} + +CFGBlock *CFGBuilder::VisitObjCAtSynchronizedStmt(ObjCAtSynchronizedStmt *S) { + // FIXME: Add locking 'primitives' to CFG for @synchronized. + + // Inline the body. + CFGBlock *SyncBlock = addStmt(S->getSynchBody()); + + // The sync body starts its own basic block. This makes it a little easier + // for diagnostic clients. + if (SyncBlock) { + if (badCFG) + return nullptr; + + Block = nullptr; + Succ = SyncBlock; + } + + // Add the @synchronized to the CFG. + autoCreateBlock(); + appendStmt(Block, S); + + // Inline the sync expression. + return addStmt(S->getSynchExpr()); +} + +CFGBlock *CFGBuilder::VisitPseudoObjectExpr(PseudoObjectExpr *E) { + autoCreateBlock(); + + // Add the PseudoObject as the last thing. + appendStmt(Block, E); + + CFGBlock *lastBlock = Block; + + // Before that, evaluate all of the semantics in order. In + // CFG-land, that means appending them in reverse order. + for (unsigned i = E->getNumSemanticExprs(); i != 0; ) { + Expr *Semantic = E->getSemanticExpr(--i); + + // If the semantic is an opaque value, we're being asked to bind + // it to its source expression. + if (OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(Semantic)) + Semantic = OVE->getSourceExpr(); + + if (CFGBlock *B = Visit(Semantic)) + lastBlock = B; + } + + return lastBlock; +} + +CFGBlock *CFGBuilder::VisitWhileStmt(WhileStmt *W) { + CFGBlock *LoopSuccessor = nullptr; + + // Save local scope position because in case of condition variable ScopePos + // won't be restored when traversing AST. + SaveAndRestore save_scope_pos(ScopePos); + + // Create local scope for possible condition variable. + // Store scope position for continue statement. + LocalScope::const_iterator LoopBeginScopePos = ScopePos; + if (VarDecl *VD = W->getConditionVariable()) { + addLocalScopeForVarDecl(VD); + addAutomaticObjHandling(ScopePos, LoopBeginScopePos, W); + } + addLoopExit(W); + + // "while" is a control-flow statement. Thus we stop processing the current + // block. + if (Block) { + if (badCFG) + return nullptr; + LoopSuccessor = Block; + Block = nullptr; + } else { + LoopSuccessor = Succ; + } + + CFGBlock *BodyBlock = nullptr, *TransitionBlock = nullptr; + + // Process the loop body. + { + assert(W->getBody()); + + // Save the current values for Block, Succ, continue and break targets. + SaveAndRestore save_Block(Block), save_Succ(Succ); + SaveAndRestore save_continue(ContinueJumpTarget), + save_break(BreakJumpTarget); + + // Create an empty block to represent the transition block for looping back + // to the head of the loop. + Succ = TransitionBlock = createBlock(false); + TransitionBlock->setLoopTarget(W); + ContinueJumpTarget = JumpTarget(Succ, LoopBeginScopePos); + + // All breaks should go to the code following the loop. + BreakJumpTarget = JumpTarget(LoopSuccessor, ScopePos); + + // Loop body should end with destructor of Condition variable (if any). + addAutomaticObjHandling(ScopePos, LoopBeginScopePos, W); + + // If body is not a compound statement create implicit scope + // and add destructors. + if (!isa<CompoundStmt>(W->getBody())) + addLocalScopeAndDtors(W->getBody()); + + // Create the body. The returned block is the entry to the loop body. + BodyBlock = addStmt(W->getBody()); + + if (!BodyBlock) + BodyBlock = ContinueJumpTarget.block; // can happen for "while(...) ;" + else if (Block && badCFG) + return nullptr; + } + + // Because of short-circuit evaluation, the condition of the loop can span + // multiple basic blocks. Thus we need the "Entry" and "Exit" blocks that + // evaluate the condition. + CFGBlock *EntryConditionBlock = nullptr, *ExitConditionBlock = nullptr; + + do { + Expr *C = W->getCond(); + + // Specially handle logical operators, which have a slightly + // more optimal CFG representation. + if (BinaryOperator *Cond = dyn_cast<BinaryOperator>(C->IgnoreParens())) + if (Cond->isLogicalOp()) { + std::tie(EntryConditionBlock, ExitConditionBlock) = + VisitLogicalOperator(Cond, W, BodyBlock, LoopSuccessor); + break; + } + + // The default case when not handling logical operators. + ExitConditionBlock = createBlock(false); + ExitConditionBlock->setTerminator(W); + + // Now add the actual condition to the condition block. + // Because the condition itself may contain control-flow, new blocks may + // be created. Thus we update "Succ" after adding the condition. + Block = ExitConditionBlock; + Block = EntryConditionBlock = addStmt(C); + + // If this block contains a condition variable, add both the condition + // variable and initializer to the CFG. + if (VarDecl *VD = W->getConditionVariable()) { + if (Expr *Init = VD->getInit()) { + autoCreateBlock(); + const DeclStmt *DS = W->getConditionVariableDeclStmt(); + assert(DS->isSingleDecl()); + findConstructionContexts( + ConstructionContextLayer::create(cfg->getBumpVectorContext(), + const_cast<DeclStmt *>(DS)), + Init); + appendStmt(Block, DS); + EntryConditionBlock = addStmt(Init); + assert(Block == EntryConditionBlock); + maybeAddScopeBeginForVarDecl(EntryConditionBlock, VD, C); + } + } + + if (Block && badCFG) + return nullptr; + + // See if this is a known constant. + const TryResult& KnownVal = tryEvaluateBool(C); + + // Add the loop body entry as a successor to the condition. + addSuccessor(ExitConditionBlock, KnownVal.isFalse() ? nullptr : BodyBlock); + // Link up the condition block with the code that follows the loop. (the + // false branch). + addSuccessor(ExitConditionBlock, + KnownVal.isTrue() ? nullptr : LoopSuccessor); + } while(false); + + // Link up the loop-back block to the entry condition block. + addSuccessor(TransitionBlock, EntryConditionBlock); + + // There can be no more statements in the condition block since we loop back + // to this block. NULL out Block to force lazy creation of another block. + Block = nullptr; + + // Return the condition block, which is the dominating block for the loop. + Succ = EntryConditionBlock; + return EntryConditionBlock; +} + +CFGBlock *CFGBuilder::VisitArrayInitLoopExpr(ArrayInitLoopExpr *A, + AddStmtChoice asc) { + if (asc.alwaysAdd(*this, A)) { + autoCreateBlock(); + appendStmt(Block, A); + } + + CFGBlock *B = Block; + + if (CFGBlock *R = Visit(A->getSubExpr())) + B = R; + + auto *OVE = dyn_cast<OpaqueValueExpr>(A->getCommonExpr()); + assert(OVE && "ArrayInitLoopExpr->getCommonExpr() should be wrapped in an " + "OpaqueValueExpr!"); + if (CFGBlock *R = Visit(OVE->getSourceExpr())) + B = R; + + return B; +} + +CFGBlock *CFGBuilder::VisitObjCAtCatchStmt(ObjCAtCatchStmt *CS) { + // ObjCAtCatchStmt are treated like labels, so they are the first statement + // in a block. + + // Save local scope position because in case of exception variable ScopePos + // won't be restored when traversing AST. + SaveAndRestore save_scope_pos(ScopePos); + + if (CS->getCatchBody()) + addStmt(CS->getCatchBody()); + + CFGBlock *CatchBlock = Block; + if (!CatchBlock) + CatchBlock = createBlock(); + + appendStmt(CatchBlock, CS); + + // Also add the ObjCAtCatchStmt as a label, like with regular labels. + CatchBlock->setLabel(CS); + + // Bail out if the CFG is bad. + if (badCFG) + return nullptr; + + // We set Block to NULL to allow lazy creation of a new block (if necessary). + Block = nullptr; + + return CatchBlock; +} + +CFGBlock *CFGBuilder::VisitObjCAtThrowStmt(ObjCAtThrowStmt *S) { + // If we were in the middle of a block we stop processing that block. + if (badCFG) + return nullptr; + + // Create the new block. + Block = createBlock(false); + + if (TryTerminatedBlock) + // The current try statement is the only successor. + addSuccessor(Block, TryTerminatedBlock); + else + // otherwise the Exit block is the only successor. + addSuccessor(Block, &cfg->getExit()); + + // Add the statement to the block. This may create new blocks if S contains + // control-flow (short-circuit operations). + return VisitStmt(S, AddStmtChoice::AlwaysAdd); +} + +CFGBlock *CFGBuilder::VisitObjCAtTryStmt(ObjCAtTryStmt *Terminator) { + // "@try"/"@catch" is a control-flow statement. Thus we stop processing the + // current block. + CFGBlock *TrySuccessor = nullptr; + + if (Block) { + if (badCFG) + return nullptr; + TrySuccessor = Block; + } else + TrySuccessor = Succ; + + // FIXME: Implement @finally support. + if (Terminator->getFinallyStmt()) + return NYS(); + + CFGBlock *PrevTryTerminatedBlock = TryTerminatedBlock; + + // Create a new block that will contain the try statement. + CFGBlock *NewTryTerminatedBlock = createBlock(false); + // Add the terminator in the try block. + NewTryTerminatedBlock->setTerminator(Terminator); + + bool HasCatchAll = false; + for (ObjCAtCatchStmt *CS : Terminator->catch_stmts()) { + // The code after the try is the implicit successor. + Succ = TrySuccessor; + if (CS->hasEllipsis()) { + HasCatchAll = true; + } + Block = nullptr; + CFGBlock *CatchBlock = VisitObjCAtCatchStmt(CS); + if (!CatchBlock) + return nullptr; + // Add this block to the list of successors for the block with the try + // statement. + addSuccessor(NewTryTerminatedBlock, CatchBlock); + } + + // FIXME: This needs updating when @finally support is added. + if (!HasCatchAll) { + if (PrevTryTerminatedBlock) + addSuccessor(NewTryTerminatedBlock, PrevTryTerminatedBlock); + else + addSuccessor(NewTryTerminatedBlock, &cfg->getExit()); + } + + // The code after the try is the implicit successor. + Succ = TrySuccessor; + + // Save the current "try" context. + SaveAndRestore SaveTry(TryTerminatedBlock, NewTryTerminatedBlock); + cfg->addTryDispatchBlock(TryTerminatedBlock); + + assert(Terminator->getTryBody() && "try must contain a non-NULL body"); + Block = nullptr; + return addStmt(Terminator->getTryBody()); +} + +CFGBlock *CFGBuilder::VisitObjCMessageExpr(ObjCMessageExpr *ME, + AddStmtChoice asc) { + findConstructionContextsForArguments(ME); + + autoCreateBlock(); + appendObjCMessage(Block, ME); + + return VisitChildren(ME); +} + +CFGBlock *CFGBuilder::VisitCXXThrowExpr(CXXThrowExpr *T) { + // If we were in the middle of a block we stop processing that block. + if (badCFG) + return nullptr; + + // Create the new block. + Block = createBlock(false); + + if (TryTerminatedBlock) + // The current try statement is the only successor. + addSuccessor(Block, TryTerminatedBlock); + else + // otherwise the Exit block is the only successor. + addSuccessor(Block, &cfg->getExit()); + + // Add the statement to the block. This may create new blocks if S contains + // control-flow (short-circuit operations). + return VisitStmt(T, AddStmtChoice::AlwaysAdd); +} + +CFGBlock *CFGBuilder::VisitCXXTypeidExpr(CXXTypeidExpr *S, AddStmtChoice asc) { + if (asc.alwaysAdd(*this, S)) { + autoCreateBlock(); + appendStmt(Block, S); + } + + // C++ [expr.typeid]p3: + // When typeid is applied to an expression other than an glvalue of a + // polymorphic class type [...] [the] expression is an unevaluated + // operand. [...] + // We add only potentially evaluated statements to the block to avoid + // CFG generation for unevaluated operands. + if (S && !S->isTypeDependent() && S->isPotentiallyEvaluated()) + return VisitChildren(S); + + // Return block without CFG for unevaluated operands. + return Block; +} + +CFGBlock *CFGBuilder::VisitDoStmt(DoStmt *D) { + CFGBlock *LoopSuccessor = nullptr; + + addLoopExit(D); + + // "do...while" is a control-flow statement. Thus we stop processing the + // current block. + if (Block) { + if (badCFG) + return nullptr; + LoopSuccessor = Block; + } else + LoopSuccessor = Succ; + + // Because of short-circuit evaluation, the condition of the loop can span + // multiple basic blocks. Thus we need the "Entry" and "Exit" blocks that + // evaluate the condition. + CFGBlock *ExitConditionBlock = createBlock(false); + CFGBlock *EntryConditionBlock = ExitConditionBlock; + + // Set the terminator for the "exit" condition block. + ExitConditionBlock->setTerminator(D); + + // Now add the actual condition to the condition block. Because the condition + // itself may contain control-flow, new blocks may be created. + if (Stmt *C = D->getCond()) { + Block = ExitConditionBlock; + EntryConditionBlock = addStmt(C); + if (Block) { + if (badCFG) + return nullptr; + } + } + + // The condition block is the implicit successor for the loop body. + Succ = EntryConditionBlock; + + // See if this is a known constant. + const TryResult &KnownVal = tryEvaluateBool(D->getCond()); + + // Process the loop body. + CFGBlock *BodyBlock = nullptr; + { + assert(D->getBody()); + + // Save the current values for Block, Succ, and continue and break targets + SaveAndRestore save_Block(Block), save_Succ(Succ); + SaveAndRestore save_continue(ContinueJumpTarget), + save_break(BreakJumpTarget); + + // All continues within this loop should go to the condition block + ContinueJumpTarget = JumpTarget(EntryConditionBlock, ScopePos); + + // All breaks should go to the code following the loop. + BreakJumpTarget = JumpTarget(LoopSuccessor, ScopePos); + + // NULL out Block to force lazy instantiation of blocks for the body. + Block = nullptr; + + // If body is not a compound statement create implicit scope + // and add destructors. + if (!isa<CompoundStmt>(D->getBody())) + addLocalScopeAndDtors(D->getBody()); + + // Create the body. The returned block is the entry to the loop body. + BodyBlock = addStmt(D->getBody()); + + if (!BodyBlock) + BodyBlock = EntryConditionBlock; // can happen for "do ; while(...)" + else if (Block) { + if (badCFG) + return nullptr; + } + + // Add an intermediate block between the BodyBlock and the + // ExitConditionBlock to represent the "loop back" transition. Create an + // empty block to represent the transition block for looping back to the + // head of the loop. + // FIXME: Can we do this more efficiently without adding another block? + Block = nullptr; + Succ = BodyBlock; + CFGBlock *LoopBackBlock = createBlock(); + LoopBackBlock->setLoopTarget(D); + + if (!KnownVal.isFalse()) + // Add the loop body entry as a successor to the condition. + addSuccessor(ExitConditionBlock, LoopBackBlock); + else + addSuccessor(ExitConditionBlock, nullptr); + } + + // Link up the condition block with the code that follows the loop. + // (the false branch). + addSuccessor(ExitConditionBlock, KnownVal.isTrue() ? nullptr : LoopSuccessor); + + // There can be no more statements in the body block(s) since we loop back to + // the body. NULL out Block to force lazy creation of another block. + Block = nullptr; + + // Return the loop body, which is the dominating block for the loop. + Succ = BodyBlock; + return BodyBlock; +} + +CFGBlock *CFGBuilder::VisitContinueStmt(ContinueStmt *C) { + // "continue" is a control-flow statement. Thus we stop processing the + // current block. + if (badCFG) + return nullptr; + + // Now create a new block that ends with the continue statement. + Block = createBlock(false); + Block->setTerminator(C); + + // If there is no target for the continue, then we are looking at an + // incomplete AST. This means the CFG cannot be constructed. + if (ContinueJumpTarget.block) { + addAutomaticObjHandling(ScopePos, ContinueJumpTarget.scopePosition, C); + addSuccessor(Block, ContinueJumpTarget.block); + } else + badCFG = true; + + return Block; +} + +CFGBlock *CFGBuilder::VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *E, + AddStmtChoice asc) { + if (asc.alwaysAdd(*this, E)) { + autoCreateBlock(); + appendStmt(Block, E); + } + + // VLA types have expressions that must be evaluated. + // Evaluation is done only for `sizeof`. + + if (E->getKind() != UETT_SizeOf) + return Block; + + CFGBlock *lastBlock = Block; + + if (E->isArgumentType()) { + for (const VariableArrayType *VA =FindVA(E->getArgumentType().getTypePtr()); + VA != nullptr; VA = FindVA(VA->getElementType().getTypePtr())) + lastBlock = addStmt(VA->getSizeExpr()); + } + return lastBlock; +} + +/// VisitStmtExpr - Utility method to handle (nested) statement +/// expressions (a GCC extension). +CFGBlock *CFGBuilder::VisitStmtExpr(StmtExpr *SE, AddStmtChoice asc) { + if (asc.alwaysAdd(*this, SE)) { + autoCreateBlock(); + appendStmt(Block, SE); + } + return VisitCompoundStmt(SE->getSubStmt(), /*ExternallyDestructed=*/true); +} + +CFGBlock *CFGBuilder::VisitSwitchStmt(SwitchStmt *Terminator) { + // "switch" is a control-flow statement. Thus we stop processing the current + // block. + CFGBlock *SwitchSuccessor = nullptr; + + // Save local scope position because in case of condition variable ScopePos + // won't be restored when traversing AST. + SaveAndRestore save_scope_pos(ScopePos); + + // Create local scope for C++17 switch init-stmt if one exists. + if (Stmt *Init = Terminator->getInit()) + addLocalScopeForStmt(Init); + + // Create local scope for possible condition variable. + // Store scope position. Add implicit destructor. + if (VarDecl *VD = Terminator->getConditionVariable()) + addLocalScopeForVarDecl(VD); + + addAutomaticObjHandling(ScopePos, save_scope_pos.get(), Terminator); + + if (Block) { + if (badCFG) + return nullptr; + SwitchSuccessor = Block; + } else SwitchSuccessor = Succ; + + // Save the current "switch" context. + SaveAndRestore save_switch(SwitchTerminatedBlock), + save_default(DefaultCaseBlock); + SaveAndRestore save_break(BreakJumpTarget); + + // Set the "default" case to be the block after the switch statement. If the + // switch statement contains a "default:", this value will be overwritten with + // the block for that code. + DefaultCaseBlock = SwitchSuccessor; + + // Create a new block that will contain the switch statement. + SwitchTerminatedBlock = createBlock(false); + + // Now process the switch body. The code after the switch is the implicit + // successor. + Succ = SwitchSuccessor; + BreakJumpTarget = JumpTarget(SwitchSuccessor, ScopePos); + + // When visiting the body, the case statements should automatically get linked + // up to the switch. We also don't keep a pointer to the body, since all + // control-flow from the switch goes to case/default statements. + assert(Terminator->getBody() && "switch must contain a non-NULL body"); + Block = nullptr; + + // For pruning unreachable case statements, save the current state + // for tracking the condition value. + SaveAndRestore save_switchExclusivelyCovered(switchExclusivelyCovered, false); + + // Determine if the switch condition can be explicitly evaluated. + assert(Terminator->getCond() && "switch condition must be non-NULL"); + Expr::EvalResult result; + bool b = tryEvaluate(Terminator->getCond(), result); + SaveAndRestore save_switchCond(switchCond, b ? &result : nullptr); + + // If body is not a compound statement create implicit scope + // and add destructors. + if (!isa<CompoundStmt>(Terminator->getBody())) + addLocalScopeAndDtors(Terminator->getBody()); + + addStmt(Terminator->getBody()); + if (Block) { + if (badCFG) + return nullptr; + } + + // If we have no "default:" case, the default transition is to the code + // following the switch body. Moreover, take into account if all the + // cases of a switch are covered (e.g., switching on an enum value). + // + // Note: We add a successor to a switch that is considered covered yet has no + // case statements if the enumeration has no enumerators. + bool SwitchAlwaysHasSuccessor = false; + SwitchAlwaysHasSuccessor |= switchExclusivelyCovered; + SwitchAlwaysHasSuccessor |= Terminator->isAllEnumCasesCovered() && + Terminator->getSwitchCaseList(); + addSuccessor(SwitchTerminatedBlock, DefaultCaseBlock, + !SwitchAlwaysHasSuccessor); + + // Add the terminator and condition in the switch block. + SwitchTerminatedBlock->setTerminator(Terminator); + Block = SwitchTerminatedBlock; + CFGBlock *LastBlock = addStmt(Terminator->getCond()); + + // If the SwitchStmt contains a condition variable, add both the + // SwitchStmt and the condition variable initialization to the CFG. + if (VarDecl *VD = Terminator->getConditionVariable()) { + if (Expr *Init = VD->getInit()) { + autoCreateBlock(); + appendStmt(Block, Terminator->getConditionVariableDeclStmt()); + LastBlock = addStmt(Init); + maybeAddScopeBeginForVarDecl(LastBlock, VD, Init); + } + } + + // Finally, if the SwitchStmt contains a C++17 init-stmt, add it to the CFG. + if (Stmt *Init = Terminator->getInit()) { + autoCreateBlock(); + LastBlock = addStmt(Init); + } + + return LastBlock; +} + +static bool shouldAddCase(bool &switchExclusivelyCovered, + const Expr::EvalResult *switchCond, + const CaseStmt *CS, + ASTContext &Ctx) { + if (!switchCond) + return true; + + bool addCase = false; + + if (!switchExclusivelyCovered) { + if (switchCond->Val.isInt()) { + // Evaluate the LHS of the case value. + const llvm::APSInt &lhsInt = CS->getLHS()->EvaluateKnownConstInt(Ctx); + const llvm::APSInt &condInt = switchCond->Val.getInt(); + + if (condInt == lhsInt) { + addCase = true; + switchExclusivelyCovered = true; + } + else if (condInt > lhsInt) { + if (const Expr *RHS = CS->getRHS()) { + // Evaluate the RHS of the case value. + const llvm::APSInt &V2 = RHS->EvaluateKnownConstInt(Ctx); + if (V2 >= condInt) { + addCase = true; + switchExclusivelyCovered = true; + } + } + } + } + else + addCase = true; + } + return addCase; +} + +CFGBlock *CFGBuilder::VisitCaseStmt(CaseStmt *CS) { + // CaseStmts are essentially labels, so they are the first statement in a + // block. + CFGBlock *TopBlock = nullptr, *LastBlock = nullptr; + + if (Stmt *Sub = CS->getSubStmt()) { + // For deeply nested chains of CaseStmts, instead of doing a recursion + // (which can blow out the stack), manually unroll and create blocks + // along the way. + while (isa<CaseStmt>(Sub)) { + CFGBlock *currentBlock = createBlock(false); + currentBlock->setLabel(CS); + + if (TopBlock) + addSuccessor(LastBlock, currentBlock); + else + TopBlock = currentBlock; + + addSuccessor(SwitchTerminatedBlock, + shouldAddCase(switchExclusivelyCovered, switchCond, + CS, *Context) + ? currentBlock : nullptr); + + LastBlock = currentBlock; + CS = cast<CaseStmt>(Sub); + Sub = CS->getSubStmt(); + } + + addStmt(Sub); + } + + CFGBlock *CaseBlock = Block; + if (!CaseBlock) + CaseBlock = createBlock(); + + // Cases statements partition blocks, so this is the top of the basic block we + // were processing (the "case XXX:" is the label). + CaseBlock->setLabel(CS); + + if (badCFG) + return nullptr; + + // Add this block to the list of successors for the block with the switch + // statement. + assert(SwitchTerminatedBlock); + addSuccessor(SwitchTerminatedBlock, CaseBlock, + shouldAddCase(switchExclusivelyCovered, switchCond, + CS, *Context)); + + // We set Block to NULL to allow lazy creation of a new block (if necessary). + Block = nullptr; + + if (TopBlock) { + addSuccessor(LastBlock, CaseBlock); + Succ = TopBlock; + } else { + // This block is now the implicit successor of other blocks. + Succ = CaseBlock; + } + + return Succ; +} + +CFGBlock *CFGBuilder::VisitDefaultStmt(DefaultStmt *Terminator) { + if (Terminator->getSubStmt()) + addStmt(Terminator->getSubStmt()); + + DefaultCaseBlock = Block; + + if (!DefaultCaseBlock) + DefaultCaseBlock = createBlock(); + + // Default statements partition blocks, so this is the top of the basic block + // we were processing (the "default:" is the label). + DefaultCaseBlock->setLabel(Terminator); + + if (badCFG) + return nullptr; + + // Unlike case statements, we don't add the default block to the successors + // for the switch statement immediately. This is done when we finish + // processing the switch statement. This allows for the default case + // (including a fall-through to the code after the switch statement) to always + // be the last successor of a switch-terminated block. + + // We set Block to NULL to allow lazy creation of a new block (if necessary). + Block = nullptr; + + // This block is now the implicit successor of other blocks. + Succ = DefaultCaseBlock; + + return DefaultCaseBlock; +} + +CFGBlock *CFGBuilder::VisitCXXTryStmt(CXXTryStmt *Terminator) { + // "try"/"catch" is a control-flow statement. Thus we stop processing the + // current block. + CFGBlock *TrySuccessor = nullptr; + + if (Block) { + if (badCFG) + return nullptr; + TrySuccessor = Block; + } else + TrySuccessor = Succ; + + CFGBlock *PrevTryTerminatedBlock = TryTerminatedBlock; + + // Create a new block that will contain the try statement. + CFGBlock *NewTryTerminatedBlock = createBlock(false); + // Add the terminator in the try block. + NewTryTerminatedBlock->setTerminator(Terminator); + + bool HasCatchAll = false; + for (unsigned I = 0, E = Terminator->getNumHandlers(); I != E; ++I) { + // The code after the try is the implicit successor. + Succ = TrySuccessor; + CXXCatchStmt *CS = Terminator->getHandler(I); + if (CS->getExceptionDecl() == nullptr) { + HasCatchAll = true; + } + Block = nullptr; + CFGBlock *CatchBlock = VisitCXXCatchStmt(CS); + if (!CatchBlock) + return nullptr; + // Add this block to the list of successors for the block with the try + // statement. + addSuccessor(NewTryTerminatedBlock, CatchBlock); + } + if (!HasCatchAll) { + if (PrevTryTerminatedBlock) + addSuccessor(NewTryTerminatedBlock, PrevTryTerminatedBlock); + else + addSuccessor(NewTryTerminatedBlock, &cfg->getExit()); + } + + // The code after the try is the implicit successor. + Succ = TrySuccessor; + + // Save the current "try" context. + SaveAndRestore SaveTry(TryTerminatedBlock, NewTryTerminatedBlock); + cfg->addTryDispatchBlock(TryTerminatedBlock); + + assert(Terminator->getTryBlock() && "try must contain a non-NULL body"); + Block = nullptr; + return addStmt(Terminator->getTryBlock()); +} + +CFGBlock *CFGBuilder::VisitCXXCatchStmt(CXXCatchStmt *CS) { + // CXXCatchStmt are treated like labels, so they are the first statement in a + // block. + + // Save local scope position because in case of exception variable ScopePos + // won't be restored when traversing AST. + SaveAndRestore save_scope_pos(ScopePos); + + // Create local scope for possible exception variable. + // Store scope position. Add implicit destructor. + if (VarDecl *VD = CS->getExceptionDecl()) { + LocalScope::const_iterator BeginScopePos = ScopePos; + addLocalScopeForVarDecl(VD); + addAutomaticObjHandling(ScopePos, BeginScopePos, CS); + } + + if (CS->getHandlerBlock()) + addStmt(CS->getHandlerBlock()); + + CFGBlock *CatchBlock = Block; + if (!CatchBlock) + CatchBlock = createBlock(); + + // CXXCatchStmt is more than just a label. They have semantic meaning + // as well, as they implicitly "initialize" the catch variable. Add + // it to the CFG as a CFGElement so that the control-flow of these + // semantics gets captured. + appendStmt(CatchBlock, CS); + + // Also add the CXXCatchStmt as a label, to mirror handling of regular + // labels. + CatchBlock->setLabel(CS); + + // Bail out if the CFG is bad. + if (badCFG) + return nullptr; + + // We set Block to NULL to allow lazy creation of a new block (if necessary). + Block = nullptr; + + return CatchBlock; +} + +CFGBlock *CFGBuilder::VisitCXXForRangeStmt(CXXForRangeStmt *S) { + // C++0x for-range statements are specified as [stmt.ranged]: + // + // { + // auto && __range = range-init; + // for ( auto __begin = begin-expr, + // __end = end-expr; + // __begin != __end; + // ++__begin ) { + // for-range-declaration = *__begin; + // statement + // } + // } + + // Save local scope position before the addition of the implicit variables. + SaveAndRestore save_scope_pos(ScopePos); + + // Create local scopes and destructors for range, begin and end variables. + if (Stmt *Range = S->getRangeStmt()) + addLocalScopeForStmt(Range); + if (Stmt *Begin = S->getBeginStmt()) + addLocalScopeForStmt(Begin); + if (Stmt *End = S->getEndStmt()) + addLocalScopeForStmt(End); + addAutomaticObjHandling(ScopePos, save_scope_pos.get(), S); + + LocalScope::const_iterator ContinueScopePos = ScopePos; + + // "for" is a control-flow statement. Thus we stop processing the current + // block. + CFGBlock *LoopSuccessor = nullptr; + if (Block) { + if (badCFG) + return nullptr; + LoopSuccessor = Block; + } else + LoopSuccessor = Succ; + + // Save the current value for the break targets. + // All breaks should go to the code following the loop. + SaveAndRestore save_break(BreakJumpTarget); + BreakJumpTarget = JumpTarget(LoopSuccessor, ScopePos); + + // The block for the __begin != __end expression. + CFGBlock *ConditionBlock = createBlock(false); + ConditionBlock->setTerminator(S); + + // Now add the actual condition to the condition block. + if (Expr *C = S->getCond()) { + Block = ConditionBlock; + CFGBlock *BeginConditionBlock = addStmt(C); + if (badCFG) + return nullptr; + assert(BeginConditionBlock == ConditionBlock && + "condition block in for-range was unexpectedly complex"); + (void)BeginConditionBlock; + } + + // The condition block is the implicit successor for the loop body as well as + // any code above the loop. + Succ = ConditionBlock; + + // See if this is a known constant. + TryResult KnownVal(true); + + if (S->getCond()) + KnownVal = tryEvaluateBool(S->getCond()); + + // Now create the loop body. + { + assert(S->getBody()); + + // Save the current values for Block, Succ, and continue targets. + SaveAndRestore save_Block(Block), save_Succ(Succ); + SaveAndRestore save_continue(ContinueJumpTarget); + + // Generate increment code in its own basic block. This is the target of + // continue statements. + Block = nullptr; + Succ = addStmt(S->getInc()); + if (badCFG) + return nullptr; + ContinueJumpTarget = JumpTarget(Succ, ContinueScopePos); + + // The starting block for the loop increment is the block that should + // represent the 'loop target' for looping back to the start of the loop. + ContinueJumpTarget.block->setLoopTarget(S); + + // Finish up the increment block and prepare to start the loop body. + assert(Block); + if (badCFG) + return nullptr; + Block = nullptr; + + // Add implicit scope and dtors for loop variable. + addLocalScopeAndDtors(S->getLoopVarStmt()); + + // If body is not a compound statement create implicit scope + // and add destructors. + if (!isa<CompoundStmt>(S->getBody())) + addLocalScopeAndDtors(S->getBody()); + + // Populate a new block to contain the loop body and loop variable. + addStmt(S->getBody()); + + if (badCFG) + return nullptr; + CFGBlock *LoopVarStmtBlock = addStmt(S->getLoopVarStmt()); + if (badCFG) + return nullptr; + + // This new body block is a successor to our condition block. + addSuccessor(ConditionBlock, + KnownVal.isFalse() ? nullptr : LoopVarStmtBlock); + } + + // Link up the condition block with the code that follows the loop (the + // false branch). + addSuccessor(ConditionBlock, KnownVal.isTrue() ? nullptr : LoopSuccessor); + + // Add the initialization statements. + Block = createBlock(); + addStmt(S->getBeginStmt()); + addStmt(S->getEndStmt()); + CFGBlock *Head = addStmt(S->getRangeStmt()); + if (S->getInit()) + Head = addStmt(S->getInit()); + return Head; +} + +CFGBlock *CFGBuilder::VisitExprWithCleanups(ExprWithCleanups *E, + AddStmtChoice asc, bool ExternallyDestructed) { + if (BuildOpts.AddTemporaryDtors) { + // If adding implicit destructors visit the full expression for adding + // destructors of temporaries. + TempDtorContext Context; + VisitForTemporaryDtors(E->getSubExpr(), ExternallyDestructed, Context); + + // Full expression has to be added as CFGStmt so it will be sequenced + // before destructors of it's temporaries. + asc = asc.withAlwaysAdd(true); + } + return Visit(E->getSubExpr(), asc); +} + +CFGBlock *CFGBuilder::VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E, + AddStmtChoice asc) { + if (asc.alwaysAdd(*this, E)) { + autoCreateBlock(); + appendStmt(Block, E); + + findConstructionContexts( + ConstructionContextLayer::create(cfg->getBumpVectorContext(), E), + E->getSubExpr()); + + // We do not want to propagate the AlwaysAdd property. + asc = asc.withAlwaysAdd(false); + } + return Visit(E->getSubExpr(), asc); +} + +CFGBlock *CFGBuilder::VisitCXXConstructExpr(CXXConstructExpr *C, + AddStmtChoice asc) { + // If the constructor takes objects as arguments by value, we need to properly + // construct these objects. Construction contexts we find here aren't for the + // constructor C, they're for its arguments only. + findConstructionContextsForArguments(C); + + autoCreateBlock(); + appendConstructor(Block, C); + + return VisitChildren(C); +} + +CFGBlock *CFGBuilder::VisitCXXNewExpr(CXXNewExpr *NE, + AddStmtChoice asc) { + autoCreateBlock(); + appendStmt(Block, NE); + + findConstructionContexts( + ConstructionContextLayer::create(cfg->getBumpVectorContext(), NE), + const_cast<CXXConstructExpr *>(NE->getConstructExpr())); + + if (NE->getInitializer()) + Block = Visit(NE->getInitializer()); + + if (BuildOpts.AddCXXNewAllocator) + appendNewAllocator(Block, NE); + + if (NE->isArray() && *NE->getArraySize()) + Block = Visit(*NE->getArraySize()); + + for (CXXNewExpr::arg_iterator I = NE->placement_arg_begin(), + E = NE->placement_arg_end(); I != E; ++I) + Block = Visit(*I); + + return Block; +} + +CFGBlock *CFGBuilder::VisitCXXDeleteExpr(CXXDeleteExpr *DE, + AddStmtChoice asc) { + autoCreateBlock(); + appendStmt(Block, DE); + QualType DTy = DE->getDestroyedType(); + if (!DTy.isNull()) { + DTy = DTy.getNonReferenceType(); + CXXRecordDecl *RD = Context->getBaseElementType(DTy)->getAsCXXRecordDecl(); + if (RD) { + if (RD->isCompleteDefinition() && !RD->hasTrivialDestructor()) + appendDeleteDtor(Block, RD, DE); + } + } + + return VisitChildren(DE); +} + +CFGBlock *CFGBuilder::VisitCXXFunctionalCastExpr(CXXFunctionalCastExpr *E, + AddStmtChoice asc) { + if (asc.alwaysAdd(*this, E)) { + autoCreateBlock(); + appendStmt(Block, E); + // We do not want to propagate the AlwaysAdd property. + asc = asc.withAlwaysAdd(false); + } + return Visit(E->getSubExpr(), asc); +} + +CFGBlock *CFGBuilder::VisitCXXTemporaryObjectExpr(CXXTemporaryObjectExpr *C, + AddStmtChoice asc) { + // If the constructor takes objects as arguments by value, we need to properly + // construct these objects. Construction contexts we find here aren't for the + // constructor C, they're for its arguments only. + findConstructionContextsForArguments(C); + + autoCreateBlock(); + appendConstructor(Block, C); + return VisitChildren(C); +} + +CFGBlock *CFGBuilder::VisitImplicitCastExpr(ImplicitCastExpr *E, + AddStmtChoice asc) { + if (asc.alwaysAdd(*this, E)) { + autoCreateBlock(); + appendStmt(Block, E); + } + + if (E->getCastKind() == CK_IntegralToBoolean) + tryEvaluateBool(E->getSubExpr()->IgnoreParens()); + + return Visit(E->getSubExpr(), AddStmtChoice()); +} + +CFGBlock *CFGBuilder::VisitConstantExpr(ConstantExpr *E, AddStmtChoice asc) { + return Visit(E->getSubExpr(), AddStmtChoice()); +} + +CFGBlock *CFGBuilder::VisitIndirectGotoStmt(IndirectGotoStmt *I) { + // Lazily create the indirect-goto dispatch block if there isn't one already. + CFGBlock *IBlock = cfg->getIndirectGotoBlock(); + + if (!IBlock) { + IBlock = createBlock(false); + cfg->setIndirectGotoBlock(IBlock); + } + + // IndirectGoto is a control-flow statement. Thus we stop processing the + // current block and create a new one. + if (badCFG) + return nullptr; + + Block = createBlock(false); + Block->setTerminator(I); + addSuccessor(Block, IBlock); + return addStmt(I->getTarget()); +} + +CFGBlock *CFGBuilder::VisitForTemporaryDtors(Stmt *E, bool ExternallyDestructed, + TempDtorContext &Context) { + assert(BuildOpts.AddImplicitDtors && BuildOpts.AddTemporaryDtors); + +tryAgain: + if (!E) { + badCFG = true; + return nullptr; + } + switch (E->getStmtClass()) { + default: + return VisitChildrenForTemporaryDtors(E, false, Context); + + case Stmt::InitListExprClass: + return VisitChildrenForTemporaryDtors(E, ExternallyDestructed, Context); + + case Stmt::BinaryOperatorClass: + return VisitBinaryOperatorForTemporaryDtors(cast<BinaryOperator>(E), + ExternallyDestructed, + Context); + + case Stmt::CXXBindTemporaryExprClass: + return VisitCXXBindTemporaryExprForTemporaryDtors( + cast<CXXBindTemporaryExpr>(E), ExternallyDestructed, Context); + + case Stmt::BinaryConditionalOperatorClass: + case Stmt::ConditionalOperatorClass: + return VisitConditionalOperatorForTemporaryDtors( + cast<AbstractConditionalOperator>(E), ExternallyDestructed, Context); + + case Stmt::ImplicitCastExprClass: + // For implicit cast we want ExternallyDestructed to be passed further. + E = cast<CastExpr>(E)->getSubExpr(); + goto tryAgain; + + case Stmt::CXXFunctionalCastExprClass: + // For functional cast we want ExternallyDestructed to be passed further. + E = cast<CXXFunctionalCastExpr>(E)->getSubExpr(); + goto tryAgain; + + case Stmt::ConstantExprClass: + E = cast<ConstantExpr>(E)->getSubExpr(); + goto tryAgain; + + case Stmt::ParenExprClass: + E = cast<ParenExpr>(E)->getSubExpr(); + goto tryAgain; + + case Stmt::MaterializeTemporaryExprClass: { + const MaterializeTemporaryExpr* MTE = cast<MaterializeTemporaryExpr>(E); + ExternallyDestructed = (MTE->getStorageDuration() != SD_FullExpression); + SmallVector<const Expr *, 2> CommaLHSs; + SmallVector<SubobjectAdjustment, 2> Adjustments; + // Find the expression whose lifetime needs to be extended. + E = const_cast<Expr *>( + cast<MaterializeTemporaryExpr>(E) + ->getSubExpr() + ->skipRValueSubobjectAdjustments(CommaLHSs, Adjustments)); + // Visit the skipped comma operator left-hand sides for other temporaries. + for (const Expr *CommaLHS : CommaLHSs) { + VisitForTemporaryDtors(const_cast<Expr *>(CommaLHS), + /*ExternallyDestructed=*/false, Context); + } + goto tryAgain; + } + + case Stmt::BlockExprClass: + // Don't recurse into blocks; their subexpressions don't get evaluated + // here. + return Block; + + case Stmt::LambdaExprClass: { + // For lambda expressions, only recurse into the capture initializers, + // and not the body. + auto *LE = cast<LambdaExpr>(E); + CFGBlock *B = Block; + for (Expr *Init : LE->capture_inits()) { + if (Init) { + if (CFGBlock *R = VisitForTemporaryDtors( + Init, /*ExternallyDestructed=*/true, Context)) + B = R; + } + } + return B; + } + + case Stmt::StmtExprClass: + // Don't recurse into statement expressions; any cleanups inside them + // will be wrapped in their own ExprWithCleanups. + return Block; + + case Stmt::CXXDefaultArgExprClass: + E = cast<CXXDefaultArgExpr>(E)->getExpr(); + goto tryAgain; + + case Stmt::CXXDefaultInitExprClass: + E = cast<CXXDefaultInitExpr>(E)->getExpr(); + goto tryAgain; + } +} + +CFGBlock *CFGBuilder::VisitChildrenForTemporaryDtors(Stmt *E, + bool ExternallyDestructed, + TempDtorContext &Context) { + if (isa<LambdaExpr>(E)) { + // Do not visit the children of lambdas; they have their own CFGs. + return Block; + } + + // When visiting children for destructors we want to visit them in reverse + // order that they will appear in the CFG. Because the CFG is built + // bottom-up, this means we visit them in their natural order, which + // reverses them in the CFG. + CFGBlock *B = Block; + for (Stmt *Child : E->children()) + if (Child) + if (CFGBlock *R = VisitForTemporaryDtors(Child, ExternallyDestructed, Context)) + B = R; + + return B; +} + +CFGBlock *CFGBuilder::VisitBinaryOperatorForTemporaryDtors( + BinaryOperator *E, bool ExternallyDestructed, TempDtorContext &Context) { + if (E->isCommaOp()) { + // For the comma operator, the LHS expression is evaluated before the RHS + // expression, so prepend temporary destructors for the LHS first. + CFGBlock *LHSBlock = VisitForTemporaryDtors(E->getLHS(), false, Context); + CFGBlock *RHSBlock = VisitForTemporaryDtors(E->getRHS(), ExternallyDestructed, Context); + return RHSBlock ? RHSBlock : LHSBlock; + } + + if (E->isLogicalOp()) { + VisitForTemporaryDtors(E->getLHS(), false, Context); + TryResult RHSExecuted = tryEvaluateBool(E->getLHS()); + if (RHSExecuted.isKnown() && E->getOpcode() == BO_LOr) + RHSExecuted.negate(); + + // We do not know at CFG-construction time whether the right-hand-side was + // executed, thus we add a branch node that depends on the temporary + // constructor call. + TempDtorContext RHSContext( + bothKnownTrue(Context.KnownExecuted, RHSExecuted)); + VisitForTemporaryDtors(E->getRHS(), false, RHSContext); + InsertTempDtorDecisionBlock(RHSContext); + + return Block; + } + + if (E->isAssignmentOp()) { + // For assignment operators, the RHS expression is evaluated before the LHS + // expression, so prepend temporary destructors for the RHS first. + CFGBlock *RHSBlock = VisitForTemporaryDtors(E->getRHS(), false, Context); + CFGBlock *LHSBlock = VisitForTemporaryDtors(E->getLHS(), false, Context); + return LHSBlock ? LHSBlock : RHSBlock; + } + + // Any other operator is visited normally. + return VisitChildrenForTemporaryDtors(E, ExternallyDestructed, Context); +} + +CFGBlock *CFGBuilder::VisitCXXBindTemporaryExprForTemporaryDtors( + CXXBindTemporaryExpr *E, bool ExternallyDestructed, TempDtorContext &Context) { + // First add destructors for temporaries in subexpression. + // Because VisitCXXBindTemporaryExpr calls setDestructed: + CFGBlock *B = VisitForTemporaryDtors(E->getSubExpr(), true, Context); + if (!ExternallyDestructed) { + // If lifetime of temporary is not prolonged (by assigning to constant + // reference) add destructor for it. + + const CXXDestructorDecl *Dtor = E->getTemporary()->getDestructor(); + + if (Dtor->getParent()->isAnyDestructorNoReturn()) { + // If the destructor is marked as a no-return destructor, we need to + // create a new block for the destructor which does not have as a + // successor anything built thus far. Control won't flow out of this + // block. + if (B) Succ = B; + Block = createNoReturnBlock(); + } else if (Context.needsTempDtorBranch()) { + // If we need to introduce a branch, we add a new block that we will hook + // up to a decision block later. + if (B) Succ = B; + Block = createBlock(); + } else { + autoCreateBlock(); + } + if (Context.needsTempDtorBranch()) { + Context.setDecisionPoint(Succ, E); + } + appendTemporaryDtor(Block, E); + + B = Block; + } + return B; +} + +void CFGBuilder::InsertTempDtorDecisionBlock(const TempDtorContext &Context, + CFGBlock *FalseSucc) { + if (!Context.TerminatorExpr) { + // If no temporary was found, we do not need to insert a decision point. + return; + } + assert(Context.TerminatorExpr); + CFGBlock *Decision = createBlock(false); + Decision->setTerminator(CFGTerminator(Context.TerminatorExpr, + CFGTerminator::TemporaryDtorsBranch)); + addSuccessor(Decision, Block, !Context.KnownExecuted.isFalse()); + addSuccessor(Decision, FalseSucc ? FalseSucc : Context.Succ, + !Context.KnownExecuted.isTrue()); + Block = Decision; +} + +CFGBlock *CFGBuilder::VisitConditionalOperatorForTemporaryDtors( + AbstractConditionalOperator *E, bool ExternallyDestructed, + TempDtorContext &Context) { + VisitForTemporaryDtors(E->getCond(), false, Context); + CFGBlock *ConditionBlock = Block; + CFGBlock *ConditionSucc = Succ; + TryResult ConditionVal = tryEvaluateBool(E->getCond()); + TryResult NegatedVal = ConditionVal; + if (NegatedVal.isKnown()) NegatedVal.negate(); + + TempDtorContext TrueContext( + bothKnownTrue(Context.KnownExecuted, ConditionVal)); + VisitForTemporaryDtors(E->getTrueExpr(), ExternallyDestructed, TrueContext); + CFGBlock *TrueBlock = Block; + + Block = ConditionBlock; + Succ = ConditionSucc; + TempDtorContext FalseContext( + bothKnownTrue(Context.KnownExecuted, NegatedVal)); + VisitForTemporaryDtors(E->getFalseExpr(), ExternallyDestructed, FalseContext); + + if (TrueContext.TerminatorExpr && FalseContext.TerminatorExpr) { + InsertTempDtorDecisionBlock(FalseContext, TrueBlock); + } else if (TrueContext.TerminatorExpr) { + Block = TrueBlock; + InsertTempDtorDecisionBlock(TrueContext); + } else { + InsertTempDtorDecisionBlock(FalseContext); + } + return Block; +} + +CFGBlock *CFGBuilder::VisitOMPExecutableDirective(OMPExecutableDirective *D, + AddStmtChoice asc) { + if (asc.alwaysAdd(*this, D)) { + autoCreateBlock(); + appendStmt(Block, D); + } + + // Iterate over all used expression in clauses. + CFGBlock *B = Block; + + // Reverse the elements to process them in natural order. Iterators are not + // bidirectional, so we need to create temp vector. + SmallVector<Stmt *, 8> Used( + OMPExecutableDirective::used_clauses_children(D->clauses())); + for (Stmt *S : llvm::reverse(Used)) { + assert(S && "Expected non-null used-in-clause child."); + if (CFGBlock *R = Visit(S)) + B = R; + } + // Visit associated structured block if any. + if (!D->isStandaloneDirective()) { + Stmt *S = D->getRawStmt(); + if (!isa<CompoundStmt>(S)) + addLocalScopeAndDtors(S); + if (CFGBlock *R = addStmt(S)) + B = R; + } + + return B; +} + +/// createBlock - Constructs and adds a new CFGBlock to the CFG. The block has +/// no successors or predecessors. If this is the first block created in the +/// CFG, it is automatically set to be the Entry and Exit of the CFG. +CFGBlock *CFG::createBlock() { + bool first_block = begin() == end(); + + // Create the block. + CFGBlock *Mem = getAllocator().Allocate<CFGBlock>(); + new (Mem) CFGBlock(NumBlockIDs++, BlkBVC, this); + Blocks.push_back(Mem, BlkBVC); + + // If this is the first block, set it as the Entry and Exit. + if (first_block) + Entry = Exit = &back(); + + // Return the block. + return &back(); +} + +/// buildCFG - Constructs a CFG from an AST. +std::unique_ptr<CFG> CFG::buildCFG(const Decl *D, Stmt *Statement, + ASTContext *C, const BuildOptions &BO) { + CFGBuilder Builder(C, BO); + return Builder.buildCFG(D, Statement); +} + +bool CFG::isLinear() const { + // Quick path: if we only have the ENTRY block, the EXIT block, and some code + // in between, then we have no room for control flow. + if (size() <= 3) + return true; + + // Traverse the CFG until we find a branch. + // TODO: While this should still be very fast, + // maybe we should cache the answer. + llvm::SmallPtrSet<const CFGBlock *, 4> Visited; + const CFGBlock *B = Entry; + while (B != Exit) { + auto IteratorAndFlag = Visited.insert(B); + if (!IteratorAndFlag.second) { + // We looped back to a block that we've already visited. Not linear. + return false; + } + + // Iterate over reachable successors. + const CFGBlock *FirstReachableB = nullptr; + for (const CFGBlock::AdjacentBlock &AB : B->succs()) { + if (!AB.isReachable()) + continue; + + if (FirstReachableB == nullptr) { + FirstReachableB = &*AB; + } else { + // We've encountered a branch. It's not a linear CFG. + return false; + } + } + + if (!FirstReachableB) { + // We reached a dead end. EXIT is unreachable. This is linear enough. + return true; + } + + // There's only one way to move forward. Proceed. + B = FirstReachableB; + } + + // We reached EXIT and found no branches. + return true; +} + +const CXXDestructorDecl * +CFGImplicitDtor::getDestructorDecl(ASTContext &astContext) const { + switch (getKind()) { + case CFGElement::Initializer: + case CFGElement::NewAllocator: + case CFGElement::LoopExit: + case CFGElement::LifetimeEnds: + case CFGElement::Statement: + case CFGElement::Constructor: + case CFGElement::CXXRecordTypedCall: + case CFGElement::ScopeBegin: + case CFGElement::ScopeEnd: + llvm_unreachable("getDestructorDecl should only be used with " + "ImplicitDtors"); + case CFGElement::AutomaticObjectDtor: { + const VarDecl *var = castAs<CFGAutomaticObjDtor>().getVarDecl(); + QualType ty = var->getType(); + + // FIXME: See CFGBuilder::addLocalScopeForVarDecl. + // + // Lifetime-extending constructs are handled here. This works for a single + // temporary in an initializer expression. + if (ty->isReferenceType()) { + if (const Expr *Init = var->getInit()) { + ty = getReferenceInitTemporaryType(Init); + } + } + + while (const ArrayType *arrayType = astContext.getAsArrayType(ty)) { + ty = arrayType->getElementType(); + } + + // The situation when the type of the lifetime-extending reference + // does not correspond to the type of the object is supposed + // to be handled by now. In particular, 'ty' is now the unwrapped + // record type. + const CXXRecordDecl *classDecl = ty->getAsCXXRecordDecl(); + assert(classDecl); + return classDecl->getDestructor(); + } + case CFGElement::DeleteDtor: { + const CXXDeleteExpr *DE = castAs<CFGDeleteDtor>().getDeleteExpr(); + QualType DTy = DE->getDestroyedType(); + DTy = DTy.getNonReferenceType(); + const CXXRecordDecl *classDecl = + astContext.getBaseElementType(DTy)->getAsCXXRecordDecl(); + return classDecl->getDestructor(); + } + case CFGElement::TemporaryDtor: { + const CXXBindTemporaryExpr *bindExpr = + castAs<CFGTemporaryDtor>().getBindTemporaryExpr(); + const CXXTemporary *temp = bindExpr->getTemporary(); + return temp->getDestructor(); + } + case CFGElement::MemberDtor: { + const FieldDecl *field = castAs<CFGMemberDtor>().getFieldDecl(); + QualType ty = field->getType(); + + while (const ArrayType *arrayType = astContext.getAsArrayType(ty)) { + ty = arrayType->getElementType(); + } + + const CXXRecordDecl *classDecl = ty->getAsCXXRecordDecl(); + assert(classDecl); + return classDecl->getDestructor(); + } + case CFGElement::BaseDtor: + // Not yet supported. + return nullptr; + } + llvm_unreachable("getKind() returned bogus value"); +} + +//===----------------------------------------------------------------------===// +// CFGBlock operations. +//===----------------------------------------------------------------------===// + +CFGBlock::AdjacentBlock::AdjacentBlock(CFGBlock *B, bool IsReachable) + : ReachableBlock(IsReachable ? B : nullptr), + UnreachableBlock(!IsReachable ? B : nullptr, + B && IsReachable ? AB_Normal : AB_Unreachable) {} + +CFGBlock::AdjacentBlock::AdjacentBlock(CFGBlock *B, CFGBlock *AlternateBlock) + : ReachableBlock(B), + UnreachableBlock(B == AlternateBlock ? nullptr : AlternateBlock, + B == AlternateBlock ? AB_Alternate : AB_Normal) {} + +void CFGBlock::addSuccessor(AdjacentBlock Succ, + BumpVectorContext &C) { + if (CFGBlock *B = Succ.getReachableBlock()) + B->Preds.push_back(AdjacentBlock(this, Succ.isReachable()), C); + + if (CFGBlock *UnreachableB = Succ.getPossiblyUnreachableBlock()) + UnreachableB->Preds.push_back(AdjacentBlock(this, false), C); + + Succs.push_back(Succ, C); +} + +bool CFGBlock::FilterEdge(const CFGBlock::FilterOptions &F, + const CFGBlock *From, const CFGBlock *To) { + if (F.IgnoreNullPredecessors && !From) + return true; + + if (To && From && F.IgnoreDefaultsWithCoveredEnums) { + // If the 'To' has no label or is labeled but the label isn't a + // CaseStmt then filter this edge. + if (const SwitchStmt *S = + dyn_cast_or_null<SwitchStmt>(From->getTerminatorStmt())) { + if (S->isAllEnumCasesCovered()) { + const Stmt *L = To->getLabel(); + if (!L || !isa<CaseStmt>(L)) + return true; + } + } + } + + return false; +} + +//===----------------------------------------------------------------------===// +// CFG pretty printing +//===----------------------------------------------------------------------===// + +namespace { + +class StmtPrinterHelper : public PrinterHelper { + using StmtMapTy = llvm::DenseMap<const Stmt *, std::pair<unsigned, unsigned>>; + using DeclMapTy = llvm::DenseMap<const Decl *, std::pair<unsigned, unsigned>>; + + StmtMapTy StmtMap; + DeclMapTy DeclMap; + signed currentBlock = 0; + unsigned currStmt = 0; + const LangOptions &LangOpts; + +public: + StmtPrinterHelper(const CFG* cfg, const LangOptions &LO) + : LangOpts(LO) { + if (!cfg) + return; + for (CFG::const_iterator I = cfg->begin(), E = cfg->end(); I != E; ++I ) { + unsigned j = 1; + for (CFGBlock::const_iterator BI = (*I)->begin(), BEnd = (*I)->end() ; + BI != BEnd; ++BI, ++j ) { + if (std::optional<CFGStmt> SE = BI->getAs<CFGStmt>()) { + const Stmt *stmt= SE->getStmt(); + std::pair<unsigned, unsigned> P((*I)->getBlockID(), j); + StmtMap[stmt] = P; + + switch (stmt->getStmtClass()) { + case Stmt::DeclStmtClass: + DeclMap[cast<DeclStmt>(stmt)->getSingleDecl()] = P; + break; + case Stmt::IfStmtClass: { + const VarDecl *var = cast<IfStmt>(stmt)->getConditionVariable(); + if (var) + DeclMap[var] = P; + break; + } + case Stmt::ForStmtClass: { + const VarDecl *var = cast<ForStmt>(stmt)->getConditionVariable(); + if (var) + DeclMap[var] = P; + break; + } + case Stmt::WhileStmtClass: { + const VarDecl *var = + cast<WhileStmt>(stmt)->getConditionVariable(); + if (var) + DeclMap[var] = P; + break; + } + case Stmt::SwitchStmtClass: { + const VarDecl *var = + cast<SwitchStmt>(stmt)->getConditionVariable(); + if (var) + DeclMap[var] = P; + break; + } + case Stmt::CXXCatchStmtClass: { + const VarDecl *var = + cast<CXXCatchStmt>(stmt)->getExceptionDecl(); + if (var) + DeclMap[var] = P; + break; + } + default: + break; + } + } + } + } + } + + ~StmtPrinterHelper() override = default; + + const LangOptions &getLangOpts() const { return LangOpts; } + void setBlockID(signed i) { currentBlock = i; } + void setStmtID(unsigned i) { currStmt = i; } + + bool handledStmt(Stmt *S, raw_ostream &OS) override { + StmtMapTy::iterator I = StmtMap.find(S); + + if (I == StmtMap.end()) + return false; + + if (currentBlock >= 0 && I->second.first == (unsigned) currentBlock + && I->second.second == currStmt) { + return false; + } + + OS << "[B" << I->second.first << "." << I->second.second << "]"; + return true; + } + + bool handleDecl(const Decl *D, raw_ostream &OS) { + DeclMapTy::iterator I = DeclMap.find(D); + + if (I == DeclMap.end()) + return false; + + if (currentBlock >= 0 && I->second.first == (unsigned) currentBlock + && I->second.second == currStmt) { + return false; + } + + OS << "[B" << I->second.first << "." << I->second.second << "]"; + return true; + } +}; + +class CFGBlockTerminatorPrint + : public StmtVisitor<CFGBlockTerminatorPrint,void> { + raw_ostream &OS; + StmtPrinterHelper* Helper; + PrintingPolicy Policy; + +public: + CFGBlockTerminatorPrint(raw_ostream &os, StmtPrinterHelper* helper, + const PrintingPolicy &Policy) + : OS(os), Helper(helper), Policy(Policy) { + this->Policy.IncludeNewlines = false; + } + + void VisitIfStmt(IfStmt *I) { + OS << "if "; + if (Stmt *C = I->getCond()) + C->printPretty(OS, Helper, Policy); + } + + // Default case. + void VisitStmt(Stmt *Terminator) { + Terminator->printPretty(OS, Helper, Policy); + } + + void VisitDeclStmt(DeclStmt *DS) { + VarDecl *VD = cast<VarDecl>(DS->getSingleDecl()); + OS << "static init " << VD->getName(); + } + + void VisitForStmt(ForStmt *F) { + OS << "for (" ; + if (F->getInit()) + OS << "..."; + OS << "; "; + if (Stmt *C = F->getCond()) + C->printPretty(OS, Helper, Policy); + OS << "; "; + if (F->getInc()) + OS << "..."; + OS << ")"; + } + + void VisitWhileStmt(WhileStmt *W) { + OS << "while " ; + if (Stmt *C = W->getCond()) + C->printPretty(OS, Helper, Policy); + } + + void VisitDoStmt(DoStmt *D) { + OS << "do ... while "; + if (Stmt *C = D->getCond()) + C->printPretty(OS, Helper, Policy); + } + + void VisitSwitchStmt(SwitchStmt *Terminator) { + OS << "switch "; + Terminator->getCond()->printPretty(OS, Helper, Policy); + } + + void VisitCXXTryStmt(CXXTryStmt *) { OS << "try ..."; } + + void VisitObjCAtTryStmt(ObjCAtTryStmt *) { OS << "@try ..."; } + + void VisitSEHTryStmt(SEHTryStmt *CS) { OS << "__try ..."; } + + void VisitAbstractConditionalOperator(AbstractConditionalOperator* C) { + if (Stmt *Cond = C->getCond()) + Cond->printPretty(OS, Helper, Policy); + OS << " ? ... : ..."; + } + + void VisitChooseExpr(ChooseExpr *C) { + OS << "__builtin_choose_expr( "; + if (Stmt *Cond = C->getCond()) + Cond->printPretty(OS, Helper, Policy); + OS << " )"; + } + + void VisitIndirectGotoStmt(IndirectGotoStmt *I) { + OS << "goto *"; + if (Stmt *T = I->getTarget()) + T->printPretty(OS, Helper, Policy); + } + + void VisitBinaryOperator(BinaryOperator* B) { + if (!B->isLogicalOp()) { + VisitExpr(B); + return; + } + + if (B->getLHS()) + B->getLHS()->printPretty(OS, Helper, Policy); + + switch (B->getOpcode()) { + case BO_LOr: + OS << " || ..."; + return; + case BO_LAnd: + OS << " && ..."; + return; + default: + llvm_unreachable("Invalid logical operator."); + } + } + + void VisitExpr(Expr *E) { + E->printPretty(OS, Helper, Policy); + } + +public: + void print(CFGTerminator T) { + switch (T.getKind()) { + case CFGTerminator::StmtBranch: + Visit(T.getStmt()); + break; + case CFGTerminator::TemporaryDtorsBranch: + OS << "(Temp Dtor) "; + Visit(T.getStmt()); + break; + case CFGTerminator::VirtualBaseBranch: + OS << "(See if most derived ctor has already initialized vbases)"; + break; + } + } +}; + +} // namespace + +static void print_initializer(raw_ostream &OS, StmtPrinterHelper &Helper, + const CXXCtorInitializer *I) { + if (I->isBaseInitializer()) + OS << I->getBaseClass()->getAsCXXRecordDecl()->getName(); + else if (I->isDelegatingInitializer()) + OS << I->getTypeSourceInfo()->getType()->getAsCXXRecordDecl()->getName(); + else + OS << I->getAnyMember()->getName(); + OS << "("; + if (Expr *IE = I->getInit()) + IE->printPretty(OS, &Helper, PrintingPolicy(Helper.getLangOpts())); + OS << ")"; + + if (I->isBaseInitializer()) + OS << " (Base initializer)"; + else if (I->isDelegatingInitializer()) + OS << " (Delegating initializer)"; + else + OS << " (Member initializer)"; +} + +static void print_construction_context(raw_ostream &OS, + StmtPrinterHelper &Helper, + const ConstructionContext *CC) { + SmallVector<const Stmt *, 3> Stmts; + switch (CC->getKind()) { + case ConstructionContext::SimpleConstructorInitializerKind: { + OS << ", "; + const auto *SICC = cast<SimpleConstructorInitializerConstructionContext>(CC); + print_initializer(OS, Helper, SICC->getCXXCtorInitializer()); + return; + } + case ConstructionContext::CXX17ElidedCopyConstructorInitializerKind: { + OS << ", "; + const auto *CICC = + cast<CXX17ElidedCopyConstructorInitializerConstructionContext>(CC); + print_initializer(OS, Helper, CICC->getCXXCtorInitializer()); + Stmts.push_back(CICC->getCXXBindTemporaryExpr()); + break; + } + case ConstructionContext::SimpleVariableKind: { + const auto *SDSCC = cast<SimpleVariableConstructionContext>(CC); + Stmts.push_back(SDSCC->getDeclStmt()); + break; + } + case ConstructionContext::CXX17ElidedCopyVariableKind: { + const auto *CDSCC = cast<CXX17ElidedCopyVariableConstructionContext>(CC); + Stmts.push_back(CDSCC->getDeclStmt()); + Stmts.push_back(CDSCC->getCXXBindTemporaryExpr()); + break; + } + case ConstructionContext::NewAllocatedObjectKind: { + const auto *NECC = cast<NewAllocatedObjectConstructionContext>(CC); + Stmts.push_back(NECC->getCXXNewExpr()); + break; + } + case ConstructionContext::SimpleReturnedValueKind: { + const auto *RSCC = cast<SimpleReturnedValueConstructionContext>(CC); + Stmts.push_back(RSCC->getReturnStmt()); + break; + } + case ConstructionContext::CXX17ElidedCopyReturnedValueKind: { + const auto *RSCC = + cast<CXX17ElidedCopyReturnedValueConstructionContext>(CC); + Stmts.push_back(RSCC->getReturnStmt()); + Stmts.push_back(RSCC->getCXXBindTemporaryExpr()); + break; + } + case ConstructionContext::SimpleTemporaryObjectKind: { + const auto *TOCC = cast<SimpleTemporaryObjectConstructionContext>(CC); + Stmts.push_back(TOCC->getCXXBindTemporaryExpr()); + Stmts.push_back(TOCC->getMaterializedTemporaryExpr()); + break; + } + case ConstructionContext::ElidedTemporaryObjectKind: { + const auto *TOCC = cast<ElidedTemporaryObjectConstructionContext>(CC); + Stmts.push_back(TOCC->getCXXBindTemporaryExpr()); + Stmts.push_back(TOCC->getMaterializedTemporaryExpr()); + Stmts.push_back(TOCC->getConstructorAfterElision()); + break; + } + case ConstructionContext::LambdaCaptureKind: { + const auto *LCC = cast<LambdaCaptureConstructionContext>(CC); + Helper.handledStmt(const_cast<LambdaExpr *>(LCC->getLambdaExpr()), OS); + OS << "+" << LCC->getIndex(); + return; + } + case ConstructionContext::ArgumentKind: { + const auto *ACC = cast<ArgumentConstructionContext>(CC); + if (const Stmt *BTE = ACC->getCXXBindTemporaryExpr()) { + OS << ", "; + Helper.handledStmt(const_cast<Stmt *>(BTE), OS); + } + OS << ", "; + Helper.handledStmt(const_cast<Expr *>(ACC->getCallLikeExpr()), OS); + OS << "+" << ACC->getIndex(); + return; + } + } + for (auto I: Stmts) + if (I) { + OS << ", "; + Helper.handledStmt(const_cast<Stmt *>(I), OS); + } +} + +static void print_elem(raw_ostream &OS, StmtPrinterHelper &Helper, + const CFGElement &E); + +void CFGElement::dumpToStream(llvm::raw_ostream &OS) const { + StmtPrinterHelper Helper(nullptr, {}); + print_elem(OS, Helper, *this); +} + +static void print_elem(raw_ostream &OS, StmtPrinterHelper &Helper, + const CFGElement &E) { + switch (E.getKind()) { + case CFGElement::Kind::Statement: + case CFGElement::Kind::CXXRecordTypedCall: + case CFGElement::Kind::Constructor: { + CFGStmt CS = E.castAs<CFGStmt>(); + const Stmt *S = CS.getStmt(); + assert(S != nullptr && "Expecting non-null Stmt"); + + // special printing for statement-expressions. + if (const StmtExpr *SE = dyn_cast<StmtExpr>(S)) { + const CompoundStmt *Sub = SE->getSubStmt(); + + auto Children = Sub->children(); + if (Children.begin() != Children.end()) { + OS << "({ ... ; "; + Helper.handledStmt(*SE->getSubStmt()->body_rbegin(),OS); + OS << " })\n"; + return; + } + } + // special printing for comma expressions. + if (const BinaryOperator* B = dyn_cast<BinaryOperator>(S)) { + if (B->getOpcode() == BO_Comma) { + OS << "... , "; + Helper.handledStmt(B->getRHS(),OS); + OS << '\n'; + return; + } + } + S->printPretty(OS, &Helper, PrintingPolicy(Helper.getLangOpts())); + + if (auto VTC = E.getAs<CFGCXXRecordTypedCall>()) { + if (isa<CXXOperatorCallExpr>(S)) + OS << " (OperatorCall)"; + OS << " (CXXRecordTypedCall"; + print_construction_context(OS, Helper, VTC->getConstructionContext()); + OS << ")"; + } else if (isa<CXXOperatorCallExpr>(S)) { + OS << " (OperatorCall)"; + } else if (isa<CXXBindTemporaryExpr>(S)) { + OS << " (BindTemporary)"; + } else if (const CXXConstructExpr *CCE = dyn_cast<CXXConstructExpr>(S)) { + OS << " (CXXConstructExpr"; + if (std::optional<CFGConstructor> CE = E.getAs<CFGConstructor>()) { + print_construction_context(OS, Helper, CE->getConstructionContext()); + } + OS << ", " << CCE->getType() << ")"; + } else if (const CastExpr *CE = dyn_cast<CastExpr>(S)) { + OS << " (" << CE->getStmtClassName() << ", " << CE->getCastKindName() + << ", " << CE->getType() << ")"; + } + + // Expressions need a newline. + if (isa<Expr>(S)) + OS << '\n'; + + break; + } + + case CFGElement::Kind::Initializer: + print_initializer(OS, Helper, E.castAs<CFGInitializer>().getInitializer()); + OS << '\n'; + break; + + case CFGElement::Kind::AutomaticObjectDtor: { + CFGAutomaticObjDtor DE = E.castAs<CFGAutomaticObjDtor>(); + const VarDecl *VD = DE.getVarDecl(); + Helper.handleDecl(VD, OS); + + QualType T = VD->getType(); + if (T->isReferenceType()) + T = getReferenceInitTemporaryType(VD->getInit(), nullptr); + + OS << ".~"; + T.getUnqualifiedType().print(OS, PrintingPolicy(Helper.getLangOpts())); + OS << "() (Implicit destructor)\n"; + break; + } + + case CFGElement::Kind::LifetimeEnds: + Helper.handleDecl(E.castAs<CFGLifetimeEnds>().getVarDecl(), OS); + OS << " (Lifetime ends)\n"; + break; + + case CFGElement::Kind::LoopExit: + OS << E.castAs<CFGLoopExit>().getLoopStmt()->getStmtClassName() << " (LoopExit)\n"; + break; + + case CFGElement::Kind::ScopeBegin: + OS << "CFGScopeBegin("; + if (const VarDecl *VD = E.castAs<CFGScopeBegin>().getVarDecl()) + OS << VD->getQualifiedNameAsString(); + OS << ")\n"; + break; + + case CFGElement::Kind::ScopeEnd: + OS << "CFGScopeEnd("; + if (const VarDecl *VD = E.castAs<CFGScopeEnd>().getVarDecl()) + OS << VD->getQualifiedNameAsString(); + OS << ")\n"; + break; + + case CFGElement::Kind::NewAllocator: + OS << "CFGNewAllocator("; + if (const CXXNewExpr *AllocExpr = E.castAs<CFGNewAllocator>().getAllocatorExpr()) + AllocExpr->getType().print(OS, PrintingPolicy(Helper.getLangOpts())); + OS << ")\n"; + break; + + case CFGElement::Kind::DeleteDtor: { + CFGDeleteDtor DE = E.castAs<CFGDeleteDtor>(); + const CXXRecordDecl *RD = DE.getCXXRecordDecl(); + if (!RD) + return; + CXXDeleteExpr *DelExpr = + const_cast<CXXDeleteExpr*>(DE.getDeleteExpr()); + Helper.handledStmt(cast<Stmt>(DelExpr->getArgument()), OS); + OS << "->~" << RD->getName().str() << "()"; + OS << " (Implicit destructor)\n"; + break; + } + + case CFGElement::Kind::BaseDtor: { + const CXXBaseSpecifier *BS = E.castAs<CFGBaseDtor>().getBaseSpecifier(); + OS << "~" << BS->getType()->getAsCXXRecordDecl()->getName() << "()"; + OS << " (Base object destructor)\n"; + break; + } + + case CFGElement::Kind::MemberDtor: { + const FieldDecl *FD = E.castAs<CFGMemberDtor>().getFieldDecl(); + const Type *T = FD->getType()->getBaseElementTypeUnsafe(); + OS << "this->" << FD->getName(); + OS << ".~" << T->getAsCXXRecordDecl()->getName() << "()"; + OS << " (Member object destructor)\n"; + break; + } + + case CFGElement::Kind::TemporaryDtor: { + const CXXBindTemporaryExpr *BT = + E.castAs<CFGTemporaryDtor>().getBindTemporaryExpr(); + OS << "~"; + BT->getType().print(OS, PrintingPolicy(Helper.getLangOpts())); + OS << "() (Temporary object destructor)\n"; + break; + } + } +} + +static void print_block(raw_ostream &OS, const CFG* cfg, + const CFGBlock &B, + StmtPrinterHelper &Helper, bool print_edges, + bool ShowColors) { + Helper.setBlockID(B.getBlockID()); + + // Print the header. + if (ShowColors) + OS.changeColor(raw_ostream::YELLOW, true); + + OS << "\n [B" << B.getBlockID(); + + if (&B == &cfg->getEntry()) + OS << " (ENTRY)]\n"; + else if (&B == &cfg->getExit()) + OS << " (EXIT)]\n"; + else if (&B == cfg->getIndirectGotoBlock()) + OS << " (INDIRECT GOTO DISPATCH)]\n"; + else if (B.hasNoReturnElement()) + OS << " (NORETURN)]\n"; + else + OS << "]\n"; + + if (ShowColors) + OS.resetColor(); + + // Print the label of this block. + if (Stmt *Label = const_cast<Stmt*>(B.getLabel())) { + if (print_edges) + OS << " "; + + if (LabelStmt *L = dyn_cast<LabelStmt>(Label)) + OS << L->getName(); + else if (CaseStmt *C = dyn_cast<CaseStmt>(Label)) { + OS << "case "; + if (const Expr *LHS = C->getLHS()) + LHS->printPretty(OS, &Helper, PrintingPolicy(Helper.getLangOpts())); + if (const Expr *RHS = C->getRHS()) { + OS << " ... "; + RHS->printPretty(OS, &Helper, PrintingPolicy(Helper.getLangOpts())); + } + } else if (isa<DefaultStmt>(Label)) + OS << "default"; + else if (CXXCatchStmt *CS = dyn_cast<CXXCatchStmt>(Label)) { + OS << "catch ("; + if (const VarDecl *ED = CS->getExceptionDecl()) + ED->print(OS, PrintingPolicy(Helper.getLangOpts()), 0); + else + OS << "..."; + OS << ")"; + } else if (ObjCAtCatchStmt *CS = dyn_cast<ObjCAtCatchStmt>(Label)) { + OS << "@catch ("; + if (const VarDecl *PD = CS->getCatchParamDecl()) + PD->print(OS, PrintingPolicy(Helper.getLangOpts()), 0); + else + OS << "..."; + OS << ")"; + } else if (SEHExceptStmt *ES = dyn_cast<SEHExceptStmt>(Label)) { + OS << "__except ("; + ES->getFilterExpr()->printPretty(OS, &Helper, + PrintingPolicy(Helper.getLangOpts()), 0); + OS << ")"; + } else + llvm_unreachable("Invalid label statement in CFGBlock."); + + OS << ":\n"; + } + + // Iterate through the statements in the block and print them. + unsigned j = 1; + + for (CFGBlock::const_iterator I = B.begin(), E = B.end() ; + I != E ; ++I, ++j ) { + // Print the statement # in the basic block and the statement itself. + if (print_edges) + OS << " "; + + OS << llvm::format("%3d", j) << ": "; + + Helper.setStmtID(j); + + print_elem(OS, Helper, *I); + } + + // Print the terminator of this block. + if (B.getTerminator().isValid()) { + if (ShowColors) + OS.changeColor(raw_ostream::GREEN); + + OS << " T: "; + + Helper.setBlockID(-1); + + PrintingPolicy PP(Helper.getLangOpts()); + CFGBlockTerminatorPrint TPrinter(OS, &Helper, PP); + TPrinter.print(B.getTerminator()); + OS << '\n'; + + if (ShowColors) + OS.resetColor(); + } + + if (print_edges) { + // Print the predecessors of this block. + if (!B.pred_empty()) { + const raw_ostream::Colors Color = raw_ostream::BLUE; + if (ShowColors) + OS.changeColor(Color); + OS << " Preds " ; + if (ShowColors) + OS.resetColor(); + OS << '(' << B.pred_size() << "):"; + unsigned i = 0; + + if (ShowColors) + OS.changeColor(Color); + + for (CFGBlock::const_pred_iterator I = B.pred_begin(), E = B.pred_end(); + I != E; ++I, ++i) { + if (i % 10 == 8) + OS << "\n "; + + CFGBlock *B = *I; + bool Reachable = true; + if (!B) { + Reachable = false; + B = I->getPossiblyUnreachableBlock(); + } + + OS << " B" << B->getBlockID(); + if (!Reachable) + OS << "(Unreachable)"; + } + + if (ShowColors) + OS.resetColor(); + + OS << '\n'; + } + + // Print the successors of this block. + if (!B.succ_empty()) { + const raw_ostream::Colors Color = raw_ostream::MAGENTA; + if (ShowColors) + OS.changeColor(Color); + OS << " Succs "; + if (ShowColors) + OS.resetColor(); + OS << '(' << B.succ_size() << "):"; + unsigned i = 0; + + if (ShowColors) + OS.changeColor(Color); + + for (CFGBlock::const_succ_iterator I = B.succ_begin(), E = B.succ_end(); + I != E; ++I, ++i) { + if (i % 10 == 8) + OS << "\n "; + + CFGBlock *B = *I; + + bool Reachable = true; + if (!B) { + Reachable = false; + B = I->getPossiblyUnreachableBlock(); + } + + if (B) { + OS << " B" << B->getBlockID(); + if (!Reachable) + OS << "(Unreachable)"; + } + else { + OS << " NULL"; + } + } + + if (ShowColors) + OS.resetColor(); + OS << '\n'; + } + } +} + +/// dump - A simple pretty printer of a CFG that outputs to stderr. +void CFG::dump(const LangOptions &LO, bool ShowColors) const { + print(llvm::errs(), LO, ShowColors); +} + +/// print - A simple pretty printer of a CFG that outputs to an ostream. +void CFG::print(raw_ostream &OS, const LangOptions &LO, bool ShowColors) const { + StmtPrinterHelper Helper(this, LO); + + // Print the entry block. + print_block(OS, this, getEntry(), Helper, true, ShowColors); + + // Iterate through the CFGBlocks and print them one by one. + for (const_iterator I = Blocks.begin(), E = Blocks.end() ; I != E ; ++I) { + // Skip the entry block, because we already printed it. + if (&(**I) == &getEntry() || &(**I) == &getExit()) + continue; + + print_block(OS, this, **I, Helper, true, ShowColors); + } + + // Print the exit block. + print_block(OS, this, getExit(), Helper, true, ShowColors); + OS << '\n'; + OS.flush(); +} + +size_t CFGBlock::getIndexInCFG() const { + return llvm::find(*getParent(), this) - getParent()->begin(); +} + +/// dump - A simply pretty printer of a CFGBlock that outputs to stderr. +void CFGBlock::dump(const CFG* cfg, const LangOptions &LO, + bool ShowColors) const { + print(llvm::errs(), cfg, LO, ShowColors); +} + +LLVM_DUMP_METHOD void CFGBlock::dump() const { + dump(getParent(), LangOptions(), false); +} + +/// print - A simple pretty printer of a CFGBlock that outputs to an ostream. +/// Generally this will only be called from CFG::print. +void CFGBlock::print(raw_ostream &OS, const CFG* cfg, + const LangOptions &LO, bool ShowColors) const { + StmtPrinterHelper Helper(cfg, LO); + print_block(OS, cfg, *this, Helper, true, ShowColors); + OS << '\n'; +} + +/// printTerminator - A simple pretty printer of the terminator of a CFGBlock. +void CFGBlock::printTerminator(raw_ostream &OS, + const LangOptions &LO) const { + CFGBlockTerminatorPrint TPrinter(OS, nullptr, PrintingPolicy(LO)); + TPrinter.print(getTerminator()); +} + +/// printTerminatorJson - Pretty-prints the terminator in JSON format. +void CFGBlock::printTerminatorJson(raw_ostream &Out, const LangOptions &LO, + bool AddQuotes) const { + std::string Buf; + llvm::raw_string_ostream TempOut(Buf); + + printTerminator(TempOut, LO); + + Out << JsonFormat(TempOut.str(), AddQuotes); +} + +// Returns true if by simply looking at the block, we can be sure that it +// results in a sink during analysis. This is useful to know when the analysis +// was interrupted, and we try to figure out if it would sink eventually. +// There may be many more reasons why a sink would appear during analysis +// (eg. checkers may generate sinks arbitrarily), but here we only consider +// sinks that would be obvious by looking at the CFG. +static bool isImmediateSinkBlock(const CFGBlock *Blk) { + if (Blk->hasNoReturnElement()) + return true; + + // FIXME: Throw-expressions are currently generating sinks during analysis: + // they're not supported yet, and also often used for actually terminating + // the program. So we should treat them as sinks in this analysis as well, + // at least for now, but once we have better support for exceptions, + // we'd need to carefully handle the case when the throw is being + // immediately caught. + if (llvm::any_of(*Blk, [](const CFGElement &Elm) { + if (std::optional<CFGStmt> StmtElm = Elm.getAs<CFGStmt>()) + if (isa<CXXThrowExpr>(StmtElm->getStmt())) + return true; + return false; + })) + return true; + + return false; +} + +bool CFGBlock::isInevitablySinking() const { + const CFG &Cfg = *getParent(); + + const CFGBlock *StartBlk = this; + if (isImmediateSinkBlock(StartBlk)) + return true; + + llvm::SmallVector<const CFGBlock *, 32> DFSWorkList; + llvm::SmallPtrSet<const CFGBlock *, 32> Visited; + + DFSWorkList.push_back(StartBlk); + while (!DFSWorkList.empty()) { + const CFGBlock *Blk = DFSWorkList.back(); + DFSWorkList.pop_back(); + Visited.insert(Blk); + + // If at least one path reaches the CFG exit, it means that control is + // returned to the caller. For now, say that we are not sure what + // happens next. If necessary, this can be improved to analyze + // the parent StackFrameContext's call site in a similar manner. + if (Blk == &Cfg.getExit()) + return false; + + for (const auto &Succ : Blk->succs()) { + if (const CFGBlock *SuccBlk = Succ.getReachableBlock()) { + if (!isImmediateSinkBlock(SuccBlk) && !Visited.count(SuccBlk)) { + // If the block has reachable child blocks that aren't no-return, + // add them to the worklist. + DFSWorkList.push_back(SuccBlk); + } + } + } + } + + // Nothing reached the exit. It can only mean one thing: there's no return. + return true; +} + +const Expr *CFGBlock::getLastCondition() const { + // If the terminator is a temporary dtor or a virtual base, etc, we can't + // retrieve a meaningful condition, bail out. + if (Terminator.getKind() != CFGTerminator::StmtBranch) + return nullptr; + + // Also, if this method was called on a block that doesn't have 2 successors, + // this block doesn't have retrievable condition. + if (succ_size() < 2) + return nullptr; + + // FIXME: Is there a better condition expression we can return in this case? + if (size() == 0) + return nullptr; + + auto StmtElem = rbegin()->getAs<CFGStmt>(); + if (!StmtElem) + return nullptr; + + const Stmt *Cond = StmtElem->getStmt(); + if (isa<ObjCForCollectionStmt>(Cond) || isa<DeclStmt>(Cond)) + return nullptr; + + // Only ObjCForCollectionStmt is known not to be a non-Expr terminator, hence + // the cast<>. + return cast<Expr>(Cond)->IgnoreParens(); +} + +Stmt *CFGBlock::getTerminatorCondition(bool StripParens) { + Stmt *Terminator = getTerminatorStmt(); + if (!Terminator) + return nullptr; + + Expr *E = nullptr; + + switch (Terminator->getStmtClass()) { + default: + break; + + case Stmt::CXXForRangeStmtClass: + E = cast<CXXForRangeStmt>(Terminator)->getCond(); + break; + + case Stmt::ForStmtClass: + E = cast<ForStmt>(Terminator)->getCond(); + break; + + case Stmt::WhileStmtClass: + E = cast<WhileStmt>(Terminator)->getCond(); + break; + + case Stmt::DoStmtClass: + E = cast<DoStmt>(Terminator)->getCond(); + break; + + case Stmt::IfStmtClass: + E = cast<IfStmt>(Terminator)->getCond(); + break; + + case Stmt::ChooseExprClass: + E = cast<ChooseExpr>(Terminator)->getCond(); + break; + + case Stmt::IndirectGotoStmtClass: + E = cast<IndirectGotoStmt>(Terminator)->getTarget(); + break; + + case Stmt::SwitchStmtClass: + E = cast<SwitchStmt>(Terminator)->getCond(); + break; + + case Stmt::BinaryConditionalOperatorClass: + E = cast<BinaryConditionalOperator>(Terminator)->getCond(); + break; + + case Stmt::ConditionalOperatorClass: + E = cast<ConditionalOperator>(Terminator)->getCond(); + break; + + case Stmt::BinaryOperatorClass: // '&&' and '||' + E = cast<BinaryOperator>(Terminator)->getLHS(); + break; + + case Stmt::ObjCForCollectionStmtClass: + return Terminator; + } + + if (!StripParens) + return E; + + return E ? E->IgnoreParens() : nullptr; +} + +//===----------------------------------------------------------------------===// +// CFG Graphviz Visualization +//===----------------------------------------------------------------------===// + +static StmtPrinterHelper *GraphHelper; + +void CFG::viewCFG(const LangOptions &LO) const { + StmtPrinterHelper H(this, LO); + GraphHelper = &H; + llvm::ViewGraph(this,"CFG"); + GraphHelper = nullptr; +} + +namespace llvm { + +template<> +struct DOTGraphTraits<const CFG*> : public DefaultDOTGraphTraits { + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getNodeLabel(const CFGBlock *Node, const CFG *Graph) { + std::string OutSStr; + llvm::raw_string_ostream Out(OutSStr); + print_block(Out,Graph, *Node, *GraphHelper, false, false); + std::string& OutStr = Out.str(); + + if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); + + // Process string output to make it nicer... + for (unsigned i = 0; i != OutStr.length(); ++i) + if (OutStr[i] == '\n') { // Left justify + OutStr[i] = '\\'; + OutStr.insert(OutStr.begin()+i+1, 'l'); + } + + return OutStr; + } +}; + +} // namespace llvm diff --git a/contrib/llvm-project/clang/lib/Analysis/CFGReachabilityAnalysis.cpp b/contrib/llvm-project/clang/lib/Analysis/CFGReachabilityAnalysis.cpp new file mode 100644 index 000000000000..2b5d6c466cde --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/CFGReachabilityAnalysis.cpp @@ -0,0 +1,76 @@ +//===- CFGReachabilityAnalysis.cpp - Basic reachability analysis ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a flow-sensitive, (mostly) path-insensitive reachability +// analysis based on Clang's CFGs. Clients can query if a given basic block +// is reachable within the CFG. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/CFGReachabilityAnalysis.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" + +using namespace clang; + +CFGReverseBlockReachabilityAnalysis::CFGReverseBlockReachabilityAnalysis( + const CFG &cfg) + : analyzed(cfg.getNumBlockIDs(), false) {} + +bool CFGReverseBlockReachabilityAnalysis::isReachable(const CFGBlock *Src, + const CFGBlock *Dst) { + const unsigned DstBlockID = Dst->getBlockID(); + + // If we haven't analyzed the destination node, run the analysis now + if (!analyzed[DstBlockID]) { + mapReachability(Dst); + analyzed[DstBlockID] = true; + } + + // Return the cached result + return reachable[DstBlockID][Src->getBlockID()]; +} + +// Maps reachability to a common node by walking the predecessors of the +// destination node. +void CFGReverseBlockReachabilityAnalysis::mapReachability(const CFGBlock *Dst) { + SmallVector<const CFGBlock *, 11> worklist; + llvm::BitVector visited(analyzed.size()); + + ReachableSet &DstReachability = reachable[Dst->getBlockID()]; + DstReachability.resize(analyzed.size(), false); + + // Start searching from the destination node, since we commonly will perform + // multiple queries relating to a destination node. + worklist.push_back(Dst); + bool firstRun = true; + + while (!worklist.empty()) { + const CFGBlock *block = worklist.pop_back_val(); + + if (visited[block->getBlockID()]) + continue; + visited[block->getBlockID()] = true; + + // Update reachability information for this node -> Dst + if (!firstRun) { + // Don't insert Dst -> Dst unless it was a predecessor of itself + DstReachability[block->getBlockID()] = true; + } + else + firstRun = false; + + // Add the predecessors to the worklist. + for (CFGBlock::const_pred_iterator i = block->pred_begin(), + e = block->pred_end(); i != e; ++i) { + if (*i) + worklist.push_back(*i); + } + } +} diff --git a/contrib/llvm-project/clang/lib/Analysis/CFGStmtMap.cpp b/contrib/llvm-project/clang/lib/Analysis/CFGStmtMap.cpp new file mode 100644 index 000000000000..c3a4581e1fb1 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/CFGStmtMap.cpp @@ -0,0 +1,91 @@ +//===--- CFGStmtMap.h - Map from Stmt* to CFGBlock* -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the CFGStmtMap class, which defines a mapping from +// Stmt* to CFGBlock* +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "clang/AST/ParentMap.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/CFGStmtMap.h" +#include <optional> + +using namespace clang; + +typedef llvm::DenseMap<const Stmt*, CFGBlock*> SMap; +static SMap *AsMap(void *m) { return (SMap*) m; } + +CFGStmtMap::~CFGStmtMap() { delete AsMap(M); } + +CFGBlock *CFGStmtMap::getBlock(Stmt *S) { + SMap *SM = AsMap(M); + Stmt *X = S; + + // If 'S' isn't in the map, walk the ParentMap to see if one of its ancestors + // is in the map. + while (X) { + SMap::iterator I = SM->find(X); + if (I != SM->end()) { + CFGBlock *B = I->second; + // Memoize this lookup. + if (X != S) + (*SM)[X] = B; + return B; + } + + X = PM->getParentIgnoreParens(X); + } + + return nullptr; +} + +static void Accumulate(SMap &SM, CFGBlock *B) { + // First walk the block-level expressions. + for (CFGBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) { + const CFGElement &CE = *I; + std::optional<CFGStmt> CS = CE.getAs<CFGStmt>(); + if (!CS) + continue; + + CFGBlock *&Entry = SM[CS->getStmt()]; + // If 'Entry' is already initialized (e.g., a terminator was already), + // skip. + if (Entry) + continue; + + Entry = B; + + } + + // Look at the label of the block. + if (Stmt *Label = B->getLabel()) + SM[Label] = B; + + // Finally, look at the terminator. If the terminator was already added + // because it is a block-level expression in another block, overwrite + // that mapping. + if (Stmt *Term = B->getTerminatorStmt()) + SM[Term] = B; +} + +CFGStmtMap *CFGStmtMap::Build(CFG *C, ParentMap *PM) { + if (!C || !PM) + return nullptr; + + SMap *SM = new SMap(); + + // Walk all blocks, accumulating the block-level expressions, labels, + // and terminators. + for (CFG::iterator I = C->begin(), E = C->end(); I != E; ++I) + Accumulate(*SM, *I); + + return new CFGStmtMap(PM, SM); +} + diff --git a/contrib/llvm-project/clang/lib/Analysis/CallGraph.cpp b/contrib/llvm-project/clang/lib/Analysis/CallGraph.cpp new file mode 100644 index 000000000000..59cc939b6fd1 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/CallGraph.cpp @@ -0,0 +1,282 @@ +//===- CallGraph.cpp - AST-based Call graph -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the AST-based CallGraph. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/CallGraph.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprObjC.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DOTGraphTraits.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <memory> +#include <string> + +using namespace clang; + +#define DEBUG_TYPE "CallGraph" + +STATISTIC(NumObjCCallEdges, "Number of Objective-C method call edges"); +STATISTIC(NumBlockCallEdges, "Number of block call edges"); + +namespace { + +/// A helper class, which walks the AST and locates all the call sites in the +/// given function body. +class CGBuilder : public StmtVisitor<CGBuilder> { + CallGraph *G; + CallGraphNode *CallerNode; + +public: + CGBuilder(CallGraph *g, CallGraphNode *N) : G(g), CallerNode(N) {} + + void VisitStmt(Stmt *S) { VisitChildren(S); } + + Decl *getDeclFromCall(CallExpr *CE) { + if (FunctionDecl *CalleeDecl = CE->getDirectCallee()) + return CalleeDecl; + + // Simple detection of a call through a block. + Expr *CEE = CE->getCallee()->IgnoreParenImpCasts(); + if (BlockExpr *Block = dyn_cast<BlockExpr>(CEE)) { + NumBlockCallEdges++; + return Block->getBlockDecl(); + } + + return nullptr; + } + + void addCalledDecl(Decl *D, Expr *CallExpr) { + if (G->includeCalleeInGraph(D)) { + CallGraphNode *CalleeNode = G->getOrInsertNode(D); + CallerNode->addCallee({CalleeNode, CallExpr}); + } + } + + void VisitCallExpr(CallExpr *CE) { + if (Decl *D = getDeclFromCall(CE)) + addCalledDecl(D, CE); + VisitChildren(CE); + } + + void VisitLambdaExpr(LambdaExpr *LE) { + if (FunctionTemplateDecl *FTD = LE->getDependentCallOperator()) + for (FunctionDecl *FD : FTD->specializations()) + G->VisitFunctionDecl(FD); + else if (CXXMethodDecl *MD = LE->getCallOperator()) + G->VisitFunctionDecl(MD); + } + + void VisitCXXNewExpr(CXXNewExpr *E) { + if (FunctionDecl *FD = E->getOperatorNew()) + addCalledDecl(FD, E); + VisitChildren(E); + } + + void VisitCXXConstructExpr(CXXConstructExpr *E) { + CXXConstructorDecl *Ctor = E->getConstructor(); + if (FunctionDecl *Def = Ctor->getDefinition()) + addCalledDecl(Def, E); + VisitChildren(E); + } + + // Include the evaluation of the default argument. + void VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { + Visit(E->getExpr()); + } + + // Include the evaluation of the default initializers in a class. + void VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { + Visit(E->getExpr()); + } + + // Adds may-call edges for the ObjC message sends. + void VisitObjCMessageExpr(ObjCMessageExpr *ME) { + if (ObjCInterfaceDecl *IDecl = ME->getReceiverInterface()) { + Selector Sel = ME->getSelector(); + + // Find the callee definition within the same translation unit. + Decl *D = nullptr; + if (ME->isInstanceMessage()) + D = IDecl->lookupPrivateMethod(Sel); + else + D = IDecl->lookupPrivateClassMethod(Sel); + if (D) { + addCalledDecl(D, ME); + NumObjCCallEdges++; + } + } + } + + void VisitChildren(Stmt *S) { + for (Stmt *SubStmt : S->children()) + if (SubStmt) + this->Visit(SubStmt); + } +}; + +} // namespace + +void CallGraph::addNodesForBlocks(DeclContext *D) { + if (BlockDecl *BD = dyn_cast<BlockDecl>(D)) + addNodeForDecl(BD, true); + + for (auto *I : D->decls()) + if (auto *DC = dyn_cast<DeclContext>(I)) + addNodesForBlocks(DC); +} + +CallGraph::CallGraph() { + Root = getOrInsertNode(nullptr); +} + +CallGraph::~CallGraph() = default; + +bool CallGraph::includeInGraph(const Decl *D) { + assert(D); + if (!D->hasBody()) + return false; + + return includeCalleeInGraph(D); +} + +bool CallGraph::includeCalleeInGraph(const Decl *D) { + if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) { + // We skip function template definitions, as their semantics is + // only determined when they are instantiated. + if (FD->isDependentContext()) + return false; + + IdentifierInfo *II = FD->getIdentifier(); + if (II && II->getName().startswith("__inline")) + return false; + } + + return true; +} + +void CallGraph::addNodeForDecl(Decl* D, bool IsGlobal) { + assert(D); + + // Allocate a new node, mark it as root, and process its calls. + CallGraphNode *Node = getOrInsertNode(D); + + // Process all the calls by this function as well. + CGBuilder builder(this, Node); + if (Stmt *Body = D->getBody()) + builder.Visit(Body); + + // Include C++ constructor member initializers. + if (auto constructor = dyn_cast<CXXConstructorDecl>(D)) { + for (CXXCtorInitializer *init : constructor->inits()) { + builder.Visit(init->getInit()); + } + } +} + +CallGraphNode *CallGraph::getNode(const Decl *F) const { + FunctionMapTy::const_iterator I = FunctionMap.find(F); + if (I == FunctionMap.end()) return nullptr; + return I->second.get(); +} + +CallGraphNode *CallGraph::getOrInsertNode(Decl *F) { + if (F && !isa<ObjCMethodDecl>(F)) + F = F->getCanonicalDecl(); + + std::unique_ptr<CallGraphNode> &Node = FunctionMap[F]; + if (Node) + return Node.get(); + + Node = std::make_unique<CallGraphNode>(F); + // Make Root node a parent of all functions to make sure all are reachable. + if (F) + Root->addCallee({Node.get(), /*Call=*/nullptr}); + return Node.get(); +} + +void CallGraph::print(raw_ostream &OS) const { + OS << " --- Call graph Dump --- \n"; + + // We are going to print the graph in reverse post order, partially, to make + // sure the output is deterministic. + llvm::ReversePostOrderTraversal<const CallGraph *> RPOT(this); + for (llvm::ReversePostOrderTraversal<const CallGraph *>::rpo_iterator + I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { + const CallGraphNode *N = *I; + + OS << " Function: "; + if (N == Root) + OS << "< root >"; + else + N->print(OS); + + OS << " calls: "; + for (CallGraphNode::const_iterator CI = N->begin(), + CE = N->end(); CI != CE; ++CI) { + assert(CI->Callee != Root && "No one can call the root node."); + CI->Callee->print(OS); + OS << " "; + } + OS << '\n'; + } + OS.flush(); +} + +LLVM_DUMP_METHOD void CallGraph::dump() const { + print(llvm::errs()); +} + +void CallGraph::viewGraph() const { + llvm::ViewGraph(this, "CallGraph"); +} + +void CallGraphNode::print(raw_ostream &os) const { + if (const NamedDecl *ND = dyn_cast_or_null<NamedDecl>(FD)) + return ND->printQualifiedName(os); + os << "< >"; +} + +LLVM_DUMP_METHOD void CallGraphNode::dump() const { + print(llvm::errs()); +} + +namespace llvm { + +template <> +struct DOTGraphTraits<const CallGraph*> : public DefaultDOTGraphTraits { + DOTGraphTraits (bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getNodeLabel(const CallGraphNode *Node, + const CallGraph *CG) { + if (CG->getRoot() == Node) { + return "< root >"; + } + if (const NamedDecl *ND = dyn_cast_or_null<NamedDecl>(Node->getDecl())) + return ND->getNameAsString(); + else + return "< >"; + } +}; + +} // namespace llvm diff --git a/contrib/llvm-project/clang/lib/Analysis/CalledOnceCheck.cpp b/contrib/llvm-project/clang/lib/Analysis/CalledOnceCheck.cpp new file mode 100644 index 000000000000..5b4fc24b6f0e --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/CalledOnceCheck.cpp @@ -0,0 +1,1705 @@ +//===- CalledOnceCheck.cpp - Check 'called once' parameters ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/CalledOnceCheck.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprObjC.h" +#include "clang/AST/OperationKinds.h" +#include "clang/AST/ParentMap.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/StmtObjC.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "clang/Basic/Builtins.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include <memory> +#include <optional> + +using namespace clang; + +namespace { +static constexpr unsigned EXPECTED_MAX_NUMBER_OF_PARAMS = 2; +template <class T> +using ParamSizedVector = llvm::SmallVector<T, EXPECTED_MAX_NUMBER_OF_PARAMS>; +static constexpr unsigned EXPECTED_NUMBER_OF_BASIC_BLOCKS = 8; +template <class T> +using CFGSizedVector = llvm::SmallVector<T, EXPECTED_NUMBER_OF_BASIC_BLOCKS>; +constexpr llvm::StringLiteral CONVENTIONAL_NAMES[] = { + "completionHandler", "completion", "withCompletionHandler", + "withCompletion", "completionBlock", "withCompletionBlock", + "replyTo", "reply", "withReplyTo"}; +constexpr llvm::StringLiteral CONVENTIONAL_SUFFIXES[] = { + "WithCompletionHandler", "WithCompletion", "WithCompletionBlock", + "WithReplyTo", "WithReply"}; +constexpr llvm::StringLiteral CONVENTIONAL_CONDITIONS[] = { + "error", "cancel", "shouldCall", "done", "OK", "success"}; + +struct KnownCalledOnceParameter { + llvm::StringLiteral FunctionName; + unsigned ParamIndex; +}; +constexpr KnownCalledOnceParameter KNOWN_CALLED_ONCE_PARAMETERS[] = { + {llvm::StringLiteral{"dispatch_async"}, 1}, + {llvm::StringLiteral{"dispatch_async_and_wait"}, 1}, + {llvm::StringLiteral{"dispatch_after"}, 2}, + {llvm::StringLiteral{"dispatch_sync"}, 1}, + {llvm::StringLiteral{"dispatch_once"}, 1}, + {llvm::StringLiteral{"dispatch_barrier_async"}, 1}, + {llvm::StringLiteral{"dispatch_barrier_async_and_wait"}, 1}, + {llvm::StringLiteral{"dispatch_barrier_sync"}, 1}}; + +class ParameterStatus { +public: + // Status kind is basically the main part of parameter's status. + // The kind represents our knowledge (so far) about a tracked parameter + // in the context of this analysis. + // + // Since we want to report on missing and extraneous calls, we need to + // track the fact whether paramater was called or not. This automatically + // decides two kinds: `NotCalled` and `Called`. + // + // One of the erroneous situations is the case when parameter is called only + // on some of the paths. We could've considered it `NotCalled`, but we want + // to report double call warnings even if these two calls are not guaranteed + // to happen in every execution. We also don't want to have it as `Called` + // because not calling tracked parameter on all of the paths is an error + // on its own. For these reasons, we need to have a separate kind, + // `MaybeCalled`, and change `Called` to `DefinitelyCalled` to avoid + // confusion. + // + // Two violations of calling parameter more than once and not calling it on + // every path are not, however, mutually exclusive. In situations where both + // violations take place, we prefer to report ONLY double call. It's always + // harder to pinpoint a bug that has arisen when a user neglects to take the + // right action (and therefore, no action is taken), than when a user takes + // the wrong action. And, in order to remember that we already reported + // a double call, we need another kind: `Reported`. + // + // Our analysis is intra-procedural and, while in the perfect world, + // developers only use tracked parameters to call them, in the real world, + // the picture might be different. Parameters can be stored in global + // variables or leaked into other functions that we know nothing about. + // We try to be lenient and trust users. Another kind `Escaped` reflects + // such situations. We don't know if it gets called there or not, but we + // should always think of `Escaped` as the best possible option. + // + // Some of the paths in the analyzed functions might end with a call + // to noreturn functions. Such paths are not required to have parameter + // calls and we want to track that. For the purposes of better diagnostics, + // we don't want to reuse `Escaped` and, thus, have another kind `NoReturn`. + // + // Additionally, we have `NotVisited` kind that tells us nothing about + // a tracked parameter, but is used for tracking analyzed (aka visited) + // basic blocks. + // + // If we consider `|` to be a JOIN operation of two kinds coming from + // two different paths, the following properties must hold: + // + // 1. for any Kind K: K | K == K + // Joining two identical kinds should result in the same kind. + // + // 2. for any Kind K: Reported | K == Reported + // Doesn't matter on which path it was reported, it still is. + // + // 3. for any Kind K: NoReturn | K == K + // We can totally ignore noreturn paths during merges. + // + // 4. DefinitelyCalled | NotCalled == MaybeCalled + // Called on one path, not called on another - that's simply + // a definition for MaybeCalled. + // + // 5. for any Kind K in [DefinitelyCalled, NotCalled, MaybeCalled]: + // Escaped | K == K + // Escaped mirrors other statuses after joins. + // Every situation, when we join any of the listed kinds K, + // is a violation. For this reason, in order to assume the + // best outcome for this escape, we consider it to be the + // same as the other path. + // + // 6. for any Kind K in [DefinitelyCalled, NotCalled]: + // MaybeCalled | K == MaybeCalled + // MaybeCalled should basically stay after almost every join. + enum Kind { + // No-return paths should be absolutely transparent for the analysis. + // 0x0 is the identity element for selected join operation (binary or). + NoReturn = 0x0, /* 0000 */ + // Escaped marks situations when marked parameter escaped into + // another function (so we can assume that it was possibly called there). + Escaped = 0x1, /* 0001 */ + // Parameter was definitely called once at this point. + DefinitelyCalled = 0x3, /* 0011 */ + // Kinds less or equal to NON_ERROR_STATUS are not considered errors. + NON_ERROR_STATUS = DefinitelyCalled, + // Parameter was not yet called. + NotCalled = 0x5, /* 0101 */ + // Parameter was not called at least on one path leading to this point, + // while there is also at least one path that it gets called. + MaybeCalled = 0x7, /* 0111 */ + // Parameter was not yet analyzed. + NotVisited = 0x8, /* 1000 */ + // We already reported a violation and stopped tracking calls for this + // parameter. + Reported = 0x15, /* 1111 */ + LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Reported) + }; + + constexpr ParameterStatus() = default; + /* implicit */ ParameterStatus(Kind K) : StatusKind(K) { + assert(!seenAnyCalls(K) && "Can't initialize status without a call"); + } + ParameterStatus(Kind K, const Expr *Call) : StatusKind(K), Call(Call) { + assert(seenAnyCalls(K) && "This kind is not supposed to have a call"); + } + + const Expr &getCall() const { + assert(seenAnyCalls(getKind()) && "ParameterStatus doesn't have a call"); + return *Call; + } + static bool seenAnyCalls(Kind K) { + return (K & DefinitelyCalled) == DefinitelyCalled && K != Reported; + } + bool seenAnyCalls() const { return seenAnyCalls(getKind()); } + + static bool isErrorStatus(Kind K) { return K > NON_ERROR_STATUS; } + bool isErrorStatus() const { return isErrorStatus(getKind()); } + + Kind getKind() const { return StatusKind; } + + void join(const ParameterStatus &Other) { + // If we have a pointer already, let's keep it. + // For the purposes of the analysis, it doesn't really matter + // which call we report. + // + // If we don't have a pointer, let's take whatever gets joined. + if (!Call) { + Call = Other.Call; + } + // Join kinds. + StatusKind |= Other.getKind(); + } + + bool operator==(const ParameterStatus &Other) const { + // We compare only kinds, pointers on their own is only additional + // information. + return getKind() == Other.getKind(); + } + +private: + // It would've been a perfect place to use llvm::PointerIntPair, but + // unfortunately NumLowBitsAvailable for clang::Expr had been reduced to 2. + Kind StatusKind = NotVisited; + const Expr *Call = nullptr; +}; + +/// State aggregates statuses of all tracked parameters. +class State { +public: + State(unsigned Size, ParameterStatus::Kind K = ParameterStatus::NotVisited) + : ParamData(Size, K) {} + + /// Return status of a parameter with the given index. + /// \{ + ParameterStatus &getStatusFor(unsigned Index) { return ParamData[Index]; } + const ParameterStatus &getStatusFor(unsigned Index) const { + return ParamData[Index]; + } + /// \} + + /// Return true if parameter with the given index can be called. + bool seenAnyCalls(unsigned Index) const { + return getStatusFor(Index).seenAnyCalls(); + } + /// Return a reference that we consider a call. + /// + /// Should only be used for parameters that can be called. + const Expr &getCallFor(unsigned Index) const { + return getStatusFor(Index).getCall(); + } + /// Return status kind of parameter with the given index. + ParameterStatus::Kind getKindFor(unsigned Index) const { + return getStatusFor(Index).getKind(); + } + + bool isVisited() const { + return llvm::all_of(ParamData, [](const ParameterStatus &S) { + return S.getKind() != ParameterStatus::NotVisited; + }); + } + + // Join other state into the current state. + void join(const State &Other) { + assert(ParamData.size() == Other.ParamData.size() && + "Couldn't join statuses with different sizes"); + for (auto Pair : llvm::zip(ParamData, Other.ParamData)) { + std::get<0>(Pair).join(std::get<1>(Pair)); + } + } + + using iterator = ParamSizedVector<ParameterStatus>::iterator; + using const_iterator = ParamSizedVector<ParameterStatus>::const_iterator; + + iterator begin() { return ParamData.begin(); } + iterator end() { return ParamData.end(); } + + const_iterator begin() const { return ParamData.begin(); } + const_iterator end() const { return ParamData.end(); } + + bool operator==(const State &Other) const { + return ParamData == Other.ParamData; + } + +private: + ParamSizedVector<ParameterStatus> ParamData; +}; + +/// A simple class that finds DeclRefExpr in the given expression. +/// +/// However, we don't want to find ANY nested DeclRefExpr skipping whatever +/// expressions on our way. Only certain expressions considered "no-op" +/// for our task are indeed skipped. +class DeclRefFinder + : public ConstStmtVisitor<DeclRefFinder, const DeclRefExpr *> { +public: + /// Find a DeclRefExpr in the given expression. + /// + /// In its most basic form (ShouldRetrieveFromComparisons == false), + /// this function can be simply reduced to the following question: + /// + /// - If expression E is used as a function argument, could we say + /// that DeclRefExpr nested in E is used as an argument? + /// + /// According to this rule, we can say that parens, casts and dereferencing + /// (dereferencing only applied to function pointers, but this is our case) + /// can be skipped. + /// + /// When we should look into comparisons the question changes to: + /// + /// - If expression E is used as a condition, could we say that + /// DeclRefExpr is being checked? + /// + /// And even though, these are two different questions, they have quite a lot + /// in common. Actually, we can say that whatever expression answers + /// positively the first question also fits the second question as well. + /// + /// In addition, we skip binary operators == and !=, and unary opeartor !. + static const DeclRefExpr *find(const Expr *E, + bool ShouldRetrieveFromComparisons = false) { + return DeclRefFinder(ShouldRetrieveFromComparisons).Visit(E); + } + + const DeclRefExpr *VisitDeclRefExpr(const DeclRefExpr *DR) { return DR; } + + const DeclRefExpr *VisitUnaryOperator(const UnaryOperator *UO) { + switch (UO->getOpcode()) { + case UO_LNot: + // We care about logical not only if we care about comparisons. + if (!ShouldRetrieveFromComparisons) + return nullptr; + [[fallthrough]]; + // Function pointer/references can be dereferenced before a call. + // That doesn't make it, however, any different from a regular call. + // For this reason, dereference operation is a "no-op". + case UO_Deref: + return Visit(UO->getSubExpr()); + default: + return nullptr; + } + } + + const DeclRefExpr *VisitBinaryOperator(const BinaryOperator *BO) { + if (!ShouldRetrieveFromComparisons) + return nullptr; + + switch (BO->getOpcode()) { + case BO_EQ: + case BO_NE: { + const DeclRefExpr *LHS = Visit(BO->getLHS()); + return LHS ? LHS : Visit(BO->getRHS()); + } + default: + return nullptr; + } + } + + const DeclRefExpr *VisitOpaqueValueExpr(const OpaqueValueExpr *OVE) { + return Visit(OVE->getSourceExpr()); + } + + const DeclRefExpr *VisitCallExpr(const CallExpr *CE) { + if (!ShouldRetrieveFromComparisons) + return nullptr; + + // We want to see through some of the boolean builtin functions + // that we are likely to see in conditions. + switch (CE->getBuiltinCallee()) { + case Builtin::BI__builtin_expect: + case Builtin::BI__builtin_expect_with_probability: { + assert(CE->getNumArgs() >= 2); + + const DeclRefExpr *Candidate = Visit(CE->getArg(0)); + return Candidate != nullptr ? Candidate : Visit(CE->getArg(1)); + } + + case Builtin::BI__builtin_unpredictable: + return Visit(CE->getArg(0)); + + default: + return nullptr; + } + } + + const DeclRefExpr *VisitExpr(const Expr *E) { + // It is a fallback method that gets called whenever the actual type + // of the given expression is not covered. + // + // We first check if we have anything to skip. And then repeat the whole + // procedure for a nested expression instead. + const Expr *DeclutteredExpr = E->IgnoreParenCasts(); + return E != DeclutteredExpr ? Visit(DeclutteredExpr) : nullptr; + } + +private: + DeclRefFinder(bool ShouldRetrieveFromComparisons) + : ShouldRetrieveFromComparisons(ShouldRetrieveFromComparisons) {} + + bool ShouldRetrieveFromComparisons; +}; + +const DeclRefExpr *findDeclRefExpr(const Expr *In, + bool ShouldRetrieveFromComparisons = false) { + return DeclRefFinder::find(In, ShouldRetrieveFromComparisons); +} + +const ParmVarDecl * +findReferencedParmVarDecl(const Expr *In, + bool ShouldRetrieveFromComparisons = false) { + if (const DeclRefExpr *DR = + findDeclRefExpr(In, ShouldRetrieveFromComparisons)) { + return dyn_cast<ParmVarDecl>(DR->getDecl()); + } + + return nullptr; +} + +/// Return conditions expression of a statement if it has one. +const Expr *getCondition(const Stmt *S) { + if (!S) { + return nullptr; + } + + if (const auto *If = dyn_cast<IfStmt>(S)) { + return If->getCond(); + } + if (const auto *Ternary = dyn_cast<AbstractConditionalOperator>(S)) { + return Ternary->getCond(); + } + + return nullptr; +} + +/// A small helper class that collects all named identifiers in the given +/// expression. It traverses it recursively, so names from deeper levels +/// of the AST will end up in the results. +/// Results might have duplicate names, if this is a problem, convert to +/// string sets afterwards. +class NamesCollector : public RecursiveASTVisitor<NamesCollector> { +public: + static constexpr unsigned EXPECTED_NUMBER_OF_NAMES = 5; + using NameCollection = + llvm::SmallVector<llvm::StringRef, EXPECTED_NUMBER_OF_NAMES>; + + static NameCollection collect(const Expr *From) { + NamesCollector Impl; + Impl.TraverseStmt(const_cast<Expr *>(From)); + return Impl.Result; + } + + bool VisitDeclRefExpr(const DeclRefExpr *E) { + Result.push_back(E->getDecl()->getName()); + return true; + } + + bool VisitObjCPropertyRefExpr(const ObjCPropertyRefExpr *E) { + llvm::StringRef Name; + + if (E->isImplicitProperty()) { + ObjCMethodDecl *PropertyMethodDecl = nullptr; + if (E->isMessagingGetter()) { + PropertyMethodDecl = E->getImplicitPropertyGetter(); + } else { + PropertyMethodDecl = E->getImplicitPropertySetter(); + } + assert(PropertyMethodDecl && + "Implicit property must have associated declaration"); + Name = PropertyMethodDecl->getSelector().getNameForSlot(0); + } else { + assert(E->isExplicitProperty()); + Name = E->getExplicitProperty()->getName(); + } + + Result.push_back(Name); + return true; + } + +private: + NamesCollector() = default; + NameCollection Result; +}; + +/// Check whether the given expression mentions any of conventional names. +bool mentionsAnyOfConventionalNames(const Expr *E) { + NamesCollector::NameCollection MentionedNames = NamesCollector::collect(E); + + return llvm::any_of(MentionedNames, [](llvm::StringRef ConditionName) { + return llvm::any_of( + CONVENTIONAL_CONDITIONS, + [ConditionName](const llvm::StringLiteral &Conventional) { + return ConditionName.contains_insensitive(Conventional); + }); + }); +} + +/// Clarification is a simple pair of a reason why parameter is not called +/// on every path and a statement to blame. +struct Clarification { + NeverCalledReason Reason; + const Stmt *Location; +}; + +/// A helper class that can produce a clarification based on the given pair +/// of basic blocks. +class NotCalledClarifier + : public ConstStmtVisitor<NotCalledClarifier, + std::optional<Clarification>> { +public: + /// The main entrypoint for the class, the function that tries to find the + /// clarification of how to explain which sub-path starts with a CFG edge + /// from Conditional to SuccWithoutCall. + /// + /// This means that this function has one precondition: + /// SuccWithoutCall should be a successor block for Conditional. + /// + /// Because clarification is not needed for non-trivial pairs of blocks + /// (i.e. SuccWithoutCall is not the only successor), it returns meaningful + /// results only for such cases. For this very reason, the parent basic + /// block, Conditional, is named that way, so it is clear what kind of + /// block is expected. + static std::optional<Clarification> clarify(const CFGBlock *Conditional, + const CFGBlock *SuccWithoutCall) { + if (const Stmt *Terminator = Conditional->getTerminatorStmt()) { + return NotCalledClarifier{Conditional, SuccWithoutCall}.Visit(Terminator); + } + return std::nullopt; + } + + std::optional<Clarification> VisitIfStmt(const IfStmt *If) { + return VisitBranchingBlock(If, NeverCalledReason::IfThen); + } + + std::optional<Clarification> + VisitAbstractConditionalOperator(const AbstractConditionalOperator *Ternary) { + return VisitBranchingBlock(Ternary, NeverCalledReason::IfThen); + } + + std::optional<Clarification> VisitSwitchStmt(const SwitchStmt *Switch) { + const Stmt *CaseToBlame = SuccInQuestion->getLabel(); + if (!CaseToBlame) { + // If interesting basic block is not labeled, it means that this + // basic block does not represent any of the cases. + return Clarification{NeverCalledReason::SwitchSkipped, Switch}; + } + + for (const SwitchCase *Case = Switch->getSwitchCaseList(); Case; + Case = Case->getNextSwitchCase()) { + if (Case == CaseToBlame) { + return Clarification{NeverCalledReason::Switch, Case}; + } + } + + llvm_unreachable("Found unexpected switch structure"); + } + + std::optional<Clarification> VisitForStmt(const ForStmt *For) { + return VisitBranchingBlock(For, NeverCalledReason::LoopEntered); + } + + std::optional<Clarification> VisitWhileStmt(const WhileStmt *While) { + return VisitBranchingBlock(While, NeverCalledReason::LoopEntered); + } + + std::optional<Clarification> + VisitBranchingBlock(const Stmt *Terminator, NeverCalledReason DefaultReason) { + assert(Parent->succ_size() == 2 && + "Branching block should have exactly two successors"); + unsigned SuccessorIndex = getSuccessorIndex(Parent, SuccInQuestion); + NeverCalledReason ActualReason = + updateForSuccessor(DefaultReason, SuccessorIndex); + return Clarification{ActualReason, Terminator}; + } + + std::optional<Clarification> VisitBinaryOperator(const BinaryOperator *) { + // We don't want to report on short-curcuit logical operations. + return std::nullopt; + } + + std::optional<Clarification> VisitStmt(const Stmt *Terminator) { + // If we got here, we didn't have a visit function for more derived + // classes of statement that this terminator actually belongs to. + // + // This is not a good scenario and should not happen in practice, but + // at least we'll warn the user. + return Clarification{NeverCalledReason::FallbackReason, Terminator}; + } + + static unsigned getSuccessorIndex(const CFGBlock *Parent, + const CFGBlock *Child) { + CFGBlock::const_succ_iterator It = llvm::find(Parent->succs(), Child); + assert(It != Parent->succ_end() && + "Given blocks should be in parent-child relationship"); + return It - Parent->succ_begin(); + } + + static NeverCalledReason + updateForSuccessor(NeverCalledReason ReasonForTrueBranch, + unsigned SuccessorIndex) { + assert(SuccessorIndex <= 1); + unsigned RawReason = + static_cast<unsigned>(ReasonForTrueBranch) + SuccessorIndex; + assert(RawReason <= + static_cast<unsigned>(NeverCalledReason::LARGEST_VALUE)); + return static_cast<NeverCalledReason>(RawReason); + } + +private: + NotCalledClarifier(const CFGBlock *Parent, const CFGBlock *SuccInQuestion) + : Parent(Parent), SuccInQuestion(SuccInQuestion) {} + + const CFGBlock *Parent, *SuccInQuestion; +}; + +class CalledOnceChecker : public ConstStmtVisitor<CalledOnceChecker> { +public: + static void check(AnalysisDeclContext &AC, CalledOnceCheckHandler &Handler, + bool CheckConventionalParameters) { + CalledOnceChecker(AC, Handler, CheckConventionalParameters).check(); + } + +private: + CalledOnceChecker(AnalysisDeclContext &AC, CalledOnceCheckHandler &Handler, + bool CheckConventionalParameters) + : FunctionCFG(*AC.getCFG()), AC(AC), Handler(Handler), + CheckConventionalParameters(CheckConventionalParameters), + CurrentState(0) { + initDataStructures(); + assert((size() == 0 || !States.empty()) && + "Data structures are inconsistent"); + } + + //===----------------------------------------------------------------------===// + // Initializing functions + //===----------------------------------------------------------------------===// + + void initDataStructures() { + const Decl *AnalyzedDecl = AC.getDecl(); + + if (const auto *Function = dyn_cast<FunctionDecl>(AnalyzedDecl)) { + findParamsToTrack(Function); + } else if (const auto *Method = dyn_cast<ObjCMethodDecl>(AnalyzedDecl)) { + findParamsToTrack(Method); + } else if (const auto *Block = dyn_cast<BlockDecl>(AnalyzedDecl)) { + findCapturesToTrack(Block); + findParamsToTrack(Block); + } + + // Have something to track, let's init states for every block from the CFG. + if (size() != 0) { + States = + CFGSizedVector<State>(FunctionCFG.getNumBlockIDs(), State(size())); + } + } + + void findCapturesToTrack(const BlockDecl *Block) { + for (const auto &Capture : Block->captures()) { + if (const auto *P = dyn_cast<ParmVarDecl>(Capture.getVariable())) { + // Parameter DeclContext is its owning function or method. + const DeclContext *ParamContext = P->getDeclContext(); + if (shouldBeCalledOnce(ParamContext, P)) { + TrackedParams.push_back(P); + } + } + } + } + + template <class FunctionLikeDecl> + void findParamsToTrack(const FunctionLikeDecl *Function) { + for (unsigned Index : llvm::seq<unsigned>(0u, Function->param_size())) { + if (shouldBeCalledOnce(Function, Index)) { + TrackedParams.push_back(Function->getParamDecl(Index)); + } + } + } + + //===----------------------------------------------------------------------===// + // Main logic 'check' functions + //===----------------------------------------------------------------------===// + + void check() { + // Nothing to check here: we don't have marked parameters. + if (size() == 0 || isPossiblyEmptyImpl()) + return; + + assert( + llvm::none_of(States, [](const State &S) { return S.isVisited(); }) && + "None of the blocks should be 'visited' before the analysis"); + + // For our task, both backward and forward approaches suite well. + // However, in order to report better diagnostics, we decided to go with + // backward analysis. + // + // Let's consider the following CFG and how forward and backward analyses + // will work for it. + // + // FORWARD: | BACKWARD: + // #1 | #1 + // +---------+ | +-----------+ + // | if | | |MaybeCalled| + // +---------+ | +-----------+ + // |NotCalled| | | if | + // +---------+ | +-----------+ + // / \ | / \ + // #2 / \ #3 | #2 / \ #3 + // +----------------+ +---------+ | +----------------+ +---------+ + // | foo() | | ... | | |DefinitelyCalled| |NotCalled| + // +----------------+ +---------+ | +----------------+ +---------+ + // |DefinitelyCalled| |NotCalled| | | foo() | | ... | + // +----------------+ +---------+ | +----------------+ +---------+ + // \ / | \ / + // \ #4 / | \ #4 / + // +-----------+ | +---------+ + // | ... | | |NotCalled| + // +-----------+ | +---------+ + // |MaybeCalled| | | ... | + // +-----------+ | +---------+ + // + // The most natural way to report lacking call in the block #3 would be to + // message that the false branch of the if statement in the block #1 doesn't + // have a call. And while with the forward approach we'll need to find a + // least common ancestor or something like that to find the 'if' to blame, + // backward analysis gives it to us out of the box. + BackwardDataflowWorklist Worklist(FunctionCFG, AC); + + // Let's visit EXIT. + const CFGBlock *Exit = &FunctionCFG.getExit(); + assignState(Exit, State(size(), ParameterStatus::NotCalled)); + Worklist.enqueuePredecessors(Exit); + + while (const CFGBlock *BB = Worklist.dequeue()) { + assert(BB && "Worklist should filter out null blocks"); + check(BB); + assert(CurrentState.isVisited() && + "After the check, basic block should be visited"); + + // Traverse successor basic blocks if the status of this block + // has changed. + if (assignState(BB, CurrentState)) { + Worklist.enqueuePredecessors(BB); + } + } + + // Check that we have all tracked parameters at the last block. + // As we are performing a backward version of the analysis, + // it should be the ENTRY block. + checkEntry(&FunctionCFG.getEntry()); + } + + void check(const CFGBlock *BB) { + // We start with a state 'inherited' from all the successors. + CurrentState = joinSuccessors(BB); + assert(CurrentState.isVisited() && + "Shouldn't start with a 'not visited' state"); + + // This is the 'exit' situation, broken promises are probably OK + // in such scenarios. + if (BB->hasNoReturnElement()) { + markNoReturn(); + // This block still can have calls (even multiple calls) and + // for this reason there is no early return here. + } + + // We use a backward dataflow propagation and for this reason we + // should traverse basic blocks bottom-up. + for (const CFGElement &Element : llvm::reverse(*BB)) { + if (std::optional<CFGStmt> S = Element.getAs<CFGStmt>()) { + check(S->getStmt()); + } + } + } + void check(const Stmt *S) { Visit(S); } + + void checkEntry(const CFGBlock *Entry) { + // We finalize this algorithm with the ENTRY block because + // we use a backward version of the analysis. This is where + // we can judge that some of the tracked parameters are not called on + // every path from ENTRY to EXIT. + + const State &EntryStatus = getState(Entry); + llvm::BitVector NotCalledOnEveryPath(size(), false); + llvm::BitVector NotUsedOnEveryPath(size(), false); + + // Check if there are no calls of the marked parameter at all + for (const auto &IndexedStatus : llvm::enumerate(EntryStatus)) { + const ParmVarDecl *Parameter = getParameter(IndexedStatus.index()); + + switch (IndexedStatus.value().getKind()) { + case ParameterStatus::NotCalled: + // If there were places where this parameter escapes (aka being used), + // we can provide a more useful diagnostic by pointing at the exact + // branches where it is not even mentioned. + if (!hasEverEscaped(IndexedStatus.index())) { + // This parameter is was not used at all, so we should report the + // most generic version of the warning. + if (isCaptured(Parameter)) { + // We want to specify that it was captured by the block. + Handler.handleCapturedNeverCalled(Parameter, AC.getDecl(), + !isExplicitlyMarked(Parameter)); + } else { + Handler.handleNeverCalled(Parameter, + !isExplicitlyMarked(Parameter)); + } + } else { + // Mark it as 'interesting' to figure out which paths don't even + // have escapes. + NotUsedOnEveryPath[IndexedStatus.index()] = true; + } + + break; + case ParameterStatus::MaybeCalled: + // If we have 'maybe called' at this point, we have an error + // that there is at least one path where this parameter + // is not called. + // + // However, reporting the warning with only that information can be + // too vague for the users. For this reason, we mark such parameters + // as "interesting" for further analysis. + NotCalledOnEveryPath[IndexedStatus.index()] = true; + break; + default: + break; + } + } + + // Early exit if we don't have parameters for extra analysis... + if (NotCalledOnEveryPath.none() && NotUsedOnEveryPath.none() && + // ... or if we've seen variables with cleanup functions. + // We can't reason that we've seen every path in this case, + // and thus abandon reporting any warnings that imply that. + !FunctionHasCleanupVars) + return; + + // We are looking for a pair of blocks A, B so that the following is true: + // * A is a predecessor of B + // * B is marked as NotCalled + // * A has at least one successor marked as either + // Escaped or DefinitelyCalled + // + // In that situation, it is guaranteed that B is the first block of the path + // where the user doesn't call or use parameter in question. + // + // For this reason, branch A -> B can be used for reporting. + // + // This part of the algorithm is guarded by a condition that the function + // does indeed have a violation of contract. For this reason, we can + // spend more time to find a good spot to place the warning. + // + // The following algorithm has the worst case complexity of O(V + E), + // where V is the number of basic blocks in FunctionCFG, + // E is the number of edges between blocks in FunctionCFG. + for (const CFGBlock *BB : FunctionCFG) { + if (!BB) + continue; + + const State &BlockState = getState(BB); + + for (unsigned Index : llvm::seq(0u, size())) { + // We don't want to use 'isLosingCall' here because we want to report + // the following situation as well: + // + // MaybeCalled + // | ... | + // MaybeCalled NotCalled + // + // Even though successor is not 'DefinitelyCalled', it is still useful + // to report it, it is still a path without a call. + if (NotCalledOnEveryPath[Index] && + BlockState.getKindFor(Index) == ParameterStatus::MaybeCalled) { + + findAndReportNotCalledBranches(BB, Index); + } else if (NotUsedOnEveryPath[Index] && + isLosingEscape(BlockState, BB, Index)) { + + findAndReportNotCalledBranches(BB, Index, /* IsEscape = */ true); + } + } + } + } + + /// Check potential call of a tracked parameter. + void checkDirectCall(const CallExpr *Call) { + if (auto Index = getIndexOfCallee(Call)) { + processCallFor(*Index, Call); + } + } + + /// Check the call expression for being an indirect call of one of the tracked + /// parameters. It is indirect in the sense that this particular call is not + /// calling the parameter itself, but rather uses it as the argument. + template <class CallLikeExpr> + void checkIndirectCall(const CallLikeExpr *CallOrMessage) { + // CallExpr::arguments does not interact nicely with llvm::enumerate. + llvm::ArrayRef<const Expr *> Arguments = + llvm::ArrayRef(CallOrMessage->getArgs(), CallOrMessage->getNumArgs()); + + // Let's check if any of the call arguments is a point of interest. + for (const auto &Argument : llvm::enumerate(Arguments)) { + if (auto Index = getIndexOfExpression(Argument.value())) { + if (shouldBeCalledOnce(CallOrMessage, Argument.index())) { + // If the corresponding parameter is marked as 'called_once' we should + // consider it as a call. + processCallFor(*Index, CallOrMessage); + } else { + // Otherwise, we mark this parameter as escaped, which can be + // interpreted both as called or not called depending on the context. + processEscapeFor(*Index); + } + // Otherwise, let's keep the state as it is. + } + } + } + + /// Process call of the parameter with the given index + void processCallFor(unsigned Index, const Expr *Call) { + ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(Index); + + if (CurrentParamStatus.seenAnyCalls()) { + + // At this point, this parameter was called, so this is a second call. + const ParmVarDecl *Parameter = getParameter(Index); + Handler.handleDoubleCall( + Parameter, &CurrentState.getCallFor(Index), Call, + !isExplicitlyMarked(Parameter), + // We are sure that the second call is definitely + // going to happen if the status is 'DefinitelyCalled'. + CurrentParamStatus.getKind() == ParameterStatus::DefinitelyCalled); + + // Mark this parameter as already reported on, so we don't repeat + // warnings. + CurrentParamStatus = ParameterStatus::Reported; + + } else if (CurrentParamStatus.getKind() != ParameterStatus::Reported) { + // If we didn't report anything yet, let's mark this parameter + // as called. + ParameterStatus Called(ParameterStatus::DefinitelyCalled, Call); + CurrentParamStatus = Called; + } + } + + /// Process escape of the parameter with the given index + void processEscapeFor(unsigned Index) { + ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(Index); + + // Escape overrides whatever error we think happened. + if (CurrentParamStatus.isErrorStatus()) { + CurrentParamStatus = ParameterStatus::Escaped; + } + } + + void findAndReportNotCalledBranches(const CFGBlock *Parent, unsigned Index, + bool IsEscape = false) { + for (const CFGBlock *Succ : Parent->succs()) { + if (!Succ) + continue; + + if (getState(Succ).getKindFor(Index) == ParameterStatus::NotCalled) { + assert(Parent->succ_size() >= 2 && + "Block should have at least two successors at this point"); + if (auto Clarification = NotCalledClarifier::clarify(Parent, Succ)) { + const ParmVarDecl *Parameter = getParameter(Index); + Handler.handleNeverCalled( + Parameter, AC.getDecl(), Clarification->Location, + Clarification->Reason, !IsEscape, !isExplicitlyMarked(Parameter)); + } + } + } + } + + //===----------------------------------------------------------------------===// + // Predicate functions to check parameters + //===----------------------------------------------------------------------===// + + /// Return true if parameter is explicitly marked as 'called_once'. + static bool isExplicitlyMarked(const ParmVarDecl *Parameter) { + return Parameter->hasAttr<CalledOnceAttr>(); + } + + /// Return true if the given name matches conventional pattens. + static bool isConventional(llvm::StringRef Name) { + return llvm::count(CONVENTIONAL_NAMES, Name) != 0; + } + + /// Return true if the given name has conventional suffixes. + static bool hasConventionalSuffix(llvm::StringRef Name) { + return llvm::any_of(CONVENTIONAL_SUFFIXES, [Name](llvm::StringRef Suffix) { + return Name.endswith(Suffix); + }); + } + + /// Return true if the given type can be used for conventional parameters. + static bool isConventional(QualType Ty) { + if (!Ty->isBlockPointerType()) { + return false; + } + + QualType BlockType = Ty->castAs<BlockPointerType>()->getPointeeType(); + // Completion handlers should have a block type with void return type. + return BlockType->castAs<FunctionType>()->getReturnType()->isVoidType(); + } + + /// Return true if the only parameter of the function is conventional. + static bool isOnlyParameterConventional(const FunctionDecl *Function) { + IdentifierInfo *II = Function->getIdentifier(); + return Function->getNumParams() == 1 && II && + hasConventionalSuffix(II->getName()); + } + + /// Return true/false if 'swift_async' attribute states that the given + /// parameter is conventionally called once. + /// Return std::nullopt if the given declaration doesn't have 'swift_async' + /// attribute. + static std::optional<bool> isConventionalSwiftAsync(const Decl *D, + unsigned ParamIndex) { + if (const SwiftAsyncAttr *A = D->getAttr<SwiftAsyncAttr>()) { + if (A->getKind() == SwiftAsyncAttr::None) { + return false; + } + + return A->getCompletionHandlerIndex().getASTIndex() == ParamIndex; + } + return std::nullopt; + } + + /// Return true if the specified selector represents init method. + static bool isInitMethod(Selector MethodSelector) { + return MethodSelector.getMethodFamily() == OMF_init; + } + + /// Return true if the specified selector piece matches conventions. + static bool isConventionalSelectorPiece(Selector MethodSelector, + unsigned PieceIndex, + QualType PieceType) { + if (!isConventional(PieceType) || isInitMethod(MethodSelector)) { + return false; + } + + if (MethodSelector.getNumArgs() == 1) { + assert(PieceIndex == 0); + return hasConventionalSuffix(MethodSelector.getNameForSlot(0)); + } + + llvm::StringRef PieceName = MethodSelector.getNameForSlot(PieceIndex); + return isConventional(PieceName) || hasConventionalSuffix(PieceName); + } + + bool shouldBeCalledOnce(const ParmVarDecl *Parameter) const { + return isExplicitlyMarked(Parameter) || + (CheckConventionalParameters && + (isConventional(Parameter->getName()) || + hasConventionalSuffix(Parameter->getName())) && + isConventional(Parameter->getType())); + } + + bool shouldBeCalledOnce(const DeclContext *ParamContext, + const ParmVarDecl *Param) { + unsigned ParamIndex = Param->getFunctionScopeIndex(); + if (const auto *Function = dyn_cast<FunctionDecl>(ParamContext)) { + return shouldBeCalledOnce(Function, ParamIndex); + } + if (const auto *Method = dyn_cast<ObjCMethodDecl>(ParamContext)) { + return shouldBeCalledOnce(Method, ParamIndex); + } + return shouldBeCalledOnce(Param); + } + + bool shouldBeCalledOnce(const BlockDecl *Block, unsigned ParamIndex) const { + return shouldBeCalledOnce(Block->getParamDecl(ParamIndex)); + } + + bool shouldBeCalledOnce(const FunctionDecl *Function, + unsigned ParamIndex) const { + if (ParamIndex >= Function->getNumParams()) { + return false; + } + // 'swift_async' goes first and overrides anything else. + if (auto ConventionalAsync = + isConventionalSwiftAsync(Function, ParamIndex)) { + return *ConventionalAsync; + } + + return shouldBeCalledOnce(Function->getParamDecl(ParamIndex)) || + (CheckConventionalParameters && + isOnlyParameterConventional(Function)); + } + + bool shouldBeCalledOnce(const ObjCMethodDecl *Method, + unsigned ParamIndex) const { + Selector MethodSelector = Method->getSelector(); + if (ParamIndex >= MethodSelector.getNumArgs()) { + return false; + } + + // 'swift_async' goes first and overrides anything else. + if (auto ConventionalAsync = isConventionalSwiftAsync(Method, ParamIndex)) { + return *ConventionalAsync; + } + + const ParmVarDecl *Parameter = Method->getParamDecl(ParamIndex); + return shouldBeCalledOnce(Parameter) || + (CheckConventionalParameters && + isConventionalSelectorPiece(MethodSelector, ParamIndex, + Parameter->getType())); + } + + bool shouldBeCalledOnce(const CallExpr *Call, unsigned ParamIndex) const { + const FunctionDecl *Function = Call->getDirectCallee(); + return Function && shouldBeCalledOnce(Function, ParamIndex); + } + + bool shouldBeCalledOnce(const ObjCMessageExpr *Message, + unsigned ParamIndex) const { + const ObjCMethodDecl *Method = Message->getMethodDecl(); + return Method && ParamIndex < Method->param_size() && + shouldBeCalledOnce(Method, ParamIndex); + } + + //===----------------------------------------------------------------------===// + // Utility methods + //===----------------------------------------------------------------------===// + + bool isCaptured(const ParmVarDecl *Parameter) const { + if (const BlockDecl *Block = dyn_cast<BlockDecl>(AC.getDecl())) { + return Block->capturesVariable(Parameter); + } + return false; + } + + // Return a call site where the block is called exactly once or null otherwise + const Expr *getBlockGuaraneedCallSite(const BlockExpr *Block) const { + ParentMap &PM = AC.getParentMap(); + + // We don't want to track the block through assignments and so on, instead + // we simply see how the block used and if it's used directly in a call, + // we decide based on call to what it is. + // + // In order to do this, we go up the parents of the block looking for + // a call or a message expressions. These might not be immediate parents + // of the actual block expression due to casts and parens, so we skip them. + for (const Stmt *Prev = Block, *Current = PM.getParent(Block); + Current != nullptr; Prev = Current, Current = PM.getParent(Current)) { + // Skip no-op (for our case) operations. + if (isa<CastExpr>(Current) || isa<ParenExpr>(Current)) + continue; + + // At this point, Prev represents our block as an immediate child of the + // call. + if (const auto *Call = dyn_cast<CallExpr>(Current)) { + // It might be the call of the Block itself... + if (Call->getCallee() == Prev) + return Call; + + // ...or it can be an indirect call of the block. + return shouldBlockArgumentBeCalledOnce(Call, Prev) ? Call : nullptr; + } + if (const auto *Message = dyn_cast<ObjCMessageExpr>(Current)) { + return shouldBlockArgumentBeCalledOnce(Message, Prev) ? Message + : nullptr; + } + + break; + } + + return nullptr; + } + + template <class CallLikeExpr> + bool shouldBlockArgumentBeCalledOnce(const CallLikeExpr *CallOrMessage, + const Stmt *BlockArgument) const { + // CallExpr::arguments does not interact nicely with llvm::enumerate. + llvm::ArrayRef<const Expr *> Arguments = + llvm::ArrayRef(CallOrMessage->getArgs(), CallOrMessage->getNumArgs()); + + for (const auto &Argument : llvm::enumerate(Arguments)) { + if (Argument.value() == BlockArgument) { + return shouldBlockArgumentBeCalledOnce(CallOrMessage, Argument.index()); + } + } + + return false; + } + + bool shouldBlockArgumentBeCalledOnce(const CallExpr *Call, + unsigned ParamIndex) const { + const FunctionDecl *Function = Call->getDirectCallee(); + return shouldBlockArgumentBeCalledOnce(Function, ParamIndex) || + shouldBeCalledOnce(Call, ParamIndex); + } + + bool shouldBlockArgumentBeCalledOnce(const ObjCMessageExpr *Message, + unsigned ParamIndex) const { + // At the moment, we don't have any Obj-C methods we want to specifically + // check in here. + return shouldBeCalledOnce(Message, ParamIndex); + } + + static bool shouldBlockArgumentBeCalledOnce(const FunctionDecl *Function, + unsigned ParamIndex) { + // There is a list of important API functions that while not following + // conventions nor being directly annotated, still guarantee that the + // callback parameter will be called exactly once. + // + // Here we check if this is the case. + return Function && + llvm::any_of(KNOWN_CALLED_ONCE_PARAMETERS, + [Function, ParamIndex]( + const KnownCalledOnceParameter &Reference) { + return Reference.FunctionName == + Function->getName() && + Reference.ParamIndex == ParamIndex; + }); + } + + /// Return true if the analyzed function is actually a default implementation + /// of the method that has to be overriden. + /// + /// These functions can have tracked parameters, but wouldn't call them + /// because they are not designed to perform any meaningful actions. + /// + /// There are a couple of flavors of such default implementations: + /// 1. Empty methods or methods with a single return statement + /// 2. Methods that have one block with a call to no return function + /// 3. Methods with only assertion-like operations + bool isPossiblyEmptyImpl() const { + if (!isa<ObjCMethodDecl>(AC.getDecl())) { + // We care only about functions that are not supposed to be called. + // Only methods can be overriden. + return false; + } + + // Case #1 (without return statements) + if (FunctionCFG.size() == 2) { + // Method has only two blocks: ENTRY and EXIT. + // This is equivalent to empty function. + return true; + } + + // Case #2 + if (FunctionCFG.size() == 3) { + const CFGBlock &Entry = FunctionCFG.getEntry(); + if (Entry.succ_empty()) { + return false; + } + + const CFGBlock *OnlyBlock = *Entry.succ_begin(); + // Method has only one block, let's see if it has a no-return + // element. + if (OnlyBlock && OnlyBlock->hasNoReturnElement()) { + return true; + } + // Fallthrough, CFGs with only one block can fall into #1 and #3 as well. + } + + // Cases #1 (return statements) and #3. + // + // It is hard to detect that something is an assertion or came + // from assertion. Here we use a simple heuristic: + // + // - If it came from a macro, it can be an assertion. + // + // Additionally, we can't assume a number of basic blocks or the CFG's + // structure because assertions might include loops and conditions. + return llvm::all_of(FunctionCFG, [](const CFGBlock *BB) { + if (!BB) { + // Unreachable blocks are totally fine. + return true; + } + + // Return statements can have sub-expressions that are represented as + // separate statements of a basic block. We should allow this. + // This parent map will be initialized with a parent tree for all + // subexpressions of the block's return statement (if it has one). + std::unique_ptr<ParentMap> ReturnChildren; + + return llvm::all_of( + llvm::reverse(*BB), // we should start with return statements, if we + // have any, i.e. from the bottom of the block + [&ReturnChildren](const CFGElement &Element) { + if (std::optional<CFGStmt> S = Element.getAs<CFGStmt>()) { + const Stmt *SuspiciousStmt = S->getStmt(); + + if (isa<ReturnStmt>(SuspiciousStmt)) { + // Let's initialize this structure to test whether + // some further statement is a part of this return. + ReturnChildren = std::make_unique<ParentMap>( + const_cast<Stmt *>(SuspiciousStmt)); + // Return statements are allowed as part of #1. + return true; + } + + return SuspiciousStmt->getBeginLoc().isMacroID() || + (ReturnChildren && + ReturnChildren->hasParent(SuspiciousStmt)); + } + return true; + }); + }); + } + + /// Check if parameter with the given index has ever escaped. + bool hasEverEscaped(unsigned Index) const { + return llvm::any_of(States, [Index](const State &StateForOneBB) { + return StateForOneBB.getKindFor(Index) == ParameterStatus::Escaped; + }); + } + + /// Return status stored for the given basic block. + /// \{ + State &getState(const CFGBlock *BB) { + assert(BB); + return States[BB->getBlockID()]; + } + const State &getState(const CFGBlock *BB) const { + assert(BB); + return States[BB->getBlockID()]; + } + /// \} + + /// Assign status to the given basic block. + /// + /// Returns true when the stored status changed. + bool assignState(const CFGBlock *BB, const State &ToAssign) { + State &Current = getState(BB); + if (Current == ToAssign) { + return false; + } + + Current = ToAssign; + return true; + } + + /// Join all incoming statuses for the given basic block. + State joinSuccessors(const CFGBlock *BB) const { + auto Succs = + llvm::make_filter_range(BB->succs(), [this](const CFGBlock *Succ) { + return Succ && this->getState(Succ).isVisited(); + }); + // We came to this block from somewhere after all. + assert(!Succs.empty() && + "Basic block should have at least one visited successor"); + + State Result = getState(*Succs.begin()); + + for (const CFGBlock *Succ : llvm::drop_begin(Succs, 1)) { + Result.join(getState(Succ)); + } + + if (const Expr *Condition = getCondition(BB->getTerminatorStmt())) { + handleConditional(BB, Condition, Result); + } + + return Result; + } + + void handleConditional(const CFGBlock *BB, const Expr *Condition, + State &ToAlter) const { + handleParameterCheck(BB, Condition, ToAlter); + if (SuppressOnConventionalErrorPaths) { + handleConventionalCheck(BB, Condition, ToAlter); + } + } + + void handleParameterCheck(const CFGBlock *BB, const Expr *Condition, + State &ToAlter) const { + // In this function, we try to deal with the following pattern: + // + // if (parameter) + // parameter(...); + // + // It's not good to show a warning here because clearly 'parameter' + // couldn't and shouldn't be called on the 'else' path. + // + // Let's check if this if statement has a check involving one of + // the tracked parameters. + if (const ParmVarDecl *Parameter = findReferencedParmVarDecl( + Condition, + /* ShouldRetrieveFromComparisons = */ true)) { + if (const auto Index = getIndex(*Parameter)) { + ParameterStatus &CurrentStatus = ToAlter.getStatusFor(*Index); + + // We don't want to deep dive into semantics of the check and + // figure out if that check was for null or something else. + // We simply trust the user that they know what they are doing. + // + // For this reason, in the following loop we look for the + // best-looking option. + for (const CFGBlock *Succ : BB->succs()) { + if (!Succ) + continue; + + const ParameterStatus &StatusInSucc = + getState(Succ).getStatusFor(*Index); + + if (StatusInSucc.isErrorStatus()) { + continue; + } + + // Let's use this status instead. + CurrentStatus = StatusInSucc; + + if (StatusInSucc.getKind() == ParameterStatus::DefinitelyCalled) { + // This is the best option to have and we already found it. + break; + } + + // If we found 'Escaped' first, we still might find 'DefinitelyCalled' + // on the other branch. And we prefer the latter. + } + } + } + } + + void handleConventionalCheck(const CFGBlock *BB, const Expr *Condition, + State &ToAlter) const { + // Even when the analysis is technically correct, it is a widespread pattern + // not to call completion handlers in some scenarios. These usually have + // typical conditional names, such as 'error' or 'cancel'. + if (!mentionsAnyOfConventionalNames(Condition)) { + return; + } + + for (const auto &IndexedStatus : llvm::enumerate(ToAlter)) { + const ParmVarDecl *Parameter = getParameter(IndexedStatus.index()); + // Conventions do not apply to explicitly marked parameters. + if (isExplicitlyMarked(Parameter)) { + continue; + } + + ParameterStatus &CurrentStatus = IndexedStatus.value(); + // If we did find that on one of the branches the user uses the callback + // and doesn't on the other path, we believe that they know what they are + // doing and trust them. + // + // There are two possible scenarios for that: + // 1. Current status is 'MaybeCalled' and one of the branches is + // 'DefinitelyCalled' + // 2. Current status is 'NotCalled' and one of the branches is 'Escaped' + if (isLosingCall(ToAlter, BB, IndexedStatus.index()) || + isLosingEscape(ToAlter, BB, IndexedStatus.index())) { + CurrentStatus = ParameterStatus::Escaped; + } + } + } + + bool isLosingCall(const State &StateAfterJoin, const CFGBlock *JoinBlock, + unsigned ParameterIndex) const { + // Let's check if the block represents DefinitelyCalled -> MaybeCalled + // transition. + return isLosingJoin(StateAfterJoin, JoinBlock, ParameterIndex, + ParameterStatus::MaybeCalled, + ParameterStatus::DefinitelyCalled); + } + + bool isLosingEscape(const State &StateAfterJoin, const CFGBlock *JoinBlock, + unsigned ParameterIndex) const { + // Let's check if the block represents Escaped -> NotCalled transition. + return isLosingJoin(StateAfterJoin, JoinBlock, ParameterIndex, + ParameterStatus::NotCalled, ParameterStatus::Escaped); + } + + bool isLosingJoin(const State &StateAfterJoin, const CFGBlock *JoinBlock, + unsigned ParameterIndex, ParameterStatus::Kind AfterJoin, + ParameterStatus::Kind BeforeJoin) const { + assert(!ParameterStatus::isErrorStatus(BeforeJoin) && + ParameterStatus::isErrorStatus(AfterJoin) && + "It's not a losing join if statuses do not represent " + "correct-to-error transition"); + + const ParameterStatus &CurrentStatus = + StateAfterJoin.getStatusFor(ParameterIndex); + + return CurrentStatus.getKind() == AfterJoin && + anySuccessorHasStatus(JoinBlock, ParameterIndex, BeforeJoin); + } + + /// Return true if any of the successors of the given basic block has + /// a specified status for the given parameter. + bool anySuccessorHasStatus(const CFGBlock *Parent, unsigned ParameterIndex, + ParameterStatus::Kind ToFind) const { + return llvm::any_of( + Parent->succs(), [this, ParameterIndex, ToFind](const CFGBlock *Succ) { + return Succ && getState(Succ).getKindFor(ParameterIndex) == ToFind; + }); + } + + /// Check given expression that was discovered to escape. + void checkEscapee(const Expr *E) { + if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) { + checkEscapee(*Parameter); + } + } + + /// Check given parameter that was discovered to escape. + void checkEscapee(const ParmVarDecl &Parameter) { + if (auto Index = getIndex(Parameter)) { + processEscapeFor(*Index); + } + } + + /// Mark all parameters in the current state as 'no-return'. + void markNoReturn() { + for (ParameterStatus &PS : CurrentState) { + PS = ParameterStatus::NoReturn; + } + } + + /// Check if the given assignment represents suppression and act on it. + void checkSuppression(const BinaryOperator *Assignment) { + // Suppression has the following form: + // parameter = 0; + // 0 can be of any form (NULL, nil, etc.) + if (auto Index = getIndexOfExpression(Assignment->getLHS())) { + + // We don't care what is written in the RHS, it could be whatever + // we can interpret as 0. + if (auto Constant = + Assignment->getRHS()->IgnoreParenCasts()->getIntegerConstantExpr( + AC.getASTContext())) { + + ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(*Index); + + if (0 == *Constant && CurrentParamStatus.seenAnyCalls()) { + // Even though this suppression mechanism is introduced to tackle + // false positives for multiple calls, the fact that the user has + // to use suppression can also tell us that we couldn't figure out + // how different paths cancel each other out. And if that is true, + // we will most certainly have false positives about parameters not + // being called on certain paths. + // + // For this reason, we abandon tracking this parameter altogether. + CurrentParamStatus = ParameterStatus::Reported; + } + } + } + } + +public: + //===----------------------------------------------------------------------===// + // Tree traversal methods + //===----------------------------------------------------------------------===// + + void VisitCallExpr(const CallExpr *Call) { + // This call might be a direct call, i.e. a parameter call... + checkDirectCall(Call); + // ... or an indirect call, i.e. when parameter is an argument. + checkIndirectCall(Call); + } + + void VisitObjCMessageExpr(const ObjCMessageExpr *Message) { + // The most common situation that we are defending against here is + // copying a tracked parameter. + if (const Expr *Receiver = Message->getInstanceReceiver()) { + checkEscapee(Receiver); + } + // Message expressions unlike calls, could not be direct. + checkIndirectCall(Message); + } + + void VisitBlockExpr(const BlockExpr *Block) { + // Block expressions are tricky. It is a very common practice to capture + // completion handlers by blocks and use them there. + // For this reason, it is important to analyze blocks and report warnings + // for completion handler misuse in blocks. + // + // However, it can be quite difficult to track how the block itself is being + // used. The full precise anlysis of that will be similar to alias analysis + // for completion handlers and can be too heavyweight for a compile-time + // diagnostic. Instead, we judge about the immediate use of the block. + // + // Here, we try to find a call expression where we know due to conventions, + // annotations, or other reasons that the block is called once and only + // once. + const Expr *CalledOnceCallSite = getBlockGuaraneedCallSite(Block); + + // We need to report this information to the handler because in the + // situation when we know that the block is called exactly once, we can be + // stricter in terms of reported diagnostics. + if (CalledOnceCallSite) { + Handler.handleBlockThatIsGuaranteedToBeCalledOnce(Block->getBlockDecl()); + } else { + Handler.handleBlockWithNoGuarantees(Block->getBlockDecl()); + } + + for (const auto &Capture : Block->getBlockDecl()->captures()) { + if (const auto *Param = dyn_cast<ParmVarDecl>(Capture.getVariable())) { + if (auto Index = getIndex(*Param)) { + if (CalledOnceCallSite) { + // The call site of a block can be considered a call site of the + // captured parameter we track. + processCallFor(*Index, CalledOnceCallSite); + } else { + // We still should consider this block as an escape for parameter, + // if we don't know about its call site or the number of time it + // can be invoked. + processEscapeFor(*Index); + } + } + } + } + } + + void VisitBinaryOperator(const BinaryOperator *Op) { + if (Op->getOpcode() == clang::BO_Assign) { + // Let's check if one of the tracked parameters is assigned into + // something, and if it is we don't want to track extra variables, so we + // consider it as an escapee. + checkEscapee(Op->getRHS()); + + // Let's check whether this assignment is a suppression. + checkSuppression(Op); + } + } + + void VisitDeclStmt(const DeclStmt *DS) { + // Variable initialization is not assignment and should be handled + // separately. + // + // Multiple declarations can be a part of declaration statement. + for (const auto *Declaration : DS->getDeclGroup()) { + if (const auto *Var = dyn_cast<VarDecl>(Declaration)) { + if (Var->getInit()) { + checkEscapee(Var->getInit()); + } + + if (Var->hasAttr<CleanupAttr>()) { + FunctionHasCleanupVars = true; + } + } + } + } + + void VisitCStyleCastExpr(const CStyleCastExpr *Cast) { + // We consider '(void)parameter' as a manual no-op escape. + // It should be used to explicitly tell the analysis that this parameter + // is intentionally not called on this path. + if (Cast->getType().getCanonicalType()->isVoidType()) { + checkEscapee(Cast->getSubExpr()); + } + } + + void VisitObjCAtThrowStmt(const ObjCAtThrowStmt *) { + // It is OK not to call marked parameters on exceptional paths. + markNoReturn(); + } + +private: + unsigned size() const { return TrackedParams.size(); } + + std::optional<unsigned> getIndexOfCallee(const CallExpr *Call) const { + return getIndexOfExpression(Call->getCallee()); + } + + std::optional<unsigned> getIndexOfExpression(const Expr *E) const { + if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) { + return getIndex(*Parameter); + } + + return std::nullopt; + } + + std::optional<unsigned> getIndex(const ParmVarDecl &Parameter) const { + // Expected number of parameters that we actually track is 1. + // + // Also, the maximum number of declared parameters could not be on a scale + // of hundreds of thousands. + // + // In this setting, linear search seems reasonable and even performs better + // than bisection. + ParamSizedVector<const ParmVarDecl *>::const_iterator It = + llvm::find(TrackedParams, &Parameter); + + if (It != TrackedParams.end()) { + return It - TrackedParams.begin(); + } + + return std::nullopt; + } + + const ParmVarDecl *getParameter(unsigned Index) const { + assert(Index < TrackedParams.size()); + return TrackedParams[Index]; + } + + const CFG &FunctionCFG; + AnalysisDeclContext &AC; + CalledOnceCheckHandler &Handler; + bool CheckConventionalParameters; + // As of now, we turn this behavior off. So, we still are going to report + // missing calls on paths that look like it was intentional. + // Technically such reports are true positives, but they can make some users + // grumpy because of the sheer number of warnings. + // It can be turned back on if we decide that we want to have the other way + // around. + bool SuppressOnConventionalErrorPaths = false; + + // The user can annotate variable declarations with cleanup functions, which + // essentially imposes a custom destructor logic on that variable. + // It is possible to use it, however, to call tracked parameters on all exits + // from the function. For this reason, we track the fact that the function + // actually has these. + bool FunctionHasCleanupVars = false; + + State CurrentState; + ParamSizedVector<const ParmVarDecl *> TrackedParams; + CFGSizedVector<State> States; +}; + +} // end anonymous namespace + +namespace clang { +void checkCalledOnceParameters(AnalysisDeclContext &AC, + CalledOnceCheckHandler &Handler, + bool CheckConventionalParameters) { + CalledOnceChecker::check(AC, Handler, CheckConventionalParameters); +} +} // end namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/CloneDetection.cpp b/contrib/llvm-project/clang/lib/Analysis/CloneDetection.cpp new file mode 100644 index 000000000000..65ac4ad6a5e5 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/CloneDetection.cpp @@ -0,0 +1,624 @@ +//===--- CloneDetection.cpp - Finds code clones in an AST -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// This file implements classes for searching and analyzing source code clones. +/// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/CloneDetection.h" +#include "clang/AST/Attr.h" +#include "clang/AST/DataCollection.h" +#include "clang/AST/DeclTemplate.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/Path.h" + +using namespace clang; + +StmtSequence::StmtSequence(const CompoundStmt *Stmt, const Decl *D, + unsigned StartIndex, unsigned EndIndex) + : S(Stmt), D(D), StartIndex(StartIndex), EndIndex(EndIndex) { + assert(Stmt && "Stmt must not be a nullptr"); + assert(StartIndex < EndIndex && "Given array should not be empty"); + assert(EndIndex <= Stmt->size() && "Given array too big for this Stmt"); +} + +StmtSequence::StmtSequence(const Stmt *Stmt, const Decl *D) + : S(Stmt), D(D), StartIndex(0), EndIndex(0) {} + +StmtSequence::StmtSequence() + : S(nullptr), D(nullptr), StartIndex(0), EndIndex(0) {} + +bool StmtSequence::contains(const StmtSequence &Other) const { + // If both sequences reside in different declarations, they can never contain + // each other. + if (D != Other.D) + return false; + + const SourceManager &SM = getASTContext().getSourceManager(); + + // Otherwise check if the start and end locations of the current sequence + // surround the other sequence. + bool StartIsInBounds = + SM.isBeforeInTranslationUnit(getBeginLoc(), Other.getBeginLoc()) || + getBeginLoc() == Other.getBeginLoc(); + if (!StartIsInBounds) + return false; + + bool EndIsInBounds = + SM.isBeforeInTranslationUnit(Other.getEndLoc(), getEndLoc()) || + Other.getEndLoc() == getEndLoc(); + return EndIsInBounds; +} + +StmtSequence::iterator StmtSequence::begin() const { + if (!holdsSequence()) { + return &S; + } + auto CS = cast<CompoundStmt>(S); + return CS->body_begin() + StartIndex; +} + +StmtSequence::iterator StmtSequence::end() const { + if (!holdsSequence()) { + return reinterpret_cast<StmtSequence::iterator>(&S) + 1; + } + auto CS = cast<CompoundStmt>(S); + return CS->body_begin() + EndIndex; +} + +ASTContext &StmtSequence::getASTContext() const { + assert(D); + return D->getASTContext(); +} + +SourceLocation StmtSequence::getBeginLoc() const { + return front()->getBeginLoc(); +} + +SourceLocation StmtSequence::getEndLoc() const { return back()->getEndLoc(); } + +SourceRange StmtSequence::getSourceRange() const { + return SourceRange(getBeginLoc(), getEndLoc()); +} + +void CloneDetector::analyzeCodeBody(const Decl *D) { + assert(D); + assert(D->hasBody()); + + Sequences.push_back(StmtSequence(D->getBody(), D)); +} + +/// Returns true if and only if \p Stmt contains at least one other +/// sequence in the \p Group. +static bool containsAnyInGroup(StmtSequence &Seq, + CloneDetector::CloneGroup &Group) { + for (StmtSequence &GroupSeq : Group) { + if (Seq.contains(GroupSeq)) + return true; + } + return false; +} + +/// Returns true if and only if all sequences in \p OtherGroup are +/// contained by a sequence in \p Group. +static bool containsGroup(CloneDetector::CloneGroup &Group, + CloneDetector::CloneGroup &OtherGroup) { + // We have less sequences in the current group than we have in the other, + // so we will never fulfill the requirement for returning true. This is only + // possible because we know that a sequence in Group can contain at most + // one sequence in OtherGroup. + if (Group.size() < OtherGroup.size()) + return false; + + for (StmtSequence &Stmt : Group) { + if (!containsAnyInGroup(Stmt, OtherGroup)) + return false; + } + return true; +} + +void OnlyLargestCloneConstraint::constrain( + std::vector<CloneDetector::CloneGroup> &Result) { + std::vector<unsigned> IndexesToRemove; + + // Compare every group in the result with the rest. If one groups contains + // another group, we only need to return the bigger group. + // Note: This doesn't scale well, so if possible avoid calling any heavy + // function from this loop to minimize the performance impact. + for (unsigned i = 0; i < Result.size(); ++i) { + for (unsigned j = 0; j < Result.size(); ++j) { + // Don't compare a group with itself. + if (i == j) + continue; + + if (containsGroup(Result[j], Result[i])) { + IndexesToRemove.push_back(i); + break; + } + } + } + + // Erasing a list of indexes from the vector should be done with decreasing + // indexes. As IndexesToRemove is constructed with increasing values, we just + // reverse iterate over it to get the desired order. + for (unsigned I : llvm::reverse(IndexesToRemove)) + Result.erase(Result.begin() + I); +} + +bool FilenamePatternConstraint::isAutoGenerated( + const CloneDetector::CloneGroup &Group) { + if (IgnoredFilesPattern.empty() || Group.empty() || + !IgnoredFilesRegex->isValid()) + return false; + + for (const StmtSequence &S : Group) { + const SourceManager &SM = S.getASTContext().getSourceManager(); + StringRef Filename = llvm::sys::path::filename( + SM.getFilename(S.getContainingDecl()->getLocation())); + if (IgnoredFilesRegex->match(Filename)) + return true; + } + + return false; +} + +/// This class defines what a type II code clone is: If it collects for two +/// statements the same data, then those two statements are considered to be +/// clones of each other. +/// +/// All collected data is forwarded to the given data consumer of the type T. +/// The data consumer class needs to provide a member method with the signature: +/// update(StringRef Str) +namespace { +template <class T> +class CloneTypeIIStmtDataCollector + : public ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>> { + ASTContext &Context; + /// The data sink to which all data is forwarded. + T &DataConsumer; + + template <class Ty> void addData(const Ty &Data) { + data_collection::addDataToConsumer(DataConsumer, Data); + } + +public: + CloneTypeIIStmtDataCollector(const Stmt *S, ASTContext &Context, + T &DataConsumer) + : Context(Context), DataConsumer(DataConsumer) { + this->Visit(S); + } + +// Define a visit method for each class to collect data and subsequently visit +// all parent classes. This uses a template so that custom visit methods by us +// take precedence. +#define DEF_ADD_DATA(CLASS, CODE) \ + template <class = void> void Visit##CLASS(const CLASS *S) { \ + CODE; \ + ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S); \ + } + +#include "clang/AST/StmtDataCollectors.inc" + +// Type II clones ignore variable names and literals, so let's skip them. +#define SKIP(CLASS) \ + void Visit##CLASS(const CLASS *S) { \ + ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S); \ + } + SKIP(DeclRefExpr) + SKIP(MemberExpr) + SKIP(IntegerLiteral) + SKIP(FloatingLiteral) + SKIP(StringLiteral) + SKIP(CXXBoolLiteralExpr) + SKIP(CharacterLiteral) +#undef SKIP +}; +} // end anonymous namespace + +static size_t createHash(llvm::MD5 &Hash) { + size_t HashCode; + + // Create the final hash code for the current Stmt. + llvm::MD5::MD5Result HashResult; + Hash.final(HashResult); + + // Copy as much as possible of the generated hash code to the Stmt's hash + // code. + std::memcpy(&HashCode, &HashResult, + std::min(sizeof(HashCode), sizeof(HashResult))); + + return HashCode; +} + +/// Generates and saves a hash code for the given Stmt. +/// \param S The given Stmt. +/// \param D The Decl containing S. +/// \param StmtsByHash Output parameter that will contain the hash codes for +/// each StmtSequence in the given Stmt. +/// \return The hash code of the given Stmt. +/// +/// If the given Stmt is a CompoundStmt, this method will also generate +/// hashes for all possible StmtSequences in the children of this Stmt. +static size_t +saveHash(const Stmt *S, const Decl *D, + std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash) { + llvm::MD5 Hash; + ASTContext &Context = D->getASTContext(); + + CloneTypeIIStmtDataCollector<llvm::MD5>(S, Context, Hash); + + auto CS = dyn_cast<CompoundStmt>(S); + SmallVector<size_t, 8> ChildHashes; + + for (const Stmt *Child : S->children()) { + if (Child == nullptr) { + ChildHashes.push_back(0); + continue; + } + size_t ChildHash = saveHash(Child, D, StmtsByHash); + Hash.update( + StringRef(reinterpret_cast<char *>(&ChildHash), sizeof(ChildHash))); + ChildHashes.push_back(ChildHash); + } + + if (CS) { + // If we're in a CompoundStmt, we hash all possible combinations of child + // statements to find clones in those subsequences. + // We first go through every possible starting position of a subsequence. + for (unsigned Pos = 0; Pos < CS->size(); ++Pos) { + // Then we try all possible lengths this subsequence could have and + // reuse the same hash object to make sure we only hash every child + // hash exactly once. + llvm::MD5 Hash; + for (unsigned Length = 1; Length <= CS->size() - Pos; ++Length) { + // Grab the current child hash and put it into our hash. We do + // -1 on the index because we start counting the length at 1. + size_t ChildHash = ChildHashes[Pos + Length - 1]; + Hash.update( + StringRef(reinterpret_cast<char *>(&ChildHash), sizeof(ChildHash))); + // If we have at least two elements in our subsequence, we can start + // saving it. + if (Length > 1) { + llvm::MD5 SubHash = Hash; + StmtsByHash.push_back(std::make_pair( + createHash(SubHash), StmtSequence(CS, D, Pos, Pos + Length))); + } + } + } + } + + size_t HashCode = createHash(Hash); + StmtsByHash.push_back(std::make_pair(HashCode, StmtSequence(S, D))); + return HashCode; +} + +namespace { +/// Wrapper around FoldingSetNodeID that it can be used as the template +/// argument of the StmtDataCollector. +class FoldingSetNodeIDWrapper { + + llvm::FoldingSetNodeID &FS; + +public: + FoldingSetNodeIDWrapper(llvm::FoldingSetNodeID &FS) : FS(FS) {} + + void update(StringRef Str) { FS.AddString(Str); } +}; +} // end anonymous namespace + +/// Writes the relevant data from all statements and child statements +/// in the given StmtSequence into the given FoldingSetNodeID. +static void CollectStmtSequenceData(const StmtSequence &Sequence, + FoldingSetNodeIDWrapper &OutputData) { + for (const Stmt *S : Sequence) { + CloneTypeIIStmtDataCollector<FoldingSetNodeIDWrapper>( + S, Sequence.getASTContext(), OutputData); + + for (const Stmt *Child : S->children()) { + if (!Child) + continue; + + CollectStmtSequenceData(StmtSequence(Child, Sequence.getContainingDecl()), + OutputData); + } + } +} + +/// Returns true if both sequences are clones of each other. +static bool areSequencesClones(const StmtSequence &LHS, + const StmtSequence &RHS) { + // We collect the data from all statements in the sequence as we did before + // when generating a hash value for each sequence. But this time we don't + // hash the collected data and compare the whole data set instead. This + // prevents any false-positives due to hash code collisions. + llvm::FoldingSetNodeID DataLHS, DataRHS; + FoldingSetNodeIDWrapper LHSWrapper(DataLHS); + FoldingSetNodeIDWrapper RHSWrapper(DataRHS); + + CollectStmtSequenceData(LHS, LHSWrapper); + CollectStmtSequenceData(RHS, RHSWrapper); + + return DataLHS == DataRHS; +} + +void RecursiveCloneTypeIIHashConstraint::constrain( + std::vector<CloneDetector::CloneGroup> &Sequences) { + // FIXME: Maybe we can do this in-place and don't need this additional vector. + std::vector<CloneDetector::CloneGroup> Result; + + for (CloneDetector::CloneGroup &Group : Sequences) { + // We assume in the following code that the Group is non-empty, so we + // skip all empty groups. + if (Group.empty()) + continue; + + std::vector<std::pair<size_t, StmtSequence>> StmtsByHash; + + // Generate hash codes for all children of S and save them in StmtsByHash. + for (const StmtSequence &S : Group) { + saveHash(S.front(), S.getContainingDecl(), StmtsByHash); + } + + // Sort hash_codes in StmtsByHash. + llvm::stable_sort(StmtsByHash, llvm::less_first()); + + // Check for each StmtSequence if its successor has the same hash value. + // We don't check the last StmtSequence as it has no successor. + // Note: The 'size - 1 ' in the condition is safe because we check for an + // empty Group vector at the beginning of this function. + for (unsigned i = 0; i < StmtsByHash.size() - 1; ++i) { + const auto Current = StmtsByHash[i]; + + // It's likely that we just found a sequence of StmtSequences that + // represent a CloneGroup, so we create a new group and start checking and + // adding the StmtSequences in this sequence. + CloneDetector::CloneGroup NewGroup; + + size_t PrototypeHash = Current.first; + + for (; i < StmtsByHash.size(); ++i) { + // A different hash value means we have reached the end of the sequence. + if (PrototypeHash != StmtsByHash[i].first) { + // The current sequence could be the start of a new CloneGroup. So we + // decrement i so that we visit it again in the outer loop. + // Note: i can never be 0 at this point because we are just comparing + // the hash of the Current StmtSequence with itself in the 'if' above. + assert(i != 0); + --i; + break; + } + // Same hash value means we should add the StmtSequence to the current + // group. + NewGroup.push_back(StmtsByHash[i].second); + } + + // We created a new clone group with matching hash codes and move it to + // the result vector. + Result.push_back(NewGroup); + } + } + // Sequences is the output parameter, so we copy our result into it. + Sequences = Result; +} + +void RecursiveCloneTypeIIVerifyConstraint::constrain( + std::vector<CloneDetector::CloneGroup> &Sequences) { + CloneConstraint::splitCloneGroups( + Sequences, [](const StmtSequence &A, const StmtSequence &B) { + return areSequencesClones(A, B); + }); +} + +size_t MinComplexityConstraint::calculateStmtComplexity( + const StmtSequence &Seq, std::size_t Limit, + const std::string &ParentMacroStack) { + if (Seq.empty()) + return 0; + + size_t Complexity = 1; + + ASTContext &Context = Seq.getASTContext(); + + // Look up what macros expanded into the current statement. + std::string MacroStack = + data_collection::getMacroStack(Seq.getBeginLoc(), Context); + + // First, check if ParentMacroStack is not empty which means we are currently + // dealing with a parent statement which was expanded from a macro. + // If this parent statement was expanded from the same macros as this + // statement, we reduce the initial complexity of this statement to zero. + // This causes that a group of statements that were generated by a single + // macro expansion will only increase the total complexity by one. + // Note: This is not the final complexity of this statement as we still + // add the complexity of the child statements to the complexity value. + if (!ParentMacroStack.empty() && MacroStack == ParentMacroStack) { + Complexity = 0; + } + + // Iterate over the Stmts in the StmtSequence and add their complexity values + // to the current complexity value. + if (Seq.holdsSequence()) { + for (const Stmt *S : Seq) { + Complexity += calculateStmtComplexity( + StmtSequence(S, Seq.getContainingDecl()), Limit, MacroStack); + if (Complexity >= Limit) + return Limit; + } + } else { + for (const Stmt *S : Seq.front()->children()) { + Complexity += calculateStmtComplexity( + StmtSequence(S, Seq.getContainingDecl()), Limit, MacroStack); + if (Complexity >= Limit) + return Limit; + } + } + return Complexity; +} + +void MatchingVariablePatternConstraint::constrain( + std::vector<CloneDetector::CloneGroup> &CloneGroups) { + CloneConstraint::splitCloneGroups( + CloneGroups, [](const StmtSequence &A, const StmtSequence &B) { + VariablePattern PatternA(A); + VariablePattern PatternB(B); + return PatternA.countPatternDifferences(PatternB) == 0; + }); +} + +void CloneConstraint::splitCloneGroups( + std::vector<CloneDetector::CloneGroup> &CloneGroups, + llvm::function_ref<bool(const StmtSequence &, const StmtSequence &)> + Compare) { + std::vector<CloneDetector::CloneGroup> Result; + for (auto &HashGroup : CloneGroups) { + // Contains all indexes in HashGroup that were already added to a + // CloneGroup. + std::vector<char> Indexes; + Indexes.resize(HashGroup.size()); + + for (unsigned i = 0; i < HashGroup.size(); ++i) { + // Skip indexes that are already part of a CloneGroup. + if (Indexes[i]) + continue; + + // Pick the first unhandled StmtSequence and consider it as the + // beginning + // of a new CloneGroup for now. + // We don't add i to Indexes because we never iterate back. + StmtSequence Prototype = HashGroup[i]; + CloneDetector::CloneGroup PotentialGroup = {Prototype}; + ++Indexes[i]; + + // Check all following StmtSequences for clones. + for (unsigned j = i + 1; j < HashGroup.size(); ++j) { + // Skip indexes that are already part of a CloneGroup. + if (Indexes[j]) + continue; + + // If a following StmtSequence belongs to our CloneGroup, we add it. + const StmtSequence &Candidate = HashGroup[j]; + + if (!Compare(Prototype, Candidate)) + continue; + + PotentialGroup.push_back(Candidate); + // Make sure we never visit this StmtSequence again. + ++Indexes[j]; + } + + // Otherwise, add it to the result and continue searching for more + // groups. + Result.push_back(PotentialGroup); + } + + assert(llvm::all_of(Indexes, [](char c) { return c == 1; })); + } + CloneGroups = Result; +} + +void VariablePattern::addVariableOccurence(const VarDecl *VarDecl, + const Stmt *Mention) { + // First check if we already reference this variable + for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) { + if (Variables[KindIndex] == VarDecl) { + // If yes, add a new occurrence that points to the existing entry in + // the Variables vector. + Occurences.emplace_back(KindIndex, Mention); + return; + } + } + // If this variable wasn't already referenced, add it to the list of + // referenced variables and add a occurrence that points to this new entry. + Occurences.emplace_back(Variables.size(), Mention); + Variables.push_back(VarDecl); +} + +void VariablePattern::addVariables(const Stmt *S) { + // Sometimes we get a nullptr (such as from IfStmts which often have nullptr + // children). We skip such statements as they don't reference any + // variables. + if (!S) + return; + + // Check if S is a reference to a variable. If yes, add it to the pattern. + if (auto D = dyn_cast<DeclRefExpr>(S)) { + if (auto VD = dyn_cast<VarDecl>(D->getDecl()->getCanonicalDecl())) + addVariableOccurence(VD, D); + } + + // Recursively check all children of the given statement. + for (const Stmt *Child : S->children()) { + addVariables(Child); + } +} + +unsigned VariablePattern::countPatternDifferences( + const VariablePattern &Other, + VariablePattern::SuspiciousClonePair *FirstMismatch) { + unsigned NumberOfDifferences = 0; + + assert(Other.Occurences.size() == Occurences.size()); + for (unsigned i = 0; i < Occurences.size(); ++i) { + auto ThisOccurence = Occurences[i]; + auto OtherOccurence = Other.Occurences[i]; + if (ThisOccurence.KindID == OtherOccurence.KindID) + continue; + + ++NumberOfDifferences; + + // If FirstMismatch is not a nullptr, we need to store information about + // the first difference between the two patterns. + if (FirstMismatch == nullptr) + continue; + + // Only proceed if we just found the first difference as we only store + // information about the first difference. + if (NumberOfDifferences != 1) + continue; + + const VarDecl *FirstSuggestion = nullptr; + // If there is a variable available in the list of referenced variables + // which wouldn't break the pattern if it is used in place of the + // current variable, we provide this variable as the suggested fix. + if (OtherOccurence.KindID < Variables.size()) + FirstSuggestion = Variables[OtherOccurence.KindID]; + + // Store information about the first clone. + FirstMismatch->FirstCloneInfo = + VariablePattern::SuspiciousClonePair::SuspiciousCloneInfo( + Variables[ThisOccurence.KindID], ThisOccurence.Mention, + FirstSuggestion); + + // Same as above but with the other clone. We do this for both clones as + // we don't know which clone is the one containing the unintended + // pattern error. + const VarDecl *SecondSuggestion = nullptr; + if (ThisOccurence.KindID < Other.Variables.size()) + SecondSuggestion = Other.Variables[ThisOccurence.KindID]; + + // Store information about the second clone. + FirstMismatch->SecondCloneInfo = + VariablePattern::SuspiciousClonePair::SuspiciousCloneInfo( + Other.Variables[OtherOccurence.KindID], OtherOccurence.Mention, + SecondSuggestion); + + // SuspiciousClonePair guarantees that the first clone always has a + // suggested variable associated with it. As we know that one of the two + // clones in the pair always has suggestion, we swap the two clones + // in case the first clone has no suggested variable which means that + // the second clone has a suggested variable and should be first. + if (!FirstMismatch->FirstCloneInfo.Suggestion) + std::swap(FirstMismatch->FirstCloneInfo, FirstMismatch->SecondCloneInfo); + + // This ensures that we always have at least one suggestion in a pair. + assert(FirstMismatch->FirstCloneInfo.Suggestion); + } + + return NumberOfDifferences; +} diff --git a/contrib/llvm-project/clang/lib/Analysis/CocoaConventions.cpp b/contrib/llvm-project/clang/lib/Analysis/CocoaConventions.cpp new file mode 100644 index 000000000000..571d72e1a841 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/CocoaConventions.cpp @@ -0,0 +1,146 @@ +//===- CocoaConventions.h - Special handling of Cocoa conventions -*- C++ -*--// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements cocoa naming convention analysis. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/DomainSpecific/CocoaConventions.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/Type.h" +#include "clang/Basic/CharInfo.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace clang; +using namespace ento; + +bool cocoa::isRefType(QualType RetTy, StringRef Prefix, + StringRef Name) { + // Recursively walk the typedef stack, allowing typedefs of reference types. + while (const TypedefType *TD = RetTy->getAs<TypedefType>()) { + StringRef TDName = TD->getDecl()->getIdentifier()->getName(); + if (TDName.startswith(Prefix) && TDName.endswith("Ref")) + return true; + // XPC unfortunately uses CF-style function names, but aren't CF types. + if (TDName.startswith("xpc_")) + return false; + RetTy = TD->getDecl()->getUnderlyingType(); + } + + if (Name.empty()) + return false; + + // Is the type void*? + const PointerType* PT = RetTy->castAs<PointerType>(); + if (!PT || !PT->getPointeeType().getUnqualifiedType()->isVoidType()) + return false; + + // Does the name start with the prefix? + return Name.startswith(Prefix); +} + +/// Returns true when the passed-in type is a CF-style reference-counted +/// type from the DiskArbitration framework. +static bool isDiskArbitrationAPIRefType(QualType T) { + return cocoa::isRefType(T, "DADisk") || + cocoa::isRefType(T, "DADissenter") || + cocoa::isRefType(T, "DASessionRef"); +} + +bool coreFoundation::isCFObjectRef(QualType T) { + return cocoa::isRefType(T, "CF") || // Core Foundation. + cocoa::isRefType(T, "CG") || // Core Graphics. + cocoa::isRefType(T, "CM") || // Core Media. + isDiskArbitrationAPIRefType(T); +} + + +bool cocoa::isCocoaObjectRef(QualType Ty) { + if (!Ty->isObjCObjectPointerType()) + return false; + + const ObjCObjectPointerType *PT = Ty->getAs<ObjCObjectPointerType>(); + + // Can be true for objects with the 'NSObject' attribute. + if (!PT) + return true; + + // We assume that id<..>, id, Class, and Class<..> all represent tracked + // objects. + if (PT->isObjCIdType() || PT->isObjCQualifiedIdType() || + PT->isObjCClassType() || PT->isObjCQualifiedClassType()) + return true; + + // Does the interface subclass NSObject? + // FIXME: We can memoize here if this gets too expensive. + const ObjCInterfaceDecl *ID = PT->getInterfaceDecl(); + + // Assume that anything declared with a forward declaration and no + // @interface subclasses NSObject. + if (!ID->hasDefinition()) + return true; + + for ( ; ID ; ID = ID->getSuperClass()) + if (ID->getIdentifier()->getName() == "NSObject") + return true; + + return false; +} + +bool coreFoundation::followsCreateRule(const FunctionDecl *fn) { + // For now, *just* base this on the function name, not on anything else. + + const IdentifierInfo *ident = fn->getIdentifier(); + if (!ident) return false; + StringRef functionName = ident->getName(); + + StringRef::iterator it = functionName.begin(); + StringRef::iterator start = it; + StringRef::iterator endI = functionName.end(); + + while (true) { + // Scan for the start of 'create' or 'copy'. + for ( ; it != endI ; ++it) { + // Search for the first character. It can either be 'C' or 'c'. + char ch = *it; + if (ch == 'C' || ch == 'c') { + // Make sure this isn't something like 'recreate' or 'Scopy'. + if (ch == 'c' && it != start && isLetter(*(it - 1))) + continue; + + ++it; + break; + } + } + + // Did we hit the end of the string? If so, we didn't find a match. + if (it == endI) + return false; + + // Scan for *lowercase* 'reate' or 'opy', followed by no lowercase + // character. + StringRef suffix = functionName.substr(it - start); + if (suffix.startswith("reate")) { + it += 5; + } + else if (suffix.startswith("opy")) { + it += 3; + } else { + // Keep scanning. + continue; + } + + if (it == endI || !isLowercase(*it)) + return true; + + // If we matched a lowercase character, it isn't the end of the + // word. Keep scanning. + } +} diff --git a/contrib/llvm-project/clang/lib/Analysis/CodeInjector.cpp b/contrib/llvm-project/clang/lib/Analysis/CodeInjector.cpp new file mode 100644 index 000000000000..412de96a13b9 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/CodeInjector.cpp @@ -0,0 +1,14 @@ +//===-- CodeInjector.cpp ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/CodeInjector.h" + +using namespace clang; + +CodeInjector::CodeInjector() {} +CodeInjector::~CodeInjector() {} diff --git a/contrib/llvm-project/clang/lib/Analysis/ConstructionContext.cpp b/contrib/llvm-project/clang/lib/Analysis/ConstructionContext.cpp new file mode 100644 index 000000000000..8a862c06f13a --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/ConstructionContext.cpp @@ -0,0 +1,227 @@ +//===- ConstructionContext.cpp - CFG constructor information --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the ConstructionContext class and its sub-classes, +// which represent various different ways of constructing C++ objects +// with the additional information the users may want to know about +// the constructor. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/ConstructionContext.h" +#include "clang/AST/ExprObjC.h" + +using namespace clang; + +const ConstructionContextLayer * +ConstructionContextLayer::create(BumpVectorContext &C, + const ConstructionContextItem &Item, + const ConstructionContextLayer *Parent) { + ConstructionContextLayer *CC = + C.getAllocator().Allocate<ConstructionContextLayer>(); + return new (CC) ConstructionContextLayer(Item, Parent); +} + +bool ConstructionContextLayer::isStrictlyMoreSpecificThan( + const ConstructionContextLayer *Other) const { + const ConstructionContextLayer *Self = this; + while (true) { + if (!Other) + return Self; + if (!Self || !(Self->Item == Other->Item)) + return false; + Self = Self->getParent(); + Other = Other->getParent(); + } + llvm_unreachable("The above loop can only be terminated via return!"); +} + +const ConstructionContext * +ConstructionContext::createMaterializedTemporaryFromLayers( + BumpVectorContext &C, const MaterializeTemporaryExpr *MTE, + const CXXBindTemporaryExpr *BTE, + const ConstructionContextLayer *ParentLayer) { + assert(MTE); + + // If the object requires destruction and is not lifetime-extended, + // then it must have a BTE within its MTE, otherwise it shouldn't. + // FIXME: This should be an assertion. + if (!BTE && !(MTE->getType().getCanonicalType()->getAsCXXRecordDecl() + ->hasTrivialDestructor() || + MTE->getStorageDuration() != SD_FullExpression)) { + return nullptr; + } + + // If the temporary is lifetime-extended, don't save the BTE, + // because we don't need a temporary destructor, but an automatic + // destructor. + if (MTE->getStorageDuration() != SD_FullExpression) { + BTE = nullptr; + } + + // Handle pre-C++17 copy and move elision. + const CXXConstructExpr *ElidedCE = nullptr; + const ConstructionContext *ElidedCC = nullptr; + if (ParentLayer) { + const ConstructionContextItem &ElidedItem = ParentLayer->getItem(); + assert(ElidedItem.getKind() == + ConstructionContextItem::ElidableConstructorKind); + ElidedCE = cast<CXXConstructExpr>(ElidedItem.getStmt()); + assert(ElidedCE->isElidable()); + // We're creating a construction context that might have already + // been created elsewhere. Maybe we should unique our construction + // contexts. That's what we often do, but in this case it's unlikely + // to bring any benefits. + ElidedCC = createFromLayers(C, ParentLayer->getParent()); + if (!ElidedCC) { + // We may fail to create the elided construction context. + // In this case, skip copy elision entirely. + return create<SimpleTemporaryObjectConstructionContext>(C, BTE, MTE); + } + return create<ElidedTemporaryObjectConstructionContext>( + C, BTE, MTE, ElidedCE, ElidedCC); + } + + // This is a normal temporary. + assert(!ParentLayer); + return create<SimpleTemporaryObjectConstructionContext>(C, BTE, MTE); +} + +const ConstructionContext *ConstructionContext::createBoundTemporaryFromLayers( + BumpVectorContext &C, const CXXBindTemporaryExpr *BTE, + const ConstructionContextLayer *ParentLayer) { + if (!ParentLayer) { + // A temporary object that doesn't require materialization. + // In particular, it shouldn't require copy elision, because + // copy/move constructors take a reference, which requires + // materialization to obtain the glvalue. + return create<SimpleTemporaryObjectConstructionContext>(C, BTE, + /*MTE=*/nullptr); + } + + const ConstructionContextItem &ParentItem = ParentLayer->getItem(); + switch (ParentItem.getKind()) { + case ConstructionContextItem::VariableKind: { + const auto *DS = cast<DeclStmt>(ParentItem.getStmt()); + assert(!cast<VarDecl>(DS->getSingleDecl())->getType().getCanonicalType() + ->getAsCXXRecordDecl()->hasTrivialDestructor()); + return create<CXX17ElidedCopyVariableConstructionContext>(C, DS, BTE); + } + case ConstructionContextItem::NewAllocatorKind: { + llvm_unreachable("This context does not accept a bound temporary!"); + } + case ConstructionContextItem::ReturnKind: { + assert(ParentLayer->isLast()); + const auto *RS = cast<ReturnStmt>(ParentItem.getStmt()); + assert(!RS->getRetValue()->getType().getCanonicalType() + ->getAsCXXRecordDecl()->hasTrivialDestructor()); + return create<CXX17ElidedCopyReturnedValueConstructionContext>(C, RS, + BTE); + } + + case ConstructionContextItem::MaterializationKind: { + // No assert. We may have an elidable copy on the grandparent layer. + const auto *MTE = cast<MaterializeTemporaryExpr>(ParentItem.getStmt()); + return createMaterializedTemporaryFromLayers(C, MTE, BTE, + ParentLayer->getParent()); + } + case ConstructionContextItem::TemporaryDestructorKind: { + llvm_unreachable("Duplicate CXXBindTemporaryExpr in the AST!"); + } + case ConstructionContextItem::ElidedDestructorKind: { + llvm_unreachable("Elided destructor items are not produced by the CFG!"); + } + case ConstructionContextItem::ElidableConstructorKind: { + llvm_unreachable("Materialization is necessary to put temporary into a " + "copy or move constructor!"); + } + case ConstructionContextItem::ArgumentKind: { + assert(ParentLayer->isLast()); + const auto *E = cast<Expr>(ParentItem.getStmt()); + assert(isa<CallExpr>(E) || isa<CXXConstructExpr>(E) || + isa<ObjCMessageExpr>(E)); + return create<ArgumentConstructionContext>(C, E, ParentItem.getIndex(), + BTE); + } + case ConstructionContextItem::InitializerKind: { + assert(ParentLayer->isLast()); + const auto *I = ParentItem.getCXXCtorInitializer(); + assert(!I->getAnyMember()->getType().getCanonicalType() + ->getAsCXXRecordDecl()->hasTrivialDestructor()); + return create<CXX17ElidedCopyConstructorInitializerConstructionContext>( + C, I, BTE); + } + case ConstructionContextItem::LambdaCaptureKind: { + assert(ParentLayer->isLast()); + const auto *E = cast<LambdaExpr>(ParentItem.getStmt()); + return create<LambdaCaptureConstructionContext>(C, E, + ParentItem.getIndex()); + } + } // switch (ParentItem.getKind()) + + llvm_unreachable("Unexpected construction context with destructor!"); +} + +const ConstructionContext *ConstructionContext::createFromLayers( + BumpVectorContext &C, const ConstructionContextLayer *TopLayer) { + // Before this point all we've had was a stockpile of arbitrary layers. + // Now validate that it is shaped as one of the finite amount of expected + // patterns. + const ConstructionContextItem &TopItem = TopLayer->getItem(); + switch (TopItem.getKind()) { + case ConstructionContextItem::VariableKind: { + assert(TopLayer->isLast()); + const auto *DS = cast<DeclStmt>(TopItem.getStmt()); + return create<SimpleVariableConstructionContext>(C, DS); + } + case ConstructionContextItem::NewAllocatorKind: { + assert(TopLayer->isLast()); + const auto *NE = cast<CXXNewExpr>(TopItem.getStmt()); + return create<NewAllocatedObjectConstructionContext>(C, NE); + } + case ConstructionContextItem::ReturnKind: { + assert(TopLayer->isLast()); + const auto *RS = cast<ReturnStmt>(TopItem.getStmt()); + return create<SimpleReturnedValueConstructionContext>(C, RS); + } + case ConstructionContextItem::MaterializationKind: { + const auto *MTE = cast<MaterializeTemporaryExpr>(TopItem.getStmt()); + return createMaterializedTemporaryFromLayers(C, MTE, /*BTE=*/nullptr, + TopLayer->getParent()); + } + case ConstructionContextItem::TemporaryDestructorKind: { + const auto *BTE = cast<CXXBindTemporaryExpr>(TopItem.getStmt()); + assert(BTE->getType().getCanonicalType()->getAsCXXRecordDecl() + ->hasNonTrivialDestructor()); + return createBoundTemporaryFromLayers(C, BTE, TopLayer->getParent()); + } + case ConstructionContextItem::ElidedDestructorKind: { + llvm_unreachable("Elided destructor items are not produced by the CFG!"); + } + case ConstructionContextItem::ElidableConstructorKind: { + llvm_unreachable("The argument needs to be materialized first!"); + } + case ConstructionContextItem::LambdaCaptureKind: { + assert(TopLayer->isLast()); + const auto *E = cast<LambdaExpr>(TopItem.getStmt()); + return create<LambdaCaptureConstructionContext>(C, E, TopItem.getIndex()); + } + case ConstructionContextItem::InitializerKind: { + assert(TopLayer->isLast()); + const CXXCtorInitializer *I = TopItem.getCXXCtorInitializer(); + return create<SimpleConstructorInitializerConstructionContext>(C, I); + } + case ConstructionContextItem::ArgumentKind: { + assert(TopLayer->isLast()); + const auto *E = cast<Expr>(TopItem.getStmt()); + return create<ArgumentConstructionContext>(C, E, TopItem.getIndex(), + /*BTE=*/nullptr); + } + } // switch (TopItem.getKind()) + llvm_unreachable("Unexpected construction context!"); +} diff --git a/contrib/llvm-project/clang/lib/Analysis/Consumed.cpp b/contrib/llvm-project/clang/lib/Analysis/Consumed.cpp new file mode 100644 index 000000000000..8ab6d7a7f3b0 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/Consumed.cpp @@ -0,0 +1,1415 @@ +//===- Consumed.cpp -------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A intra-procedural analysis for checking consumed properties. This is based, +// in part, on research on linear types. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/Consumed.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/OperatorKinds.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <memory> +#include <optional> +#include <utility> + +// TODO: Adjust states of args to constructors in the same way that arguments to +// function calls are handled. +// TODO: Use information from tests in for- and while-loop conditional. +// TODO: Add notes about the actual and expected state for +// TODO: Correctly identify unreachable blocks when chaining boolean operators. +// TODO: Adjust the parser and AttributesList class to support lists of +// identifiers. +// TODO: Warn about unreachable code. +// TODO: Switch to using a bitmap to track unreachable blocks. +// TODO: Handle variable definitions, e.g. bool valid = x.isValid(); +// if (valid) ...; (Deferred) +// TODO: Take notes on state transitions to provide better warning messages. +// (Deferred) +// TODO: Test nested conditionals: A) Checking the same value multiple times, +// and 2) Checking different values. (Deferred) + +using namespace clang; +using namespace consumed; + +// Key method definition +ConsumedWarningsHandlerBase::~ConsumedWarningsHandlerBase() = default; + +static SourceLocation getFirstStmtLoc(const CFGBlock *Block) { + // Find the source location of the first statement in the block, if the block + // is not empty. + for (const auto &B : *Block) + if (std::optional<CFGStmt> CS = B.getAs<CFGStmt>()) + return CS->getStmt()->getBeginLoc(); + + // Block is empty. + // If we have one successor, return the first statement in that block + if (Block->succ_size() == 1 && *Block->succ_begin()) + return getFirstStmtLoc(*Block->succ_begin()); + + return {}; +} + +static SourceLocation getLastStmtLoc(const CFGBlock *Block) { + // Find the source location of the last statement in the block, if the block + // is not empty. + if (const Stmt *StmtNode = Block->getTerminatorStmt()) { + return StmtNode->getBeginLoc(); + } else { + for (CFGBlock::const_reverse_iterator BI = Block->rbegin(), + BE = Block->rend(); BI != BE; ++BI) { + if (std::optional<CFGStmt> CS = BI->getAs<CFGStmt>()) + return CS->getStmt()->getBeginLoc(); + } + } + + // If we have one successor, return the first statement in that block + SourceLocation Loc; + if (Block->succ_size() == 1 && *Block->succ_begin()) + Loc = getFirstStmtLoc(*Block->succ_begin()); + if (Loc.isValid()) + return Loc; + + // If we have one predecessor, return the last statement in that block + if (Block->pred_size() == 1 && *Block->pred_begin()) + return getLastStmtLoc(*Block->pred_begin()); + + return Loc; +} + +static ConsumedState invertConsumedUnconsumed(ConsumedState State) { + switch (State) { + case CS_Unconsumed: + return CS_Consumed; + case CS_Consumed: + return CS_Unconsumed; + case CS_None: + return CS_None; + case CS_Unknown: + return CS_Unknown; + } + llvm_unreachable("invalid enum"); +} + +static bool isCallableInState(const CallableWhenAttr *CWAttr, + ConsumedState State) { + for (const auto &S : CWAttr->callableStates()) { + ConsumedState MappedAttrState = CS_None; + + switch (S) { + case CallableWhenAttr::Unknown: + MappedAttrState = CS_Unknown; + break; + + case CallableWhenAttr::Unconsumed: + MappedAttrState = CS_Unconsumed; + break; + + case CallableWhenAttr::Consumed: + MappedAttrState = CS_Consumed; + break; + } + + if (MappedAttrState == State) + return true; + } + + return false; +} + +static bool isConsumableType(const QualType &QT) { + if (QT->isPointerType() || QT->isReferenceType()) + return false; + + if (const CXXRecordDecl *RD = QT->getAsCXXRecordDecl()) + return RD->hasAttr<ConsumableAttr>(); + + return false; +} + +static bool isAutoCastType(const QualType &QT) { + if (QT->isPointerType() || QT->isReferenceType()) + return false; + + if (const CXXRecordDecl *RD = QT->getAsCXXRecordDecl()) + return RD->hasAttr<ConsumableAutoCastAttr>(); + + return false; +} + +static bool isSetOnReadPtrType(const QualType &QT) { + if (const CXXRecordDecl *RD = QT->getPointeeCXXRecordDecl()) + return RD->hasAttr<ConsumableSetOnReadAttr>(); + return false; +} + +static bool isKnownState(ConsumedState State) { + switch (State) { + case CS_Unconsumed: + case CS_Consumed: + return true; + case CS_None: + case CS_Unknown: + return false; + } + llvm_unreachable("invalid enum"); +} + +static bool isRValueRef(QualType ParamType) { + return ParamType->isRValueReferenceType(); +} + +static bool isTestingFunction(const FunctionDecl *FunDecl) { + return FunDecl->hasAttr<TestTypestateAttr>(); +} + +static bool isPointerOrRef(QualType ParamType) { + return ParamType->isPointerType() || ParamType->isReferenceType(); +} + +static ConsumedState mapConsumableAttrState(const QualType QT) { + assert(isConsumableType(QT)); + + const ConsumableAttr *CAttr = + QT->getAsCXXRecordDecl()->getAttr<ConsumableAttr>(); + + switch (CAttr->getDefaultState()) { + case ConsumableAttr::Unknown: + return CS_Unknown; + case ConsumableAttr::Unconsumed: + return CS_Unconsumed; + case ConsumableAttr::Consumed: + return CS_Consumed; + } + llvm_unreachable("invalid enum"); +} + +static ConsumedState +mapParamTypestateAttrState(const ParamTypestateAttr *PTAttr) { + switch (PTAttr->getParamState()) { + case ParamTypestateAttr::Unknown: + return CS_Unknown; + case ParamTypestateAttr::Unconsumed: + return CS_Unconsumed; + case ParamTypestateAttr::Consumed: + return CS_Consumed; + } + llvm_unreachable("invalid_enum"); +} + +static ConsumedState +mapReturnTypestateAttrState(const ReturnTypestateAttr *RTSAttr) { + switch (RTSAttr->getState()) { + case ReturnTypestateAttr::Unknown: + return CS_Unknown; + case ReturnTypestateAttr::Unconsumed: + return CS_Unconsumed; + case ReturnTypestateAttr::Consumed: + return CS_Consumed; + } + llvm_unreachable("invalid enum"); +} + +static ConsumedState mapSetTypestateAttrState(const SetTypestateAttr *STAttr) { + switch (STAttr->getNewState()) { + case SetTypestateAttr::Unknown: + return CS_Unknown; + case SetTypestateAttr::Unconsumed: + return CS_Unconsumed; + case SetTypestateAttr::Consumed: + return CS_Consumed; + } + llvm_unreachable("invalid_enum"); +} + +static StringRef stateToString(ConsumedState State) { + switch (State) { + case consumed::CS_None: + return "none"; + + case consumed::CS_Unknown: + return "unknown"; + + case consumed::CS_Unconsumed: + return "unconsumed"; + + case consumed::CS_Consumed: + return "consumed"; + } + llvm_unreachable("invalid enum"); +} + +static ConsumedState testsFor(const FunctionDecl *FunDecl) { + assert(isTestingFunction(FunDecl)); + switch (FunDecl->getAttr<TestTypestateAttr>()->getTestState()) { + case TestTypestateAttr::Unconsumed: + return CS_Unconsumed; + case TestTypestateAttr::Consumed: + return CS_Consumed; + } + llvm_unreachable("invalid enum"); +} + +namespace { + +struct VarTestResult { + const VarDecl *Var; + ConsumedState TestsFor; +}; + +} // namespace + +namespace clang { +namespace consumed { + +enum EffectiveOp { + EO_And, + EO_Or +}; + +class PropagationInfo { + enum { + IT_None, + IT_State, + IT_VarTest, + IT_BinTest, + IT_Var, + IT_Tmp + } InfoType = IT_None; + + struct BinTestTy { + const BinaryOperator *Source; + EffectiveOp EOp; + VarTestResult LTest; + VarTestResult RTest; + }; + + union { + ConsumedState State; + VarTestResult VarTest; + const VarDecl *Var; + const CXXBindTemporaryExpr *Tmp; + BinTestTy BinTest; + }; + +public: + PropagationInfo() = default; + PropagationInfo(const VarTestResult &VarTest) + : InfoType(IT_VarTest), VarTest(VarTest) {} + + PropagationInfo(const VarDecl *Var, ConsumedState TestsFor) + : InfoType(IT_VarTest) { + VarTest.Var = Var; + VarTest.TestsFor = TestsFor; + } + + PropagationInfo(const BinaryOperator *Source, EffectiveOp EOp, + const VarTestResult <est, const VarTestResult &RTest) + : InfoType(IT_BinTest) { + BinTest.Source = Source; + BinTest.EOp = EOp; + BinTest.LTest = LTest; + BinTest.RTest = RTest; + } + + PropagationInfo(const BinaryOperator *Source, EffectiveOp EOp, + const VarDecl *LVar, ConsumedState LTestsFor, + const VarDecl *RVar, ConsumedState RTestsFor) + : InfoType(IT_BinTest) { + BinTest.Source = Source; + BinTest.EOp = EOp; + BinTest.LTest.Var = LVar; + BinTest.LTest.TestsFor = LTestsFor; + BinTest.RTest.Var = RVar; + BinTest.RTest.TestsFor = RTestsFor; + } + + PropagationInfo(ConsumedState State) + : InfoType(IT_State), State(State) {} + PropagationInfo(const VarDecl *Var) : InfoType(IT_Var), Var(Var) {} + PropagationInfo(const CXXBindTemporaryExpr *Tmp) + : InfoType(IT_Tmp), Tmp(Tmp) {} + + const ConsumedState &getState() const { + assert(InfoType == IT_State); + return State; + } + + const VarTestResult &getVarTest() const { + assert(InfoType == IT_VarTest); + return VarTest; + } + + const VarTestResult &getLTest() const { + assert(InfoType == IT_BinTest); + return BinTest.LTest; + } + + const VarTestResult &getRTest() const { + assert(InfoType == IT_BinTest); + return BinTest.RTest; + } + + const VarDecl *getVar() const { + assert(InfoType == IT_Var); + return Var; + } + + const CXXBindTemporaryExpr *getTmp() const { + assert(InfoType == IT_Tmp); + return Tmp; + } + + ConsumedState getAsState(const ConsumedStateMap *StateMap) const { + assert(isVar() || isTmp() || isState()); + + if (isVar()) + return StateMap->getState(Var); + else if (isTmp()) + return StateMap->getState(Tmp); + else if (isState()) + return State; + else + return CS_None; + } + + EffectiveOp testEffectiveOp() const { + assert(InfoType == IT_BinTest); + return BinTest.EOp; + } + + const BinaryOperator * testSourceNode() const { + assert(InfoType == IT_BinTest); + return BinTest.Source; + } + + bool isValid() const { return InfoType != IT_None; } + bool isState() const { return InfoType == IT_State; } + bool isVarTest() const { return InfoType == IT_VarTest; } + bool isBinTest() const { return InfoType == IT_BinTest; } + bool isVar() const { return InfoType == IT_Var; } + bool isTmp() const { return InfoType == IT_Tmp; } + + bool isTest() const { + return InfoType == IT_VarTest || InfoType == IT_BinTest; + } + + bool isPointerToValue() const { + return InfoType == IT_Var || InfoType == IT_Tmp; + } + + PropagationInfo invertTest() const { + assert(InfoType == IT_VarTest || InfoType == IT_BinTest); + + if (InfoType == IT_VarTest) { + return PropagationInfo(VarTest.Var, + invertConsumedUnconsumed(VarTest.TestsFor)); + + } else if (InfoType == IT_BinTest) { + return PropagationInfo(BinTest.Source, + BinTest.EOp == EO_And ? EO_Or : EO_And, + BinTest.LTest.Var, invertConsumedUnconsumed(BinTest.LTest.TestsFor), + BinTest.RTest.Var, invertConsumedUnconsumed(BinTest.RTest.TestsFor)); + } else { + return {}; + } + } +}; + +} // namespace consumed +} // namespace clang + +static void +setStateForVarOrTmp(ConsumedStateMap *StateMap, const PropagationInfo &PInfo, + ConsumedState State) { + assert(PInfo.isVar() || PInfo.isTmp()); + + if (PInfo.isVar()) + StateMap->setState(PInfo.getVar(), State); + else + StateMap->setState(PInfo.getTmp(), State); +} + +namespace clang { +namespace consumed { + +class ConsumedStmtVisitor : public ConstStmtVisitor<ConsumedStmtVisitor> { + using MapType = llvm::DenseMap<const Stmt *, PropagationInfo>; + using PairType= std::pair<const Stmt *, PropagationInfo>; + using InfoEntry = MapType::iterator; + using ConstInfoEntry = MapType::const_iterator; + + ConsumedAnalyzer &Analyzer; + ConsumedStateMap *StateMap; + MapType PropagationMap; + + InfoEntry findInfo(const Expr *E) { + if (const auto Cleanups = dyn_cast<ExprWithCleanups>(E)) + if (!Cleanups->cleanupsHaveSideEffects()) + E = Cleanups->getSubExpr(); + return PropagationMap.find(E->IgnoreParens()); + } + + ConstInfoEntry findInfo(const Expr *E) const { + if (const auto Cleanups = dyn_cast<ExprWithCleanups>(E)) + if (!Cleanups->cleanupsHaveSideEffects()) + E = Cleanups->getSubExpr(); + return PropagationMap.find(E->IgnoreParens()); + } + + void insertInfo(const Expr *E, const PropagationInfo &PI) { + PropagationMap.insert(PairType(E->IgnoreParens(), PI)); + } + + void forwardInfo(const Expr *From, const Expr *To); + void copyInfo(const Expr *From, const Expr *To, ConsumedState CS); + ConsumedState getInfo(const Expr *From); + void setInfo(const Expr *To, ConsumedState NS); + void propagateReturnType(const Expr *Call, const FunctionDecl *Fun); + +public: + void checkCallability(const PropagationInfo &PInfo, + const FunctionDecl *FunDecl, + SourceLocation BlameLoc); + bool handleCall(const CallExpr *Call, const Expr *ObjArg, + const FunctionDecl *FunD); + + void VisitBinaryOperator(const BinaryOperator *BinOp); + void VisitCallExpr(const CallExpr *Call); + void VisitCastExpr(const CastExpr *Cast); + void VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *Temp); + void VisitCXXConstructExpr(const CXXConstructExpr *Call); + void VisitCXXMemberCallExpr(const CXXMemberCallExpr *Call); + void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *Call); + void VisitDeclRefExpr(const DeclRefExpr *DeclRef); + void VisitDeclStmt(const DeclStmt *DelcS); + void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *Temp); + void VisitMemberExpr(const MemberExpr *MExpr); + void VisitParmVarDecl(const ParmVarDecl *Param); + void VisitReturnStmt(const ReturnStmt *Ret); + void VisitUnaryOperator(const UnaryOperator *UOp); + void VisitVarDecl(const VarDecl *Var); + + ConsumedStmtVisitor(ConsumedAnalyzer &Analyzer, ConsumedStateMap *StateMap) + : Analyzer(Analyzer), StateMap(StateMap) {} + + PropagationInfo getInfo(const Expr *StmtNode) const { + ConstInfoEntry Entry = findInfo(StmtNode); + + if (Entry != PropagationMap.end()) + return Entry->second; + else + return {}; + } + + void reset(ConsumedStateMap *NewStateMap) { + StateMap = NewStateMap; + } +}; + +} // namespace consumed +} // namespace clang + +void ConsumedStmtVisitor::forwardInfo(const Expr *From, const Expr *To) { + InfoEntry Entry = findInfo(From); + if (Entry != PropagationMap.end()) + insertInfo(To, Entry->second); +} + +// Create a new state for To, which is initialized to the state of From. +// If NS is not CS_None, sets the state of From to NS. +void ConsumedStmtVisitor::copyInfo(const Expr *From, const Expr *To, + ConsumedState NS) { + InfoEntry Entry = findInfo(From); + if (Entry != PropagationMap.end()) { + PropagationInfo& PInfo = Entry->second; + ConsumedState CS = PInfo.getAsState(StateMap); + if (CS != CS_None) + insertInfo(To, PropagationInfo(CS)); + if (NS != CS_None && PInfo.isPointerToValue()) + setStateForVarOrTmp(StateMap, PInfo, NS); + } +} + +// Get the ConsumedState for From +ConsumedState ConsumedStmtVisitor::getInfo(const Expr *From) { + InfoEntry Entry = findInfo(From); + if (Entry != PropagationMap.end()) { + PropagationInfo& PInfo = Entry->second; + return PInfo.getAsState(StateMap); + } + return CS_None; +} + +// If we already have info for To then update it, otherwise create a new entry. +void ConsumedStmtVisitor::setInfo(const Expr *To, ConsumedState NS) { + InfoEntry Entry = findInfo(To); + if (Entry != PropagationMap.end()) { + PropagationInfo& PInfo = Entry->second; + if (PInfo.isPointerToValue()) + setStateForVarOrTmp(StateMap, PInfo, NS); + } else if (NS != CS_None) { + insertInfo(To, PropagationInfo(NS)); + } +} + +void ConsumedStmtVisitor::checkCallability(const PropagationInfo &PInfo, + const FunctionDecl *FunDecl, + SourceLocation BlameLoc) { + assert(!PInfo.isTest()); + + const CallableWhenAttr *CWAttr = FunDecl->getAttr<CallableWhenAttr>(); + if (!CWAttr) + return; + + if (PInfo.isVar()) { + ConsumedState VarState = StateMap->getState(PInfo.getVar()); + + if (VarState == CS_None || isCallableInState(CWAttr, VarState)) + return; + + Analyzer.WarningsHandler.warnUseInInvalidState( + FunDecl->getNameAsString(), PInfo.getVar()->getNameAsString(), + stateToString(VarState), BlameLoc); + } else { + ConsumedState TmpState = PInfo.getAsState(StateMap); + + if (TmpState == CS_None || isCallableInState(CWAttr, TmpState)) + return; + + Analyzer.WarningsHandler.warnUseOfTempInInvalidState( + FunDecl->getNameAsString(), stateToString(TmpState), BlameLoc); + } +} + +// Factors out common behavior for function, method, and operator calls. +// Check parameters and set parameter state if necessary. +// Returns true if the state of ObjArg is set, or false otherwise. +bool ConsumedStmtVisitor::handleCall(const CallExpr *Call, const Expr *ObjArg, + const FunctionDecl *FunD) { + unsigned Offset = 0; + if (isa<CXXOperatorCallExpr>(Call) && isa<CXXMethodDecl>(FunD)) + Offset = 1; // first argument is 'this' + + // check explicit parameters + for (unsigned Index = Offset; Index < Call->getNumArgs(); ++Index) { + // Skip variable argument lists. + if (Index - Offset >= FunD->getNumParams()) + break; + + const ParmVarDecl *Param = FunD->getParamDecl(Index - Offset); + QualType ParamType = Param->getType(); + + InfoEntry Entry = findInfo(Call->getArg(Index)); + + if (Entry == PropagationMap.end() || Entry->second.isTest()) + continue; + PropagationInfo PInfo = Entry->second; + + // Check that the parameter is in the correct state. + if (ParamTypestateAttr *PTA = Param->getAttr<ParamTypestateAttr>()) { + ConsumedState ParamState = PInfo.getAsState(StateMap); + ConsumedState ExpectedState = mapParamTypestateAttrState(PTA); + + if (ParamState != ExpectedState) + Analyzer.WarningsHandler.warnParamTypestateMismatch( + Call->getArg(Index)->getExprLoc(), + stateToString(ExpectedState), stateToString(ParamState)); + } + + if (!(Entry->second.isVar() || Entry->second.isTmp())) + continue; + + // Adjust state on the caller side. + if (ReturnTypestateAttr *RT = Param->getAttr<ReturnTypestateAttr>()) + setStateForVarOrTmp(StateMap, PInfo, mapReturnTypestateAttrState(RT)); + else if (isRValueRef(ParamType) || isConsumableType(ParamType)) + setStateForVarOrTmp(StateMap, PInfo, consumed::CS_Consumed); + else if (isPointerOrRef(ParamType) && + (!ParamType->getPointeeType().isConstQualified() || + isSetOnReadPtrType(ParamType))) + setStateForVarOrTmp(StateMap, PInfo, consumed::CS_Unknown); + } + + if (!ObjArg) + return false; + + // check implicit 'self' parameter, if present + InfoEntry Entry = findInfo(ObjArg); + if (Entry != PropagationMap.end()) { + PropagationInfo PInfo = Entry->second; + checkCallability(PInfo, FunD, Call->getExprLoc()); + + if (SetTypestateAttr *STA = FunD->getAttr<SetTypestateAttr>()) { + if (PInfo.isVar()) { + StateMap->setState(PInfo.getVar(), mapSetTypestateAttrState(STA)); + return true; + } + else if (PInfo.isTmp()) { + StateMap->setState(PInfo.getTmp(), mapSetTypestateAttrState(STA)); + return true; + } + } + else if (isTestingFunction(FunD) && PInfo.isVar()) { + PropagationMap.insert(PairType(Call, + PropagationInfo(PInfo.getVar(), testsFor(FunD)))); + } + } + return false; +} + +void ConsumedStmtVisitor::propagateReturnType(const Expr *Call, + const FunctionDecl *Fun) { + QualType RetType = Fun->getCallResultType(); + if (RetType->isReferenceType()) + RetType = RetType->getPointeeType(); + + if (isConsumableType(RetType)) { + ConsumedState ReturnState; + if (ReturnTypestateAttr *RTA = Fun->getAttr<ReturnTypestateAttr>()) + ReturnState = mapReturnTypestateAttrState(RTA); + else + ReturnState = mapConsumableAttrState(RetType); + + PropagationMap.insert(PairType(Call, PropagationInfo(ReturnState))); + } +} + +void ConsumedStmtVisitor::VisitBinaryOperator(const BinaryOperator *BinOp) { + switch (BinOp->getOpcode()) { + case BO_LAnd: + case BO_LOr : { + InfoEntry LEntry = findInfo(BinOp->getLHS()), + REntry = findInfo(BinOp->getRHS()); + + VarTestResult LTest, RTest; + + if (LEntry != PropagationMap.end() && LEntry->second.isVarTest()) { + LTest = LEntry->second.getVarTest(); + } else { + LTest.Var = nullptr; + LTest.TestsFor = CS_None; + } + + if (REntry != PropagationMap.end() && REntry->second.isVarTest()) { + RTest = REntry->second.getVarTest(); + } else { + RTest.Var = nullptr; + RTest.TestsFor = CS_None; + } + + if (!(LTest.Var == nullptr && RTest.Var == nullptr)) + PropagationMap.insert(PairType(BinOp, PropagationInfo(BinOp, + static_cast<EffectiveOp>(BinOp->getOpcode() == BO_LOr), LTest, RTest))); + break; + } + + case BO_PtrMemD: + case BO_PtrMemI: + forwardInfo(BinOp->getLHS(), BinOp); + break; + + default: + break; + } +} + +void ConsumedStmtVisitor::VisitCallExpr(const CallExpr *Call) { + const FunctionDecl *FunDecl = Call->getDirectCallee(); + if (!FunDecl) + return; + + // Special case for the std::move function. + // TODO: Make this more specific. (Deferred) + if (Call->isCallToStdMove()) { + copyInfo(Call->getArg(0), Call, CS_Consumed); + return; + } + + handleCall(Call, nullptr, FunDecl); + propagateReturnType(Call, FunDecl); +} + +void ConsumedStmtVisitor::VisitCastExpr(const CastExpr *Cast) { + forwardInfo(Cast->getSubExpr(), Cast); +} + +void ConsumedStmtVisitor::VisitCXXBindTemporaryExpr( + const CXXBindTemporaryExpr *Temp) { + + InfoEntry Entry = findInfo(Temp->getSubExpr()); + + if (Entry != PropagationMap.end() && !Entry->second.isTest()) { + StateMap->setState(Temp, Entry->second.getAsState(StateMap)); + PropagationMap.insert(PairType(Temp, PropagationInfo(Temp))); + } +} + +void ConsumedStmtVisitor::VisitCXXConstructExpr(const CXXConstructExpr *Call) { + CXXConstructorDecl *Constructor = Call->getConstructor(); + + QualType ThisType = Constructor->getThisType()->getPointeeType(); + + if (!isConsumableType(ThisType)) + return; + + // FIXME: What should happen if someone annotates the move constructor? + if (ReturnTypestateAttr *RTA = Constructor->getAttr<ReturnTypestateAttr>()) { + // TODO: Adjust state of args appropriately. + ConsumedState RetState = mapReturnTypestateAttrState(RTA); + PropagationMap.insert(PairType(Call, PropagationInfo(RetState))); + } else if (Constructor->isDefaultConstructor()) { + PropagationMap.insert(PairType(Call, + PropagationInfo(consumed::CS_Consumed))); + } else if (Constructor->isMoveConstructor()) { + copyInfo(Call->getArg(0), Call, CS_Consumed); + } else if (Constructor->isCopyConstructor()) { + // Copy state from arg. If setStateOnRead then set arg to CS_Unknown. + ConsumedState NS = + isSetOnReadPtrType(Constructor->getThisType()) ? + CS_Unknown : CS_None; + copyInfo(Call->getArg(0), Call, NS); + } else { + // TODO: Adjust state of args appropriately. + ConsumedState RetState = mapConsumableAttrState(ThisType); + PropagationMap.insert(PairType(Call, PropagationInfo(RetState))); + } +} + +void ConsumedStmtVisitor::VisitCXXMemberCallExpr( + const CXXMemberCallExpr *Call) { + CXXMethodDecl* MD = Call->getMethodDecl(); + if (!MD) + return; + + handleCall(Call, Call->getImplicitObjectArgument(), MD); + propagateReturnType(Call, MD); +} + +void ConsumedStmtVisitor::VisitCXXOperatorCallExpr( + const CXXOperatorCallExpr *Call) { + const auto *FunDecl = dyn_cast_or_null<FunctionDecl>(Call->getDirectCallee()); + if (!FunDecl) return; + + if (Call->getOperator() == OO_Equal) { + ConsumedState CS = getInfo(Call->getArg(1)); + if (!handleCall(Call, Call->getArg(0), FunDecl)) + setInfo(Call->getArg(0), CS); + return; + } + + if (const auto *MCall = dyn_cast<CXXMemberCallExpr>(Call)) + handleCall(MCall, MCall->getImplicitObjectArgument(), FunDecl); + else + handleCall(Call, Call->getArg(0), FunDecl); + + propagateReturnType(Call, FunDecl); +} + +void ConsumedStmtVisitor::VisitDeclRefExpr(const DeclRefExpr *DeclRef) { + if (const auto *Var = dyn_cast_or_null<VarDecl>(DeclRef->getDecl())) + if (StateMap->getState(Var) != consumed::CS_None) + PropagationMap.insert(PairType(DeclRef, PropagationInfo(Var))); +} + +void ConsumedStmtVisitor::VisitDeclStmt(const DeclStmt *DeclS) { + for (const auto *DI : DeclS->decls()) + if (isa<VarDecl>(DI)) + VisitVarDecl(cast<VarDecl>(DI)); + + if (DeclS->isSingleDecl()) + if (const auto *Var = dyn_cast_or_null<VarDecl>(DeclS->getSingleDecl())) + PropagationMap.insert(PairType(DeclS, PropagationInfo(Var))); +} + +void ConsumedStmtVisitor::VisitMaterializeTemporaryExpr( + const MaterializeTemporaryExpr *Temp) { + forwardInfo(Temp->getSubExpr(), Temp); +} + +void ConsumedStmtVisitor::VisitMemberExpr(const MemberExpr *MExpr) { + forwardInfo(MExpr->getBase(), MExpr); +} + +void ConsumedStmtVisitor::VisitParmVarDecl(const ParmVarDecl *Param) { + QualType ParamType = Param->getType(); + ConsumedState ParamState = consumed::CS_None; + + if (const ParamTypestateAttr *PTA = Param->getAttr<ParamTypestateAttr>()) + ParamState = mapParamTypestateAttrState(PTA); + else if (isConsumableType(ParamType)) + ParamState = mapConsumableAttrState(ParamType); + else if (isRValueRef(ParamType) && + isConsumableType(ParamType->getPointeeType())) + ParamState = mapConsumableAttrState(ParamType->getPointeeType()); + else if (ParamType->isReferenceType() && + isConsumableType(ParamType->getPointeeType())) + ParamState = consumed::CS_Unknown; + + if (ParamState != CS_None) + StateMap->setState(Param, ParamState); +} + +void ConsumedStmtVisitor::VisitReturnStmt(const ReturnStmt *Ret) { + ConsumedState ExpectedState = Analyzer.getExpectedReturnState(); + + if (ExpectedState != CS_None) { + InfoEntry Entry = findInfo(Ret->getRetValue()); + + if (Entry != PropagationMap.end()) { + ConsumedState RetState = Entry->second.getAsState(StateMap); + + if (RetState != ExpectedState) + Analyzer.WarningsHandler.warnReturnTypestateMismatch( + Ret->getReturnLoc(), stateToString(ExpectedState), + stateToString(RetState)); + } + } + + StateMap->checkParamsForReturnTypestate(Ret->getBeginLoc(), + Analyzer.WarningsHandler); +} + +void ConsumedStmtVisitor::VisitUnaryOperator(const UnaryOperator *UOp) { + InfoEntry Entry = findInfo(UOp->getSubExpr()); + if (Entry == PropagationMap.end()) return; + + switch (UOp->getOpcode()) { + case UO_AddrOf: + PropagationMap.insert(PairType(UOp, Entry->second)); + break; + + case UO_LNot: + if (Entry->second.isTest()) + PropagationMap.insert(PairType(UOp, Entry->second.invertTest())); + break; + + default: + break; + } +} + +// TODO: See if I need to check for reference types here. +void ConsumedStmtVisitor::VisitVarDecl(const VarDecl *Var) { + if (isConsumableType(Var->getType())) { + if (Var->hasInit()) { + MapType::iterator VIT = findInfo(Var->getInit()->IgnoreImplicit()); + if (VIT != PropagationMap.end()) { + PropagationInfo PInfo = VIT->second; + ConsumedState St = PInfo.getAsState(StateMap); + + if (St != consumed::CS_None) { + StateMap->setState(Var, St); + return; + } + } + } + // Otherwise + StateMap->setState(Var, consumed::CS_Unknown); + } +} + +static void splitVarStateForIf(const IfStmt *IfNode, const VarTestResult &Test, + ConsumedStateMap *ThenStates, + ConsumedStateMap *ElseStates) { + ConsumedState VarState = ThenStates->getState(Test.Var); + + if (VarState == CS_Unknown) { + ThenStates->setState(Test.Var, Test.TestsFor); + ElseStates->setState(Test.Var, invertConsumedUnconsumed(Test.TestsFor)); + } else if (VarState == invertConsumedUnconsumed(Test.TestsFor)) { + ThenStates->markUnreachable(); + } else if (VarState == Test.TestsFor) { + ElseStates->markUnreachable(); + } +} + +static void splitVarStateForIfBinOp(const PropagationInfo &PInfo, + ConsumedStateMap *ThenStates, + ConsumedStateMap *ElseStates) { + const VarTestResult <est = PInfo.getLTest(), + &RTest = PInfo.getRTest(); + + ConsumedState LState = LTest.Var ? ThenStates->getState(LTest.Var) : CS_None, + RState = RTest.Var ? ThenStates->getState(RTest.Var) : CS_None; + + if (LTest.Var) { + if (PInfo.testEffectiveOp() == EO_And) { + if (LState == CS_Unknown) { + ThenStates->setState(LTest.Var, LTest.TestsFor); + } else if (LState == invertConsumedUnconsumed(LTest.TestsFor)) { + ThenStates->markUnreachable(); + } else if (LState == LTest.TestsFor && isKnownState(RState)) { + if (RState == RTest.TestsFor) + ElseStates->markUnreachable(); + else + ThenStates->markUnreachable(); + } + } else { + if (LState == CS_Unknown) { + ElseStates->setState(LTest.Var, + invertConsumedUnconsumed(LTest.TestsFor)); + } else if (LState == LTest.TestsFor) { + ElseStates->markUnreachable(); + } else if (LState == invertConsumedUnconsumed(LTest.TestsFor) && + isKnownState(RState)) { + if (RState == RTest.TestsFor) + ElseStates->markUnreachable(); + else + ThenStates->markUnreachable(); + } + } + } + + if (RTest.Var) { + if (PInfo.testEffectiveOp() == EO_And) { + if (RState == CS_Unknown) + ThenStates->setState(RTest.Var, RTest.TestsFor); + else if (RState == invertConsumedUnconsumed(RTest.TestsFor)) + ThenStates->markUnreachable(); + } else { + if (RState == CS_Unknown) + ElseStates->setState(RTest.Var, + invertConsumedUnconsumed(RTest.TestsFor)); + else if (RState == RTest.TestsFor) + ElseStates->markUnreachable(); + } + } +} + +bool ConsumedBlockInfo::allBackEdgesVisited(const CFGBlock *CurrBlock, + const CFGBlock *TargetBlock) { + assert(CurrBlock && "Block pointer must not be NULL"); + assert(TargetBlock && "TargetBlock pointer must not be NULL"); + + unsigned int CurrBlockOrder = VisitOrder[CurrBlock->getBlockID()]; + for (CFGBlock::const_pred_iterator PI = TargetBlock->pred_begin(), + PE = TargetBlock->pred_end(); PI != PE; ++PI) { + if (*PI && CurrBlockOrder < VisitOrder[(*PI)->getBlockID()] ) + return false; + } + return true; +} + +void ConsumedBlockInfo::addInfo( + const CFGBlock *Block, ConsumedStateMap *StateMap, + std::unique_ptr<ConsumedStateMap> &OwnedStateMap) { + assert(Block && "Block pointer must not be NULL"); + + auto &Entry = StateMapsArray[Block->getBlockID()]; + + if (Entry) { + Entry->intersect(*StateMap); + } else if (OwnedStateMap) + Entry = std::move(OwnedStateMap); + else + Entry = std::make_unique<ConsumedStateMap>(*StateMap); +} + +void ConsumedBlockInfo::addInfo(const CFGBlock *Block, + std::unique_ptr<ConsumedStateMap> StateMap) { + assert(Block && "Block pointer must not be NULL"); + + auto &Entry = StateMapsArray[Block->getBlockID()]; + + if (Entry) { + Entry->intersect(*StateMap); + } else { + Entry = std::move(StateMap); + } +} + +ConsumedStateMap* ConsumedBlockInfo::borrowInfo(const CFGBlock *Block) { + assert(Block && "Block pointer must not be NULL"); + assert(StateMapsArray[Block->getBlockID()] && "Block has no block info"); + + return StateMapsArray[Block->getBlockID()].get(); +} + +void ConsumedBlockInfo::discardInfo(const CFGBlock *Block) { + StateMapsArray[Block->getBlockID()] = nullptr; +} + +std::unique_ptr<ConsumedStateMap> +ConsumedBlockInfo::getInfo(const CFGBlock *Block) { + assert(Block && "Block pointer must not be NULL"); + + auto &Entry = StateMapsArray[Block->getBlockID()]; + return isBackEdgeTarget(Block) ? std::make_unique<ConsumedStateMap>(*Entry) + : std::move(Entry); +} + +bool ConsumedBlockInfo::isBackEdge(const CFGBlock *From, const CFGBlock *To) { + assert(From && "From block must not be NULL"); + assert(To && "From block must not be NULL"); + + return VisitOrder[From->getBlockID()] > VisitOrder[To->getBlockID()]; +} + +bool ConsumedBlockInfo::isBackEdgeTarget(const CFGBlock *Block) { + assert(Block && "Block pointer must not be NULL"); + + // Anything with less than two predecessors can't be the target of a back + // edge. + if (Block->pred_size() < 2) + return false; + + unsigned int BlockVisitOrder = VisitOrder[Block->getBlockID()]; + for (CFGBlock::const_pred_iterator PI = Block->pred_begin(), + PE = Block->pred_end(); PI != PE; ++PI) { + if (*PI && BlockVisitOrder < VisitOrder[(*PI)->getBlockID()]) + return true; + } + return false; +} + +void ConsumedStateMap::checkParamsForReturnTypestate(SourceLocation BlameLoc, + ConsumedWarningsHandlerBase &WarningsHandler) const { + + for (const auto &DM : VarMap) { + if (isa<ParmVarDecl>(DM.first)) { + const auto *Param = cast<ParmVarDecl>(DM.first); + const ReturnTypestateAttr *RTA = Param->getAttr<ReturnTypestateAttr>(); + + if (!RTA) + continue; + + ConsumedState ExpectedState = mapReturnTypestateAttrState(RTA); + if (DM.second != ExpectedState) + WarningsHandler.warnParamReturnTypestateMismatch(BlameLoc, + Param->getNameAsString(), stateToString(ExpectedState), + stateToString(DM.second)); + } + } +} + +void ConsumedStateMap::clearTemporaries() { + TmpMap.clear(); +} + +ConsumedState ConsumedStateMap::getState(const VarDecl *Var) const { + VarMapType::const_iterator Entry = VarMap.find(Var); + + if (Entry != VarMap.end()) + return Entry->second; + + return CS_None; +} + +ConsumedState +ConsumedStateMap::getState(const CXXBindTemporaryExpr *Tmp) const { + TmpMapType::const_iterator Entry = TmpMap.find(Tmp); + + if (Entry != TmpMap.end()) + return Entry->second; + + return CS_None; +} + +void ConsumedStateMap::intersect(const ConsumedStateMap &Other) { + ConsumedState LocalState; + + if (this->From && this->From == Other.From && !Other.Reachable) { + this->markUnreachable(); + return; + } + + for (const auto &DM : Other.VarMap) { + LocalState = this->getState(DM.first); + + if (LocalState == CS_None) + continue; + + if (LocalState != DM.second) + VarMap[DM.first] = CS_Unknown; + } +} + +void ConsumedStateMap::intersectAtLoopHead(const CFGBlock *LoopHead, + const CFGBlock *LoopBack, const ConsumedStateMap *LoopBackStates, + ConsumedWarningsHandlerBase &WarningsHandler) { + + ConsumedState LocalState; + SourceLocation BlameLoc = getLastStmtLoc(LoopBack); + + for (const auto &DM : LoopBackStates->VarMap) { + LocalState = this->getState(DM.first); + + if (LocalState == CS_None) + continue; + + if (LocalState != DM.second) { + VarMap[DM.first] = CS_Unknown; + WarningsHandler.warnLoopStateMismatch(BlameLoc, + DM.first->getNameAsString()); + } + } +} + +void ConsumedStateMap::markUnreachable() { + this->Reachable = false; + VarMap.clear(); + TmpMap.clear(); +} + +void ConsumedStateMap::setState(const VarDecl *Var, ConsumedState State) { + VarMap[Var] = State; +} + +void ConsumedStateMap::setState(const CXXBindTemporaryExpr *Tmp, + ConsumedState State) { + TmpMap[Tmp] = State; +} + +void ConsumedStateMap::remove(const CXXBindTemporaryExpr *Tmp) { + TmpMap.erase(Tmp); +} + +bool ConsumedStateMap::operator!=(const ConsumedStateMap *Other) const { + for (const auto &DM : Other->VarMap) + if (this->getState(DM.first) != DM.second) + return true; + return false; +} + +void ConsumedAnalyzer::determineExpectedReturnState(AnalysisDeclContext &AC, + const FunctionDecl *D) { + QualType ReturnType; + if (const auto *Constructor = dyn_cast<CXXConstructorDecl>(D)) { + ReturnType = Constructor->getThisType()->getPointeeType(); + } else + ReturnType = D->getCallResultType(); + + if (const ReturnTypestateAttr *RTSAttr = D->getAttr<ReturnTypestateAttr>()) { + const CXXRecordDecl *RD = ReturnType->getAsCXXRecordDecl(); + if (!RD || !RD->hasAttr<ConsumableAttr>()) { + // FIXME: This should be removed when template instantiation propagates + // attributes at template specialization definition, not + // declaration. When it is removed the test needs to be enabled + // in SemaDeclAttr.cpp. + WarningsHandler.warnReturnTypestateForUnconsumableType( + RTSAttr->getLocation(), ReturnType.getAsString()); + ExpectedReturnState = CS_None; + } else + ExpectedReturnState = mapReturnTypestateAttrState(RTSAttr); + } else if (isConsumableType(ReturnType)) { + if (isAutoCastType(ReturnType)) // We can auto-cast the state to the + ExpectedReturnState = CS_None; // expected state. + else + ExpectedReturnState = mapConsumableAttrState(ReturnType); + } + else + ExpectedReturnState = CS_None; +} + +bool ConsumedAnalyzer::splitState(const CFGBlock *CurrBlock, + const ConsumedStmtVisitor &Visitor) { + std::unique_ptr<ConsumedStateMap> FalseStates( + new ConsumedStateMap(*CurrStates)); + PropagationInfo PInfo; + + if (const auto *IfNode = + dyn_cast_or_null<IfStmt>(CurrBlock->getTerminator().getStmt())) { + const Expr *Cond = IfNode->getCond(); + + PInfo = Visitor.getInfo(Cond); + if (!PInfo.isValid() && isa<BinaryOperator>(Cond)) + PInfo = Visitor.getInfo(cast<BinaryOperator>(Cond)->getRHS()); + + if (PInfo.isVarTest()) { + CurrStates->setSource(Cond); + FalseStates->setSource(Cond); + splitVarStateForIf(IfNode, PInfo.getVarTest(), CurrStates.get(), + FalseStates.get()); + } else if (PInfo.isBinTest()) { + CurrStates->setSource(PInfo.testSourceNode()); + FalseStates->setSource(PInfo.testSourceNode()); + splitVarStateForIfBinOp(PInfo, CurrStates.get(), FalseStates.get()); + } else { + return false; + } + } else if (const auto *BinOp = + dyn_cast_or_null<BinaryOperator>(CurrBlock->getTerminator().getStmt())) { + PInfo = Visitor.getInfo(BinOp->getLHS()); + if (!PInfo.isVarTest()) { + if ((BinOp = dyn_cast_or_null<BinaryOperator>(BinOp->getLHS()))) { + PInfo = Visitor.getInfo(BinOp->getRHS()); + + if (!PInfo.isVarTest()) + return false; + } else { + return false; + } + } + + CurrStates->setSource(BinOp); + FalseStates->setSource(BinOp); + + const VarTestResult &Test = PInfo.getVarTest(); + ConsumedState VarState = CurrStates->getState(Test.Var); + + if (BinOp->getOpcode() == BO_LAnd) { + if (VarState == CS_Unknown) + CurrStates->setState(Test.Var, Test.TestsFor); + else if (VarState == invertConsumedUnconsumed(Test.TestsFor)) + CurrStates->markUnreachable(); + + } else if (BinOp->getOpcode() == BO_LOr) { + if (VarState == CS_Unknown) + FalseStates->setState(Test.Var, + invertConsumedUnconsumed(Test.TestsFor)); + else if (VarState == Test.TestsFor) + FalseStates->markUnreachable(); + } + } else { + return false; + } + + CFGBlock::const_succ_iterator SI = CurrBlock->succ_begin(); + + if (*SI) + BlockInfo.addInfo(*SI, std::move(CurrStates)); + else + CurrStates = nullptr; + + if (*++SI) + BlockInfo.addInfo(*SI, std::move(FalseStates)); + + return true; +} + +void ConsumedAnalyzer::run(AnalysisDeclContext &AC) { + const auto *D = dyn_cast_or_null<FunctionDecl>(AC.getDecl()); + if (!D) + return; + + CFG *CFGraph = AC.getCFG(); + if (!CFGraph) + return; + + determineExpectedReturnState(AC, D); + + PostOrderCFGView *SortedGraph = AC.getAnalysis<PostOrderCFGView>(); + // AC.getCFG()->viewCFG(LangOptions()); + + BlockInfo = ConsumedBlockInfo(CFGraph->getNumBlockIDs(), SortedGraph); + + CurrStates = std::make_unique<ConsumedStateMap>(); + ConsumedStmtVisitor Visitor(*this, CurrStates.get()); + + // Add all trackable parameters to the state map. + for (const auto *PI : D->parameters()) + Visitor.VisitParmVarDecl(PI); + + // Visit all of the function's basic blocks. + for (const auto *CurrBlock : *SortedGraph) { + if (!CurrStates) + CurrStates = BlockInfo.getInfo(CurrBlock); + + if (!CurrStates) { + continue; + } else if (!CurrStates->isReachable()) { + CurrStates = nullptr; + continue; + } + + Visitor.reset(CurrStates.get()); + + // Visit all of the basic block's statements. + for (const auto &B : *CurrBlock) { + switch (B.getKind()) { + case CFGElement::Statement: + Visitor.Visit(B.castAs<CFGStmt>().getStmt()); + break; + + case CFGElement::TemporaryDtor: { + const CFGTemporaryDtor &DTor = B.castAs<CFGTemporaryDtor>(); + const CXXBindTemporaryExpr *BTE = DTor.getBindTemporaryExpr(); + + Visitor.checkCallability(PropagationInfo(BTE), + DTor.getDestructorDecl(AC.getASTContext()), + BTE->getExprLoc()); + CurrStates->remove(BTE); + break; + } + + case CFGElement::AutomaticObjectDtor: { + const CFGAutomaticObjDtor &DTor = B.castAs<CFGAutomaticObjDtor>(); + SourceLocation Loc = DTor.getTriggerStmt()->getEndLoc(); + const VarDecl *Var = DTor.getVarDecl(); + + Visitor.checkCallability(PropagationInfo(Var), + DTor.getDestructorDecl(AC.getASTContext()), + Loc); + break; + } + + default: + break; + } + } + + // TODO: Handle other forms of branching with precision, including while- + // and for-loops. (Deferred) + if (!splitState(CurrBlock, Visitor)) { + CurrStates->setSource(nullptr); + + if (CurrBlock->succ_size() > 1 || + (CurrBlock->succ_size() == 1 && + (*CurrBlock->succ_begin())->pred_size() > 1)) { + + auto *RawState = CurrStates.get(); + + for (CFGBlock::const_succ_iterator SI = CurrBlock->succ_begin(), + SE = CurrBlock->succ_end(); SI != SE; ++SI) { + if (*SI == nullptr) continue; + + if (BlockInfo.isBackEdge(CurrBlock, *SI)) { + BlockInfo.borrowInfo(*SI)->intersectAtLoopHead( + *SI, CurrBlock, RawState, WarningsHandler); + + if (BlockInfo.allBackEdgesVisited(CurrBlock, *SI)) + BlockInfo.discardInfo(*SI); + } else { + BlockInfo.addInfo(*SI, RawState, CurrStates); + } + } + + CurrStates = nullptr; + } + } + + if (CurrBlock == &AC.getCFG()->getExit() && + D->getCallResultType()->isVoidType()) + CurrStates->checkParamsForReturnTypestate(D->getLocation(), + WarningsHandler); + } // End of block iterator. + + // Delete the last existing state map. + CurrStates = nullptr; + + WarningsHandler.emitDiagnostics(); +} diff --git a/contrib/llvm-project/clang/lib/Analysis/Dominators.cpp b/contrib/llvm-project/clang/lib/Analysis/Dominators.cpp new file mode 100644 index 000000000000..f7ad68673d0f --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/Dominators.cpp @@ -0,0 +1,19 @@ +//===- Dominators.cpp - Implementation of dominators tree for Clang CFG ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/Dominators.h" + +namespace clang { + +template <> +void CFGDominatorTreeImpl</*IsPostDom=*/true>::anchor() {} + +template <> +void CFGDominatorTreeImpl</*IsPostDom=*/false>::anchor() {} + +} // end of namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/ExprMutationAnalyzer.cpp b/contrib/llvm-project/clang/lib/Analysis/ExprMutationAnalyzer.cpp new file mode 100644 index 000000000000..c876eaa6358a --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/ExprMutationAnalyzer.cpp @@ -0,0 +1,629 @@ +//===---------- ExprMutationAnalyzer.cpp ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Analysis/Analyses/ExprMutationAnalyzer.h" +#include "clang/AST/Expr.h" +#include "clang/AST/OperationKinds.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "llvm/ADT/STLExtras.h" + +namespace clang { +using namespace ast_matchers; + +namespace { + +AST_MATCHER_P(LambdaExpr, hasCaptureInit, const Expr *, E) { + return llvm::is_contained(Node.capture_inits(), E); +} + +AST_MATCHER_P(CXXForRangeStmt, hasRangeStmt, + ast_matchers::internal::Matcher<DeclStmt>, InnerMatcher) { + const DeclStmt *const Range = Node.getRangeStmt(); + return InnerMatcher.matches(*Range, Finder, Builder); +} + +AST_MATCHER_P(Expr, maybeEvalCommaExpr, ast_matchers::internal::Matcher<Expr>, + InnerMatcher) { + const Expr *Result = &Node; + while (const auto *BOComma = + dyn_cast_or_null<BinaryOperator>(Result->IgnoreParens())) { + if (!BOComma->isCommaOp()) + break; + Result = BOComma->getRHS(); + } + return InnerMatcher.matches(*Result, Finder, Builder); +} + +AST_MATCHER_P(Stmt, canResolveToExpr, ast_matchers::internal::Matcher<Stmt>, + InnerMatcher) { + auto *Exp = dyn_cast<Expr>(&Node); + if (!Exp) { + return stmt().matches(Node, Finder, Builder); + } + + auto DerivedToBase = [](const ast_matchers::internal::Matcher<Expr> &Inner) { + return implicitCastExpr(anyOf(hasCastKind(CK_DerivedToBase), + hasCastKind(CK_UncheckedDerivedToBase)), + hasSourceExpression(Inner)); + }; + auto IgnoreDerivedToBase = + [&DerivedToBase](const ast_matchers::internal::Matcher<Expr> &Inner) { + return ignoringParens(expr(anyOf(Inner, DerivedToBase(Inner)))); + }; + + // The 'ConditionalOperator' matches on `<anything> ? <expr> : <expr>`. + // This matching must be recursive because `<expr>` can be anything resolving + // to the `InnerMatcher`, for example another conditional operator. + // The edge-case `BaseClass &b = <cond> ? DerivedVar1 : DerivedVar2;` + // is handled, too. The implicit cast happens outside of the conditional. + // This is matched by `IgnoreDerivedToBase(canResolveToExpr(InnerMatcher))` + // below. + auto const ConditionalOperator = conditionalOperator(anyOf( + hasTrueExpression(ignoringParens(canResolveToExpr(InnerMatcher))), + hasFalseExpression(ignoringParens(canResolveToExpr(InnerMatcher))))); + auto const ElvisOperator = binaryConditionalOperator(anyOf( + hasTrueExpression(ignoringParens(canResolveToExpr(InnerMatcher))), + hasFalseExpression(ignoringParens(canResolveToExpr(InnerMatcher))))); + + auto const ComplexMatcher = ignoringParens( + expr(anyOf(IgnoreDerivedToBase(InnerMatcher), + maybeEvalCommaExpr(IgnoreDerivedToBase(InnerMatcher)), + IgnoreDerivedToBase(ConditionalOperator), + IgnoreDerivedToBase(ElvisOperator)))); + + return ComplexMatcher.matches(*Exp, Finder, Builder); +} + +// Similar to 'hasAnyArgument', but does not work because 'InitListExpr' does +// not have the 'arguments()' method. +AST_MATCHER_P(InitListExpr, hasAnyInit, ast_matchers::internal::Matcher<Expr>, + InnerMatcher) { + for (const Expr *Arg : Node.inits()) { + ast_matchers::internal::BoundNodesTreeBuilder Result(*Builder); + if (InnerMatcher.matches(*Arg, Finder, &Result)) { + *Builder = std::move(Result); + return true; + } + } + return false; +} + +const ast_matchers::internal::VariadicDynCastAllOfMatcher<Stmt, CXXTypeidExpr> + cxxTypeidExpr; + +AST_MATCHER(CXXTypeidExpr, isPotentiallyEvaluated) { + return Node.isPotentiallyEvaluated(); +} + +AST_MATCHER_P(GenericSelectionExpr, hasControllingExpr, + ast_matchers::internal::Matcher<Expr>, InnerMatcher) { + return InnerMatcher.matches(*Node.getControllingExpr(), Finder, Builder); +} + +const auto nonConstReferenceType = [] { + return hasUnqualifiedDesugaredType( + referenceType(pointee(unless(isConstQualified())))); +}; + +const auto nonConstPointerType = [] { + return hasUnqualifiedDesugaredType( + pointerType(pointee(unless(isConstQualified())))); +}; + +const auto isMoveOnly = [] { + return cxxRecordDecl( + hasMethod(cxxConstructorDecl(isMoveConstructor(), unless(isDeleted()))), + hasMethod(cxxMethodDecl(isMoveAssignmentOperator(), unless(isDeleted()))), + unless(anyOf(hasMethod(cxxConstructorDecl(isCopyConstructor(), + unless(isDeleted()))), + hasMethod(cxxMethodDecl(isCopyAssignmentOperator(), + unless(isDeleted())))))); +}; + +template <class T> struct NodeID; +template <> struct NodeID<Expr> { static constexpr StringRef value = "expr"; }; +template <> struct NodeID<Decl> { static constexpr StringRef value = "decl"; }; +constexpr StringRef NodeID<Expr>::value; +constexpr StringRef NodeID<Decl>::value; + +template <class T, class F = const Stmt *(ExprMutationAnalyzer::*)(const T *)> +const Stmt *tryEachMatch(ArrayRef<ast_matchers::BoundNodes> Matches, + ExprMutationAnalyzer *Analyzer, F Finder) { + const StringRef ID = NodeID<T>::value; + for (const auto &Nodes : Matches) { + if (const Stmt *S = (Analyzer->*Finder)(Nodes.getNodeAs<T>(ID))) + return S; + } + return nullptr; +} + +} // namespace + +const Stmt *ExprMutationAnalyzer::findMutation(const Expr *Exp) { + return findMutationMemoized(Exp, + {&ExprMutationAnalyzer::findDirectMutation, + &ExprMutationAnalyzer::findMemberMutation, + &ExprMutationAnalyzer::findArrayElementMutation, + &ExprMutationAnalyzer::findCastMutation, + &ExprMutationAnalyzer::findRangeLoopMutation, + &ExprMutationAnalyzer::findReferenceMutation, + &ExprMutationAnalyzer::findFunctionArgMutation}, + Results); +} + +const Stmt *ExprMutationAnalyzer::findMutation(const Decl *Dec) { + return tryEachDeclRef(Dec, &ExprMutationAnalyzer::findMutation); +} + +const Stmt *ExprMutationAnalyzer::findPointeeMutation(const Expr *Exp) { + return findMutationMemoized(Exp, {/*TODO*/}, PointeeResults); +} + +const Stmt *ExprMutationAnalyzer::findPointeeMutation(const Decl *Dec) { + return tryEachDeclRef(Dec, &ExprMutationAnalyzer::findPointeeMutation); +} + +const Stmt *ExprMutationAnalyzer::findMutationMemoized( + const Expr *Exp, llvm::ArrayRef<MutationFinder> Finders, + ResultMap &MemoizedResults) { + const auto Memoized = MemoizedResults.find(Exp); + if (Memoized != MemoizedResults.end()) + return Memoized->second; + + if (isUnevaluated(Exp)) + return MemoizedResults[Exp] = nullptr; + + for (const auto &Finder : Finders) { + if (const Stmt *S = (this->*Finder)(Exp)) + return MemoizedResults[Exp] = S; + } + + return MemoizedResults[Exp] = nullptr; +} + +const Stmt *ExprMutationAnalyzer::tryEachDeclRef(const Decl *Dec, + MutationFinder Finder) { + const auto Refs = + match(findAll(declRefExpr(to(equalsNode(Dec))).bind(NodeID<Expr>::value)), + Stm, Context); + for (const auto &RefNodes : Refs) { + const auto *E = RefNodes.getNodeAs<Expr>(NodeID<Expr>::value); + if ((this->*Finder)(E)) + return E; + } + return nullptr; +} + +bool ExprMutationAnalyzer::isUnevaluated(const Stmt *Exp, const Stmt &Stm, + ASTContext &Context) { + return selectFirst<Stmt>( + NodeID<Expr>::value, + match( + findAll( + stmt(canResolveToExpr(equalsNode(Exp)), + anyOf( + // `Exp` is part of the underlying expression of + // decltype/typeof if it has an ancestor of + // typeLoc. + hasAncestor(typeLoc(unless( + hasAncestor(unaryExprOrTypeTraitExpr())))), + hasAncestor(expr(anyOf( + // `UnaryExprOrTypeTraitExpr` is unevaluated + // unless it's sizeof on VLA. + unaryExprOrTypeTraitExpr(unless(sizeOfExpr( + hasArgumentOfType(variableArrayType())))), + // `CXXTypeidExpr` is unevaluated unless it's + // applied to an expression of glvalue of + // polymorphic class type. + cxxTypeidExpr( + unless(isPotentiallyEvaluated())), + // The controlling expression of + // `GenericSelectionExpr` is unevaluated. + genericSelectionExpr(hasControllingExpr( + hasDescendant(equalsNode(Exp)))), + cxxNoexceptExpr()))))) + .bind(NodeID<Expr>::value)), + Stm, Context)) != nullptr; +} + +bool ExprMutationAnalyzer::isUnevaluated(const Expr *Exp) { + return isUnevaluated(Exp, Stm, Context); +} + +const Stmt * +ExprMutationAnalyzer::findExprMutation(ArrayRef<BoundNodes> Matches) { + return tryEachMatch<Expr>(Matches, this, &ExprMutationAnalyzer::findMutation); +} + +const Stmt * +ExprMutationAnalyzer::findDeclMutation(ArrayRef<BoundNodes> Matches) { + return tryEachMatch<Decl>(Matches, this, &ExprMutationAnalyzer::findMutation); +} + +const Stmt *ExprMutationAnalyzer::findExprPointeeMutation( + ArrayRef<ast_matchers::BoundNodes> Matches) { + return tryEachMatch<Expr>(Matches, this, + &ExprMutationAnalyzer::findPointeeMutation); +} + +const Stmt *ExprMutationAnalyzer::findDeclPointeeMutation( + ArrayRef<ast_matchers::BoundNodes> Matches) { + return tryEachMatch<Decl>(Matches, this, + &ExprMutationAnalyzer::findPointeeMutation); +} + +const Stmt *ExprMutationAnalyzer::findDirectMutation(const Expr *Exp) { + // LHS of any assignment operators. + const auto AsAssignmentLhs = binaryOperator( + isAssignmentOperator(), hasLHS(canResolveToExpr(equalsNode(Exp)))); + + // Operand of increment/decrement operators. + const auto AsIncDecOperand = + unaryOperator(anyOf(hasOperatorName("++"), hasOperatorName("--")), + hasUnaryOperand(canResolveToExpr(equalsNode(Exp)))); + + // Invoking non-const member function. + // A member function is assumed to be non-const when it is unresolved. + const auto NonConstMethod = cxxMethodDecl(unless(isConst())); + + const auto AsNonConstThis = expr(anyOf( + cxxMemberCallExpr(callee(NonConstMethod), + on(canResolveToExpr(equalsNode(Exp)))), + cxxOperatorCallExpr(callee(NonConstMethod), + hasArgument(0, canResolveToExpr(equalsNode(Exp)))), + // In case of a templated type, calling overloaded operators is not + // resolved and modelled as `binaryOperator` on a dependent type. + // Such instances are considered a modification, because they can modify + // in different instantiations of the template. + binaryOperator(hasEitherOperand( + allOf(ignoringImpCasts(canResolveToExpr(equalsNode(Exp))), + isTypeDependent()))), + // Within class templates and member functions the member expression might + // not be resolved. In that case, the `callExpr` is considered to be a + // modification. + callExpr( + callee(expr(anyOf(unresolvedMemberExpr(hasObjectExpression( + canResolveToExpr(equalsNode(Exp)))), + cxxDependentScopeMemberExpr(hasObjectExpression( + canResolveToExpr(equalsNode(Exp)))))))), + // Match on a call to a known method, but the call itself is type + // dependent (e.g. `vector<T> v; v.push(T{});` in a templated function). + callExpr(allOf(isTypeDependent(), + callee(memberExpr(hasDeclaration(NonConstMethod), + hasObjectExpression(canResolveToExpr( + equalsNode(Exp))))))))); + + // Taking address of 'Exp'. + // We're assuming 'Exp' is mutated as soon as its address is taken, though in + // theory we can follow the pointer and see whether it escaped `Stm` or is + // dereferenced and then mutated. This is left for future improvements. + const auto AsAmpersandOperand = + unaryOperator(hasOperatorName("&"), + // A NoOp implicit cast is adding const. + unless(hasParent(implicitCastExpr(hasCastKind(CK_NoOp)))), + hasUnaryOperand(canResolveToExpr(equalsNode(Exp)))); + const auto AsPointerFromArrayDecay = + castExpr(hasCastKind(CK_ArrayToPointerDecay), + unless(hasParent(arraySubscriptExpr())), + has(canResolveToExpr(equalsNode(Exp)))); + // Treat calling `operator->()` of move-only classes as taking address. + // These are typically smart pointers with unique ownership so we treat + // mutation of pointee as mutation of the smart pointer itself. + const auto AsOperatorArrowThis = cxxOperatorCallExpr( + hasOverloadedOperatorName("->"), + callee( + cxxMethodDecl(ofClass(isMoveOnly()), returns(nonConstPointerType()))), + argumentCountIs(1), hasArgument(0, canResolveToExpr(equalsNode(Exp)))); + + // Used as non-const-ref argument when calling a function. + // An argument is assumed to be non-const-ref when the function is unresolved. + // Instantiated template functions are not handled here but in + // findFunctionArgMutation which has additional smarts for handling forwarding + // references. + const auto NonConstRefParam = forEachArgumentWithParamType( + anyOf(canResolveToExpr(equalsNode(Exp)), + memberExpr(hasObjectExpression(canResolveToExpr(equalsNode(Exp))))), + nonConstReferenceType()); + const auto NotInstantiated = unless(hasDeclaration(isInstantiated())); + const auto TypeDependentCallee = + callee(expr(anyOf(unresolvedLookupExpr(), unresolvedMemberExpr(), + cxxDependentScopeMemberExpr(), + hasType(templateTypeParmType()), isTypeDependent()))); + + const auto AsNonConstRefArg = anyOf( + callExpr(NonConstRefParam, NotInstantiated), + cxxConstructExpr(NonConstRefParam, NotInstantiated), + callExpr(TypeDependentCallee, + hasAnyArgument(canResolveToExpr(equalsNode(Exp)))), + cxxUnresolvedConstructExpr( + hasAnyArgument(canResolveToExpr(equalsNode(Exp)))), + // Previous False Positive in the following Code: + // `template <typename T> void f() { int i = 42; new Type<T>(i); }` + // Where the constructor of `Type` takes its argument as reference. + // The AST does not resolve in a `cxxConstructExpr` because it is + // type-dependent. + parenListExpr(hasDescendant(expr(canResolveToExpr(equalsNode(Exp))))), + // If the initializer is for a reference type, there is no cast for + // the variable. Values are cast to RValue first. + initListExpr(hasAnyInit(expr(canResolveToExpr(equalsNode(Exp)))))); + + // Captured by a lambda by reference. + // If we're initializing a capture with 'Exp' directly then we're initializing + // a reference capture. + // For value captures there will be an ImplicitCastExpr <LValueToRValue>. + const auto AsLambdaRefCaptureInit = lambdaExpr(hasCaptureInit(Exp)); + + // Returned as non-const-ref. + // If we're returning 'Exp' directly then it's returned as non-const-ref. + // For returning by value there will be an ImplicitCastExpr <LValueToRValue>. + // For returning by const-ref there will be an ImplicitCastExpr <NoOp> (for + // adding const.) + const auto AsNonConstRefReturn = + returnStmt(hasReturnValue(canResolveToExpr(equalsNode(Exp)))); + + // It is used as a non-const-reference for initalizing a range-for loop. + const auto AsNonConstRefRangeInit = cxxForRangeStmt( + hasRangeInit(declRefExpr(allOf(canResolveToExpr(equalsNode(Exp)), + hasType(nonConstReferenceType()))))); + + const auto Matches = match( + traverse(TK_AsIs, + findAll(stmt(anyOf(AsAssignmentLhs, AsIncDecOperand, + AsNonConstThis, AsAmpersandOperand, + AsPointerFromArrayDecay, AsOperatorArrowThis, + AsNonConstRefArg, AsLambdaRefCaptureInit, + AsNonConstRefReturn, AsNonConstRefRangeInit)) + .bind("stmt"))), + Stm, Context); + return selectFirst<Stmt>("stmt", Matches); +} + +const Stmt *ExprMutationAnalyzer::findMemberMutation(const Expr *Exp) { + // Check whether any member of 'Exp' is mutated. + const auto MemberExprs = + match(findAll(expr(anyOf(memberExpr(hasObjectExpression( + canResolveToExpr(equalsNode(Exp)))), + cxxDependentScopeMemberExpr(hasObjectExpression( + canResolveToExpr(equalsNode(Exp)))))) + .bind(NodeID<Expr>::value)), + Stm, Context); + return findExprMutation(MemberExprs); +} + +const Stmt *ExprMutationAnalyzer::findArrayElementMutation(const Expr *Exp) { + // Check whether any element of an array is mutated. + const auto SubscriptExprs = + match(findAll(arraySubscriptExpr( + anyOf(hasBase(canResolveToExpr(equalsNode(Exp))), + hasBase(implicitCastExpr( + allOf(hasCastKind(CK_ArrayToPointerDecay), + hasSourceExpression(canResolveToExpr( + equalsNode(Exp)))))))) + .bind(NodeID<Expr>::value)), + Stm, Context); + return findExprMutation(SubscriptExprs); +} + +const Stmt *ExprMutationAnalyzer::findCastMutation(const Expr *Exp) { + // If the 'Exp' is explicitly casted to a non-const reference type the + // 'Exp' is considered to be modified. + const auto ExplicitCast = match( + findAll( + stmt(castExpr(hasSourceExpression(canResolveToExpr(equalsNode(Exp))), + explicitCastExpr( + hasDestinationType(nonConstReferenceType())))) + .bind("stmt")), + Stm, Context); + + if (const auto *CastStmt = selectFirst<Stmt>("stmt", ExplicitCast)) + return CastStmt; + + // If 'Exp' is casted to any non-const reference type, check the castExpr. + const auto Casts = match( + findAll( + expr(castExpr(hasSourceExpression(canResolveToExpr(equalsNode(Exp))), + anyOf(explicitCastExpr( + hasDestinationType(nonConstReferenceType())), + implicitCastExpr(hasImplicitDestinationType( + nonConstReferenceType()))))) + .bind(NodeID<Expr>::value)), + Stm, Context); + + if (const Stmt *S = findExprMutation(Casts)) + return S; + // Treat std::{move,forward} as cast. + const auto Calls = + match(findAll(callExpr(callee(namedDecl( + hasAnyName("::std::move", "::std::forward"))), + hasArgument(0, canResolveToExpr(equalsNode(Exp)))) + .bind("expr")), + Stm, Context); + return findExprMutation(Calls); +} + +const Stmt *ExprMutationAnalyzer::findRangeLoopMutation(const Expr *Exp) { + // Keep the ordering for the specific initialization matches to happen first, + // because it is cheaper to match all potential modifications of the loop + // variable. + + // The range variable is a reference to a builtin array. In that case the + // array is considered modified if the loop-variable is a non-const reference. + const auto DeclStmtToNonRefToArray = declStmt(hasSingleDecl(varDecl(hasType( + hasUnqualifiedDesugaredType(referenceType(pointee(arrayType()))))))); + const auto RefToArrayRefToElements = + match(findAll(stmt(cxxForRangeStmt( + hasLoopVariable( + varDecl(anyOf(hasType(nonConstReferenceType()), + hasType(nonConstPointerType()))) + .bind(NodeID<Decl>::value)), + hasRangeStmt(DeclStmtToNonRefToArray), + hasRangeInit(canResolveToExpr(equalsNode(Exp))))) + .bind("stmt")), + Stm, Context); + + if (const auto *BadRangeInitFromArray = + selectFirst<Stmt>("stmt", RefToArrayRefToElements)) + return BadRangeInitFromArray; + + // Small helper to match special cases in range-for loops. + // + // It is possible that containers do not provide a const-overload for their + // iterator accessors. If this is the case, the variable is used non-const + // no matter what happens in the loop. This requires special detection as it + // is then faster to find all mutations of the loop variable. + // It aims at a different modification as well. + const auto HasAnyNonConstIterator = + anyOf(allOf(hasMethod(allOf(hasName("begin"), unless(isConst()))), + unless(hasMethod(allOf(hasName("begin"), isConst())))), + allOf(hasMethod(allOf(hasName("end"), unless(isConst()))), + unless(hasMethod(allOf(hasName("end"), isConst()))))); + + const auto DeclStmtToNonConstIteratorContainer = declStmt( + hasSingleDecl(varDecl(hasType(hasUnqualifiedDesugaredType(referenceType( + pointee(hasDeclaration(cxxRecordDecl(HasAnyNonConstIterator))))))))); + + const auto RefToContainerBadIterators = + match(findAll(stmt(cxxForRangeStmt(allOf( + hasRangeStmt(DeclStmtToNonConstIteratorContainer), + hasRangeInit(canResolveToExpr(equalsNode(Exp)))))) + .bind("stmt")), + Stm, Context); + + if (const auto *BadIteratorsContainer = + selectFirst<Stmt>("stmt", RefToContainerBadIterators)) + return BadIteratorsContainer; + + // If range for looping over 'Exp' with a non-const reference loop variable, + // check all declRefExpr of the loop variable. + const auto LoopVars = + match(findAll(cxxForRangeStmt( + hasLoopVariable(varDecl(hasType(nonConstReferenceType())) + .bind(NodeID<Decl>::value)), + hasRangeInit(canResolveToExpr(equalsNode(Exp))))), + Stm, Context); + return findDeclMutation(LoopVars); +} + +const Stmt *ExprMutationAnalyzer::findReferenceMutation(const Expr *Exp) { + // Follow non-const reference returned by `operator*()` of move-only classes. + // These are typically smart pointers with unique ownership so we treat + // mutation of pointee as mutation of the smart pointer itself. + const auto Ref = + match(findAll(cxxOperatorCallExpr( + hasOverloadedOperatorName("*"), + callee(cxxMethodDecl(ofClass(isMoveOnly()), + returns(nonConstReferenceType()))), + argumentCountIs(1), + hasArgument(0, canResolveToExpr(equalsNode(Exp)))) + .bind(NodeID<Expr>::value)), + Stm, Context); + if (const Stmt *S = findExprMutation(Ref)) + return S; + + // If 'Exp' is bound to a non-const reference, check all declRefExpr to that. + const auto Refs = match( + stmt(forEachDescendant( + varDecl( + hasType(nonConstReferenceType()), + hasInitializer(anyOf(canResolveToExpr(equalsNode(Exp)), + memberExpr(hasObjectExpression( + canResolveToExpr(equalsNode(Exp)))))), + hasParent(declStmt().bind("stmt")), + // Don't follow the reference in range statement, we've + // handled that separately. + unless(hasParent(declStmt(hasParent( + cxxForRangeStmt(hasRangeStmt(equalsBoundNode("stmt")))))))) + .bind(NodeID<Decl>::value))), + Stm, Context); + return findDeclMutation(Refs); +} + +const Stmt *ExprMutationAnalyzer::findFunctionArgMutation(const Expr *Exp) { + const auto NonConstRefParam = forEachArgumentWithParam( + canResolveToExpr(equalsNode(Exp)), + parmVarDecl(hasType(nonConstReferenceType())).bind("parm")); + const auto IsInstantiated = hasDeclaration(isInstantiated()); + const auto FuncDecl = hasDeclaration(functionDecl().bind("func")); + const auto Matches = match( + traverse( + TK_AsIs, + findAll( + expr(anyOf(callExpr(NonConstRefParam, IsInstantiated, FuncDecl, + unless(callee(namedDecl(hasAnyName( + "::std::move", "::std::forward"))))), + cxxConstructExpr(NonConstRefParam, IsInstantiated, + FuncDecl))) + .bind(NodeID<Expr>::value))), + Stm, Context); + for (const auto &Nodes : Matches) { + const auto *Exp = Nodes.getNodeAs<Expr>(NodeID<Expr>::value); + const auto *Func = Nodes.getNodeAs<FunctionDecl>("func"); + if (!Func->getBody() || !Func->getPrimaryTemplate()) + return Exp; + + const auto *Parm = Nodes.getNodeAs<ParmVarDecl>("parm"); + const ArrayRef<ParmVarDecl *> AllParams = + Func->getPrimaryTemplate()->getTemplatedDecl()->parameters(); + QualType ParmType = + AllParams[std::min<size_t>(Parm->getFunctionScopeIndex(), + AllParams.size() - 1)] + ->getType(); + if (const auto *T = ParmType->getAs<PackExpansionType>()) + ParmType = T->getPattern(); + + // If param type is forwarding reference, follow into the function + // definition and see whether the param is mutated inside. + if (const auto *RefType = ParmType->getAs<RValueReferenceType>()) { + if (!RefType->getPointeeType().getQualifiers() && + RefType->getPointeeType()->getAs<TemplateTypeParmType>()) { + std::unique_ptr<FunctionParmMutationAnalyzer> &Analyzer = + FuncParmAnalyzer[Func]; + if (!Analyzer) + Analyzer.reset(new FunctionParmMutationAnalyzer(*Func, Context)); + if (Analyzer->findMutation(Parm)) + return Exp; + continue; + } + } + // Not forwarding reference. + return Exp; + } + return nullptr; +} + +FunctionParmMutationAnalyzer::FunctionParmMutationAnalyzer( + const FunctionDecl &Func, ASTContext &Context) + : BodyAnalyzer(*Func.getBody(), Context) { + if (const auto *Ctor = dyn_cast<CXXConstructorDecl>(&Func)) { + // CXXCtorInitializer might also mutate Param but they're not part of + // function body, check them eagerly here since they're typically trivial. + for (const CXXCtorInitializer *Init : Ctor->inits()) { + ExprMutationAnalyzer InitAnalyzer(*Init->getInit(), Context); + for (const ParmVarDecl *Parm : Ctor->parameters()) { + if (Results.find(Parm) != Results.end()) + continue; + if (const Stmt *S = InitAnalyzer.findMutation(Parm)) + Results[Parm] = S; + } + } + } +} + +const Stmt * +FunctionParmMutationAnalyzer::findMutation(const ParmVarDecl *Parm) { + const auto Memoized = Results.find(Parm); + if (Memoized != Results.end()) + return Memoized->second; + + if (const Stmt *S = BodyAnalyzer.findMutation(Parm)) + return Results[Parm] = S; + + return Results[Parm] = nullptr; +} + +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp new file mode 100644 index 000000000000..2492b5203724 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp @@ -0,0 +1,71 @@ +//===- ControlFlowContext.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a ControlFlowContext class that is used by dataflow +// analyses that run over Control-Flow Graphs (CFGs). +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/ControlFlowContext.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Stmt.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Error.h" +#include <utility> + +namespace clang { +namespace dataflow { + +/// Returns a map from statements to basic blocks that contain them. +static llvm::DenseMap<const Stmt *, const CFGBlock *> +buildStmtToBasicBlockMap(const CFG &Cfg) { + llvm::DenseMap<const Stmt *, const CFGBlock *> StmtToBlock; + for (const CFGBlock *Block : Cfg) { + if (Block == nullptr) + continue; + + for (const CFGElement &Element : *Block) { + auto Stmt = Element.getAs<CFGStmt>(); + if (!Stmt) + continue; + + StmtToBlock[Stmt->getStmt()] = Block; + } + if (const Stmt *TerminatorStmt = Block->getTerminatorStmt()) + StmtToBlock[TerminatorStmt] = Block; + } + return StmtToBlock; +} + +llvm::Expected<ControlFlowContext> +ControlFlowContext::build(const Decl *D, Stmt &S, ASTContext &C) { + CFG::BuildOptions Options; + Options.PruneTriviallyFalseEdges = false; + Options.AddImplicitDtors = true; + Options.AddTemporaryDtors = true; + Options.AddInitializers = true; + Options.AddCXXDefaultInitExprInCtors = true; + + // Ensure that all sub-expressions in basic blocks are evaluated. + Options.setAllAlwaysAdd(); + + auto Cfg = CFG::buildCFG(D, &S, &C, Options); + if (Cfg == nullptr) + return llvm::createStringError( + std::make_error_code(std::errc::invalid_argument), + "CFG::buildCFG failed"); + + llvm::DenseMap<const Stmt *, const CFGBlock *> StmtToBlock = + buildStmtToBasicBlockMap(*Cfg); + return ControlFlowContext(D, std::move(Cfg), std::move(StmtToBlock)); +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp new file mode 100644 index 000000000000..480606bdac8d --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp @@ -0,0 +1,422 @@ +//===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a DataflowAnalysisContext class that owns objects that +// encompass the state of a program and stores context that is used during +// dataflow analysis. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" +#include "clang/AST/ExprCXX.h" +#include "clang/Analysis/FlowSensitive/DebugSupport.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/Support/Debug.h" +#include <cassert> +#include <memory> +#include <utility> + +namespace clang { +namespace dataflow { + +void DataflowAnalysisContext::addModeledFields( + const llvm::DenseSet<const FieldDecl *> &Fields) { + llvm::set_union(ModeledFields, Fields); +} + +llvm::DenseSet<const FieldDecl *> +DataflowAnalysisContext::getReferencedFields(QualType Type) { + llvm::DenseSet<const FieldDecl *> Fields = getObjectFields(Type); + llvm::set_intersect(Fields, ModeledFields); + return Fields; +} + +StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { + if (!Type.isNull() && + (Type->isStructureOrClassType() || Type->isUnionType())) { + llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; + // During context-sensitive analysis, a struct may be allocated in one + // function, but its field accessed in a function lower in the stack than + // the allocation. Since we only collect fields used in the function where + // the allocation occurs, we can't apply that filter when performing + // context-sensitive analysis. But, this only applies to storage locations, + // since field access it not allowed to fail. In contrast, field *values* + // don't need this allowance, since the API allows for uninitialized fields. + auto Fields = Opts.ContextSensitiveOpts ? getObjectFields(Type) + : getReferencedFields(Type); + for (const FieldDecl *Field : Fields) + FieldLocs.insert({Field, &createStorageLocation(Field->getType())}); + return takeOwnership( + std::make_unique<AggregateStorageLocation>(Type, std::move(FieldLocs))); + } + return takeOwnership(std::make_unique<ScalarStorageLocation>(Type)); +} + +StorageLocation & +DataflowAnalysisContext::getStableStorageLocation(const VarDecl &D) { + if (auto *Loc = getStorageLocation(D)) + return *Loc; + auto &Loc = createStorageLocation(D.getType()); + setStorageLocation(D, Loc); + return Loc; +} + +StorageLocation & +DataflowAnalysisContext::getStableStorageLocation(const Expr &E) { + if (auto *Loc = getStorageLocation(E)) + return *Loc; + auto &Loc = createStorageLocation(E.getType()); + setStorageLocation(E, Loc); + return Loc; +} + +PointerValue & +DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) { + auto CanonicalPointeeType = + PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType(); + auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr); + if (Res.second) { + auto &PointeeLoc = createStorageLocation(CanonicalPointeeType); + Res.first->second = + &takeOwnership(std::make_unique<PointerValue>(PointeeLoc)); + } + return *Res.first->second; +} + +static std::pair<BoolValue *, BoolValue *> +makeCanonicalBoolValuePair(BoolValue &LHS, BoolValue &RHS) { + auto Res = std::make_pair(&LHS, &RHS); + if (&RHS < &LHS) + std::swap(Res.first, Res.second); + return Res; +} + +BoolValue &DataflowAnalysisContext::getOrCreateConjunction(BoolValue &LHS, + BoolValue &RHS) { + if (&LHS == &RHS) + return LHS; + + auto Res = ConjunctionVals.try_emplace(makeCanonicalBoolValuePair(LHS, RHS), + nullptr); + if (Res.second) + Res.first->second = + &takeOwnership(std::make_unique<ConjunctionValue>(LHS, RHS)); + return *Res.first->second; +} + +BoolValue &DataflowAnalysisContext::getOrCreateDisjunction(BoolValue &LHS, + BoolValue &RHS) { + if (&LHS == &RHS) + return LHS; + + auto Res = DisjunctionVals.try_emplace(makeCanonicalBoolValuePair(LHS, RHS), + nullptr); + if (Res.second) + Res.first->second = + &takeOwnership(std::make_unique<DisjunctionValue>(LHS, RHS)); + return *Res.first->second; +} + +BoolValue &DataflowAnalysisContext::getOrCreateNegation(BoolValue &Val) { + auto Res = NegationVals.try_emplace(&Val, nullptr); + if (Res.second) + Res.first->second = &takeOwnership(std::make_unique<NegationValue>(Val)); + return *Res.first->second; +} + +BoolValue &DataflowAnalysisContext::getOrCreateImplication(BoolValue &LHS, + BoolValue &RHS) { + if (&LHS == &RHS) + return getBoolLiteralValue(true); + + auto Res = ImplicationVals.try_emplace(std::make_pair(&LHS, &RHS), nullptr); + if (Res.second) + Res.first->second = + &takeOwnership(std::make_unique<ImplicationValue>(LHS, RHS)); + return *Res.first->second; +} + +BoolValue &DataflowAnalysisContext::getOrCreateIff(BoolValue &LHS, + BoolValue &RHS) { + if (&LHS == &RHS) + return getBoolLiteralValue(true); + + auto Res = BiconditionalVals.try_emplace(makeCanonicalBoolValuePair(LHS, RHS), + nullptr); + if (Res.second) + Res.first->second = + &takeOwnership(std::make_unique<BiconditionalValue>(LHS, RHS)); + return *Res.first->second; +} + +AtomicBoolValue &DataflowAnalysisContext::makeFlowConditionToken() { + return createAtomicBoolValue(); +} + +void DataflowAnalysisContext::addFlowConditionConstraint( + AtomicBoolValue &Token, BoolValue &Constraint) { + auto Res = FlowConditionConstraints.try_emplace(&Token, &Constraint); + if (!Res.second) { + Res.first->second = &getOrCreateConjunction(*Res.first->second, Constraint); + } +} + +AtomicBoolValue & +DataflowAnalysisContext::forkFlowCondition(AtomicBoolValue &Token) { + auto &ForkToken = makeFlowConditionToken(); + FlowConditionDeps[&ForkToken].insert(&Token); + addFlowConditionConstraint(ForkToken, Token); + return ForkToken; +} + +AtomicBoolValue & +DataflowAnalysisContext::joinFlowConditions(AtomicBoolValue &FirstToken, + AtomicBoolValue &SecondToken) { + auto &Token = makeFlowConditionToken(); + FlowConditionDeps[&Token].insert(&FirstToken); + FlowConditionDeps[&Token].insert(&SecondToken); + addFlowConditionConstraint(Token, + getOrCreateDisjunction(FirstToken, SecondToken)); + return Token; +} + +Solver::Result +DataflowAnalysisContext::querySolver(llvm::DenseSet<BoolValue *> Constraints) { + Constraints.insert(&getBoolLiteralValue(true)); + Constraints.insert(&getOrCreateNegation(getBoolLiteralValue(false))); + return S->solve(std::move(Constraints)); +} + +bool DataflowAnalysisContext::flowConditionImplies(AtomicBoolValue &Token, + BoolValue &Val) { + // Returns true if and only if truth assignment of the flow condition implies + // that `Val` is also true. We prove whether or not this property holds by + // reducing the problem to satisfiability checking. In other words, we attempt + // to show that assuming `Val` is false makes the constraints induced by the + // flow condition unsatisfiable. + llvm::DenseSet<BoolValue *> Constraints = {&Token, &getOrCreateNegation(Val)}; + llvm::DenseSet<AtomicBoolValue *> VisitedTokens; + addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); + return isUnsatisfiable(std::move(Constraints)); +} + +bool DataflowAnalysisContext::flowConditionIsTautology(AtomicBoolValue &Token) { + // Returns true if and only if we cannot prove that the flow condition can + // ever be false. + llvm::DenseSet<BoolValue *> Constraints = {&getOrCreateNegation(Token)}; + llvm::DenseSet<AtomicBoolValue *> VisitedTokens; + addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); + return isUnsatisfiable(std::move(Constraints)); +} + +bool DataflowAnalysisContext::equivalentBoolValues(BoolValue &Val1, + BoolValue &Val2) { + llvm::DenseSet<BoolValue *> Constraints = { + &getOrCreateNegation(getOrCreateIff(Val1, Val2))}; + return isUnsatisfiable(Constraints); +} + +void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( + AtomicBoolValue &Token, llvm::DenseSet<BoolValue *> &Constraints, + llvm::DenseSet<AtomicBoolValue *> &VisitedTokens) { + auto Res = VisitedTokens.insert(&Token); + if (!Res.second) + return; + + auto ConstraintsIt = FlowConditionConstraints.find(&Token); + if (ConstraintsIt == FlowConditionConstraints.end()) { + Constraints.insert(&Token); + } else { + // Bind flow condition token via `iff` to its set of constraints: + // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints + Constraints.insert(&getOrCreateIff(Token, *ConstraintsIt->second)); + } + + auto DepsIt = FlowConditionDeps.find(&Token); + if (DepsIt != FlowConditionDeps.end()) { + for (AtomicBoolValue *DepToken : DepsIt->second) { + addTransitiveFlowConditionConstraints(*DepToken, Constraints, + VisitedTokens); + } + } +} + +BoolValue &DataflowAnalysisContext::substituteBoolValue( + BoolValue &Val, + llvm::DenseMap<BoolValue *, BoolValue *> &SubstitutionsCache) { + auto It = SubstitutionsCache.find(&Val); + if (It != SubstitutionsCache.end()) { + // Return memoized result of substituting this boolean value. + return *It->second; + } + + // Handle substitution on the boolean value (and its subvalues), saving the + // result into `SubstitutionsCache`. + BoolValue *Result; + switch (Val.getKind()) { + case Value::Kind::AtomicBool: { + Result = &Val; + break; + } + case Value::Kind::Negation: { + auto &Negation = *cast<NegationValue>(&Val); + auto &Sub = substituteBoolValue(Negation.getSubVal(), SubstitutionsCache); + Result = &getOrCreateNegation(Sub); + break; + } + case Value::Kind::Disjunction: { + auto &Disjunct = *cast<DisjunctionValue>(&Val); + auto &LeftSub = + substituteBoolValue(Disjunct.getLeftSubValue(), SubstitutionsCache); + auto &RightSub = + substituteBoolValue(Disjunct.getRightSubValue(), SubstitutionsCache); + Result = &getOrCreateDisjunction(LeftSub, RightSub); + break; + } + case Value::Kind::Conjunction: { + auto &Conjunct = *cast<ConjunctionValue>(&Val); + auto &LeftSub = + substituteBoolValue(Conjunct.getLeftSubValue(), SubstitutionsCache); + auto &RightSub = + substituteBoolValue(Conjunct.getRightSubValue(), SubstitutionsCache); + Result = &getOrCreateConjunction(LeftSub, RightSub); + break; + } + case Value::Kind::Implication: { + auto &IV = *cast<ImplicationValue>(&Val); + auto &LeftSub = + substituteBoolValue(IV.getLeftSubValue(), SubstitutionsCache); + auto &RightSub = + substituteBoolValue(IV.getRightSubValue(), SubstitutionsCache); + Result = &getOrCreateImplication(LeftSub, RightSub); + break; + } + case Value::Kind::Biconditional: { + auto &BV = *cast<BiconditionalValue>(&Val); + auto &LeftSub = + substituteBoolValue(BV.getLeftSubValue(), SubstitutionsCache); + auto &RightSub = + substituteBoolValue(BV.getRightSubValue(), SubstitutionsCache); + Result = &getOrCreateIff(LeftSub, RightSub); + break; + } + default: + llvm_unreachable("Unhandled Value Kind"); + } + SubstitutionsCache[&Val] = Result; + return *Result; +} + +BoolValue &DataflowAnalysisContext::buildAndSubstituteFlowCondition( + AtomicBoolValue &Token, + llvm::DenseMap<AtomicBoolValue *, BoolValue *> Substitutions) { + assert( + Substitutions.find(&getBoolLiteralValue(true)) == Substitutions.end() && + Substitutions.find(&getBoolLiteralValue(false)) == Substitutions.end() && + "Do not substitute true/false boolean literals"); + llvm::DenseMap<BoolValue *, BoolValue *> SubstitutionsCache( + Substitutions.begin(), Substitutions.end()); + return buildAndSubstituteFlowConditionWithCache(Token, SubstitutionsCache); +} + +BoolValue &DataflowAnalysisContext::buildAndSubstituteFlowConditionWithCache( + AtomicBoolValue &Token, + llvm::DenseMap<BoolValue *, BoolValue *> &SubstitutionsCache) { + auto ConstraintsIt = FlowConditionConstraints.find(&Token); + if (ConstraintsIt == FlowConditionConstraints.end()) { + return getBoolLiteralValue(true); + } + auto DepsIt = FlowConditionDeps.find(&Token); + if (DepsIt != FlowConditionDeps.end()) { + for (AtomicBoolValue *DepToken : DepsIt->second) { + auto &NewDep = buildAndSubstituteFlowConditionWithCache( + *DepToken, SubstitutionsCache); + SubstitutionsCache[DepToken] = &NewDep; + } + } + return substituteBoolValue(*ConstraintsIt->second, SubstitutionsCache); +} + +void DataflowAnalysisContext::dumpFlowCondition(AtomicBoolValue &Token) { + llvm::DenseSet<BoolValue *> Constraints = {&Token}; + llvm::DenseSet<AtomicBoolValue *> VisitedTokens; + addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); + + llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames = { + {&getBoolLiteralValue(false), "False"}, + {&getBoolLiteralValue(true), "True"}}; + llvm::dbgs() << debugString(Constraints, AtomNames); +} + +const ControlFlowContext * +DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) { + // Canonicalize the key: + F = F->getDefinition(); + if (F == nullptr) + return nullptr; + auto It = FunctionContexts.find(F); + if (It != FunctionContexts.end()) + return &It->second; + + if (Stmt *Body = F->getBody()) { + auto CFCtx = ControlFlowContext::build(F, *Body, F->getASTContext()); + // FIXME: Handle errors. + assert(CFCtx); + auto Result = FunctionContexts.insert({F, std::move(*CFCtx)}); + return &Result.first->second; + } + + return nullptr; +} + +} // namespace dataflow +} // namespace clang + +using namespace clang; + +const Expr &clang::dataflow::ignoreCFGOmittedNodes(const Expr &E) { + const Expr *Current = &E; + if (auto *EWC = dyn_cast<ExprWithCleanups>(Current)) { + Current = EWC->getSubExpr(); + assert(Current != nullptr); + } + Current = Current->IgnoreParens(); + assert(Current != nullptr); + return *Current; +} + +const Stmt &clang::dataflow::ignoreCFGOmittedNodes(const Stmt &S) { + if (auto *E = dyn_cast<Expr>(&S)) + return ignoreCFGOmittedNodes(*E); + return S; +} + +// FIXME: Does not precisely handle non-virtual diamond inheritance. A single +// field decl will be modeled for all instances of the inherited field. +static void +getFieldsFromClassHierarchy(QualType Type, + llvm::DenseSet<const FieldDecl *> &Fields) { + if (Type->isIncompleteType() || Type->isDependentType() || + !Type->isRecordType()) + return; + + for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) + Fields.insert(Field); + if (auto *CXXRecord = Type->getAsCXXRecordDecl()) + for (const CXXBaseSpecifier &Base : CXXRecord->bases()) + getFieldsFromClassHierarchy(Base.getType(), Fields); +} + +/// Gets the set of all fields in the type. +llvm::DenseSet<const FieldDecl *> +clang::dataflow::getObjectFields(QualType Type) { + llvm::DenseSet<const FieldDecl *> Fields; + getFieldsFromClassHierarchy(Type, Fields); + return Fields; +} diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp new file mode 100644 index 000000000000..cc3992805cc7 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -0,0 +1,818 @@ +//===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines an Environment class that is used by dataflow analyses +// that run over Control-Flow Graphs (CFGs) to keep track of the state of the +// program at given program points. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <memory> +#include <utility> + +namespace clang { +namespace dataflow { + +// FIXME: convert these to parameters of the analysis or environment. Current +// settings have been experimentaly validated, but only for a particular +// analysis. +static constexpr int MaxCompositeValueDepth = 3; +static constexpr int MaxCompositeValueSize = 1000; + +/// Returns a map consisting of key-value entries that are present in both maps. +template <typename K, typename V> +llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1, + const llvm::DenseMap<K, V> &Map2) { + llvm::DenseMap<K, V> Result; + for (auto &Entry : Map1) { + auto It = Map2.find(Entry.first); + if (It != Map2.end() && Entry.second == It->second) + Result.insert({Entry.first, Entry.second}); + } + return Result; +} + +static bool compareDistinctValues(QualType Type, Value &Val1, + const Environment &Env1, Value &Val2, + const Environment &Env2, + Environment::ValueModel &Model) { + // Note: Potentially costly, but, for booleans, we could check whether both + // can be proven equivalent in their respective environments. + + // FIXME: move the reference/pointers logic from `areEquivalentValues` to here + // and implement separate, join/widen specific handling for + // reference/pointers. + switch (Model.compare(Type, Val1, Env1, Val2, Env2)) { + case ComparisonResult::Same: + return true; + case ComparisonResult::Different: + return false; + case ComparisonResult::Unknown: + switch (Val1.getKind()) { + case Value::Kind::Integer: + case Value::Kind::Reference: + case Value::Kind::Pointer: + case Value::Kind::Struct: + // FIXME: this choice intentionally introduces unsoundness to allow + // for convergence. Once we have widening support for the + // reference/pointer and struct built-in models, this should be + // `false`. + return true; + default: + return false; + } + } + llvm_unreachable("All cases covered in switch"); +} + +/// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`, +/// respectively, of the same type `Type`. Merging generally produces a single +/// value that (soundly) approximates the two inputs, although the actual +/// meaning depends on `Model`. +static Value *mergeDistinctValues(QualType Type, Value &Val1, + const Environment &Env1, Value &Val2, + const Environment &Env2, + Environment &MergedEnv, + Environment::ValueModel &Model) { + // Join distinct boolean values preserving information about the constraints + // in the respective path conditions. + if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) { + // FIXME: Checking both values should be unnecessary, since they should have + // a consistent shape. However, right now we can end up with BoolValue's in + // integer-typed variables due to our incorrect handling of + // boolean-to-integer casts (we just propagate the BoolValue to the result + // of the cast). So, a join can encounter an integer in one branch but a + // bool in the other. + // For example: + // ``` + // std::optional<bool> o; + // int x; + // if (o.has_value()) + // x = o.value(); + // ``` + auto *Expr1 = cast<BoolValue>(&Val1); + auto *Expr2 = cast<BoolValue>(&Val2); + auto &MergedVal = MergedEnv.makeAtomicBoolValue(); + MergedEnv.addToFlowCondition(MergedEnv.makeOr( + MergedEnv.makeAnd(Env1.getFlowConditionToken(), + MergedEnv.makeIff(MergedVal, *Expr1)), + MergedEnv.makeAnd(Env2.getFlowConditionToken(), + MergedEnv.makeIff(MergedVal, *Expr2)))); + return &MergedVal; + } + + // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge` + // returns false to avoid storing unneeded values in `DACtx`. + // FIXME: Creating the value based on the type alone creates misshapen values + // for lvalues, since the type does not reflect the need for `ReferenceValue`. + if (Value *MergedVal = MergedEnv.createValue(Type)) + if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv)) + return MergedVal; + + return nullptr; +} + +// When widening does not change `Current`, return value will equal `&Prev`. +static Value &widenDistinctValues(QualType Type, Value &Prev, + const Environment &PrevEnv, Value &Current, + Environment &CurrentEnv, + Environment::ValueModel &Model) { + // Boolean-model widening. + if (isa<BoolValue>(&Prev)) { + assert(isa<BoolValue>(Current)); + // Widen to Top, because we know they are different values. If previous was + // already Top, re-use that to (implicitly) indicate that no change occured. + if (isa<TopBoolValue>(Prev)) + return Prev; + return CurrentEnv.makeTopBoolValue(); + } + + // FIXME: Add other built-in model widening. + + // Custom-model widening. + if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv)) + return *W; + + // Default of widening is a no-op: leave the current value unchanged. + return Current; +} + +/// Initializes a global storage value. +static void insertIfGlobal(const Decl &D, + llvm::DenseSet<const FieldDecl *> &Fields, + llvm::DenseSet<const VarDecl *> &Vars) { + if (auto *V = dyn_cast<VarDecl>(&D)) + if (V->hasGlobalStorage()) + Vars.insert(V); +} + +static void getFieldsAndGlobalVars(const Decl &D, + llvm::DenseSet<const FieldDecl *> &Fields, + llvm::DenseSet<const VarDecl *> &Vars) { + insertIfGlobal(D, Fields, Vars); + if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D)) + for (const auto *B : Decomp->bindings()) + if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding())) + // FIXME: should we be using `E->getFoundDecl()`? + if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) + Fields.insert(FD); +} + +/// Traverses `S` and inserts into `Vars` any global storage values that are +/// declared in or referenced from sub-statements. +static void getFieldsAndGlobalVars(const Stmt &S, + llvm::DenseSet<const FieldDecl *> &Fields, + llvm::DenseSet<const VarDecl *> &Vars) { + for (auto *Child : S.children()) + if (Child != nullptr) + getFieldsAndGlobalVars(*Child, Fields, Vars); + + if (auto *DS = dyn_cast<DeclStmt>(&S)) { + if (DS->isSingleDecl()) + getFieldsAndGlobalVars(*DS->getSingleDecl(), Fields, Vars); + else + for (auto *D : DS->getDeclGroup()) + getFieldsAndGlobalVars(*D, Fields, Vars); + } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) { + insertIfGlobal(*E->getDecl(), Fields, Vars); + } else if (auto *E = dyn_cast<MemberExpr>(&S)) { + // FIXME: should we be using `E->getFoundDecl()`? + const ValueDecl *VD = E->getMemberDecl(); + insertIfGlobal(*VD, Fields, Vars); + if (const auto *FD = dyn_cast<FieldDecl>(VD)) + Fields.insert(FD); + } +} + +// FIXME: Add support for resetting globals after function calls to enable +// the implementation of sound analyses. +void Environment::initVars(llvm::DenseSet<const VarDecl *> Vars) { + for (const VarDecl *D : Vars) { + if (getStorageLocation(*D, SkipPast::None) != nullptr) + continue; + auto &Loc = createStorageLocation(*D); + setStorageLocation(*D, Loc); + if (auto *Val = createValue(D->getType())) + setValue(Loc, *Val); + } +} + +Environment::Environment(DataflowAnalysisContext &DACtx) + : DACtx(&DACtx), FlowConditionToken(&DACtx.makeFlowConditionToken()) {} + +Environment::Environment(const Environment &Other) + : DACtx(Other.DACtx), CallStack(Other.CallStack), + ReturnLoc(Other.ReturnLoc), ThisPointeeLoc(Other.ThisPointeeLoc), + DeclToLoc(Other.DeclToLoc), ExprToLoc(Other.ExprToLoc), + LocToVal(Other.LocToVal), MemberLocToStruct(Other.MemberLocToStruct), + FlowConditionToken(&DACtx->forkFlowCondition(*Other.FlowConditionToken)) { +} + +Environment &Environment::operator=(const Environment &Other) { + Environment Copy(Other); + *this = std::move(Copy); + return *this; +} + +Environment::Environment(DataflowAnalysisContext &DACtx, + const DeclContext &DeclCtx) + : Environment(DACtx) { + CallStack.push_back(&DeclCtx); + + if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) { + assert(FuncDecl->getBody() != nullptr); + + llvm::DenseSet<const FieldDecl *> Fields; + llvm::DenseSet<const VarDecl *> Vars; + + // Look for global variable references in the constructor-initializers. + if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(&DeclCtx)) { + for (const auto *Init : CtorDecl->inits()) { + if (const auto *M = Init->getAnyMember()) + Fields.insert(M); + const Expr *E = Init->getInit(); + assert(E != nullptr); + getFieldsAndGlobalVars(*E, Fields, Vars); + } + } + getFieldsAndGlobalVars(*FuncDecl->getBody(), Fields, Vars); + + // These have to be added before the lines that follow to ensure that + // `create*` work correctly for structs. + DACtx.addModeledFields(Fields); + + initVars(Vars); + + for (const auto *ParamDecl : FuncDecl->parameters()) { + assert(ParamDecl != nullptr); + auto &ParamLoc = createStorageLocation(*ParamDecl); + setStorageLocation(*ParamDecl, ParamLoc); + if (Value *ParamVal = createValue(ParamDecl->getType())) + setValue(ParamLoc, *ParamVal); + } + + QualType ReturnType = FuncDecl->getReturnType(); + ReturnLoc = &createStorageLocation(ReturnType); + } + + if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) { + auto *Parent = MethodDecl->getParent(); + assert(Parent != nullptr); + if (Parent->isLambda()) + MethodDecl = dyn_cast<CXXMethodDecl>(Parent->getDeclContext()); + + // FIXME: Initialize the ThisPointeeLoc of lambdas too. + if (MethodDecl && !MethodDecl->isStatic()) { + QualType ThisPointeeType = MethodDecl->getThisObjectType(); + ThisPointeeLoc = &createStorageLocation(ThisPointeeType); + if (Value *ThisPointeeVal = createValue(ThisPointeeType)) + setValue(*ThisPointeeLoc, *ThisPointeeVal); + } + } +} + +bool Environment::canDescend(unsigned MaxDepth, + const DeclContext *Callee) const { + return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee); +} + +Environment Environment::pushCall(const CallExpr *Call) const { + Environment Env(*this); + + // FIXME: Support references here. + Env.ReturnLoc = getStorageLocation(*Call, SkipPast::Reference); + + if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) { + if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) { + if (!isa<CXXThisExpr>(Arg)) + Env.ThisPointeeLoc = getStorageLocation(*Arg, SkipPast::Reference); + // Otherwise (when the argument is `this`), retain the current + // environment's `ThisPointeeLoc`. + } + } + + Env.pushCallInternal(Call->getDirectCallee(), + llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); + + return Env; +} + +Environment Environment::pushCall(const CXXConstructExpr *Call) const { + Environment Env(*this); + + // FIXME: Support references here. + Env.ReturnLoc = getStorageLocation(*Call, SkipPast::Reference); + + Env.ThisPointeeLoc = Env.ReturnLoc; + + Env.pushCallInternal(Call->getConstructor(), + llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); + + return Env; +} + +void Environment::pushCallInternal(const FunctionDecl *FuncDecl, + ArrayRef<const Expr *> Args) { + CallStack.push_back(FuncDecl); + + // FIXME: Share this code with the constructor, rather than duplicating it. + llvm::DenseSet<const FieldDecl *> Fields; + llvm::DenseSet<const VarDecl *> Vars; + // Look for global variable references in the constructor-initializers. + if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) { + for (const auto *Init : CtorDecl->inits()) { + if (const auto *M = Init->getAnyMember()) + Fields.insert(M); + const Expr *E = Init->getInit(); + assert(E != nullptr); + getFieldsAndGlobalVars(*E, Fields, Vars); + } + } + getFieldsAndGlobalVars(*FuncDecl->getBody(), Fields, Vars); + + // These have to be added before the lines that follow to ensure that + // `create*` work correctly for structs. + DACtx->addModeledFields(Fields); + + initVars(Vars); + + const auto *ParamIt = FuncDecl->param_begin(); + + // FIXME: Parameters don't always map to arguments 1:1; examples include + // overloaded operators implemented as member functions, and parameter packs. + for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) { + assert(ParamIt != FuncDecl->param_end()); + + const Expr *Arg = Args[ArgIndex]; + auto *ArgLoc = getStorageLocation(*Arg, SkipPast::Reference); + if (ArgLoc == nullptr) + continue; + + const VarDecl *Param = *ParamIt; + auto &Loc = createStorageLocation(*Param); + setStorageLocation(*Param, Loc); + + QualType ParamType = Param->getType(); + if (ParamType->isReferenceType()) { + auto &Val = takeOwnership(std::make_unique<ReferenceValue>(*ArgLoc)); + setValue(Loc, Val); + } else if (auto *ArgVal = getValue(*ArgLoc)) { + setValue(Loc, *ArgVal); + } else if (Value *Val = createValue(ParamType)) { + setValue(Loc, *Val); + } + } +} + +void Environment::popCall(const Environment &CalleeEnv) { + // We ignore `DACtx` because it's already the same in both. We don't want the + // callee's `DeclCtx`, `ReturnLoc` or `ThisPointeeLoc`. We don't bring back + // `DeclToLoc` and `ExprToLoc` because we want to be able to later analyze the + // same callee in a different context, and `setStorageLocation` requires there + // to not already be a storage location assigned. Conceptually, these maps + // capture information from the local scope, so when popping that scope, we do + // not propagate the maps. + this->LocToVal = std::move(CalleeEnv.LocToVal); + this->MemberLocToStruct = std::move(CalleeEnv.MemberLocToStruct); + this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); +} + +bool Environment::equivalentTo(const Environment &Other, + Environment::ValueModel &Model) const { + assert(DACtx == Other.DACtx); + + if (ReturnLoc != Other.ReturnLoc) + return false; + + if (ThisPointeeLoc != Other.ThisPointeeLoc) + return false; + + if (DeclToLoc != Other.DeclToLoc) + return false; + + if (ExprToLoc != Other.ExprToLoc) + return false; + + // Compare the contents for the intersection of their domains. + for (auto &Entry : LocToVal) { + const StorageLocation *Loc = Entry.first; + assert(Loc != nullptr); + + Value *Val = Entry.second; + assert(Val != nullptr); + + auto It = Other.LocToVal.find(Loc); + if (It == Other.LocToVal.end()) + continue; + assert(It->second != nullptr); + + if (!areEquivalentValues(*Val, *It->second) && + !compareDistinctValues(Loc->getType(), *Val, *this, *It->second, Other, + Model)) + return false; + } + + return true; +} + +LatticeJoinEffect Environment::widen(const Environment &PrevEnv, + Environment::ValueModel &Model) { + assert(DACtx == PrevEnv.DACtx); + assert(ReturnLoc == PrevEnv.ReturnLoc); + assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); + assert(CallStack == PrevEnv.CallStack); + + auto Effect = LatticeJoinEffect::Unchanged; + + // By the API, `PrevEnv` is a previous version of the environment for the same + // block, so we have some guarantees about its shape. In particular, it will + // be the result of a join or widen operation on previous values for this + // block. For `DeclToLoc` and `ExprToLoc`, join guarantees that these maps are + // subsets of the maps in `PrevEnv`. So, as long as we maintain this property + // here, we don't need change their current values to widen. + // + // FIXME: `MemberLocToStruct` does not share the above property, because + // `join` can cause the map size to increase (when we add fresh data in places + // of conflict). Once this issue with join is resolved, re-enable the + // assertion below or replace with something that captures the desired + // invariant. + assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size()); + assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size()); + // assert(MemberLocToStruct.size() <= PrevEnv.MemberLocToStruct.size()); + + llvm::DenseMap<const StorageLocation *, Value *> WidenedLocToVal; + for (auto &Entry : LocToVal) { + const StorageLocation *Loc = Entry.first; + assert(Loc != nullptr); + + Value *Val = Entry.second; + assert(Val != nullptr); + + auto PrevIt = PrevEnv.LocToVal.find(Loc); + if (PrevIt == PrevEnv.LocToVal.end()) + continue; + assert(PrevIt->second != nullptr); + + if (areEquivalentValues(*Val, *PrevIt->second)) { + WidenedLocToVal.insert({Loc, Val}); + continue; + } + + Value &WidenedVal = widenDistinctValues(Loc->getType(), *PrevIt->second, + PrevEnv, *Val, *this, Model); + WidenedLocToVal.insert({Loc, &WidenedVal}); + if (&WidenedVal != PrevIt->second) + Effect = LatticeJoinEffect::Changed; + } + LocToVal = std::move(WidenedLocToVal); + // FIXME: update the equivalence calculation for `MemberLocToStruct`, once we + // have a systematic way of soundly comparing this map. + if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() || + ExprToLoc.size() != PrevEnv.ExprToLoc.size() || + LocToVal.size() != PrevEnv.LocToVal.size() || + MemberLocToStruct.size() != PrevEnv.MemberLocToStruct.size()) + Effect = LatticeJoinEffect::Changed; + + return Effect; +} + +LatticeJoinEffect Environment::join(const Environment &Other, + Environment::ValueModel &Model) { + assert(DACtx == Other.DACtx); + assert(ReturnLoc == Other.ReturnLoc); + assert(ThisPointeeLoc == Other.ThisPointeeLoc); + assert(CallStack == Other.CallStack); + + auto Effect = LatticeJoinEffect::Unchanged; + + Environment JoinedEnv(*DACtx); + + JoinedEnv.CallStack = CallStack; + JoinedEnv.ReturnLoc = ReturnLoc; + JoinedEnv.ThisPointeeLoc = ThisPointeeLoc; + + JoinedEnv.DeclToLoc = intersectDenseMaps(DeclToLoc, Other.DeclToLoc); + if (DeclToLoc.size() != JoinedEnv.DeclToLoc.size()) + Effect = LatticeJoinEffect::Changed; + + JoinedEnv.ExprToLoc = intersectDenseMaps(ExprToLoc, Other.ExprToLoc); + if (ExprToLoc.size() != JoinedEnv.ExprToLoc.size()) + Effect = LatticeJoinEffect::Changed; + + JoinedEnv.MemberLocToStruct = + intersectDenseMaps(MemberLocToStruct, Other.MemberLocToStruct); + if (MemberLocToStruct.size() != JoinedEnv.MemberLocToStruct.size()) + Effect = LatticeJoinEffect::Changed; + + // FIXME: set `Effect` as needed. + // FIXME: update join to detect backedges and simplify the flow condition + // accordingly. + JoinedEnv.FlowConditionToken = &DACtx->joinFlowConditions( + *FlowConditionToken, *Other.FlowConditionToken); + + for (auto &Entry : LocToVal) { + const StorageLocation *Loc = Entry.first; + assert(Loc != nullptr); + + Value *Val = Entry.second; + assert(Val != nullptr); + + auto It = Other.LocToVal.find(Loc); + if (It == Other.LocToVal.end()) + continue; + assert(It->second != nullptr); + + if (areEquivalentValues(*Val, *It->second)) { + JoinedEnv.LocToVal.insert({Loc, Val}); + continue; + } + + if (Value *MergedVal = + mergeDistinctValues(Loc->getType(), *Val, *this, *It->second, Other, + JoinedEnv, Model)) { + JoinedEnv.LocToVal.insert({Loc, MergedVal}); + Effect = LatticeJoinEffect::Changed; + } + } + if (LocToVal.size() != JoinedEnv.LocToVal.size()) + Effect = LatticeJoinEffect::Changed; + + *this = std::move(JoinedEnv); + + return Effect; +} + +StorageLocation &Environment::createStorageLocation(QualType Type) { + return DACtx->createStorageLocation(Type); +} + +StorageLocation &Environment::createStorageLocation(const VarDecl &D) { + // Evaluated declarations are always assigned the same storage locations to + // ensure that the environment stabilizes across loop iterations. Storage + // locations for evaluated declarations are stored in the analysis context. + return DACtx->getStableStorageLocation(D); +} + +StorageLocation &Environment::createStorageLocation(const Expr &E) { + // Evaluated expressions are always assigned the same storage locations to + // ensure that the environment stabilizes across loop iterations. Storage + // locations for evaluated expressions are stored in the analysis context. + return DACtx->getStableStorageLocation(E); +} + +void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { + assert(DeclToLoc.find(&D) == DeclToLoc.end()); + DeclToLoc[&D] = &Loc; +} + +StorageLocation *Environment::getStorageLocation(const ValueDecl &D, + SkipPast SP) const { + auto It = DeclToLoc.find(&D); + return It == DeclToLoc.end() ? nullptr : &skip(*It->second, SP); +} + +void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) { + const Expr &CanonE = ignoreCFGOmittedNodes(E); + assert(ExprToLoc.find(&CanonE) == ExprToLoc.end()); + ExprToLoc[&CanonE] = &Loc; +} + +StorageLocation *Environment::getStorageLocation(const Expr &E, + SkipPast SP) const { + // FIXME: Add a test with parens. + auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); + return It == ExprToLoc.end() ? nullptr : &skip(*It->second, SP); +} + +StorageLocation *Environment::getThisPointeeStorageLocation() const { + return ThisPointeeLoc; +} + +StorageLocation *Environment::getReturnStorageLocation() const { + return ReturnLoc; +} + +PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { + return DACtx->getOrCreateNullPointerValue(PointeeType); +} + +void Environment::setValue(const StorageLocation &Loc, Value &Val) { + LocToVal[&Loc] = &Val; + + if (auto *StructVal = dyn_cast<StructValue>(&Val)) { + auto &AggregateLoc = *cast<AggregateStorageLocation>(&Loc); + + const QualType Type = AggregateLoc.getType(); + assert(Type->isStructureOrClassType() || Type->isUnionType()); + + for (const FieldDecl *Field : DACtx->getReferencedFields(Type)) { + assert(Field != nullptr); + StorageLocation &FieldLoc = AggregateLoc.getChild(*Field); + MemberLocToStruct[&FieldLoc] = std::make_pair(StructVal, Field); + if (auto *FieldVal = StructVal->getChild(*Field)) + setValue(FieldLoc, *FieldVal); + } + } + + auto It = MemberLocToStruct.find(&Loc); + if (It != MemberLocToStruct.end()) { + // `Loc` is the location of a struct member so we need to also update the + // value of the member in the corresponding `StructValue`. + + assert(It->second.first != nullptr); + StructValue &StructVal = *It->second.first; + + assert(It->second.second != nullptr); + const ValueDecl &Member = *It->second.second; + + StructVal.setChild(Member, Val); + } +} + +Value *Environment::getValue(const StorageLocation &Loc) const { + auto It = LocToVal.find(&Loc); + return It == LocToVal.end() ? nullptr : It->second; +} + +Value *Environment::getValue(const ValueDecl &D, SkipPast SP) const { + auto *Loc = getStorageLocation(D, SP); + if (Loc == nullptr) + return nullptr; + return getValue(*Loc); +} + +Value *Environment::getValue(const Expr &E, SkipPast SP) const { + auto *Loc = getStorageLocation(E, SP); + if (Loc == nullptr) + return nullptr; + return getValue(*Loc); +} + +Value *Environment::createValue(QualType Type) { + llvm::DenseSet<QualType> Visited; + int CreatedValuesCount = 0; + Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0, + CreatedValuesCount); + if (CreatedValuesCount > MaxCompositeValueSize) { + llvm::errs() << "Attempting to initialize a huge value of type: " << Type + << '\n'; + } + return Val; +} + +Value *Environment::createValueUnlessSelfReferential( + QualType Type, llvm::DenseSet<QualType> &Visited, int Depth, + int &CreatedValuesCount) { + assert(!Type.isNull()); + + // Allow unlimited fields at depth 1; only cap at deeper nesting levels. + if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) || + Depth > MaxCompositeValueDepth) + return nullptr; + + if (Type->isBooleanType()) { + CreatedValuesCount++; + return &makeAtomicBoolValue(); + } + + if (Type->isIntegerType()) { + // FIXME: consider instead `return nullptr`, given that we do nothing useful + // with integers, and so distinguishing them serves no purpose, but could + // prevent convergence. + CreatedValuesCount++; + return &takeOwnership(std::make_unique<IntegerValue>()); + } + + if (Type->isReferenceType()) { + CreatedValuesCount++; + QualType PointeeType = Type->castAs<ReferenceType>()->getPointeeType(); + auto &PointeeLoc = createStorageLocation(PointeeType); + + if (Visited.insert(PointeeType.getCanonicalType()).second) { + Value *PointeeVal = createValueUnlessSelfReferential( + PointeeType, Visited, Depth, CreatedValuesCount); + Visited.erase(PointeeType.getCanonicalType()); + + if (PointeeVal != nullptr) + setValue(PointeeLoc, *PointeeVal); + } + + return &takeOwnership(std::make_unique<ReferenceValue>(PointeeLoc)); + } + + if (Type->isPointerType()) { + CreatedValuesCount++; + QualType PointeeType = Type->castAs<PointerType>()->getPointeeType(); + auto &PointeeLoc = createStorageLocation(PointeeType); + + if (Visited.insert(PointeeType.getCanonicalType()).second) { + Value *PointeeVal = createValueUnlessSelfReferential( + PointeeType, Visited, Depth, CreatedValuesCount); + Visited.erase(PointeeType.getCanonicalType()); + + if (PointeeVal != nullptr) + setValue(PointeeLoc, *PointeeVal); + } + + return &takeOwnership(std::make_unique<PointerValue>(PointeeLoc)); + } + + if (Type->isStructureOrClassType() || Type->isUnionType()) { + CreatedValuesCount++; + llvm::DenseMap<const ValueDecl *, Value *> FieldValues; + for (const FieldDecl *Field : DACtx->getReferencedFields(Type)) { + assert(Field != nullptr); + + QualType FieldType = Field->getType(); + if (Visited.contains(FieldType.getCanonicalType())) + continue; + + Visited.insert(FieldType.getCanonicalType()); + if (auto *FieldValue = createValueUnlessSelfReferential( + FieldType, Visited, Depth + 1, CreatedValuesCount)) + FieldValues.insert({Field, FieldValue}); + Visited.erase(FieldType.getCanonicalType()); + } + + return &takeOwnership( + std::make_unique<StructValue>(std::move(FieldValues))); + } + + return nullptr; +} + +StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const { + switch (SP) { + case SkipPast::None: + return Loc; + case SkipPast::Reference: + // References cannot be chained so we only need to skip past one level of + // indirection. + if (auto *Val = dyn_cast_or_null<ReferenceValue>(getValue(Loc))) + return Val->getReferentLoc(); + return Loc; + case SkipPast::ReferenceThenPointer: + StorageLocation &LocPastRef = skip(Loc, SkipPast::Reference); + if (auto *Val = dyn_cast_or_null<PointerValue>(getValue(LocPastRef))) + return Val->getPointeeLoc(); + return LocPastRef; + } + llvm_unreachable("bad SkipPast kind"); +} + +const StorageLocation &Environment::skip(const StorageLocation &Loc, + SkipPast SP) const { + return skip(*const_cast<StorageLocation *>(&Loc), SP); +} + +void Environment::addToFlowCondition(BoolValue &Val) { + DACtx->addFlowConditionConstraint(*FlowConditionToken, Val); +} + +bool Environment::flowConditionImplies(BoolValue &Val) const { + return DACtx->flowConditionImplies(*FlowConditionToken, Val); +} + +void Environment::dump(raw_ostream &OS) const { + // FIXME: add printing for remaining fields and allow caller to decide what + // fields are printed. + OS << "DeclToLoc:\n"; + for (auto [D, L] : DeclToLoc) + OS << " [" << D->getName() << ", " << L << "]\n"; + + OS << "ExprToLoc:\n"; + for (auto [E, L] : ExprToLoc) + OS << " [" << E << ", " << L << "]\n"; + + OS << "LocToVal:\n"; + for (auto [L, V] : LocToVal) { + OS << " [" << L << ", " << V << ": " << *V << "]\n"; + } + + OS << "FlowConditionToken:\n"; + DACtx->dumpFlowCondition(*FlowConditionToken); +} + +void Environment::dump() const { + dump(llvm::dbgs()); +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp new file mode 100644 index 000000000000..d4886f154b33 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DebugSupport.cpp @@ -0,0 +1,259 @@ +//===- DebugSupport.cpp -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines functions which generate more readable forms of data +// structures used in the dataflow analyses, for debugging purposes. +// +//===----------------------------------------------------------------------===// + +#include <utility> + +#include "clang/Analysis/FlowSensitive/DebugSupport.h" +#include "clang/Analysis/FlowSensitive/Solver.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatAdapters.h" +#include "llvm/Support/FormatCommon.h" +#include "llvm/Support/FormatVariadic.h" + +namespace clang { +namespace dataflow { + +using llvm::AlignStyle; +using llvm::fmt_pad; +using llvm::formatv; + +llvm::StringRef debugString(Value::Kind Kind) { + switch (Kind) { + case Value::Kind::Integer: + return "Integer"; + case Value::Kind::Reference: + return "Reference"; + case Value::Kind::Pointer: + return "Pointer"; + case Value::Kind::Struct: + return "Struct"; + case Value::Kind::AtomicBool: + return "AtomicBool"; + case Value::Kind::TopBool: + return "TopBool"; + case Value::Kind::Conjunction: + return "Conjunction"; + case Value::Kind::Disjunction: + return "Disjunction"; + case Value::Kind::Negation: + return "Negation"; + case Value::Kind::Implication: + return "Implication"; + case Value::Kind::Biconditional: + return "Biconditional"; + } + llvm_unreachable("Unhandled value kind"); +} + +llvm::StringRef debugString(Solver::Result::Assignment Assignment) { + switch (Assignment) { + case Solver::Result::Assignment::AssignedFalse: + return "False"; + case Solver::Result::Assignment::AssignedTrue: + return "True"; + } + llvm_unreachable("Booleans can only be assigned true/false"); +} + +llvm::StringRef debugString(Solver::Result::Status Status) { + switch (Status) { + case Solver::Result::Status::Satisfiable: + return "Satisfiable"; + case Solver::Result::Status::Unsatisfiable: + return "Unsatisfiable"; + case Solver::Result::Status::TimedOut: + return "TimedOut"; + } + llvm_unreachable("Unhandled SAT check result status"); +} + +namespace { + +class DebugStringGenerator { +public: + explicit DebugStringGenerator( + llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNamesArg) + : Counter(0), AtomNames(std::move(AtomNamesArg)) { +#ifndef NDEBUG + llvm::StringSet<> Names; + for (auto &N : AtomNames) { + assert(Names.insert(N.second).second && + "The same name must not assigned to different atoms"); + } +#endif + } + + /// Returns a string representation of a boolean value `B`. + std::string debugString(const BoolValue &B, size_t Depth = 0) { + std::string S; + switch (B.getKind()) { + case Value::Kind::AtomicBool: { + S = getAtomName(&cast<AtomicBoolValue>(B)); + break; + } + case Value::Kind::Conjunction: { + auto &C = cast<ConjunctionValue>(B); + auto L = debugString(C.getLeftSubValue(), Depth + 1); + auto R = debugString(C.getRightSubValue(), Depth + 1); + S = formatv("(and\n{0}\n{1})", L, R); + break; + } + case Value::Kind::Disjunction: { + auto &D = cast<DisjunctionValue>(B); + auto L = debugString(D.getLeftSubValue(), Depth + 1); + auto R = debugString(D.getRightSubValue(), Depth + 1); + S = formatv("(or\n{0}\n{1})", L, R); + break; + } + case Value::Kind::Negation: { + auto &N = cast<NegationValue>(B); + S = formatv("(not\n{0})", debugString(N.getSubVal(), Depth + 1)); + break; + } + case Value::Kind::Implication: { + auto &IV = cast<ImplicationValue>(B); + auto L = debugString(IV.getLeftSubValue(), Depth + 1); + auto R = debugString(IV.getRightSubValue(), Depth + 1); + S = formatv("(=>\n{0}\n{1})", L, R); + break; + } + case Value::Kind::Biconditional: { + auto &BV = cast<BiconditionalValue>(B); + auto L = debugString(BV.getLeftSubValue(), Depth + 1); + auto R = debugString(BV.getRightSubValue(), Depth + 1); + S = formatv("(=\n{0}\n{1})", L, R); + break; + } + default: + llvm_unreachable("Unhandled value kind"); + } + auto Indent = Depth * 4; + return formatv("{0}", fmt_pad(S, Indent, 0)); + } + + std::string debugString(const llvm::DenseSet<BoolValue *> &Constraints) { + std::vector<std::string> ConstraintsStrings; + ConstraintsStrings.reserve(Constraints.size()); + for (BoolValue *Constraint : Constraints) { + ConstraintsStrings.push_back(debugString(*Constraint)); + } + llvm::sort(ConstraintsStrings); + + std::string Result; + for (const std::string &S : ConstraintsStrings) { + Result += S; + Result += '\n'; + } + return Result; + } + + /// Returns a string representation of a set of boolean `Constraints` and the + /// `Result` of satisfiability checking on the `Constraints`. + std::string debugString(ArrayRef<BoolValue *> &Constraints, + const Solver::Result &Result) { + auto Template = R"( +Constraints +------------ +{0:$[ + +]} +------------ +{1}. +{2} +)"; + + std::vector<std::string> ConstraintsStrings; + ConstraintsStrings.reserve(Constraints.size()); + for (auto &Constraint : Constraints) { + ConstraintsStrings.push_back(debugString(*Constraint)); + } + + auto StatusString = clang::dataflow::debugString(Result.getStatus()); + auto Solution = Result.getSolution(); + auto SolutionString = Solution ? "\n" + debugString(*Solution) : ""; + + return formatv( + Template, + llvm::make_range(ConstraintsStrings.begin(), ConstraintsStrings.end()), + StatusString, SolutionString); + } + +private: + /// Returns a string representation of a truth assignment to atom booleans. + std::string debugString( + const llvm::DenseMap<AtomicBoolValue *, Solver::Result::Assignment> + &AtomAssignments) { + size_t MaxNameLength = 0; + for (auto &AtomName : AtomNames) { + MaxNameLength = std::max(MaxNameLength, AtomName.second.size()); + } + + std::vector<std::string> Lines; + for (auto &AtomAssignment : AtomAssignments) { + auto Line = formatv("{0} = {1}", + fmt_align(getAtomName(AtomAssignment.first), + AlignStyle::Left, MaxNameLength), + clang::dataflow::debugString(AtomAssignment.second)); + Lines.push_back(Line); + } + llvm::sort(Lines); + + return formatv("{0:$[\n]}", llvm::make_range(Lines.begin(), Lines.end())); + } + + /// Returns the name assigned to `Atom`, either user-specified or created by + /// default rules (B0, B1, ...). + std::string getAtomName(const AtomicBoolValue *Atom) { + auto Entry = AtomNames.try_emplace(Atom, formatv("B{0}", Counter)); + if (Entry.second) { + Counter++; + } + return Entry.first->second; + } + + // Keep track of number of atoms without a user-specified name, used to assign + // non-repeating default names to such atoms. + size_t Counter; + + // Keep track of names assigned to atoms. + llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames; +}; + +} // namespace + +std::string +debugString(const BoolValue &B, + llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames) { + return DebugStringGenerator(std::move(AtomNames)).debugString(B); +} + +std::string +debugString(const llvm::DenseSet<BoolValue *> &Constraints, + llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames) { + return DebugStringGenerator(std::move(AtomNames)).debugString(Constraints); +} + +std::string +debugString(ArrayRef<BoolValue *> Constraints, const Solver::Result &Result, + llvm::DenseMap<const AtomicBoolValue *, std::string> AtomNames) { + return DebugStringGenerator(std::move(AtomNames)) + .debugString(Constraints, Result); +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp new file mode 100644 index 000000000000..f457964fb132 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/ChromiumCheckModel.cpp @@ -0,0 +1,71 @@ +//===-- ChromiumCheckModel.cpp ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Models/ChromiumCheckModel.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "llvm/ADT/DenseSet.h" + +namespace clang { +namespace dataflow { + +/// Determines whether `D` is one of the methods used to implement Chromium's +/// `CHECK` macros. Populates `CheckDecls`, if empty. +bool isCheckLikeMethod(llvm::SmallDenseSet<const CXXMethodDecl *> &CheckDecls, + const CXXMethodDecl &D) { + // All of the methods of interest are static, so avoid any lookup for + // non-static methods (the common case). + if (!D.isStatic()) + return false; + + if (CheckDecls.empty()) { + // Attempt to initialize `CheckDecls` with the methods in class + // `CheckError`. + const CXXRecordDecl *ParentClass = D.getParent(); + if (ParentClass == nullptr || !ParentClass->getDeclName().isIdentifier() || + ParentClass->getName() != "CheckError") + return false; + + // Check whether namespace is "logging". + const auto *N = + dyn_cast_or_null<NamespaceDecl>(ParentClass->getDeclContext()); + if (N == nullptr || !N->getDeclName().isIdentifier() || + N->getName() != "logging") + return false; + + // Check whether "logging" is a top-level namespace. + if (N->getParent() == nullptr || !N->getParent()->isTranslationUnit()) + return false; + + for (const CXXMethodDecl *M : ParentClass->methods()) + if (M->getDeclName().isIdentifier() && M->getName().endswith("Check")) + CheckDecls.insert(M); + } + + return CheckDecls.contains(&D); +} + +bool ChromiumCheckModel::transfer(const CFGElement *Element, Environment &Env) { + auto CS = Element->getAs<CFGStmt>(); + if (!CS) + return false; + auto Stmt = CS->getStmt(); + if (const auto *Call = dyn_cast<CallExpr>(Stmt)) { + if (const auto *M = dyn_cast<CXXMethodDecl>(Call->getDirectCallee())) { + if (isCheckLikeMethod(CheckDecls, *M)) { + // Mark this branch as unreachable. + Env.addToFlowCondition(Env.getBoolLiteralValue(false)); + return true; + } + } + } + return false; +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp new file mode 100644 index 000000000000..308dc25dad1f --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp @@ -0,0 +1,912 @@ +//===-- UncheckedOptionalAccessModel.cpp ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a dataflow analysis that detects unsafe uses of optional +// values. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/Stmt.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/CFGMatchSwitch.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/NoopLattice.h" +#include "clang/Analysis/FlowSensitive/StorageLocation.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <memory> +#include <optional> +#include <utility> +#include <vector> + +namespace clang { +namespace dataflow { +namespace { + +using namespace ::clang::ast_matchers; +using LatticeTransferState = TransferState<NoopLattice>; + +DeclarationMatcher optionalClass() { + return classTemplateSpecializationDecl( + anyOf(hasName("std::optional"), hasName("std::__optional_storage_base"), + hasName("__optional_destruct_base"), hasName("absl::optional"), + hasName("base::Optional")), + hasTemplateArgument(0, refersToType(type().bind("T")))); +} + +auto optionalOrAliasType() { + return hasUnqualifiedDesugaredType( + recordType(hasDeclaration(optionalClass()))); +} + +/// Matches any of the spellings of the optional types and sugar, aliases, etc. +auto hasOptionalType() { return hasType(optionalOrAliasType()); } + +auto isOptionalMemberCallWithName( + llvm::StringRef MemberName, + const std::optional<StatementMatcher> &Ignorable = std::nullopt) { + auto Exception = unless(Ignorable ? expr(anyOf(*Ignorable, cxxThisExpr())) + : cxxThisExpr()); + return cxxMemberCallExpr( + on(expr(Exception)), + callee(cxxMethodDecl(hasName(MemberName), ofClass(optionalClass())))); +} + +auto isOptionalOperatorCallWithName( + llvm::StringRef operator_name, + const std::optional<StatementMatcher> &Ignorable = std::nullopt) { + return cxxOperatorCallExpr( + hasOverloadedOperatorName(operator_name), + callee(cxxMethodDecl(ofClass(optionalClass()))), + Ignorable ? callExpr(unless(hasArgument(0, *Ignorable))) : callExpr()); +} + +auto isMakeOptionalCall() { + return callExpr( + callee(functionDecl(hasAnyName( + "std::make_optional", "base::make_optional", "absl::make_optional"))), + hasOptionalType()); +} + +auto nulloptTypeDecl() { + return namedDecl( + hasAnyName("std::nullopt_t", "absl::nullopt_t", "base::nullopt_t")); +} + +auto hasNulloptType() { return hasType(nulloptTypeDecl()); } + +// `optional` or `nullopt_t` +auto hasAnyOptionalType() { + return hasType(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(anyOf(nulloptTypeDecl(), optionalClass()))))); +} + + +auto inPlaceClass() { + return recordDecl( + hasAnyName("std::in_place_t", "absl::in_place_t", "base::in_place_t")); +} + +auto isOptionalNulloptConstructor() { + return cxxConstructExpr( + hasOptionalType(), + hasDeclaration(cxxConstructorDecl(parameterCountIs(1), + hasParameter(0, hasNulloptType())))); +} + +auto isOptionalInPlaceConstructor() { + return cxxConstructExpr(hasOptionalType(), + hasArgument(0, hasType(inPlaceClass()))); +} + +auto isOptionalValueOrConversionConstructor() { + return cxxConstructExpr( + hasOptionalType(), + unless(hasDeclaration( + cxxConstructorDecl(anyOf(isCopyConstructor(), isMoveConstructor())))), + argumentCountIs(1), hasArgument(0, unless(hasNulloptType()))); +} + +auto isOptionalValueOrConversionAssignment() { + return cxxOperatorCallExpr( + hasOverloadedOperatorName("="), + callee(cxxMethodDecl(ofClass(optionalClass()))), + unless(hasDeclaration(cxxMethodDecl( + anyOf(isCopyAssignmentOperator(), isMoveAssignmentOperator())))), + argumentCountIs(2), hasArgument(1, unless(hasNulloptType()))); +} + +auto isNulloptConstructor() { + return cxxConstructExpr(hasNulloptType(), argumentCountIs(1), + hasArgument(0, hasNulloptType())); +} + +auto isOptionalNulloptAssignment() { + return cxxOperatorCallExpr(hasOverloadedOperatorName("="), + callee(cxxMethodDecl(ofClass(optionalClass()))), + argumentCountIs(2), + hasArgument(1, hasNulloptType())); +} + +auto isStdSwapCall() { + return callExpr(callee(functionDecl(hasName("std::swap"))), + argumentCountIs(2), hasArgument(0, hasOptionalType()), + hasArgument(1, hasOptionalType())); +} + +constexpr llvm::StringLiteral ValueOrCallID = "ValueOrCall"; + +auto isValueOrStringEmptyCall() { + // `opt.value_or("").empty()` + return cxxMemberCallExpr( + callee(cxxMethodDecl(hasName("empty"))), + onImplicitObjectArgument(ignoringImplicit( + cxxMemberCallExpr(on(expr(unless(cxxThisExpr()))), + callee(cxxMethodDecl(hasName("value_or"), + ofClass(optionalClass()))), + hasArgument(0, stringLiteral(hasSize(0)))) + .bind(ValueOrCallID)))); +} + +auto isValueOrNotEqX() { + auto ComparesToSame = [](ast_matchers::internal::Matcher<Stmt> Arg) { + return hasOperands( + ignoringImplicit( + cxxMemberCallExpr(on(expr(unless(cxxThisExpr()))), + callee(cxxMethodDecl(hasName("value_or"), + ofClass(optionalClass()))), + hasArgument(0, Arg)) + .bind(ValueOrCallID)), + ignoringImplicit(Arg)); + }; + + // `opt.value_or(X) != X`, for X is `nullptr`, `""`, or `0`. Ideally, we'd + // support this pattern for any expression, but the AST does not have a + // generic expression comparison facility, so we specialize to common cases + // seen in practice. FIXME: define a matcher that compares values across + // nodes, which would let us generalize this to any `X`. + return binaryOperation(hasOperatorName("!="), + anyOf(ComparesToSame(cxxNullPtrLiteralExpr()), + ComparesToSame(stringLiteral(hasSize(0))), + ComparesToSame(integerLiteral(equals(0))))); +} + +auto isCallReturningOptional() { + return callExpr(hasType(qualType(anyOf( + optionalOrAliasType(), referenceType(pointee(optionalOrAliasType())))))); +} + +template <typename L, typename R> +auto isComparisonOperatorCall(L lhs_arg_matcher, R rhs_arg_matcher) { + return cxxOperatorCallExpr( + anyOf(hasOverloadedOperatorName("=="), hasOverloadedOperatorName("!=")), + argumentCountIs(2), hasArgument(0, lhs_arg_matcher), + hasArgument(1, rhs_arg_matcher)); +} + +// Ensures that `Expr` is mapped to a `BoolValue` and returns it. +BoolValue &forceBoolValue(Environment &Env, const Expr &Expr) { + auto *Value = cast_or_null<BoolValue>(Env.getValue(Expr, SkipPast::None)); + if (Value != nullptr) + return *Value; + + auto &Loc = Env.createStorageLocation(Expr); + Value = &Env.makeAtomicBoolValue(); + Env.setValue(Loc, *Value); + Env.setStorageLocation(Expr, Loc); + return *Value; +} + +/// Sets `HasValueVal` as the symbolic value that represents the "has_value" +/// property of the optional value `OptionalVal`. +void setHasValue(Value &OptionalVal, BoolValue &HasValueVal) { + OptionalVal.setProperty("has_value", HasValueVal); +} + +/// Creates a symbolic value for an `optional` value using `HasValueVal` as the +/// symbolic value of its "has_value" property. +StructValue &createOptionalValue(Environment &Env, BoolValue &HasValueVal) { + auto OptionalVal = std::make_unique<StructValue>(); + setHasValue(*OptionalVal, HasValueVal); + return Env.takeOwnership(std::move(OptionalVal)); +} + +/// Returns the symbolic value that represents the "has_value" property of the +/// optional value `OptionalVal`. Returns null if `OptionalVal` is null. +BoolValue *getHasValue(Environment &Env, Value *OptionalVal) { + if (OptionalVal != nullptr) { + auto *HasValueVal = + cast_or_null<BoolValue>(OptionalVal->getProperty("has_value")); + if (HasValueVal == nullptr) { + HasValueVal = &Env.makeAtomicBoolValue(); + OptionalVal->setProperty("has_value", *HasValueVal); + } + return HasValueVal; + } + return nullptr; +} + +/// If `Type` is a reference type, returns the type of its pointee. Otherwise, +/// returns `Type` itself. +QualType stripReference(QualType Type) { + return Type->isReferenceType() ? Type->getPointeeType() : Type; +} + +/// Returns true if and only if `Type` is an optional type. +bool isOptionalType(QualType Type) { + if (!Type->isRecordType()) + return false; + // FIXME: Optimize this by avoiding the `getQualifiedNameAsString` call. + auto TypeName = Type->getAsCXXRecordDecl()->getQualifiedNameAsString(); + return TypeName == "std::optional" || TypeName == "absl::optional" || + TypeName == "base::Optional"; +} + +/// Returns the number of optional wrappers in `Type`. +/// +/// For example, if `Type` is `optional<optional<int>>`, the result of this +/// function will be 2. +int countOptionalWrappers(const ASTContext &ASTCtx, QualType Type) { + if (!isOptionalType(Type)) + return 0; + return 1 + countOptionalWrappers( + ASTCtx, + cast<ClassTemplateSpecializationDecl>(Type->getAsRecordDecl()) + ->getTemplateArgs() + .get(0) + .getAsType() + .getDesugaredType(ASTCtx)); +} + +/// Tries to initialize the `optional`'s value (that is, contents), and return +/// its location. Returns nullptr if the value can't be represented. +StorageLocation *maybeInitializeOptionalValueMember(QualType Q, + Value &OptionalVal, + Environment &Env) { + // The "value" property represents a synthetic field. As such, it needs + // `StorageLocation`, like normal fields (and other variables). So, we model + // it with a `ReferenceValue`, since that includes a storage location. Once + // the property is set, it will be shared by all environments that access the + // `Value` representing the optional (here, `OptionalVal`). + if (auto *ValueProp = OptionalVal.getProperty("value")) { + auto *ValueRef = clang::cast<ReferenceValue>(ValueProp); + auto &ValueLoc = ValueRef->getReferentLoc(); + if (Env.getValue(ValueLoc) == nullptr) { + // The property was previously set, but the value has been lost. This can + // happen, for example, because of an environment merge (where the two + // environments mapped the property to different values, which resulted in + // them both being discarded), or when two blocks in the CFG, with neither + // a dominator of the other, visit the same optional value, or even when a + // block is revisited during testing to collect per-statement state. + // FIXME: This situation means that the optional contents are not shared + // between branches and the like. Practically, this lack of sharing + // reduces the precision of the model when the contents are relevant to + // the check, like another optional or a boolean that influences control + // flow. + auto *ValueVal = Env.createValue(ValueLoc.getType()); + if (ValueVal == nullptr) + return nullptr; + Env.setValue(ValueLoc, *ValueVal); + } + return &ValueLoc; + } + + auto Ty = stripReference(Q); + auto *ValueVal = Env.createValue(Ty); + if (ValueVal == nullptr) + return nullptr; + auto &ValueLoc = Env.createStorageLocation(Ty); + Env.setValue(ValueLoc, *ValueVal); + auto ValueRef = std::make_unique<ReferenceValue>(ValueLoc); + OptionalVal.setProperty("value", Env.takeOwnership(std::move(ValueRef))); + return &ValueLoc; +} + +void initializeOptionalReference(const Expr *OptionalExpr, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + if (auto *OptionalVal = + State.Env.getValue(*OptionalExpr, SkipPast::Reference)) { + if (OptionalVal->getProperty("has_value") == nullptr) { + setHasValue(*OptionalVal, State.Env.makeAtomicBoolValue()); + } + } +} + +/// Returns true if and only if `OptionalVal` is initialized and known to be +/// empty in `Env. +bool isEmptyOptional(const Value &OptionalVal, const Environment &Env) { + auto *HasValueVal = + cast_or_null<BoolValue>(OptionalVal.getProperty("has_value")); + return HasValueVal != nullptr && + Env.flowConditionImplies(Env.makeNot(*HasValueVal)); +} + +/// Returns true if and only if `OptionalVal` is initialized and known to be +/// non-empty in `Env. +bool isNonEmptyOptional(const Value &OptionalVal, const Environment &Env) { + auto *HasValueVal = + cast_or_null<BoolValue>(OptionalVal.getProperty("has_value")); + return HasValueVal != nullptr && Env.flowConditionImplies(*HasValueVal); +} + +void transferUnwrapCall(const Expr *UnwrapExpr, const Expr *ObjectExpr, + LatticeTransferState &State) { + if (auto *OptionalVal = + State.Env.getValue(*ObjectExpr, SkipPast::ReferenceThenPointer)) { + if (State.Env.getStorageLocation(*UnwrapExpr, SkipPast::None) == nullptr) + if (auto *Loc = maybeInitializeOptionalValueMember( + UnwrapExpr->getType(), *OptionalVal, State.Env)) + State.Env.setStorageLocation(*UnwrapExpr, *Loc); + } +} + +void transferMakeOptionalCall(const CallExpr *E, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + auto &Loc = State.Env.createStorageLocation(*E); + State.Env.setStorageLocation(*E, Loc); + State.Env.setValue( + Loc, createOptionalValue(State.Env, State.Env.getBoolLiteralValue(true))); +} + +void transferOptionalHasValueCall(const CXXMemberCallExpr *CallExpr, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + if (auto *HasValueVal = getHasValue( + State.Env, State.Env.getValue(*CallExpr->getImplicitObjectArgument(), + SkipPast::ReferenceThenPointer))) { + auto &CallExprLoc = State.Env.createStorageLocation(*CallExpr); + State.Env.setValue(CallExprLoc, *HasValueVal); + State.Env.setStorageLocation(*CallExpr, CallExprLoc); + } +} + +/// `ModelPred` builds a logical formula relating the predicate in +/// `ValueOrPredExpr` to the optional's `has_value` property. +void transferValueOrImpl(const clang::Expr *ValueOrPredExpr, + const MatchFinder::MatchResult &Result, + LatticeTransferState &State, + BoolValue &(*ModelPred)(Environment &Env, + BoolValue &ExprVal, + BoolValue &HasValueVal)) { + auto &Env = State.Env; + + const auto *ObjectArgumentExpr = + Result.Nodes.getNodeAs<clang::CXXMemberCallExpr>(ValueOrCallID) + ->getImplicitObjectArgument(); + + auto *HasValueVal = getHasValue( + State.Env, + State.Env.getValue(*ObjectArgumentExpr, SkipPast::ReferenceThenPointer)); + if (HasValueVal == nullptr) + return; + + Env.addToFlowCondition( + ModelPred(Env, forceBoolValue(Env, *ValueOrPredExpr), *HasValueVal)); +} + +void transferValueOrStringEmptyCall(const clang::Expr *ComparisonExpr, + const MatchFinder::MatchResult &Result, + LatticeTransferState &State) { + return transferValueOrImpl(ComparisonExpr, Result, State, + [](Environment &Env, BoolValue &ExprVal, + BoolValue &HasValueVal) -> BoolValue & { + // If the result is *not* empty, then we know the + // optional must have been holding a value. If + // `ExprVal` is true, though, we don't learn + // anything definite about `has_value`, so we + // don't add any corresponding implications to + // the flow condition. + return Env.makeImplication(Env.makeNot(ExprVal), + HasValueVal); + }); +} + +void transferValueOrNotEqX(const Expr *ComparisonExpr, + const MatchFinder::MatchResult &Result, + LatticeTransferState &State) { + transferValueOrImpl(ComparisonExpr, Result, State, + [](Environment &Env, BoolValue &ExprVal, + BoolValue &HasValueVal) -> BoolValue & { + // We know that if `(opt.value_or(X) != X)` then + // `opt.hasValue()`, even without knowing further + // details about the contents of `opt`. + return Env.makeImplication(ExprVal, HasValueVal); + }); +} + +void transferCallReturningOptional(const CallExpr *E, + const MatchFinder::MatchResult &Result, + LatticeTransferState &State) { + if (State.Env.getStorageLocation(*E, SkipPast::None) != nullptr) + return; + + auto &Loc = State.Env.createStorageLocation(*E); + State.Env.setStorageLocation(*E, Loc); + State.Env.setValue( + Loc, createOptionalValue(State.Env, State.Env.makeAtomicBoolValue())); +} + +void assignOptionalValue(const Expr &E, Environment &Env, + BoolValue &HasValueVal) { + if (auto *OptionalLoc = + Env.getStorageLocation(E, SkipPast::ReferenceThenPointer)) { + Env.setValue(*OptionalLoc, createOptionalValue(Env, HasValueVal)); + } +} + +/// Returns a symbolic value for the "has_value" property of an `optional<T>` +/// value that is constructed/assigned from a value of type `U` or `optional<U>` +/// where `T` is constructible from `U`. +BoolValue &valueOrConversionHasValue(const FunctionDecl &F, const Expr &E, + const MatchFinder::MatchResult &MatchRes, + LatticeTransferState &State) { + assert(F.getTemplateSpecializationArgs() != nullptr); + assert(F.getTemplateSpecializationArgs()->size() > 0); + + const int TemplateParamOptionalWrappersCount = countOptionalWrappers( + *MatchRes.Context, + stripReference(F.getTemplateSpecializationArgs()->get(0).getAsType())); + const int ArgTypeOptionalWrappersCount = + countOptionalWrappers(*MatchRes.Context, stripReference(E.getType())); + + // Check if this is a constructor/assignment call for `optional<T>` with + // argument of type `U` such that `T` is constructible from `U`. + if (TemplateParamOptionalWrappersCount == ArgTypeOptionalWrappersCount) + return State.Env.getBoolLiteralValue(true); + + // This is a constructor/assignment call for `optional<T>` with argument of + // type `optional<U>` such that `T` is constructible from `U`. + if (auto *HasValueVal = + getHasValue(State.Env, State.Env.getValue(E, SkipPast::Reference))) + return *HasValueVal; + return State.Env.makeAtomicBoolValue(); +} + +void transferValueOrConversionConstructor( + const CXXConstructExpr *E, const MatchFinder::MatchResult &MatchRes, + LatticeTransferState &State) { + assert(E->getNumArgs() > 0); + + assignOptionalValue(*E, State.Env, + valueOrConversionHasValue(*E->getConstructor(), + *E->getArg(0), MatchRes, + State)); +} + +void transferAssignment(const CXXOperatorCallExpr *E, BoolValue &HasValueVal, + LatticeTransferState &State) { + assert(E->getNumArgs() > 0); + + auto *OptionalLoc = + State.Env.getStorageLocation(*E->getArg(0), SkipPast::Reference); + if (OptionalLoc == nullptr) + return; + + State.Env.setValue(*OptionalLoc, createOptionalValue(State.Env, HasValueVal)); + + // Assign a storage location for the whole expression. + State.Env.setStorageLocation(*E, *OptionalLoc); +} + +void transferValueOrConversionAssignment( + const CXXOperatorCallExpr *E, const MatchFinder::MatchResult &MatchRes, + LatticeTransferState &State) { + assert(E->getNumArgs() > 1); + transferAssignment(E, + valueOrConversionHasValue(*E->getDirectCallee(), + *E->getArg(1), MatchRes, State), + State); +} + +void transferNulloptAssignment(const CXXOperatorCallExpr *E, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + transferAssignment(E, State.Env.getBoolLiteralValue(false), State); +} + +void transferSwap(const StorageLocation &OptionalLoc1, + const StorageLocation &OptionalLoc2, + LatticeTransferState &State) { + auto *OptionalVal1 = State.Env.getValue(OptionalLoc1); + assert(OptionalVal1 != nullptr); + + auto *OptionalVal2 = State.Env.getValue(OptionalLoc2); + assert(OptionalVal2 != nullptr); + + State.Env.setValue(OptionalLoc1, *OptionalVal2); + State.Env.setValue(OptionalLoc2, *OptionalVal1); +} + +void transferSwapCall(const CXXMemberCallExpr *E, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + assert(E->getNumArgs() == 1); + + auto *OptionalLoc1 = State.Env.getStorageLocation( + *E->getImplicitObjectArgument(), SkipPast::ReferenceThenPointer); + assert(OptionalLoc1 != nullptr); + + auto *OptionalLoc2 = + State.Env.getStorageLocation(*E->getArg(0), SkipPast::Reference); + assert(OptionalLoc2 != nullptr); + + transferSwap(*OptionalLoc1, *OptionalLoc2, State); +} + +void transferStdSwapCall(const CallExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + assert(E->getNumArgs() == 2); + + auto *OptionalLoc1 = + State.Env.getStorageLocation(*E->getArg(0), SkipPast::Reference); + assert(OptionalLoc1 != nullptr); + + auto *OptionalLoc2 = + State.Env.getStorageLocation(*E->getArg(1), SkipPast::Reference); + assert(OptionalLoc2 != nullptr); + + transferSwap(*OptionalLoc1, *OptionalLoc2, State); +} + +BoolValue &evaluateEquality(Environment &Env, BoolValue &EqVal, BoolValue &LHS, + BoolValue &RHS) { + // Logically, an optional<T> object is composed of two values - a `has_value` + // bit and a value of type T. Equality of optional objects compares both + // values. Therefore, merely comparing the `has_value` bits isn't sufficient: + // when two optional objects are engaged, the equality of their respective + // values of type T matters. Since we only track the `has_value` bits, we + // can't make any conclusions about equality when we know that two optional + // objects are engaged. + // + // We express this as two facts about the equality: + // a) EqVal => (LHS & RHS) v (!RHS & !LHS) + // If they are equal, then either both are set or both are unset. + // b) (!LHS & !RHS) => EqVal + // If neither is set, then they are equal. + // We rewrite b) as !EqVal => (LHS v RHS), for a more compact formula. + return Env.makeAnd( + Env.makeImplication( + EqVal, Env.makeOr(Env.makeAnd(LHS, RHS), + Env.makeAnd(Env.makeNot(LHS), Env.makeNot(RHS)))), + Env.makeImplication(Env.makeNot(EqVal), Env.makeOr(LHS, RHS))); +} + +void transferOptionalAndOptionalCmp(const clang::CXXOperatorCallExpr *CmpExpr, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + Environment &Env = State.Env; + auto *CmpValue = &forceBoolValue(Env, *CmpExpr); + if (auto *LHasVal = getHasValue( + Env, Env.getValue(*CmpExpr->getArg(0), SkipPast::Reference))) + if (auto *RHasVal = getHasValue( + Env, Env.getValue(*CmpExpr->getArg(1), SkipPast::Reference))) { + if (CmpExpr->getOperator() == clang::OO_ExclaimEqual) + CmpValue = &State.Env.makeNot(*CmpValue); + Env.addToFlowCondition( + evaluateEquality(Env, *CmpValue, *LHasVal, *RHasVal)); + } +} + +void transferOptionalAndValueCmp(const clang::CXXOperatorCallExpr *CmpExpr, + const clang::Expr *E, Environment &Env) { + auto *CmpValue = &forceBoolValue(Env, *CmpExpr); + if (auto *HasVal = getHasValue(Env, Env.getValue(*E, SkipPast::Reference))) { + if (CmpExpr->getOperator() == clang::OO_ExclaimEqual) + CmpValue = &Env.makeNot(*CmpValue); + Env.addToFlowCondition(evaluateEquality(Env, *CmpValue, *HasVal, + Env.getBoolLiteralValue(true))); + } +} + +std::optional<StatementMatcher> +ignorableOptional(const UncheckedOptionalAccessModelOptions &Options) { + if (Options.IgnoreSmartPointerDereference) { + auto SmartPtrUse = expr(ignoringParenImpCasts(cxxOperatorCallExpr( + anyOf(hasOverloadedOperatorName("->"), hasOverloadedOperatorName("*")), + unless(hasArgument(0, expr(hasOptionalType())))))); + return expr( + anyOf(SmartPtrUse, memberExpr(hasObjectExpression(SmartPtrUse)))); + } + return std::nullopt; +} + +StatementMatcher +valueCall(const std::optional<StatementMatcher> &IgnorableOptional) { + return isOptionalMemberCallWithName("value", IgnorableOptional); +} + +StatementMatcher +valueOperatorCall(const std::optional<StatementMatcher> &IgnorableOptional) { + return expr(anyOf(isOptionalOperatorCallWithName("*", IgnorableOptional), + isOptionalOperatorCallWithName("->", IgnorableOptional))); +} + +auto buildTransferMatchSwitch() { + // FIXME: Evaluate the efficiency of matchers. If using matchers results in a + // lot of duplicated work (e.g. string comparisons), consider providing APIs + // that avoid it through memoization. + return CFGMatchSwitchBuilder<LatticeTransferState>() + // Attach a symbolic "has_value" state to optional values that we see for + // the first time. + .CaseOfCFGStmt<Expr>( + expr(anyOf(declRefExpr(), memberExpr()), hasOptionalType()), + initializeOptionalReference) + + // make_optional + .CaseOfCFGStmt<CallExpr>(isMakeOptionalCall(), transferMakeOptionalCall) + + // optional::optional (in place) + .CaseOfCFGStmt<CXXConstructExpr>( + isOptionalInPlaceConstructor(), + [](const CXXConstructExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + assignOptionalValue(*E, State.Env, + State.Env.getBoolLiteralValue(true)); + }) + // nullopt_t::nullopt_t + .CaseOfCFGStmt<CXXConstructExpr>( + isNulloptConstructor(), + [](const CXXConstructExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + assignOptionalValue(*E, State.Env, + State.Env.getBoolLiteralValue(false)); + }) + // optional::optional(nullopt_t) + .CaseOfCFGStmt<CXXConstructExpr>( + isOptionalNulloptConstructor(), + [](const CXXConstructExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + assignOptionalValue(*E, State.Env, + State.Env.getBoolLiteralValue(false)); + }) + // optional::optional (value/conversion) + .CaseOfCFGStmt<CXXConstructExpr>(isOptionalValueOrConversionConstructor(), + transferValueOrConversionConstructor) + + + // optional::operator= + .CaseOfCFGStmt<CXXOperatorCallExpr>( + isOptionalValueOrConversionAssignment(), + transferValueOrConversionAssignment) + .CaseOfCFGStmt<CXXOperatorCallExpr>(isOptionalNulloptAssignment(), + transferNulloptAssignment) + + // optional::value + .CaseOfCFGStmt<CXXMemberCallExpr>( + valueCall(std::nullopt), + [](const CXXMemberCallExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + transferUnwrapCall(E, E->getImplicitObjectArgument(), State); + }) + + // optional::operator*, optional::operator-> + .CaseOfCFGStmt<CallExpr>(valueOperatorCall(std::nullopt), + [](const CallExpr *E, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + transferUnwrapCall(E, E->getArg(0), State); + }) + + // optional::has_value + .CaseOfCFGStmt<CXXMemberCallExpr>( + isOptionalMemberCallWithName("has_value"), + transferOptionalHasValueCall) + + // optional::operator bool + .CaseOfCFGStmt<CXXMemberCallExpr>( + isOptionalMemberCallWithName("operator bool"), + transferOptionalHasValueCall) + + // optional::emplace + .CaseOfCFGStmt<CXXMemberCallExpr>( + isOptionalMemberCallWithName("emplace"), + [](const CXXMemberCallExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + assignOptionalValue(*E->getImplicitObjectArgument(), State.Env, + State.Env.getBoolLiteralValue(true)); + }) + + // optional::reset + .CaseOfCFGStmt<CXXMemberCallExpr>( + isOptionalMemberCallWithName("reset"), + [](const CXXMemberCallExpr *E, const MatchFinder::MatchResult &, + LatticeTransferState &State) { + assignOptionalValue(*E->getImplicitObjectArgument(), State.Env, + State.Env.getBoolLiteralValue(false)); + }) + + // optional::swap + .CaseOfCFGStmt<CXXMemberCallExpr>(isOptionalMemberCallWithName("swap"), + transferSwapCall) + + // std::swap + .CaseOfCFGStmt<CallExpr>(isStdSwapCall(), transferStdSwapCall) + + // opt.value_or("").empty() + .CaseOfCFGStmt<Expr>(isValueOrStringEmptyCall(), + transferValueOrStringEmptyCall) + + // opt.value_or(X) != X + .CaseOfCFGStmt<Expr>(isValueOrNotEqX(), transferValueOrNotEqX) + + // Comparisons (==, !=): + .CaseOfCFGStmt<CXXOperatorCallExpr>( + isComparisonOperatorCall(hasAnyOptionalType(), hasAnyOptionalType()), + transferOptionalAndOptionalCmp) + .CaseOfCFGStmt<CXXOperatorCallExpr>( + isComparisonOperatorCall(hasOptionalType(), + unless(hasAnyOptionalType())), + [](const clang::CXXOperatorCallExpr *Cmp, + const MatchFinder::MatchResult &, LatticeTransferState &State) { + transferOptionalAndValueCmp(Cmp, Cmp->getArg(0), State.Env); + }) + .CaseOfCFGStmt<CXXOperatorCallExpr>( + isComparisonOperatorCall(unless(hasAnyOptionalType()), + hasOptionalType()), + [](const clang::CXXOperatorCallExpr *Cmp, + const MatchFinder::MatchResult &, LatticeTransferState &State) { + transferOptionalAndValueCmp(Cmp, Cmp->getArg(1), State.Env); + }) + + // returns optional + .CaseOfCFGStmt<CallExpr>(isCallReturningOptional(), + transferCallReturningOptional) + + .Build(); +} + +std::vector<SourceLocation> diagnoseUnwrapCall(const Expr *UnwrapExpr, + const Expr *ObjectExpr, + const Environment &Env) { + if (auto *OptionalVal = + Env.getValue(*ObjectExpr, SkipPast::ReferenceThenPointer)) { + auto *Prop = OptionalVal->getProperty("has_value"); + if (auto *HasValueVal = cast_or_null<BoolValue>(Prop)) { + if (Env.flowConditionImplies(*HasValueVal)) + return {}; + } + } + + // Record that this unwrap is *not* provably safe. + // FIXME: include either the name of the optional (if applicable) or a source + // range of the access for easier interpretation of the result. + return {ObjectExpr->getBeginLoc()}; +} + +auto buildDiagnoseMatchSwitch( + const UncheckedOptionalAccessModelOptions &Options) { + // FIXME: Evaluate the efficiency of matchers. If using matchers results in a + // lot of duplicated work (e.g. string comparisons), consider providing APIs + // that avoid it through memoization. + auto IgnorableOptional = ignorableOptional(Options); + return CFGMatchSwitchBuilder<const Environment, std::vector<SourceLocation>>() + // optional::value + .CaseOfCFGStmt<CXXMemberCallExpr>( + valueCall(IgnorableOptional), + [](const CXXMemberCallExpr *E, const MatchFinder::MatchResult &, + const Environment &Env) { + return diagnoseUnwrapCall(E, E->getImplicitObjectArgument(), Env); + }) + + // optional::operator*, optional::operator-> + .CaseOfCFGStmt<CallExpr>( + valueOperatorCall(IgnorableOptional), + [](const CallExpr *E, const MatchFinder::MatchResult &, + const Environment &Env) { + return diagnoseUnwrapCall(E, E->getArg(0), Env); + }) + .Build(); +} + +} // namespace + +ast_matchers::DeclarationMatcher +UncheckedOptionalAccessModel::optionalClassDecl() { + return optionalClass(); +} + +UncheckedOptionalAccessModel::UncheckedOptionalAccessModel(ASTContext &Ctx) + : DataflowAnalysis<UncheckedOptionalAccessModel, NoopLattice>(Ctx), + TransferMatchSwitch(buildTransferMatchSwitch()) {} + +void UncheckedOptionalAccessModel::transfer(const CFGElement *Elt, + NoopLattice &L, Environment &Env) { + LatticeTransferState State(L, Env); + TransferMatchSwitch(*Elt, getASTContext(), State); +} + +ComparisonResult UncheckedOptionalAccessModel::compare( + QualType Type, const Value &Val1, const Environment &Env1, + const Value &Val2, const Environment &Env2) { + if (!isOptionalType(Type)) + return ComparisonResult::Unknown; + bool MustNonEmpty1 = isNonEmptyOptional(Val1, Env1); + bool MustNonEmpty2 = isNonEmptyOptional(Val2, Env2); + if (MustNonEmpty1 && MustNonEmpty2) return ComparisonResult::Same; + // If exactly one is true, then they're different, no reason to check whether + // they're definitely empty. + if (MustNonEmpty1 || MustNonEmpty2) return ComparisonResult::Different; + // Check if they're both definitely empty. + return (isEmptyOptional(Val1, Env1) && isEmptyOptional(Val2, Env2)) + ? ComparisonResult::Same + : ComparisonResult::Different; +} + +bool UncheckedOptionalAccessModel::merge(QualType Type, const Value &Val1, + const Environment &Env1, + const Value &Val2, + const Environment &Env2, + Value &MergedVal, + Environment &MergedEnv) { + if (!isOptionalType(Type)) + return true; + // FIXME: uses same approach as join for `BoolValues`. Requires non-const + // values, though, so will require updating the interface. + auto &HasValueVal = MergedEnv.makeAtomicBoolValue(); + bool MustNonEmpty1 = isNonEmptyOptional(Val1, Env1); + bool MustNonEmpty2 = isNonEmptyOptional(Val2, Env2); + if (MustNonEmpty1 && MustNonEmpty2) + MergedEnv.addToFlowCondition(HasValueVal); + else if ( + // Only make the costly calls to `isEmptyOptional` if we got "unknown" + // (false) for both calls to `isNonEmptyOptional`. + !MustNonEmpty1 && !MustNonEmpty2 && isEmptyOptional(Val1, Env1) && + isEmptyOptional(Val2, Env2)) + MergedEnv.addToFlowCondition(MergedEnv.makeNot(HasValueVal)); + setHasValue(MergedVal, HasValueVal); + return true; +} + +Value *UncheckedOptionalAccessModel::widen(QualType Type, Value &Prev, + const Environment &PrevEnv, + Value &Current, + Environment &CurrentEnv) { + switch (compare(Type, Prev, PrevEnv, Current, CurrentEnv)) { + case ComparisonResult::Same: + return &Prev; + case ComparisonResult::Different: + if (auto *PrevHasVal = + cast_or_null<BoolValue>(Prev.getProperty("has_value"))) { + if (isa<TopBoolValue>(PrevHasVal)) + return &Prev; + } + if (auto *CurrentHasVal = + cast_or_null<BoolValue>(Current.getProperty("has_value"))) { + if (isa<TopBoolValue>(CurrentHasVal)) + return &Current; + } + return &createOptionalValue(CurrentEnv, CurrentEnv.makeTopBoolValue()); + case ComparisonResult::Unknown: + return nullptr; + } + llvm_unreachable("all cases covered in switch"); +} + +UncheckedOptionalAccessDiagnoser::UncheckedOptionalAccessDiagnoser( + UncheckedOptionalAccessModelOptions Options) + : DiagnoseMatchSwitch(buildDiagnoseMatchSwitch(Options)) {} + +std::vector<SourceLocation> UncheckedOptionalAccessDiagnoser::diagnose( + ASTContext &Ctx, const CFGElement *Elt, const Environment &Env) { + return DiagnoseMatchSwitch(*Elt, Ctx, Env); +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Transfer.cpp new file mode 100644 index 000000000000..0e6c484b67e7 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -0,0 +1,839 @@ +//===-- Transfer.cpp --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines transfer functions that evaluate program statements and +// update an environment accordingly. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Transfer.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/OperationKinds.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Analysis/FlowSensitive/ControlFlowContext.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/NoopAnalysis.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "clang/Basic/Builtins.h" +#include "clang/Basic/OperatorKinds.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <memory> +#include <tuple> + +namespace clang { +namespace dataflow { + +static BoolValue &evaluateBooleanEquality(const Expr &LHS, const Expr &RHS, + Environment &Env) { + if (auto *LHSValue = + dyn_cast_or_null<BoolValue>(Env.getValue(LHS, SkipPast::Reference))) + if (auto *RHSValue = + dyn_cast_or_null<BoolValue>(Env.getValue(RHS, SkipPast::Reference))) + return Env.makeIff(*LHSValue, *RHSValue); + + return Env.makeAtomicBoolValue(); +} + +// Functionally updates `V` such that any instances of `TopBool` are replaced +// with fresh atomic bools. Note: This implementation assumes that `B` is a +// tree; if `B` is a DAG, it will lose any sharing between subvalues that was +// present in the original . +static BoolValue &unpackValue(BoolValue &V, Environment &Env); + +template <typename Derived, typename M> +BoolValue &unpackBinaryBoolValue(Environment &Env, BoolValue &B, M build) { + auto &V = *cast<Derived>(&B); + BoolValue &Left = V.getLeftSubValue(); + BoolValue &Right = V.getRightSubValue(); + BoolValue &ULeft = unpackValue(Left, Env); + BoolValue &URight = unpackValue(Right, Env); + + if (&ULeft == &Left && &URight == &Right) + return V; + + return (Env.*build)(ULeft, URight); +} + +static BoolValue &unpackValue(BoolValue &V, Environment &Env) { + switch (V.getKind()) { + case Value::Kind::Integer: + case Value::Kind::Reference: + case Value::Kind::Pointer: + case Value::Kind::Struct: + llvm_unreachable("BoolValue cannot have any of these kinds."); + + case Value::Kind::AtomicBool: + return V; + + case Value::Kind::TopBool: + // Unpack `TopBool` into a fresh atomic bool. + return Env.makeAtomicBoolValue(); + + case Value::Kind::Negation: { + auto &N = *cast<NegationValue>(&V); + BoolValue &Sub = N.getSubVal(); + BoolValue &USub = unpackValue(Sub, Env); + + if (&USub == &Sub) + return V; + return Env.makeNot(USub); + } + case Value::Kind::Conjunction: + return unpackBinaryBoolValue<ConjunctionValue>(Env, V, + &Environment::makeAnd); + case Value::Kind::Disjunction: + return unpackBinaryBoolValue<DisjunctionValue>(Env, V, + &Environment::makeOr); + case Value::Kind::Implication: + return unpackBinaryBoolValue<ImplicationValue>( + Env, V, &Environment::makeImplication); + case Value::Kind::Biconditional: + return unpackBinaryBoolValue<BiconditionalValue>(Env, V, + &Environment::makeIff); + } + llvm_unreachable("All reachable cases in switch return"); +} + +// Unpacks the value (if any) associated with `E` and updates `E` to the new +// value, if any unpacking occured. +static Value *maybeUnpackLValueExpr(const Expr &E, Environment &Env) { + // FIXME: this is too flexible: it _allows_ a reference, while it should + // _require_ one, since lvalues should always be wrapped in `ReferenceValue`. + auto *Loc = Env.getStorageLocation(E, SkipPast::Reference); + if (Loc == nullptr) + return nullptr; + auto *Val = Env.getValue(*Loc); + + auto *B = dyn_cast_or_null<BoolValue>(Val); + if (B == nullptr) + return Val; + + auto &UnpackedVal = unpackValue(*B, Env); + if (&UnpackedVal == Val) + return Val; + Env.setValue(*Loc, UnpackedVal); + return &UnpackedVal; +} + +class TransferVisitor : public ConstStmtVisitor<TransferVisitor> { +public: + TransferVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env) + : StmtToEnv(StmtToEnv), Env(Env) {} + + void VisitBinaryOperator(const BinaryOperator *S) { + const Expr *LHS = S->getLHS(); + assert(LHS != nullptr); + + const Expr *RHS = S->getRHS(); + assert(RHS != nullptr); + + switch (S->getOpcode()) { + case BO_Assign: { + auto *LHSLoc = Env.getStorageLocation(*LHS, SkipPast::Reference); + if (LHSLoc == nullptr) + break; + + auto *RHSVal = Env.getValue(*RHS, SkipPast::Reference); + if (RHSVal == nullptr) + break; + + // Assign a value to the storage location of the left-hand side. + Env.setValue(*LHSLoc, *RHSVal); + + // Assign a storage location for the whole expression. + Env.setStorageLocation(*S, *LHSLoc); + break; + } + case BO_LAnd: + case BO_LOr: { + BoolValue &LHSVal = getLogicOperatorSubExprValue(*LHS); + BoolValue &RHSVal = getLogicOperatorSubExprValue(*RHS); + + auto &Loc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, Loc); + if (S->getOpcode() == BO_LAnd) + Env.setValue(Loc, Env.makeAnd(LHSVal, RHSVal)); + else + Env.setValue(Loc, Env.makeOr(LHSVal, RHSVal)); + break; + } + case BO_NE: + case BO_EQ: { + auto &LHSEqRHSValue = evaluateBooleanEquality(*LHS, *RHS, Env); + auto &Loc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, Loc); + Env.setValue(Loc, S->getOpcode() == BO_EQ ? LHSEqRHSValue + : Env.makeNot(LHSEqRHSValue)); + break; + } + case BO_Comma: { + if (auto *Loc = Env.getStorageLocation(*RHS, SkipPast::None)) + Env.setStorageLocation(*S, *Loc); + break; + } + default: + break; + } + } + + void VisitDeclRefExpr(const DeclRefExpr *S) { + const ValueDecl *VD = S->getDecl(); + assert(VD != nullptr); + auto *DeclLoc = Env.getStorageLocation(*VD, SkipPast::None); + if (DeclLoc == nullptr) + return; + + if (VD->getType()->isReferenceType()) { + assert(isa_and_nonnull<ReferenceValue>(Env.getValue((*DeclLoc))) && + "reference-typed declarations map to `ReferenceValue`s"); + Env.setStorageLocation(*S, *DeclLoc); + } else { + auto &Loc = Env.createStorageLocation(*S); + auto &Val = Env.takeOwnership(std::make_unique<ReferenceValue>(*DeclLoc)); + Env.setStorageLocation(*S, Loc); + Env.setValue(Loc, Val); + } + } + + void VisitDeclStmt(const DeclStmt *S) { + // Group decls are converted into single decls in the CFG so the cast below + // is safe. + const auto &D = *cast<VarDecl>(S->getSingleDecl()); + + // Static local vars are already initialized in `Environment`. + if (D.hasGlobalStorage()) + return; + + // The storage location for `D` could have been created earlier, before the + // variable's declaration statement (for example, in the case of + // BindingDecls). + auto *MaybeLoc = Env.getStorageLocation(D, SkipPast::None); + if (MaybeLoc == nullptr) { + MaybeLoc = &Env.createStorageLocation(D); + Env.setStorageLocation(D, *MaybeLoc); + } + auto &Loc = *MaybeLoc; + + const Expr *InitExpr = D.getInit(); + if (InitExpr == nullptr) { + // No initializer expression - associate `Loc` with a new value. + if (Value *Val = Env.createValue(D.getType())) + Env.setValue(Loc, *Val); + return; + } + + if (D.getType()->isReferenceType()) { + // Initializing a reference variable - do not create a reference to + // reference. + if (auto *InitExprLoc = + Env.getStorageLocation(*InitExpr, SkipPast::Reference)) { + auto &Val = + Env.takeOwnership(std::make_unique<ReferenceValue>(*InitExprLoc)); + Env.setValue(Loc, Val); + } + } else if (auto *InitExprVal = Env.getValue(*InitExpr, SkipPast::None)) { + Env.setValue(Loc, *InitExprVal); + } + + if (Env.getValue(Loc) == nullptr) { + // We arrive here in (the few) cases where an expression is intentionally + // "uninterpreted". There are two ways to handle this situation: propagate + // the status, so that uninterpreted initializers result in uninterpreted + // variables, or provide a default value. We choose the latter so that + // later refinements of the variable can be used for reasoning about the + // surrounding code. + // + // FIXME. If and when we interpret all language cases, change this to + // assert that `InitExpr` is interpreted, rather than supplying a default + // value (assuming we don't update the environment API to return + // references). + if (Value *Val = Env.createValue(D.getType())) + Env.setValue(Loc, *Val); + } + + if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D)) { + // If VarDecl is a DecompositionDecl, evaluate each of its bindings. This + // needs to be evaluated after initializing the values in the storage for + // VarDecl, as the bindings refer to them. + // FIXME: Add support for ArraySubscriptExpr. + // FIXME: Consider adding AST nodes used in BindingDecls to the CFG. + for (const auto *B : Decomp->bindings()) { + if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding())) { + auto *DE = dyn_cast_or_null<DeclRefExpr>(ME->getBase()); + if (DE == nullptr) + continue; + + // ME and its base haven't been visited because they aren't included + // in the statements of the CFG basic block. + VisitDeclRefExpr(DE); + VisitMemberExpr(ME); + + if (auto *Loc = Env.getStorageLocation(*ME, SkipPast::Reference)) + Env.setStorageLocation(*B, *Loc); + } else if (auto *VD = B->getHoldingVar()) { + // Holding vars are used to back the BindingDecls of tuple-like + // types. The holding var declarations appear *after* this statement, + // so we have to create a location for them here to share with `B`. We + // don't visit the binding, because we know it will be a DeclRefExpr + // to `VD`. + auto &VDLoc = Env.createStorageLocation(*VD); + Env.setStorageLocation(*VD, VDLoc); + Env.setStorageLocation(*B, VDLoc); + } + } + } + } + + void VisitImplicitCastExpr(const ImplicitCastExpr *S) { + const Expr *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + + switch (S->getCastKind()) { + case CK_IntegralToBoolean: { + // This cast creates a new, boolean value from the integral value. We + // model that with a fresh value in the environment, unless it's already a + // boolean. + auto &Loc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, Loc); + if (auto *SubExprVal = dyn_cast_or_null<BoolValue>( + Env.getValue(*SubExpr, SkipPast::Reference))) + Env.setValue(Loc, *SubExprVal); + else + // FIXME: If integer modeling is added, then update this code to create + // the boolean based on the integer model. + Env.setValue(Loc, Env.makeAtomicBoolValue()); + break; + } + + case CK_LValueToRValue: { + // When an L-value is used as an R-value, it may result in sharing, so we + // need to unpack any nested `Top`s. + auto *SubExprVal = maybeUnpackLValueExpr(*SubExpr, Env); + if (SubExprVal == nullptr) + break; + + auto &ExprLoc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, ExprLoc); + Env.setValue(ExprLoc, *SubExprVal); + break; + } + + case CK_IntegralCast: + // FIXME: This cast creates a new integral value from the + // subexpression. But, because we don't model integers, we don't + // distinguish between this new value and the underlying one. If integer + // modeling is added, then update this code to create a fresh location and + // value. + case CK_UncheckedDerivedToBase: + case CK_ConstructorConversion: + case CK_UserDefinedConversion: + // FIXME: Add tests that excercise CK_UncheckedDerivedToBase, + // CK_ConstructorConversion, and CK_UserDefinedConversion. + case CK_NoOp: { + // FIXME: Consider making `Environment::getStorageLocation` skip noop + // expressions (this and other similar expressions in the file) instead of + // assigning them storage locations. + auto *SubExprLoc = Env.getStorageLocation(*SubExpr, SkipPast::None); + if (SubExprLoc == nullptr) + break; + + Env.setStorageLocation(*S, *SubExprLoc); + break; + } + case CK_NullToPointer: + case CK_NullToMemberPointer: { + auto &Loc = Env.createStorageLocation(S->getType()); + Env.setStorageLocation(*S, Loc); + + auto &NullPointerVal = + Env.getOrCreateNullPointerValue(S->getType()->getPointeeType()); + Env.setValue(Loc, NullPointerVal); + break; + } + default: + break; + } + } + + void VisitUnaryOperator(const UnaryOperator *S) { + const Expr *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + + switch (S->getOpcode()) { + case UO_Deref: { + // Skip past a reference to handle dereference of a dependent pointer. + const auto *SubExprVal = cast_or_null<PointerValue>( + Env.getValue(*SubExpr, SkipPast::Reference)); + if (SubExprVal == nullptr) + break; + + auto &Loc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, Loc); + Env.setValue(Loc, Env.takeOwnership(std::make_unique<ReferenceValue>( + SubExprVal->getPointeeLoc()))); + break; + } + case UO_AddrOf: { + // Do not form a pointer to a reference. If `SubExpr` is assigned a + // `ReferenceValue` then form a value that points to the location of its + // pointee. + StorageLocation *PointeeLoc = + Env.getStorageLocation(*SubExpr, SkipPast::Reference); + if (PointeeLoc == nullptr) + break; + + auto &PointerLoc = Env.createStorageLocation(*S); + auto &PointerVal = + Env.takeOwnership(std::make_unique<PointerValue>(*PointeeLoc)); + Env.setStorageLocation(*S, PointerLoc); + Env.setValue(PointerLoc, PointerVal); + break; + } + case UO_LNot: { + auto *SubExprVal = + dyn_cast_or_null<BoolValue>(Env.getValue(*SubExpr, SkipPast::None)); + if (SubExprVal == nullptr) + break; + + auto &ExprLoc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, ExprLoc); + Env.setValue(ExprLoc, Env.makeNot(*SubExprVal)); + break; + } + default: + break; + } + } + + void VisitCXXThisExpr(const CXXThisExpr *S) { + auto *ThisPointeeLoc = Env.getThisPointeeStorageLocation(); + if (ThisPointeeLoc == nullptr) + // Unions are not supported yet, and will not have a location for the + // `this` expression's pointee. + return; + + auto &Loc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, Loc); + Env.setValue(Loc, Env.takeOwnership( + std::make_unique<PointerValue>(*ThisPointeeLoc))); + } + + void VisitReturnStmt(const ReturnStmt *S) { + if (!Env.getAnalysisOptions().ContextSensitiveOpts) + return; + + auto *Ret = S->getRetValue(); + if (Ret == nullptr) + return; + + auto *Val = Env.getValue(*Ret, SkipPast::None); + if (Val == nullptr) + return; + + // FIXME: Support reference-type returns. + if (Val->getKind() == Value::Kind::Reference) + return; + + auto *Loc = Env.getReturnStorageLocation(); + assert(Loc != nullptr); + // FIXME: Support reference-type returns. + if (Loc->getType()->isReferenceType()) + return; + + // FIXME: Model NRVO. + Env.setValue(*Loc, *Val); + } + + void VisitMemberExpr(const MemberExpr *S) { + ValueDecl *Member = S->getMemberDecl(); + assert(Member != nullptr); + + // FIXME: Consider assigning pointer values to function member expressions. + if (Member->isFunctionOrFunctionTemplate()) + return; + + // FIXME: if/when we add support for modeling enums, use that support here. + if (isa<EnumConstantDecl>(Member)) + return; + + if (auto *D = dyn_cast<VarDecl>(Member)) { + if (D->hasGlobalStorage()) { + auto *VarDeclLoc = Env.getStorageLocation(*D, SkipPast::None); + if (VarDeclLoc == nullptr) + return; + + if (VarDeclLoc->getType()->isReferenceType()) { + assert(isa_and_nonnull<ReferenceValue>(Env.getValue((*VarDeclLoc))) && + "reference-typed declarations map to `ReferenceValue`s"); + Env.setStorageLocation(*S, *VarDeclLoc); + } else { + auto &Loc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, Loc); + Env.setValue(Loc, Env.takeOwnership( + std::make_unique<ReferenceValue>(*VarDeclLoc))); + } + return; + } + } + + // The receiver can be either a value or a pointer to a value. Skip past the + // indirection to handle both cases. + auto *BaseLoc = cast_or_null<AggregateStorageLocation>( + Env.getStorageLocation(*S->getBase(), SkipPast::ReferenceThenPointer)); + if (BaseLoc == nullptr) + return; + + auto &MemberLoc = BaseLoc->getChild(*Member); + if (MemberLoc.getType()->isReferenceType()) { + // Based on its type, `MemberLoc` must be mapped either to nothing or to a + // `ReferenceValue`. For the former, we won't set a storage location for + // this expression, so as to maintain an invariant lvalue expressions; + // namely, that their location maps to a `ReferenceValue`. In this, + // lvalues are unlike other expressions, where it is valid for their + // location to map to nothing (because they are not modeled). + // + // Note: we need this invariant for lvalues so that, when accessing a + // value, we can distinguish an rvalue from an lvalue. An alternative + // design, which takes the expression's value category into account, would + // avoid the need for this invariant. + if (auto *V = Env.getValue(MemberLoc)) { + assert(isa<ReferenceValue>(V) && + "reference-typed declarations map to `ReferenceValue`s"); + Env.setStorageLocation(*S, MemberLoc); + } + } else { + auto &Loc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, Loc); + Env.setValue( + Loc, Env.takeOwnership(std::make_unique<ReferenceValue>(MemberLoc))); + } + } + + void VisitCXXDefaultInitExpr(const CXXDefaultInitExpr *S) { + const Expr *InitExpr = S->getExpr(); + assert(InitExpr != nullptr); + + Value *InitExprVal = Env.getValue(*InitExpr, SkipPast::None); + if (InitExprVal == nullptr) + return; + + const FieldDecl *Field = S->getField(); + assert(Field != nullptr); + + auto &ThisLoc = + *cast<AggregateStorageLocation>(Env.getThisPointeeStorageLocation()); + auto &FieldLoc = ThisLoc.getChild(*Field); + Env.setValue(FieldLoc, *InitExprVal); + } + + void VisitCXXConstructExpr(const CXXConstructExpr *S) { + const CXXConstructorDecl *ConstructorDecl = S->getConstructor(); + assert(ConstructorDecl != nullptr); + + if (ConstructorDecl->isCopyOrMoveConstructor()) { + assert(S->getNumArgs() == 1); + + const Expr *Arg = S->getArg(0); + assert(Arg != nullptr); + + if (S->isElidable()) { + auto *ArgLoc = Env.getStorageLocation(*Arg, SkipPast::Reference); + if (ArgLoc == nullptr) + return; + + Env.setStorageLocation(*S, *ArgLoc); + } else if (auto *ArgVal = Env.getValue(*Arg, SkipPast::Reference)) { + auto &Loc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, Loc); + Env.setValue(Loc, *ArgVal); + } + return; + } + + auto &Loc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, Loc); + if (Value *Val = Env.createValue(S->getType())) + Env.setValue(Loc, *Val); + + transferInlineCall(S, ConstructorDecl); + } + + void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *S) { + if (S->getOperator() == OO_Equal) { + assert(S->getNumArgs() == 2); + + const Expr *Arg0 = S->getArg(0); + assert(Arg0 != nullptr); + + const Expr *Arg1 = S->getArg(1); + assert(Arg1 != nullptr); + + // Evaluate only copy and move assignment operators. + auto *Arg0Type = Arg0->getType()->getUnqualifiedDesugaredType(); + auto *Arg1Type = Arg1->getType()->getUnqualifiedDesugaredType(); + if (Arg0Type != Arg1Type) + return; + + auto *ObjectLoc = Env.getStorageLocation(*Arg0, SkipPast::Reference); + if (ObjectLoc == nullptr) + return; + + auto *Val = Env.getValue(*Arg1, SkipPast::Reference); + if (Val == nullptr) + return; + + // Assign a value to the storage location of the object. + Env.setValue(*ObjectLoc, *Val); + + // FIXME: Add a test for the value of the whole expression. + // Assign a storage location for the whole expression. + Env.setStorageLocation(*S, *ObjectLoc); + } + } + + void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *S) { + if (S->getCastKind() == CK_ConstructorConversion) { + const Expr *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + + auto *SubExprLoc = Env.getStorageLocation(*SubExpr, SkipPast::None); + if (SubExprLoc == nullptr) + return; + + Env.setStorageLocation(*S, *SubExprLoc); + } + } + + void VisitCXXTemporaryObjectExpr(const CXXTemporaryObjectExpr *S) { + auto &Loc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, Loc); + if (Value *Val = Env.createValue(S->getType())) + Env.setValue(Loc, *Val); + } + + void VisitCallExpr(const CallExpr *S) { + // Of clang's builtins, only `__builtin_expect` is handled explicitly, since + // others (like trap, debugtrap, and unreachable) are handled by CFG + // construction. + if (S->isCallToStdMove()) { + assert(S->getNumArgs() == 1); + + const Expr *Arg = S->getArg(0); + assert(Arg != nullptr); + + auto *ArgLoc = Env.getStorageLocation(*Arg, SkipPast::None); + if (ArgLoc == nullptr) + return; + + Env.setStorageLocation(*S, *ArgLoc); + } else if (S->getDirectCallee() != nullptr && + S->getDirectCallee()->getBuiltinID() == + Builtin::BI__builtin_expect) { + assert(S->getNumArgs() > 0); + assert(S->getArg(0) != nullptr); + // `__builtin_expect` returns by-value, so strip away any potential + // references in the argument. + auto *ArgLoc = Env.getStorageLocation(*S->getArg(0), SkipPast::Reference); + if (ArgLoc == nullptr) + return; + Env.setStorageLocation(*S, *ArgLoc); + } else if (const FunctionDecl *F = S->getDirectCallee()) { + transferInlineCall(S, F); + } + } + + void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *S) { + const Expr *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + + auto *SubExprLoc = Env.getStorageLocation(*SubExpr, SkipPast::None); + if (SubExprLoc == nullptr) + return; + + Env.setStorageLocation(*S, *SubExprLoc); + } + + void VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *S) { + const Expr *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + + auto *SubExprLoc = Env.getStorageLocation(*SubExpr, SkipPast::None); + if (SubExprLoc == nullptr) + return; + + Env.setStorageLocation(*S, *SubExprLoc); + } + + void VisitCXXStaticCastExpr(const CXXStaticCastExpr *S) { + if (S->getCastKind() == CK_NoOp) { + const Expr *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + + auto *SubExprLoc = Env.getStorageLocation(*SubExpr, SkipPast::None); + if (SubExprLoc == nullptr) + return; + + Env.setStorageLocation(*S, *SubExprLoc); + } + } + + void VisitConditionalOperator(const ConditionalOperator *S) { + // FIXME: Revisit this once flow conditions are added to the framework. For + // `a = b ? c : d` we can add `b => a == c && !b => a == d` to the flow + // condition. + auto &Loc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, Loc); + if (Value *Val = Env.createValue(S->getType())) + Env.setValue(Loc, *Val); + } + + void VisitInitListExpr(const InitListExpr *S) { + QualType Type = S->getType(); + + auto &Loc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, Loc); + + auto *Val = Env.createValue(Type); + if (Val == nullptr) + return; + + Env.setValue(Loc, *Val); + + if (Type->isStructureOrClassType()) { + for (auto It : llvm::zip(Type->getAsRecordDecl()->fields(), S->inits())) { + const FieldDecl *Field = std::get<0>(It); + assert(Field != nullptr); + + const Expr *Init = std::get<1>(It); + assert(Init != nullptr); + + if (Value *InitVal = Env.getValue(*Init, SkipPast::None)) + cast<StructValue>(Val)->setChild(*Field, *InitVal); + } + } + // FIXME: Implement array initialization. + } + + void VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *S) { + auto &Loc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, Loc); + Env.setValue(Loc, Env.getBoolLiteralValue(S->getValue())); + } + + void VisitParenExpr(const ParenExpr *S) { + // The CFG does not contain `ParenExpr` as top-level statements in basic + // blocks, however manual traversal to sub-expressions may encounter them. + // Redirect to the sub-expression. + auto *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + Visit(SubExpr); + } + + void VisitExprWithCleanups(const ExprWithCleanups *S) { + // The CFG does not contain `ExprWithCleanups` as top-level statements in + // basic blocks, however manual traversal to sub-expressions may encounter + // them. Redirect to the sub-expression. + auto *SubExpr = S->getSubExpr(); + assert(SubExpr != nullptr); + Visit(SubExpr); + } + +private: + BoolValue &getLogicOperatorSubExprValue(const Expr &SubExpr) { + // `SubExpr` and its parent logic operator might be part of different basic + // blocks. We try to access the value that is assigned to `SubExpr` in the + // corresponding environment. + if (const Environment *SubExprEnv = StmtToEnv.getEnvironment(SubExpr)) { + if (auto *Val = dyn_cast_or_null<BoolValue>( + SubExprEnv->getValue(SubExpr, SkipPast::Reference))) + return *Val; + } + + if (Env.getStorageLocation(SubExpr, SkipPast::None) == nullptr) { + // Sub-expressions that are logic operators are not added in basic blocks + // (e.g. see CFG for `bool d = a && (b || c);`). If `SubExpr` is a logic + // operator, it may not have been evaluated and assigned a value yet. In + // that case, we need to first visit `SubExpr` and then try to get the + // value that gets assigned to it. + Visit(&SubExpr); + } + + if (auto *Val = dyn_cast_or_null<BoolValue>( + Env.getValue(SubExpr, SkipPast::Reference))) + return *Val; + + // If the value of `SubExpr` is still unknown, we create a fresh symbolic + // boolean value for it. + return Env.makeAtomicBoolValue(); + } + + // If context sensitivity is enabled, try to analyze the body of the callee + // `F` of `S`. The type `E` must be either `CallExpr` or `CXXConstructExpr`. + template <typename E> + void transferInlineCall(const E *S, const FunctionDecl *F) { + const auto &Options = Env.getAnalysisOptions(); + if (!(Options.ContextSensitiveOpts && + Env.canDescend(Options.ContextSensitiveOpts->Depth, F))) + return; + + const ControlFlowContext *CFCtx = Env.getControlFlowContext(F); + if (!CFCtx) + return; + + // FIXME: We don't support context-sensitive analysis of recursion, so + // we should return early here if `F` is the same as the `FunctionDecl` + // holding `S` itself. + + auto ExitBlock = CFCtx->getCFG().getExit().getBlockID(); + + if (const auto *NonConstructExpr = dyn_cast<CallExpr>(S)) { + // Note that it is important for the storage location of `S` to be set + // before `pushCall`, because the latter uses it to set the storage + // location for `return`. + auto &ReturnLoc = Env.createStorageLocation(*S); + Env.setStorageLocation(*S, ReturnLoc); + } + auto CalleeEnv = Env.pushCall(S); + + // FIXME: Use the same analysis as the caller for the callee. Note, + // though, that doing so would require support for changing the analysis's + // ASTContext. + assert(CFCtx->getDecl() != nullptr && + "ControlFlowContexts in the environment should always carry a decl"); + auto Analysis = NoopAnalysis(CFCtx->getDecl()->getASTContext(), + DataflowAnalysisOptions{Options}); + + auto BlockToOutputState = + dataflow::runDataflowAnalysis(*CFCtx, Analysis, CalleeEnv); + assert(BlockToOutputState); + assert(ExitBlock < BlockToOutputState->size()); + + auto ExitState = (*BlockToOutputState)[ExitBlock]; + assert(ExitState); + + Env.popCall(ExitState->Env); + } + + const StmtToEnvMap &StmtToEnv; + Environment &Env; +}; + +void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env) { + TransferVisitor(StmtToEnv, Env).Visit(&S); +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp new file mode 100644 index 000000000000..b125701212c9 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -0,0 +1,500 @@ +//===- TypeErasedDataflowAnalysis.cpp -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines type-erased base types and functions for building dataflow +// analyses that run over Control-Flow Graphs (CFGs). +// +//===----------------------------------------------------------------------===// + +#include <algorithm> +#include <memory> +#include <optional> +#include <system_error> +#include <utility> +#include <vector> + +#include "clang/AST/DeclCXX.h" +#include "clang/AST/OperationKinds.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "clang/Analysis/FlowSensitive/Transfer.h" +#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" + +#define DEBUG_TYPE "clang-dataflow" + +namespace clang { +namespace dataflow { + +class StmtToEnvMapImpl : public StmtToEnvMap { +public: + StmtToEnvMapImpl( + const ControlFlowContext &CFCtx, + llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> + BlockToState) + : CFCtx(CFCtx), BlockToState(BlockToState) {} + + const Environment *getEnvironment(const Stmt &S) const override { + auto BlockIt = CFCtx.getStmtToBlock().find(&ignoreCFGOmittedNodes(S)); + assert(BlockIt != CFCtx.getStmtToBlock().end()); + const auto &State = BlockToState[BlockIt->getSecond()->getBlockID()]; + assert(State); + return &State->Env; + } + +private: + const ControlFlowContext &CFCtx; + llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockToState; +}; + +/// Returns the index of `Block` in the successors of `Pred`. +static int blockIndexInPredecessor(const CFGBlock &Pred, + const CFGBlock &Block) { + auto BlockPos = llvm::find_if( + Pred.succs(), [&Block](const CFGBlock::AdjacentBlock &Succ) { + return Succ && Succ->getBlockID() == Block.getBlockID(); + }); + return BlockPos - Pred.succ_begin(); +} + +static bool isLoopHead(const CFGBlock &B) { + if (const auto *T = B.getTerminatorStmt()) + switch (T->getStmtClass()) { + case Stmt::WhileStmtClass: + case Stmt::DoStmtClass: + case Stmt::ForStmtClass: + return true; + default: + return false; + } + + return false; +} + +// The return type of the visit functions in TerminatorVisitor. The first +// element represents the terminator expression (that is the conditional +// expression in case of a path split in the CFG). The second element +// represents whether the condition was true or false. +using TerminatorVisitorRetTy = std::pair<const Expr *, bool>; + +/// Extends the flow condition of an environment based on a terminator +/// statement. +class TerminatorVisitor + : public ConstStmtVisitor<TerminatorVisitor, TerminatorVisitorRetTy> { +public: + TerminatorVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env, + int BlockSuccIdx) + : StmtToEnv(StmtToEnv), Env(Env), BlockSuccIdx(BlockSuccIdx) {} + + TerminatorVisitorRetTy VisitIfStmt(const IfStmt *S) { + auto *Cond = S->getCond(); + assert(Cond != nullptr); + return extendFlowCondition(*Cond); + } + + TerminatorVisitorRetTy VisitWhileStmt(const WhileStmt *S) { + auto *Cond = S->getCond(); + assert(Cond != nullptr); + return extendFlowCondition(*Cond); + } + + TerminatorVisitorRetTy VisitDoStmt(const DoStmt *S) { + auto *Cond = S->getCond(); + assert(Cond != nullptr); + return extendFlowCondition(*Cond); + } + + TerminatorVisitorRetTy VisitForStmt(const ForStmt *S) { + auto *Cond = S->getCond(); + if (Cond != nullptr) + return extendFlowCondition(*Cond); + return {nullptr, false}; + } + + TerminatorVisitorRetTy VisitBinaryOperator(const BinaryOperator *S) { + assert(S->getOpcode() == BO_LAnd || S->getOpcode() == BO_LOr); + auto *LHS = S->getLHS(); + assert(LHS != nullptr); + return extendFlowCondition(*LHS); + } + + TerminatorVisitorRetTy + VisitConditionalOperator(const ConditionalOperator *S) { + auto *Cond = S->getCond(); + assert(Cond != nullptr); + return extendFlowCondition(*Cond); + } + +private: + TerminatorVisitorRetTy extendFlowCondition(const Expr &Cond) { + // The terminator sub-expression might not be evaluated. + if (Env.getStorageLocation(Cond, SkipPast::None) == nullptr) + transfer(StmtToEnv, Cond, Env); + + // FIXME: The flow condition must be an r-value, so `SkipPast::None` should + // suffice. + auto *Val = + cast_or_null<BoolValue>(Env.getValue(Cond, SkipPast::Reference)); + // Value merging depends on flow conditions from different environments + // being mutually exclusive -- that is, they cannot both be true in their + // entirety (even if they may share some clauses). So, we need *some* value + // for the condition expression, even if just an atom. + if (Val == nullptr) { + // FIXME: Consider introducing a helper for this get-or-create pattern. + auto *Loc = Env.getStorageLocation(Cond, SkipPast::None); + if (Loc == nullptr) { + Loc = &Env.createStorageLocation(Cond); + Env.setStorageLocation(Cond, *Loc); + } + Val = &Env.makeAtomicBoolValue(); + Env.setValue(*Loc, *Val); + } + + bool ConditionValue = true; + // The condition must be inverted for the successor that encompasses the + // "else" branch, if such exists. + if (BlockSuccIdx == 1) { + Val = &Env.makeNot(*Val); + ConditionValue = false; + } + + Env.addToFlowCondition(*Val); + return {&Cond, ConditionValue}; + } + + const StmtToEnvMap &StmtToEnv; + Environment &Env; + int BlockSuccIdx; +}; + +/// Holds data structures required for running dataflow analysis. +struct AnalysisContext { + AnalysisContext(const ControlFlowContext &CFCtx, + TypeErasedDataflowAnalysis &Analysis, + const Environment &InitEnv, + llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> + BlockStates) + : CFCtx(CFCtx), Analysis(Analysis), InitEnv(InitEnv), + BlockStates(BlockStates) {} + + /// Contains the CFG being analyzed. + const ControlFlowContext &CFCtx; + /// The analysis to be run. + TypeErasedDataflowAnalysis &Analysis; + /// Initial state to start the analysis. + const Environment &InitEnv; + /// Stores the state of a CFG block if it has been evaluated by the analysis. + /// The indices correspond to the block IDs. + llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockStates; +}; + +/// Computes the input state for a given basic block by joining the output +/// states of its predecessors. +/// +/// Requirements: +/// +/// All predecessors of `Block` except those with loop back edges must have +/// already been transferred. States in `AC.BlockStates` that are set to +/// `std::nullopt` represent basic blocks that are not evaluated yet. +static TypeErasedDataflowAnalysisState +computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) { + llvm::DenseSet<const CFGBlock *> Preds; + Preds.insert(Block.pred_begin(), Block.pred_end()); + if (Block.getTerminator().isTemporaryDtorsBranch()) { + // This handles a special case where the code that produced the CFG includes + // a conditional operator with a branch that constructs a temporary and + // calls a destructor annotated as noreturn. The CFG models this as follows: + // + // B1 (contains the condition of the conditional operator) - succs: B2, B3 + // B2 (contains code that does not call a noreturn destructor) - succs: B4 + // B3 (contains code that calls a noreturn destructor) - succs: B4 + // B4 (has temporary destructor terminator) - succs: B5, B6 + // B5 (noreturn block that is associated with the noreturn destructor call) + // B6 (contains code that follows the conditional operator statement) + // + // The first successor (B5 above) of a basic block with a temporary + // destructor terminator (B4 above) is the block that evaluates the + // destructor. If that block has a noreturn element then the predecessor + // block that constructed the temporary object (B3 above) is effectively a + // noreturn block and its state should not be used as input for the state + // of the block that has a temporary destructor terminator (B4 above). This + // holds regardless of which branch of the ternary operator calls the + // noreturn destructor. However, it doesn't cases where a nested ternary + // operator includes a branch that contains a noreturn destructor call. + // + // See `NoreturnDestructorTest` for concrete examples. + if (Block.succ_begin()->getReachableBlock()->hasNoReturnElement()) { + auto &StmtToBlock = AC.CFCtx.getStmtToBlock(); + auto StmtBlock = StmtToBlock.find(Block.getTerminatorStmt()); + assert(StmtBlock != StmtToBlock.end()); + Preds.erase(StmtBlock->getSecond()); + } + } + + std::optional<TypeErasedDataflowAnalysisState> MaybeState; + + auto &Analysis = AC.Analysis; + for (const CFGBlock *Pred : Preds) { + // Skip if the `Block` is unreachable or control flow cannot get past it. + if (!Pred || Pred->hasNoReturnElement()) + continue; + + // Skip if `Pred` was not evaluated yet. This could happen if `Pred` has a + // loop back edge to `Block`. + const std::optional<TypeErasedDataflowAnalysisState> &MaybePredState = + AC.BlockStates[Pred->getBlockID()]; + if (!MaybePredState) + continue; + + TypeErasedDataflowAnalysisState PredState = *MaybePredState; + if (Analysis.builtinOptions()) { + if (const Stmt *PredTerminatorStmt = Pred->getTerminatorStmt()) { + const StmtToEnvMapImpl StmtToEnv(AC.CFCtx, AC.BlockStates); + auto [Cond, CondValue] = + TerminatorVisitor(StmtToEnv, PredState.Env, + blockIndexInPredecessor(*Pred, Block)) + .Visit(PredTerminatorStmt); + if (Cond != nullptr) + // FIXME: Call transferBranchTypeErased even if BuiltinTransferOpts + // are not set. + Analysis.transferBranchTypeErased(CondValue, Cond, PredState.Lattice, + PredState.Env); + } + } + + if (MaybeState) { + Analysis.joinTypeErased(MaybeState->Lattice, PredState.Lattice); + MaybeState->Env.join(PredState.Env, Analysis); + } else { + MaybeState = std::move(PredState); + } + } + if (!MaybeState) { + // FIXME: Consider passing `Block` to `Analysis.typeErasedInitialElement()` + // to enable building analyses like computation of dominators that + // initialize the state of each basic block differently. + MaybeState.emplace(Analysis.typeErasedInitialElement(), AC.InitEnv); + } + return *MaybeState; +} + +/// Built-in transfer function for `CFGStmt`. +void builtinTransferStatement(const CFGStmt &Elt, + TypeErasedDataflowAnalysisState &InputState, + AnalysisContext &AC) { + const Stmt *S = Elt.getStmt(); + assert(S != nullptr); + transfer(StmtToEnvMapImpl(AC.CFCtx, AC.BlockStates), *S, InputState.Env); +} + +/// Built-in transfer function for `CFGInitializer`. +void builtinTransferInitializer(const CFGInitializer &Elt, + TypeErasedDataflowAnalysisState &InputState) { + const CXXCtorInitializer *Init = Elt.getInitializer(); + assert(Init != nullptr); + + auto &Env = InputState.Env; + const auto &ThisLoc = + *cast<AggregateStorageLocation>(Env.getThisPointeeStorageLocation()); + + const FieldDecl *Member = Init->getMember(); + if (Member == nullptr) + // Not a field initializer. + return; + + auto *InitStmt = Init->getInit(); + assert(InitStmt != nullptr); + + auto *InitStmtLoc = Env.getStorageLocation(*InitStmt, SkipPast::Reference); + if (InitStmtLoc == nullptr) + return; + + auto *InitStmtVal = Env.getValue(*InitStmtLoc); + if (InitStmtVal == nullptr) + return; + + if (Member->getType()->isReferenceType()) { + auto &MemberLoc = ThisLoc.getChild(*Member); + Env.setValue(MemberLoc, Env.takeOwnership(std::make_unique<ReferenceValue>( + *InitStmtLoc))); + } else { + auto &MemberLoc = ThisLoc.getChild(*Member); + Env.setValue(MemberLoc, *InitStmtVal); + } +} + +void builtinTransfer(const CFGElement &Elt, + TypeErasedDataflowAnalysisState &State, + AnalysisContext &AC) { + switch (Elt.getKind()) { + case CFGElement::Statement: + builtinTransferStatement(Elt.castAs<CFGStmt>(), State, AC); + break; + case CFGElement::Initializer: + builtinTransferInitializer(Elt.castAs<CFGInitializer>(), State); + break; + default: + // FIXME: Evaluate other kinds of `CFGElement`. + break; + } +} + +/// Transfers `State` by evaluating each element in the `Block` based on the +/// `AC.Analysis` specified. +/// +/// Built-in transfer functions (if the option for `ApplyBuiltinTransfer` is set +/// by the analysis) will be applied to the element before evaluation by the +/// user-specified analysis. +/// `PostVisitCFG` (if provided) will be applied to the element after evaluation +/// by the user-specified analysis. +TypeErasedDataflowAnalysisState +transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC, + std::function<void(const CFGElement &, + const TypeErasedDataflowAnalysisState &)> + PostVisitCFG = nullptr) { + auto State = computeBlockInputState(Block, AC); + for (const auto &Element : Block) { + // Built-in analysis + if (AC.Analysis.builtinOptions()) { + builtinTransfer(Element, State, AC); + } + + // User-provided analysis + AC.Analysis.transferTypeErased(&Element, State.Lattice, State.Env); + + // Post processing + if (PostVisitCFG) { + PostVisitCFG(Element, State); + } + } + return State; +} + +TypeErasedDataflowAnalysisState transferBlock( + const ControlFlowContext &CFCtx, + llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockStates, + const CFGBlock &Block, const Environment &InitEnv, + TypeErasedDataflowAnalysis &Analysis, + std::function<void(const CFGElement &, + const TypeErasedDataflowAnalysisState &)> + PostVisitCFG) { + AnalysisContext AC(CFCtx, Analysis, InitEnv, BlockStates); + return transferCFGBlock(Block, AC, PostVisitCFG); +} + +llvm::Expected<std::vector<std::optional<TypeErasedDataflowAnalysisState>>> +runTypeErasedDataflowAnalysis( + const ControlFlowContext &CFCtx, TypeErasedDataflowAnalysis &Analysis, + const Environment &InitEnv, + std::function<void(const CFGElement &, + const TypeErasedDataflowAnalysisState &)> + PostVisitCFG) { + PostOrderCFGView POV(&CFCtx.getCFG()); + ForwardDataflowWorklist Worklist(CFCtx.getCFG(), &POV); + + std::vector<std::optional<TypeErasedDataflowAnalysisState>> BlockStates( + CFCtx.getCFG().size(), std::nullopt); + + // The entry basic block doesn't contain statements so it can be skipped. + const CFGBlock &Entry = CFCtx.getCFG().getEntry(); + BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(), + InitEnv}; + Worklist.enqueueSuccessors(&Entry); + + AnalysisContext AC(CFCtx, Analysis, InitEnv, BlockStates); + + // Bugs in lattices and transfer functions can prevent the analysis from + // converging. To limit the damage (infinite loops) that these bugs can cause, + // limit the number of iterations. + // FIXME: Consider making the maximum number of iterations configurable. + // FIXME: Consider restricting the number of backedges followed, rather than + // iterations. + // FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number + // of iterations, number of functions that time out, etc. + static constexpr uint32_t MaxAverageVisitsPerBlock = 4; + static constexpr uint32_t AbsoluteMaxIterations = 1 << 16; + const uint32_t RelativeMaxIterations = + MaxAverageVisitsPerBlock * BlockStates.size(); + const uint32_t MaxIterations = + std::min(RelativeMaxIterations, AbsoluteMaxIterations); + uint32_t Iterations = 0; + while (const CFGBlock *Block = Worklist.dequeue()) { + LLVM_DEBUG(llvm::dbgs() + << "Processing Block " << Block->getBlockID() << "\n"); + if (++Iterations > MaxIterations) { + return llvm::createStringError(std::errc::timed_out, + "maximum number of iterations reached"); + } + + const std::optional<TypeErasedDataflowAnalysisState> &OldBlockState = + BlockStates[Block->getBlockID()]; + TypeErasedDataflowAnalysisState NewBlockState = + transferCFGBlock(*Block, AC); + LLVM_DEBUG({ + llvm::errs() << "New Env:\n"; + NewBlockState.Env.dump(); + }); + + if (OldBlockState) { + LLVM_DEBUG({ + llvm::errs() << "Old Env:\n"; + OldBlockState->Env.dump(); + }); + if (isLoopHead(*Block)) { + LatticeJoinEffect Effect1 = Analysis.widenTypeErased( + NewBlockState.Lattice, OldBlockState->Lattice); + LatticeJoinEffect Effect2 = + NewBlockState.Env.widen(OldBlockState->Env, Analysis); + if (Effect1 == LatticeJoinEffect::Unchanged && + Effect2 == LatticeJoinEffect::Unchanged) + // The state of `Block` didn't change from widening so there's no need + // to revisit its successors. + continue; + } else if (Analysis.isEqualTypeErased(OldBlockState->Lattice, + NewBlockState.Lattice) && + OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) { + // The state of `Block` didn't change after transfer so there's no need + // to revisit its successors. + continue; + } + } + + BlockStates[Block->getBlockID()] = std::move(NewBlockState); + + // Do not add unreachable successor blocks to `Worklist`. + if (Block->hasNoReturnElement()) + continue; + + Worklist.enqueueSuccessors(Block); + } + // FIXME: Consider evaluating unreachable basic blocks (those that have a + // state set to `std::nullopt` at this point) to also analyze dead code. + + if (PostVisitCFG) { + for (const CFGBlock *Block : CFCtx.getCFG()) { + // Skip blocks that were not evaluated. + if (!BlockStates[Block->getBlockID()]) + continue; + transferCFGBlock(*Block, AC, PostVisitCFG); + } + } + + return BlockStates; +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Value.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Value.cpp new file mode 100644 index 000000000000..59affa80bdce --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/Value.cpp @@ -0,0 +1,56 @@ +//===-- Value.cpp -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines support functions for the `Value` type. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Value.h" +#include "clang/Analysis/FlowSensitive/DebugSupport.h" +#include "llvm/Support/Casting.h" + +namespace clang { +namespace dataflow { + +static bool areEquivalentIndirectionValues(const Value &Val1, + const Value &Val2) { + if (auto *IndVal1 = dyn_cast<ReferenceValue>(&Val1)) { + auto *IndVal2 = cast<ReferenceValue>(&Val2); + return &IndVal1->getReferentLoc() == &IndVal2->getReferentLoc(); + } + if (auto *IndVal1 = dyn_cast<PointerValue>(&Val1)) { + auto *IndVal2 = cast<PointerValue>(&Val2); + return &IndVal1->getPointeeLoc() == &IndVal2->getPointeeLoc(); + } + return false; +} + +bool areEquivalentValues(const Value &Val1, const Value &Val2) { + return &Val1 == &Val2 || (Val1.getKind() == Val2.getKind() && + (isa<TopBoolValue>(&Val1) || + areEquivalentIndirectionValues(Val1, Val2))); +} + +raw_ostream &operator<<(raw_ostream &OS, const Value &Val) { + switch (Val.getKind()) { + case Value::Kind::Reference: { + const auto *RV = cast<ReferenceValue>(&Val); + return OS << "Reference(" << &RV->getReferentLoc() << ")"; + } + case Value::Kind::Pointer: { + const auto *PV = dyn_cast<PointerValue>(&Val); + return OS << "Pointer(" << &PV->getPointeeLoc() << ")"; + } + // FIXME: support remaining cases. + default: + return OS << debugString(Val.getKind()); + } +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp new file mode 100644 index 000000000000..caa1ed266c5f --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp @@ -0,0 +1,721 @@ +//===- WatchedLiteralsSolver.cpp --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a SAT solver implementation that can be used by dataflow +// analyses. +// +//===----------------------------------------------------------------------===// + +#include <cassert> +#include <cstdint> +#include <iterator> +#include <queue> +#include <vector> + +#include "clang/Analysis/FlowSensitive/Solver.h" +#include "clang/Analysis/FlowSensitive/Value.h" +#include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" + +namespace clang { +namespace dataflow { + +// `WatchedLiteralsSolver` is an implementation of Algorithm D from Knuth's +// The Art of Computer Programming Volume 4: Satisfiability, Fascicle 6. It is +// based on the backtracking DPLL algorithm [1], keeps references to a single +// "watched" literal per clause, and uses a set of "active" variables to perform +// unit propagation. +// +// The solver expects that its input is a boolean formula in conjunctive normal +// form that consists of clauses of at least one literal. A literal is either a +// boolean variable or its negation. Below we define types, data structures, and +// utilities that are used to represent boolean formulas in conjunctive normal +// form. +// +// [1] https://en.wikipedia.org/wiki/DPLL_algorithm + +/// Boolean variables are represented as positive integers. +using Variable = uint32_t; + +/// A null boolean variable is used as a placeholder in various data structures +/// and algorithms. +static constexpr Variable NullVar = 0; + +/// Literals are represented as positive integers. Specifically, for a boolean +/// variable `V` that is represented as the positive integer `I`, the positive +/// literal `V` is represented as the integer `2*I` and the negative literal +/// `!V` is represented as the integer `2*I+1`. +using Literal = uint32_t; + +/// A null literal is used as a placeholder in various data structures and +/// algorithms. +static constexpr Literal NullLit = 0; + +/// Returns the positive literal `V`. +static constexpr Literal posLit(Variable V) { return 2 * V; } + +/// Returns the negative literal `!V`. +static constexpr Literal negLit(Variable V) { return 2 * V + 1; } + +/// Returns the negated literal `!L`. +static constexpr Literal notLit(Literal L) { return L ^ 1; } + +/// Returns the variable of `L`. +static constexpr Variable var(Literal L) { return L >> 1; } + +/// Clause identifiers are represented as positive integers. +using ClauseID = uint32_t; + +/// A null clause identifier is used as a placeholder in various data structures +/// and algorithms. +static constexpr ClauseID NullClause = 0; + +/// A boolean formula in conjunctive normal form. +struct BooleanFormula { + /// `LargestVar` is equal to the largest positive integer that represents a + /// variable in the formula. + const Variable LargestVar; + + /// Literals of all clauses in the formula. + /// + /// The element at index 0 stands for the literal in the null clause. It is + /// set to 0 and isn't used. Literals of clauses in the formula start from the + /// element at index 1. + /// + /// For example, for the formula `(L1 v L2) ^ (L2 v L3 v L4)` the elements of + /// `Clauses` will be `[0, L1, L2, L2, L3, L4]`. + std::vector<Literal> Clauses; + + /// Start indices of clauses of the formula in `Clauses`. + /// + /// The element at index 0 stands for the start index of the null clause. It + /// is set to 0 and isn't used. Start indices of clauses in the formula start + /// from the element at index 1. + /// + /// For example, for the formula `(L1 v L2) ^ (L2 v L3 v L4)` the elements of + /// `ClauseStarts` will be `[0, 1, 3]`. Note that the literals of the first + /// clause always start at index 1. The start index for the literals of the + /// second clause depends on the size of the first clause and so on. + std::vector<size_t> ClauseStarts; + + /// Maps literals (indices of the vector) to clause identifiers (elements of + /// the vector) that watch the respective literals. + /// + /// For a given clause, its watched literal is always its first literal in + /// `Clauses`. This invariant is maintained when watched literals change. + std::vector<ClauseID> WatchedHead; + + /// Maps clause identifiers (elements of the vector) to identifiers of other + /// clauses that watch the same literals, forming a set of linked lists. + /// + /// The element at index 0 stands for the identifier of the clause that + /// follows the null clause. It is set to 0 and isn't used. Identifiers of + /// clauses in the formula start from the element at index 1. + std::vector<ClauseID> NextWatched; + + /// Stores the variable identifier and value location for atomic booleans in + /// the formula. + llvm::DenseMap<Variable, AtomicBoolValue *> Atomics; + + explicit BooleanFormula(Variable LargestVar, + llvm::DenseMap<Variable, AtomicBoolValue *> Atomics) + : LargestVar(LargestVar), Atomics(std::move(Atomics)) { + Clauses.push_back(0); + ClauseStarts.push_back(0); + NextWatched.push_back(0); + const size_t NumLiterals = 2 * LargestVar + 1; + WatchedHead.resize(NumLiterals + 1, 0); + } + + /// Adds the `L1 v L2 v L3` clause to the formula. If `L2` or `L3` are + /// `NullLit` they are respectively omitted from the clause. + /// + /// Requirements: + /// + /// `L1` must not be `NullLit`. + /// + /// All literals in the input that are not `NullLit` must be distinct. + void addClause(Literal L1, Literal L2 = NullLit, Literal L3 = NullLit) { + // The literals are guaranteed to be distinct from properties of BoolValue + // and the construction in `buildBooleanFormula`. + assert(L1 != NullLit && L1 != L2 && L1 != L3 && + (L2 != L3 || L2 == NullLit)); + + const ClauseID C = ClauseStarts.size(); + const size_t S = Clauses.size(); + ClauseStarts.push_back(S); + + Clauses.push_back(L1); + if (L2 != NullLit) + Clauses.push_back(L2); + if (L3 != NullLit) + Clauses.push_back(L3); + + // Designate the first literal as the "watched" literal of the clause. + NextWatched.push_back(WatchedHead[L1]); + WatchedHead[L1] = C; + } + + /// Returns the number of literals in clause `C`. + size_t clauseSize(ClauseID C) const { + return C == ClauseStarts.size() - 1 ? Clauses.size() - ClauseStarts[C] + : ClauseStarts[C + 1] - ClauseStarts[C]; + } + + /// Returns the literals of clause `C`. + llvm::ArrayRef<Literal> clauseLiterals(ClauseID C) const { + return llvm::ArrayRef<Literal>(&Clauses[ClauseStarts[C]], clauseSize(C)); + } +}; + +/// Converts the conjunction of `Vals` into a formula in conjunctive normal +/// form where each clause has at least one and at most three literals. +BooleanFormula buildBooleanFormula(const llvm::DenseSet<BoolValue *> &Vals) { + // The general strategy of the algorithm implemented below is to map each + // of the sub-values in `Vals` to a unique variable and use these variables in + // the resulting CNF expression to avoid exponential blow up. The number of + // literals in the resulting formula is guaranteed to be linear in the number + // of sub-values in `Vals`. + + // Map each sub-value in `Vals` to a unique variable. + llvm::DenseMap<BoolValue *, Variable> SubValsToVar; + // Store variable identifiers and value location of atomic booleans. + llvm::DenseMap<Variable, AtomicBoolValue *> Atomics; + Variable NextVar = 1; + { + std::queue<BoolValue *> UnprocessedSubVals; + for (BoolValue *Val : Vals) + UnprocessedSubVals.push(Val); + while (!UnprocessedSubVals.empty()) { + Variable Var = NextVar; + BoolValue *Val = UnprocessedSubVals.front(); + UnprocessedSubVals.pop(); + + if (!SubValsToVar.try_emplace(Val, Var).second) + continue; + ++NextVar; + + // Visit the sub-values of `Val`. + switch (Val->getKind()) { + case Value::Kind::Conjunction: { + auto *C = cast<ConjunctionValue>(Val); + UnprocessedSubVals.push(&C->getLeftSubValue()); + UnprocessedSubVals.push(&C->getRightSubValue()); + break; + } + case Value::Kind::Disjunction: { + auto *D = cast<DisjunctionValue>(Val); + UnprocessedSubVals.push(&D->getLeftSubValue()); + UnprocessedSubVals.push(&D->getRightSubValue()); + break; + } + case Value::Kind::Negation: { + auto *N = cast<NegationValue>(Val); + UnprocessedSubVals.push(&N->getSubVal()); + break; + } + case Value::Kind::Implication: { + auto *I = cast<ImplicationValue>(Val); + UnprocessedSubVals.push(&I->getLeftSubValue()); + UnprocessedSubVals.push(&I->getRightSubValue()); + break; + } + case Value::Kind::Biconditional: { + auto *B = cast<BiconditionalValue>(Val); + UnprocessedSubVals.push(&B->getLeftSubValue()); + UnprocessedSubVals.push(&B->getRightSubValue()); + break; + } + case Value::Kind::TopBool: + // Nothing more to do. This `TopBool` instance has already been mapped + // to a fresh solver variable (`NextVar`, above) and is thereafter + // anonymous. The solver never sees `Top`. + break; + case Value::Kind::AtomicBool: { + Atomics[Var] = cast<AtomicBoolValue>(Val); + break; + } + default: + llvm_unreachable("buildBooleanFormula: unhandled value kind"); + } + } + } + + auto GetVar = [&SubValsToVar](const BoolValue *Val) { + auto ValIt = SubValsToVar.find(Val); + assert(ValIt != SubValsToVar.end()); + return ValIt->second; + }; + + BooleanFormula Formula(NextVar - 1, std::move(Atomics)); + std::vector<bool> ProcessedSubVals(NextVar, false); + + // Add a conjunct for each variable that represents a top-level conjunction + // value in `Vals`. + for (BoolValue *Val : Vals) + Formula.addClause(posLit(GetVar(Val))); + + // Add conjuncts that represent the mapping between newly-created variables + // and their corresponding sub-values. + std::queue<BoolValue *> UnprocessedSubVals; + for (BoolValue *Val : Vals) + UnprocessedSubVals.push(Val); + while (!UnprocessedSubVals.empty()) { + const BoolValue *Val = UnprocessedSubVals.front(); + UnprocessedSubVals.pop(); + const Variable Var = GetVar(Val); + + if (ProcessedSubVals[Var]) + continue; + ProcessedSubVals[Var] = true; + + if (auto *C = dyn_cast<ConjunctionValue>(Val)) { + const Variable LeftSubVar = GetVar(&C->getLeftSubValue()); + const Variable RightSubVar = GetVar(&C->getRightSubValue()); + + if (LeftSubVar == RightSubVar) { + // `X <=> (A ^ A)` is equivalent to `(!X v A) ^ (X v !A)` which is + // already in conjunctive normal form. Below we add each of the + // conjuncts of the latter expression to the result. + Formula.addClause(negLit(Var), posLit(LeftSubVar)); + Formula.addClause(posLit(Var), negLit(LeftSubVar)); + + // Visit a sub-value of `Val` (pick any, they are identical). + UnprocessedSubVals.push(&C->getLeftSubValue()); + } else { + // `X <=> (A ^ B)` is equivalent to `(!X v A) ^ (!X v B) ^ (X v !A v !B)` + // which is already in conjunctive normal form. Below we add each of the + // conjuncts of the latter expression to the result. + Formula.addClause(negLit(Var), posLit(LeftSubVar)); + Formula.addClause(negLit(Var), posLit(RightSubVar)); + Formula.addClause(posLit(Var), negLit(LeftSubVar), negLit(RightSubVar)); + + // Visit the sub-values of `Val`. + UnprocessedSubVals.push(&C->getLeftSubValue()); + UnprocessedSubVals.push(&C->getRightSubValue()); + } + } else if (auto *D = dyn_cast<DisjunctionValue>(Val)) { + const Variable LeftSubVar = GetVar(&D->getLeftSubValue()); + const Variable RightSubVar = GetVar(&D->getRightSubValue()); + + if (LeftSubVar == RightSubVar) { + // `X <=> (A v A)` is equivalent to `(!X v A) ^ (X v !A)` which is + // already in conjunctive normal form. Below we add each of the + // conjuncts of the latter expression to the result. + Formula.addClause(negLit(Var), posLit(LeftSubVar)); + Formula.addClause(posLit(Var), negLit(LeftSubVar)); + + // Visit a sub-value of `Val` (pick any, they are identical). + UnprocessedSubVals.push(&D->getLeftSubValue()); + } else { + // `X <=> (A v B)` is equivalent to `(!X v A v B) ^ (X v !A) ^ (X v !B)` + // which is already in conjunctive normal form. Below we add each of the + // conjuncts of the latter expression to the result. + Formula.addClause(negLit(Var), posLit(LeftSubVar), posLit(RightSubVar)); + Formula.addClause(posLit(Var), negLit(LeftSubVar)); + Formula.addClause(posLit(Var), negLit(RightSubVar)); + + // Visit the sub-values of `Val`. + UnprocessedSubVals.push(&D->getLeftSubValue()); + UnprocessedSubVals.push(&D->getRightSubValue()); + } + } else if (auto *N = dyn_cast<NegationValue>(Val)) { + const Variable SubVar = GetVar(&N->getSubVal()); + + // `X <=> !Y` is equivalent to `(!X v !Y) ^ (X v Y)` which is already in + // conjunctive normal form. Below we add each of the conjuncts of the + // latter expression to the result. + Formula.addClause(negLit(Var), negLit(SubVar)); + Formula.addClause(posLit(Var), posLit(SubVar)); + + // Visit the sub-values of `Val`. + UnprocessedSubVals.push(&N->getSubVal()); + } else if (auto *I = dyn_cast<ImplicationValue>(Val)) { + const Variable LeftSubVar = GetVar(&I->getLeftSubValue()); + const Variable RightSubVar = GetVar(&I->getRightSubValue()); + + // `X <=> (A => B)` is equivalent to + // `(X v A) ^ (X v !B) ^ (!X v !A v B)` which is already in + // conjunctive normal form. Below we add each of the conjuncts of the + // latter expression to the result. + Formula.addClause(posLit(Var), posLit(LeftSubVar)); + Formula.addClause(posLit(Var), negLit(RightSubVar)); + Formula.addClause(negLit(Var), negLit(LeftSubVar), posLit(RightSubVar)); + + // Visit the sub-values of `Val`. + UnprocessedSubVals.push(&I->getLeftSubValue()); + UnprocessedSubVals.push(&I->getRightSubValue()); + } else if (auto *B = dyn_cast<BiconditionalValue>(Val)) { + const Variable LeftSubVar = GetVar(&B->getLeftSubValue()); + const Variable RightSubVar = GetVar(&B->getRightSubValue()); + + if (LeftSubVar == RightSubVar) { + // `X <=> (A <=> A)` is equvalent to `X` which is already in + // conjunctive normal form. Below we add each of the conjuncts of the + // latter expression to the result. + Formula.addClause(posLit(Var)); + + // No need to visit the sub-values of `Val`. + } else { + // `X <=> (A <=> B)` is equivalent to + // `(X v A v B) ^ (X v !A v !B) ^ (!X v A v !B) ^ (!X v !A v B)` which is + // already in conjunctive normal form. Below we add each of the conjuncts + // of the latter expression to the result. + Formula.addClause(posLit(Var), posLit(LeftSubVar), posLit(RightSubVar)); + Formula.addClause(posLit(Var), negLit(LeftSubVar), negLit(RightSubVar)); + Formula.addClause(negLit(Var), posLit(LeftSubVar), negLit(RightSubVar)); + Formula.addClause(negLit(Var), negLit(LeftSubVar), posLit(RightSubVar)); + + // Visit the sub-values of `Val`. + UnprocessedSubVals.push(&B->getLeftSubValue()); + UnprocessedSubVals.push(&B->getRightSubValue()); + } + } + } + + return Formula; +} + +class WatchedLiteralsSolverImpl { + /// A boolean formula in conjunctive normal form that the solver will attempt + /// to prove satisfiable. The formula will be modified in the process. + BooleanFormula Formula; + + /// The search for a satisfying assignment of the variables in `Formula` will + /// proceed in levels, starting from 1 and going up to `Formula.LargestVar` + /// (inclusive). The current level is stored in `Level`. At each level the + /// solver will assign a value to an unassigned variable. If this leads to a + /// consistent partial assignment, `Level` will be incremented. Otherwise, if + /// it results in a conflict, the solver will backtrack by decrementing + /// `Level` until it reaches the most recent level where a decision was made. + size_t Level = 0; + + /// Maps levels (indices of the vector) to variables (elements of the vector) + /// that are assigned values at the respective levels. + /// + /// The element at index 0 isn't used. Variables start from the element at + /// index 1. + std::vector<Variable> LevelVars; + + /// State of the solver at a particular level. + enum class State : uint8_t { + /// Indicates that the solver made a decision. + Decision = 0, + + /// Indicates that the solver made a forced move. + Forced = 1, + }; + + /// State of the solver at a particular level. It keeps track of previous + /// decisions that the solver can refer to when backtracking. + /// + /// The element at index 0 isn't used. States start from the element at index + /// 1. + std::vector<State> LevelStates; + + enum class Assignment : int8_t { + Unassigned = -1, + AssignedFalse = 0, + AssignedTrue = 1 + }; + + /// Maps variables (indices of the vector) to their assignments (elements of + /// the vector). + /// + /// The element at index 0 isn't used. Variable assignments start from the + /// element at index 1. + std::vector<Assignment> VarAssignments; + + /// A set of unassigned variables that appear in watched literals in + /// `Formula`. The vector is guaranteed to contain unique elements. + std::vector<Variable> ActiveVars; + +public: + explicit WatchedLiteralsSolverImpl(const llvm::DenseSet<BoolValue *> &Vals) + : Formula(buildBooleanFormula(Vals)), LevelVars(Formula.LargestVar + 1), + LevelStates(Formula.LargestVar + 1) { + assert(!Vals.empty()); + + // Initialize the state at the root level to a decision so that in + // `reverseForcedMoves` we don't have to check that `Level >= 0` on each + // iteration. + LevelStates[0] = State::Decision; + + // Initialize all variables as unassigned. + VarAssignments.resize(Formula.LargestVar + 1, Assignment::Unassigned); + + // Initialize the active variables. + for (Variable Var = Formula.LargestVar; Var != NullVar; --Var) { + if (isWatched(posLit(Var)) || isWatched(negLit(Var))) + ActiveVars.push_back(Var); + } + } + + Solver::Result solve() && { + size_t I = 0; + while (I < ActiveVars.size()) { + // Assert that the following invariants hold: + // 1. All active variables are unassigned. + // 2. All active variables form watched literals. + // 3. Unassigned variables that form watched literals are active. + // FIXME: Consider replacing these with test cases that fail if the any + // of the invariants is broken. That might not be easy due to the + // transformations performed by `buildBooleanFormula`. + assert(activeVarsAreUnassigned()); + assert(activeVarsFormWatchedLiterals()); + assert(unassignedVarsFormingWatchedLiteralsAreActive()); + + const Variable ActiveVar = ActiveVars[I]; + + // Look for unit clauses that contain the active variable. + const bool unitPosLit = watchedByUnitClause(posLit(ActiveVar)); + const bool unitNegLit = watchedByUnitClause(negLit(ActiveVar)); + if (unitPosLit && unitNegLit) { + // We found a conflict! + + // Backtrack and rewind the `Level` until the most recent non-forced + // assignment. + reverseForcedMoves(); + + // If the root level is reached, then all possible assignments lead to + // a conflict. + if (Level == 0) + return Solver::Result::Unsatisfiable(); + + // Otherwise, take the other branch at the most recent level where a + // decision was made. + LevelStates[Level] = State::Forced; + const Variable Var = LevelVars[Level]; + VarAssignments[Var] = VarAssignments[Var] == Assignment::AssignedTrue + ? Assignment::AssignedFalse + : Assignment::AssignedTrue; + + updateWatchedLiterals(); + } else if (unitPosLit || unitNegLit) { + // We found a unit clause! The value of its unassigned variable is + // forced. + ++Level; + + LevelVars[Level] = ActiveVar; + LevelStates[Level] = State::Forced; + VarAssignments[ActiveVar] = + unitPosLit ? Assignment::AssignedTrue : Assignment::AssignedFalse; + + // Remove the variable that was just assigned from the set of active + // variables. + if (I + 1 < ActiveVars.size()) { + // Replace the variable that was just assigned with the last active + // variable for efficient removal. + ActiveVars[I] = ActiveVars.back(); + } else { + // This was the last active variable. Repeat the process from the + // beginning. + I = 0; + } + ActiveVars.pop_back(); + + updateWatchedLiterals(); + } else if (I + 1 == ActiveVars.size()) { + // There are no remaining unit clauses in the formula! Make a decision + // for one of the active variables at the current level. + ++Level; + + LevelVars[Level] = ActiveVar; + LevelStates[Level] = State::Decision; + VarAssignments[ActiveVar] = decideAssignment(ActiveVar); + + // Remove the variable that was just assigned from the set of active + // variables. + ActiveVars.pop_back(); + + updateWatchedLiterals(); + + // This was the last active variable. Repeat the process from the + // beginning. + I = 0; + } else { + ++I; + } + } + return Solver::Result::Satisfiable(buildSolution()); + } + +private: + /// Returns a satisfying truth assignment to the atomic values in the boolean + /// formula. + llvm::DenseMap<AtomicBoolValue *, Solver::Result::Assignment> + buildSolution() { + llvm::DenseMap<AtomicBoolValue *, Solver::Result::Assignment> Solution; + for (auto &Atomic : Formula.Atomics) { + // A variable may have a definite true/false assignment, or it may be + // unassigned indicating its truth value does not affect the result of + // the formula. Unassigned variables are assigned to true as a default. + Solution[Atomic.second] = + VarAssignments[Atomic.first] == Assignment::AssignedFalse + ? Solver::Result::Assignment::AssignedFalse + : Solver::Result::Assignment::AssignedTrue; + } + return Solution; + } + + /// Reverses forced moves until the most recent level where a decision was + /// made on the assignment of a variable. + void reverseForcedMoves() { + for (; LevelStates[Level] == State::Forced; --Level) { + const Variable Var = LevelVars[Level]; + + VarAssignments[Var] = Assignment::Unassigned; + + // If the variable that we pass through is watched then we add it to the + // active variables. + if (isWatched(posLit(Var)) || isWatched(negLit(Var))) + ActiveVars.push_back(Var); + } + } + + /// Updates watched literals that are affected by a variable assignment. + void updateWatchedLiterals() { + const Variable Var = LevelVars[Level]; + + // Update the watched literals of clauses that currently watch the literal + // that falsifies `Var`. + const Literal FalseLit = VarAssignments[Var] == Assignment::AssignedTrue + ? negLit(Var) + : posLit(Var); + ClauseID FalseLitWatcher = Formula.WatchedHead[FalseLit]; + Formula.WatchedHead[FalseLit] = NullClause; + while (FalseLitWatcher != NullClause) { + const ClauseID NextFalseLitWatcher = Formula.NextWatched[FalseLitWatcher]; + + // Pick the first non-false literal as the new watched literal. + const size_t FalseLitWatcherStart = Formula.ClauseStarts[FalseLitWatcher]; + size_t NewWatchedLitIdx = FalseLitWatcherStart + 1; + while (isCurrentlyFalse(Formula.Clauses[NewWatchedLitIdx])) + ++NewWatchedLitIdx; + const Literal NewWatchedLit = Formula.Clauses[NewWatchedLitIdx]; + const Variable NewWatchedLitVar = var(NewWatchedLit); + + // Swap the old watched literal for the new one in `FalseLitWatcher` to + // maintain the invariant that the watched literal is at the beginning of + // the clause. + Formula.Clauses[NewWatchedLitIdx] = FalseLit; + Formula.Clauses[FalseLitWatcherStart] = NewWatchedLit; + + // If the new watched literal isn't watched by any other clause and its + // variable isn't assigned we need to add it to the active variables. + if (!isWatched(NewWatchedLit) && !isWatched(notLit(NewWatchedLit)) && + VarAssignments[NewWatchedLitVar] == Assignment::Unassigned) + ActiveVars.push_back(NewWatchedLitVar); + + Formula.NextWatched[FalseLitWatcher] = Formula.WatchedHead[NewWatchedLit]; + Formula.WatchedHead[NewWatchedLit] = FalseLitWatcher; + + // Go to the next clause that watches `FalseLit`. + FalseLitWatcher = NextFalseLitWatcher; + } + } + + /// Returns true if and only if one of the clauses that watch `Lit` is a unit + /// clause. + bool watchedByUnitClause(Literal Lit) const { + for (ClauseID LitWatcher = Formula.WatchedHead[Lit]; + LitWatcher != NullClause; + LitWatcher = Formula.NextWatched[LitWatcher]) { + llvm::ArrayRef<Literal> Clause = Formula.clauseLiterals(LitWatcher); + + // Assert the invariant that the watched literal is always the first one + // in the clause. + // FIXME: Consider replacing this with a test case that fails if the + // invariant is broken by `updateWatchedLiterals`. That might not be easy + // due to the transformations performed by `buildBooleanFormula`. + assert(Clause.front() == Lit); + + if (isUnit(Clause)) + return true; + } + return false; + } + + /// Returns true if and only if `Clause` is a unit clause. + bool isUnit(llvm::ArrayRef<Literal> Clause) const { + return llvm::all_of(Clause.drop_front(), + [this](Literal L) { return isCurrentlyFalse(L); }); + } + + /// Returns true if and only if `Lit` evaluates to `false` in the current + /// partial assignment. + bool isCurrentlyFalse(Literal Lit) const { + return static_cast<int8_t>(VarAssignments[var(Lit)]) == + static_cast<int8_t>(Lit & 1); + } + + /// Returns true if and only if `Lit` is watched by a clause in `Formula`. + bool isWatched(Literal Lit) const { + return Formula.WatchedHead[Lit] != NullClause; + } + + /// Returns an assignment for an unassigned variable. + Assignment decideAssignment(Variable Var) const { + return !isWatched(posLit(Var)) || isWatched(negLit(Var)) + ? Assignment::AssignedFalse + : Assignment::AssignedTrue; + } + + /// Returns a set of all watched literals. + llvm::DenseSet<Literal> watchedLiterals() const { + llvm::DenseSet<Literal> WatchedLiterals; + for (Literal Lit = 2; Lit < Formula.WatchedHead.size(); Lit++) { + if (Formula.WatchedHead[Lit] == NullClause) + continue; + WatchedLiterals.insert(Lit); + } + return WatchedLiterals; + } + + /// Returns true if and only if all active variables are unassigned. + bool activeVarsAreUnassigned() const { + return llvm::all_of(ActiveVars, [this](Variable Var) { + return VarAssignments[Var] == Assignment::Unassigned; + }); + } + + /// Returns true if and only if all active variables form watched literals. + bool activeVarsFormWatchedLiterals() const { + const llvm::DenseSet<Literal> WatchedLiterals = watchedLiterals(); + return llvm::all_of(ActiveVars, [&WatchedLiterals](Variable Var) { + return WatchedLiterals.contains(posLit(Var)) || + WatchedLiterals.contains(negLit(Var)); + }); + } + + /// Returns true if and only if all unassigned variables that are forming + /// watched literals are active. + bool unassignedVarsFormingWatchedLiteralsAreActive() const { + const llvm::DenseSet<Variable> ActiveVarsSet(ActiveVars.begin(), + ActiveVars.end()); + for (Literal Lit : watchedLiterals()) { + const Variable Var = var(Lit); + if (VarAssignments[Var] != Assignment::Unassigned) + continue; + if (ActiveVarsSet.contains(Var)) + continue; + return false; + } + return true; + } +}; + +Solver::Result WatchedLiteralsSolver::solve(llvm::DenseSet<BoolValue *> Vals) { + return Vals.empty() ? Solver::Result::Satisfiable({{}}) + : WatchedLiteralsSolverImpl(Vals).solve(); +} + +} // namespace dataflow +} // namespace clang diff --git a/contrib/llvm-project/clang/lib/Analysis/IssueHash.cpp b/contrib/llvm-project/clang/lib/Analysis/IssueHash.cpp new file mode 100644 index 000000000000..4d56e774b76a --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/IssueHash.cpp @@ -0,0 +1,208 @@ +//===---------- IssueHash.cpp - Generate identification hashes --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/IssueHash.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/Specifiers.h" +#include "clang/Lex/Lexer.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/Path.h" + +#include <functional> +#include <optional> +#include <sstream> +#include <string> + +using namespace clang; + +// Get a string representation of the parts of the signature that can be +// overloaded on. +static std::string GetSignature(const FunctionDecl *Target) { + if (!Target) + return ""; + std::string Signature; + + // When a flow sensitive bug happens in templated code we should not generate + // distinct hash value for every instantiation. Use the signature from the + // primary template. + if (const FunctionDecl *InstantiatedFrom = + Target->getTemplateInstantiationPattern()) + Target = InstantiatedFrom; + + if (!isa<CXXConstructorDecl>(Target) && !isa<CXXDestructorDecl>(Target) && + !isa<CXXConversionDecl>(Target)) + Signature.append(Target->getReturnType().getAsString()).append(" "); + Signature.append(Target->getQualifiedNameAsString()).append("("); + + for (int i = 0, paramsCount = Target->getNumParams(); i < paramsCount; ++i) { + if (i) + Signature.append(", "); + Signature.append(Target->getParamDecl(i)->getType().getAsString()); + } + + if (Target->isVariadic()) + Signature.append(", ..."); + Signature.append(")"); + + const auto *TargetT = + llvm::dyn_cast_or_null<FunctionType>(Target->getType().getTypePtr()); + + if (!TargetT || !isa<CXXMethodDecl>(Target)) + return Signature; + + if (TargetT->isConst()) + Signature.append(" const"); + if (TargetT->isVolatile()) + Signature.append(" volatile"); + if (TargetT->isRestrict()) + Signature.append(" restrict"); + + if (const auto *TargetPT = + dyn_cast_or_null<FunctionProtoType>(Target->getType().getTypePtr())) { + switch (TargetPT->getRefQualifier()) { + case RQ_LValue: + Signature.append(" &"); + break; + case RQ_RValue: + Signature.append(" &&"); + break; + default: + break; + } + } + + return Signature; +} + +static std::string GetEnclosingDeclContextSignature(const Decl *D) { + if (!D) + return ""; + + if (const auto *ND = dyn_cast<NamedDecl>(D)) { + std::string DeclName; + + switch (ND->getKind()) { + case Decl::Namespace: + case Decl::Record: + case Decl::CXXRecord: + case Decl::Enum: + DeclName = ND->getQualifiedNameAsString(); + break; + case Decl::CXXConstructor: + case Decl::CXXDestructor: + case Decl::CXXConversion: + case Decl::CXXMethod: + case Decl::Function: + DeclName = GetSignature(dyn_cast_or_null<FunctionDecl>(ND)); + break; + case Decl::ObjCMethod: + // ObjC Methods can not be overloaded, qualified name uniquely identifies + // the method. + DeclName = ND->getQualifiedNameAsString(); + break; + default: + break; + } + + return DeclName; + } + + return ""; +} + +static StringRef GetNthLineOfFile(std::optional<llvm::MemoryBufferRef> Buffer, + int Line) { + if (!Buffer) + return ""; + + llvm::line_iterator LI(*Buffer, false); + for (; !LI.is_at_eof() && LI.line_number() != Line; ++LI) + ; + + return *LI; +} + +static std::string NormalizeLine(const SourceManager &SM, const FullSourceLoc &L, + const LangOptions &LangOpts) { + static StringRef Whitespaces = " \t\n"; + + StringRef Str = GetNthLineOfFile(SM.getBufferOrNone(L.getFileID(), L), + L.getExpansionLineNumber()); + StringRef::size_type col = Str.find_first_not_of(Whitespaces); + if (col == StringRef::npos) + col = 1; // The line only contains whitespace. + else + col++; + SourceLocation StartOfLine = + SM.translateLineCol(SM.getFileID(L), L.getExpansionLineNumber(), col); + std::optional<llvm::MemoryBufferRef> Buffer = + SM.getBufferOrNone(SM.getFileID(StartOfLine), StartOfLine); + if (!Buffer) + return {}; + + const char *BufferPos = SM.getCharacterData(StartOfLine); + + Token Token; + Lexer Lexer(SM.getLocForStartOfFile(SM.getFileID(StartOfLine)), LangOpts, + Buffer->getBufferStart(), BufferPos, Buffer->getBufferEnd()); + + size_t NextStart = 0; + std::ostringstream LineBuff; + while (!Lexer.LexFromRawLexer(Token) && NextStart < 2) { + if (Token.isAtStartOfLine() && NextStart++ > 0) + continue; + LineBuff << std::string(SM.getCharacterData(Token.getLocation()), + Token.getLength()); + } + + return LineBuff.str(); +} + +static llvm::SmallString<32> GetMD5HashOfContent(StringRef Content) { + llvm::MD5 Hash; + llvm::MD5::MD5Result MD5Res; + SmallString<32> Res; + + Hash.update(Content); + Hash.final(MD5Res); + llvm::MD5::stringifyResult(MD5Res, Res); + + return Res; +} + +std::string clang::getIssueString(const FullSourceLoc &IssueLoc, + StringRef CheckerName, + StringRef WarningMessage, + const Decl *IssueDecl, + const LangOptions &LangOpts) { + static StringRef Delimiter = "$"; + + return (llvm::Twine(CheckerName) + Delimiter + + GetEnclosingDeclContextSignature(IssueDecl) + Delimiter + + Twine(IssueLoc.getExpansionColumnNumber()) + Delimiter + + NormalizeLine(IssueLoc.getManager(), IssueLoc, LangOpts) + + Delimiter + WarningMessage) + .str(); +} + +SmallString<32> clang::getIssueHash(const FullSourceLoc &IssueLoc, + StringRef CheckerName, + StringRef WarningMessage, + const Decl *IssueDecl, + const LangOptions &LangOpts) { + + return GetMD5HashOfContent(getIssueString( + IssueLoc, CheckerName, WarningMessage, IssueDecl, LangOpts)); +} diff --git a/contrib/llvm-project/clang/lib/Analysis/LiveVariables.cpp b/contrib/llvm-project/clang/lib/Analysis/LiveVariables.cpp new file mode 100644 index 000000000000..6d03dd05ca3d --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/LiveVariables.cpp @@ -0,0 +1,643 @@ +//=- LiveVariables.cpp - Live Variable Analysis for Source CFGs ----------*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements Live Variables analysis for source-level CFGs. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LiveVariables.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <optional> +#include <vector> + +using namespace clang; + +namespace { +class LiveVariablesImpl { +public: + AnalysisDeclContext &analysisContext; + llvm::ImmutableSet<const Expr *>::Factory ESetFact; + llvm::ImmutableSet<const VarDecl *>::Factory DSetFact; + llvm::ImmutableSet<const BindingDecl *>::Factory BSetFact; + llvm::DenseMap<const CFGBlock *, LiveVariables::LivenessValues> blocksEndToLiveness; + llvm::DenseMap<const CFGBlock *, LiveVariables::LivenessValues> blocksBeginToLiveness; + llvm::DenseMap<const Stmt *, LiveVariables::LivenessValues> stmtsToLiveness; + llvm::DenseMap<const DeclRefExpr *, unsigned> inAssignment; + const bool killAtAssign; + + LiveVariables::LivenessValues + merge(LiveVariables::LivenessValues valsA, + LiveVariables::LivenessValues valsB); + + LiveVariables::LivenessValues + runOnBlock(const CFGBlock *block, LiveVariables::LivenessValues val, + LiveVariables::Observer *obs = nullptr); + + void dumpBlockLiveness(const SourceManager& M); + void dumpExprLiveness(const SourceManager& M); + + LiveVariablesImpl(AnalysisDeclContext &ac, bool KillAtAssign) + : analysisContext(ac), + ESetFact(false), // Do not canonicalize ImmutableSets by default. + DSetFact(false), // This is a *major* performance win. + BSetFact(false), killAtAssign(KillAtAssign) {} +}; +} // namespace + +static LiveVariablesImpl &getImpl(void *x) { + return *((LiveVariablesImpl *) x); +} + +//===----------------------------------------------------------------------===// +// Operations and queries on LivenessValues. +//===----------------------------------------------------------------------===// + +bool LiveVariables::LivenessValues::isLive(const Expr *E) const { + return liveExprs.contains(E); +} + +bool LiveVariables::LivenessValues::isLive(const VarDecl *D) const { + if (const auto *DD = dyn_cast<DecompositionDecl>(D)) { + bool alive = false; + for (const BindingDecl *BD : DD->bindings()) + alive |= liveBindings.contains(BD); + + // Note: the only known case this condition is necessary, is when a bindig + // to a tuple-like structure is created. The HoldingVar initializers have a + // DeclRefExpr to the DecompositionDecl. + alive |= liveDecls.contains(DD); + return alive; + } + return liveDecls.contains(D); +} + +namespace { + template <typename SET> + SET mergeSets(SET A, SET B) { + if (A.isEmpty()) + return B; + + for (typename SET::iterator it = B.begin(), ei = B.end(); it != ei; ++it) { + A = A.add(*it); + } + return A; + } +} // namespace + +void LiveVariables::Observer::anchor() { } + +LiveVariables::LivenessValues +LiveVariablesImpl::merge(LiveVariables::LivenessValues valsA, + LiveVariables::LivenessValues valsB) { + + llvm::ImmutableSetRef<const Expr *> SSetRefA( + valsA.liveExprs.getRootWithoutRetain(), ESetFact.getTreeFactory()), + SSetRefB(valsB.liveExprs.getRootWithoutRetain(), + ESetFact.getTreeFactory()); + + llvm::ImmutableSetRef<const VarDecl *> + DSetRefA(valsA.liveDecls.getRootWithoutRetain(), DSetFact.getTreeFactory()), + DSetRefB(valsB.liveDecls.getRootWithoutRetain(), DSetFact.getTreeFactory()); + + llvm::ImmutableSetRef<const BindingDecl *> + BSetRefA(valsA.liveBindings.getRootWithoutRetain(), BSetFact.getTreeFactory()), + BSetRefB(valsB.liveBindings.getRootWithoutRetain(), BSetFact.getTreeFactory()); + + SSetRefA = mergeSets(SSetRefA, SSetRefB); + DSetRefA = mergeSets(DSetRefA, DSetRefB); + BSetRefA = mergeSets(BSetRefA, BSetRefB); + + // asImmutableSet() canonicalizes the tree, allowing us to do an easy + // comparison afterwards. + return LiveVariables::LivenessValues(SSetRefA.asImmutableSet(), + DSetRefA.asImmutableSet(), + BSetRefA.asImmutableSet()); +} + +bool LiveVariables::LivenessValues::equals(const LivenessValues &V) const { + return liveExprs == V.liveExprs && liveDecls == V.liveDecls; +} + +//===----------------------------------------------------------------------===// +// Query methods. +//===----------------------------------------------------------------------===// + +static bool isAlwaysAlive(const VarDecl *D) { + return D->hasGlobalStorage(); +} + +bool LiveVariables::isLive(const CFGBlock *B, const VarDecl *D) { + return isAlwaysAlive(D) || getImpl(impl).blocksEndToLiveness[B].isLive(D); +} + +bool LiveVariables::isLive(const Stmt *S, const VarDecl *D) { + return isAlwaysAlive(D) || getImpl(impl).stmtsToLiveness[S].isLive(D); +} + +bool LiveVariables::isLive(const Stmt *Loc, const Expr *Val) { + return getImpl(impl).stmtsToLiveness[Loc].isLive(Val); +} + +//===----------------------------------------------------------------------===// +// Dataflow computation. +//===----------------------------------------------------------------------===// + +namespace { +class TransferFunctions : public StmtVisitor<TransferFunctions> { + LiveVariablesImpl &LV; + LiveVariables::LivenessValues &val; + LiveVariables::Observer *observer; + const CFGBlock *currentBlock; +public: + TransferFunctions(LiveVariablesImpl &im, + LiveVariables::LivenessValues &Val, + LiveVariables::Observer *Observer, + const CFGBlock *CurrentBlock) + : LV(im), val(Val), observer(Observer), currentBlock(CurrentBlock) {} + + void VisitBinaryOperator(BinaryOperator *BO); + void VisitBlockExpr(BlockExpr *BE); + void VisitDeclRefExpr(DeclRefExpr *DR); + void VisitDeclStmt(DeclStmt *DS); + void VisitObjCForCollectionStmt(ObjCForCollectionStmt *OS); + void VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *UE); + void VisitUnaryOperator(UnaryOperator *UO); + void Visit(Stmt *S); +}; +} // namespace + +static const VariableArrayType *FindVA(QualType Ty) { + const Type *ty = Ty.getTypePtr(); + while (const ArrayType *VT = dyn_cast<ArrayType>(ty)) { + if (const VariableArrayType *VAT = dyn_cast<VariableArrayType>(VT)) + if (VAT->getSizeExpr()) + return VAT; + + ty = VT->getElementType().getTypePtr(); + } + + return nullptr; +} + +static const Expr *LookThroughExpr(const Expr *E) { + while (E) { + if (const Expr *Ex = dyn_cast<Expr>(E)) + E = Ex->IgnoreParens(); + if (const FullExpr *FE = dyn_cast<FullExpr>(E)) { + E = FE->getSubExpr(); + continue; + } + if (const OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(E)) { + E = OVE->getSourceExpr(); + continue; + } + break; + } + return E; +} + +static void AddLiveExpr(llvm::ImmutableSet<const Expr *> &Set, + llvm::ImmutableSet<const Expr *>::Factory &F, + const Expr *E) { + Set = F.add(Set, LookThroughExpr(E)); +} + +void TransferFunctions::Visit(Stmt *S) { + if (observer) + observer->observeStmt(S, currentBlock, val); + + StmtVisitor<TransferFunctions>::Visit(S); + + if (const auto *E = dyn_cast<Expr>(S)) { + val.liveExprs = LV.ESetFact.remove(val.liveExprs, E); + } + + // Mark all children expressions live. + + switch (S->getStmtClass()) { + default: + break; + case Stmt::StmtExprClass: { + // For statement expressions, look through the compound statement. + S = cast<StmtExpr>(S)->getSubStmt(); + break; + } + case Stmt::CXXMemberCallExprClass: { + // Include the implicit "this" pointer as being live. + CXXMemberCallExpr *CE = cast<CXXMemberCallExpr>(S); + if (Expr *ImplicitObj = CE->getImplicitObjectArgument()) { + AddLiveExpr(val.liveExprs, LV.ESetFact, ImplicitObj); + } + break; + } + case Stmt::ObjCMessageExprClass: { + // In calls to super, include the implicit "self" pointer as being live. + ObjCMessageExpr *CE = cast<ObjCMessageExpr>(S); + if (CE->getReceiverKind() == ObjCMessageExpr::SuperInstance) + val.liveDecls = LV.DSetFact.add(val.liveDecls, + LV.analysisContext.getSelfDecl()); + break; + } + case Stmt::DeclStmtClass: { + const DeclStmt *DS = cast<DeclStmt>(S); + if (const VarDecl *VD = dyn_cast<VarDecl>(DS->getSingleDecl())) { + for (const VariableArrayType* VA = FindVA(VD->getType()); + VA != nullptr; VA = FindVA(VA->getElementType())) { + AddLiveExpr(val.liveExprs, LV.ESetFact, VA->getSizeExpr()); + } + } + break; + } + case Stmt::PseudoObjectExprClass: { + // A pseudo-object operation only directly consumes its result + // expression. + Expr *child = cast<PseudoObjectExpr>(S)->getResultExpr(); + if (!child) return; + if (OpaqueValueExpr *OV = dyn_cast<OpaqueValueExpr>(child)) + child = OV->getSourceExpr(); + child = child->IgnoreParens(); + val.liveExprs = LV.ESetFact.add(val.liveExprs, child); + return; + } + + // FIXME: These cases eventually shouldn't be needed. + case Stmt::ExprWithCleanupsClass: { + S = cast<ExprWithCleanups>(S)->getSubExpr(); + break; + } + case Stmt::CXXBindTemporaryExprClass: { + S = cast<CXXBindTemporaryExpr>(S)->getSubExpr(); + break; + } + case Stmt::UnaryExprOrTypeTraitExprClass: { + // No need to unconditionally visit subexpressions. + return; + } + case Stmt::IfStmtClass: { + // If one of the branches is an expression rather than a compound + // statement, it will be bad if we mark it as live at the terminator + // of the if-statement (i.e., immediately after the condition expression). + AddLiveExpr(val.liveExprs, LV.ESetFact, cast<IfStmt>(S)->getCond()); + return; + } + case Stmt::WhileStmtClass: { + // If the loop body is an expression rather than a compound statement, + // it will be bad if we mark it as live at the terminator of the loop + // (i.e., immediately after the condition expression). + AddLiveExpr(val.liveExprs, LV.ESetFact, cast<WhileStmt>(S)->getCond()); + return; + } + case Stmt::DoStmtClass: { + // If the loop body is an expression rather than a compound statement, + // it will be bad if we mark it as live at the terminator of the loop + // (i.e., immediately after the condition expression). + AddLiveExpr(val.liveExprs, LV.ESetFact, cast<DoStmt>(S)->getCond()); + return; + } + case Stmt::ForStmtClass: { + // If the loop body is an expression rather than a compound statement, + // it will be bad if we mark it as live at the terminator of the loop + // (i.e., immediately after the condition expression). + AddLiveExpr(val.liveExprs, LV.ESetFact, cast<ForStmt>(S)->getCond()); + return; + } + + } + + // HACK + FIXME: What is this? One could only guess that this is an attempt to + // fish for live values, for example, arguments from a call expression. + // Maybe we could take inspiration from UninitializedVariable analysis? + for (Stmt *Child : S->children()) { + if (const auto *E = dyn_cast_or_null<Expr>(Child)) + AddLiveExpr(val.liveExprs, LV.ESetFact, E); + } +} + +static bool writeShouldKill(const VarDecl *VD) { + return VD && !VD->getType()->isReferenceType() && + !isAlwaysAlive(VD); +} + +void TransferFunctions::VisitBinaryOperator(BinaryOperator *B) { + if (LV.killAtAssign && B->getOpcode() == BO_Assign) { + if (const auto *DR = dyn_cast<DeclRefExpr>(B->getLHS()->IgnoreParens())) { + LV.inAssignment[DR] = 1; + } + } + if (B->isAssignmentOp()) { + if (!LV.killAtAssign) + return; + + // Assigning to a variable? + Expr *LHS = B->getLHS()->IgnoreParens(); + + if (DeclRefExpr *DR = dyn_cast<DeclRefExpr>(LHS)) { + const Decl* D = DR->getDecl(); + bool Killed = false; + + if (const BindingDecl* BD = dyn_cast<BindingDecl>(D)) { + Killed = !BD->getType()->isReferenceType(); + if (Killed) { + if (const auto *HV = BD->getHoldingVar()) + val.liveDecls = LV.DSetFact.remove(val.liveDecls, HV); + + val.liveBindings = LV.BSetFact.remove(val.liveBindings, BD); + } + } else if (const auto *VD = dyn_cast<VarDecl>(D)) { + Killed = writeShouldKill(VD); + if (Killed) + val.liveDecls = LV.DSetFact.remove(val.liveDecls, VD); + + } + + if (Killed && observer) + observer->observerKill(DR); + } + } +} + +void TransferFunctions::VisitBlockExpr(BlockExpr *BE) { + for (const VarDecl *VD : + LV.analysisContext.getReferencedBlockVars(BE->getBlockDecl())) { + if (isAlwaysAlive(VD)) + continue; + val.liveDecls = LV.DSetFact.add(val.liveDecls, VD); + } +} + +void TransferFunctions::VisitDeclRefExpr(DeclRefExpr *DR) { + const Decl* D = DR->getDecl(); + bool InAssignment = LV.inAssignment[DR]; + if (const auto *BD = dyn_cast<BindingDecl>(D)) { + if (!InAssignment) { + if (const auto *HV = BD->getHoldingVar()) + val.liveDecls = LV.DSetFact.add(val.liveDecls, HV); + + val.liveBindings = LV.BSetFact.add(val.liveBindings, BD); + } + } else if (const auto *VD = dyn_cast<VarDecl>(D)) { + if (!InAssignment && !isAlwaysAlive(VD)) + val.liveDecls = LV.DSetFact.add(val.liveDecls, VD); + } +} + +void TransferFunctions::VisitDeclStmt(DeclStmt *DS) { + for (const auto *DI : DS->decls()) { + if (const auto *DD = dyn_cast<DecompositionDecl>(DI)) { + for (const auto *BD : DD->bindings()) { + if (const auto *HV = BD->getHoldingVar()) + val.liveDecls = LV.DSetFact.remove(val.liveDecls, HV); + + val.liveBindings = LV.BSetFact.remove(val.liveBindings, BD); + } + + // When a bindig to a tuple-like structure is created, the HoldingVar + // initializers have a DeclRefExpr to the DecompositionDecl. + val.liveDecls = LV.DSetFact.remove(val.liveDecls, DD); + } else if (const auto *VD = dyn_cast<VarDecl>(DI)) { + if (!isAlwaysAlive(VD)) + val.liveDecls = LV.DSetFact.remove(val.liveDecls, VD); + } + } +} + +void TransferFunctions::VisitObjCForCollectionStmt(ObjCForCollectionStmt *OS) { + // Kill the iteration variable. + DeclRefExpr *DR = nullptr; + const VarDecl *VD = nullptr; + + Stmt *element = OS->getElement(); + if (DeclStmt *DS = dyn_cast<DeclStmt>(element)) { + VD = cast<VarDecl>(DS->getSingleDecl()); + } + else if ((DR = dyn_cast<DeclRefExpr>(cast<Expr>(element)->IgnoreParens()))) { + VD = cast<VarDecl>(DR->getDecl()); + } + + if (VD) { + val.liveDecls = LV.DSetFact.remove(val.liveDecls, VD); + if (observer && DR) + observer->observerKill(DR); + } +} + +void TransferFunctions:: +VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *UE) +{ + // While sizeof(var) doesn't technically extend the liveness of 'var', it + // does extent the liveness of metadata if 'var' is a VariableArrayType. + // We handle that special case here. + if (UE->getKind() != UETT_SizeOf || UE->isArgumentType()) + return; + + const Expr *subEx = UE->getArgumentExpr(); + if (subEx->getType()->isVariableArrayType()) { + assert(subEx->isLValue()); + val.liveExprs = LV.ESetFact.add(val.liveExprs, subEx->IgnoreParens()); + } +} + +void TransferFunctions::VisitUnaryOperator(UnaryOperator *UO) { + // Treat ++/-- as a kill. + // Note we don't actually have to do anything if we don't have an observer, + // since a ++/-- acts as both a kill and a "use". + if (!observer) + return; + + switch (UO->getOpcode()) { + default: + return; + case UO_PostInc: + case UO_PostDec: + case UO_PreInc: + case UO_PreDec: + break; + } + + if (auto *DR = dyn_cast<DeclRefExpr>(UO->getSubExpr()->IgnoreParens())) { + const Decl *D = DR->getDecl(); + if (isa<VarDecl>(D) || isa<BindingDecl>(D)) { + // Treat ++/-- as a kill. + observer->observerKill(DR); + } + } +} + +LiveVariables::LivenessValues +LiveVariablesImpl::runOnBlock(const CFGBlock *block, + LiveVariables::LivenessValues val, + LiveVariables::Observer *obs) { + + TransferFunctions TF(*this, val, obs, block); + + // Visit the terminator (if any). + if (const Stmt *term = block->getTerminatorStmt()) + TF.Visit(const_cast<Stmt*>(term)); + + // Apply the transfer function for all Stmts in the block. + for (CFGBlock::const_reverse_iterator it = block->rbegin(), + ei = block->rend(); it != ei; ++it) { + const CFGElement &elem = *it; + + if (std::optional<CFGAutomaticObjDtor> Dtor = + elem.getAs<CFGAutomaticObjDtor>()) { + val.liveDecls = DSetFact.add(val.liveDecls, Dtor->getVarDecl()); + continue; + } + + if (!elem.getAs<CFGStmt>()) + continue; + + const Stmt *S = elem.castAs<CFGStmt>().getStmt(); + TF.Visit(const_cast<Stmt*>(S)); + stmtsToLiveness[S] = val; + } + return val; +} + +void LiveVariables::runOnAllBlocks(LiveVariables::Observer &obs) { + const CFG *cfg = getImpl(impl).analysisContext.getCFG(); + for (CFG::const_iterator it = cfg->begin(), ei = cfg->end(); it != ei; ++it) + getImpl(impl).runOnBlock(*it, getImpl(impl).blocksEndToLiveness[*it], &obs); +} + +LiveVariables::LiveVariables(void *im) : impl(im) {} + +LiveVariables::~LiveVariables() { + delete (LiveVariablesImpl*) impl; +} + +std::unique_ptr<LiveVariables> +LiveVariables::computeLiveness(AnalysisDeclContext &AC, bool killAtAssign) { + + // No CFG? Bail out. + CFG *cfg = AC.getCFG(); + if (!cfg) + return nullptr; + + // The analysis currently has scalability issues for very large CFGs. + // Bail out if it looks too large. + if (cfg->getNumBlockIDs() > 300000) + return nullptr; + + LiveVariablesImpl *LV = new LiveVariablesImpl(AC, killAtAssign); + + // Construct the dataflow worklist. Enqueue the exit block as the + // start of the analysis. + BackwardDataflowWorklist worklist(*cfg, AC); + llvm::BitVector everAnalyzedBlock(cfg->getNumBlockIDs()); + + // FIXME: we should enqueue using post order. + for (const CFGBlock *B : cfg->nodes()) { + worklist.enqueueBlock(B); + } + + while (const CFGBlock *block = worklist.dequeue()) { + // Determine if the block's end value has changed. If not, we + // have nothing left to do for this block. + LivenessValues &prevVal = LV->blocksEndToLiveness[block]; + + // Merge the values of all successor blocks. + LivenessValues val; + for (CFGBlock::const_succ_iterator it = block->succ_begin(), + ei = block->succ_end(); it != ei; ++it) { + if (const CFGBlock *succ = *it) { + val = LV->merge(val, LV->blocksBeginToLiveness[succ]); + } + } + + if (!everAnalyzedBlock[block->getBlockID()]) + everAnalyzedBlock[block->getBlockID()] = true; + else if (prevVal.equals(val)) + continue; + + prevVal = val; + + // Update the dataflow value for the start of this block. + LV->blocksBeginToLiveness[block] = LV->runOnBlock(block, val); + + // Enqueue the value to the predecessors. + worklist.enqueuePredecessors(block); + } + + return std::unique_ptr<LiveVariables>(new LiveVariables(LV)); +} + +void LiveVariables::dumpBlockLiveness(const SourceManager &M) { + getImpl(impl).dumpBlockLiveness(M); +} + +void LiveVariablesImpl::dumpBlockLiveness(const SourceManager &M) { + std::vector<const CFGBlock *> vec; + for (llvm::DenseMap<const CFGBlock *, LiveVariables::LivenessValues>::iterator + it = blocksEndToLiveness.begin(), ei = blocksEndToLiveness.end(); + it != ei; ++it) { + vec.push_back(it->first); + } + llvm::sort(vec, [](const CFGBlock *A, const CFGBlock *B) { + return A->getBlockID() < B->getBlockID(); + }); + + std::vector<const VarDecl*> declVec; + + for (std::vector<const CFGBlock *>::iterator + it = vec.begin(), ei = vec.end(); it != ei; ++it) { + llvm::errs() << "\n[ B" << (*it)->getBlockID() + << " (live variables at block exit) ]\n"; + + LiveVariables::LivenessValues vals = blocksEndToLiveness[*it]; + declVec.clear(); + + for (llvm::ImmutableSet<const VarDecl *>::iterator si = + vals.liveDecls.begin(), + se = vals.liveDecls.end(); si != se; ++si) { + declVec.push_back(*si); + } + + llvm::sort(declVec, [](const Decl *A, const Decl *B) { + return A->getBeginLoc() < B->getBeginLoc(); + }); + + for (std::vector<const VarDecl*>::iterator di = declVec.begin(), + de = declVec.end(); di != de; ++di) { + llvm::errs() << " " << (*di)->getDeclName().getAsString() + << " <"; + (*di)->getLocation().print(llvm::errs(), M); + llvm::errs() << ">\n"; + } + } + llvm::errs() << "\n"; +} + +void LiveVariables::dumpExprLiveness(const SourceManager &M) { + getImpl(impl).dumpExprLiveness(M); +} + +void LiveVariablesImpl::dumpExprLiveness(const SourceManager &M) { + // Don't iterate over blockEndsToLiveness directly because it's not sorted. + for (const CFGBlock *B : *analysisContext.getCFG()) { + + llvm::errs() << "\n[ B" << B->getBlockID() + << " (live expressions at block exit) ]\n"; + for (const Expr *E : blocksEndToLiveness[B].liveExprs) { + llvm::errs() << "\n"; + E->dump(); + } + llvm::errs() << "\n"; + } +} + +const void *LiveVariables::getTag() { static int x; return &x; } +const void *RelaxedLiveVariables::getTag() { static int x; return &x; } diff --git a/contrib/llvm-project/clang/lib/Analysis/MacroExpansionContext.cpp b/contrib/llvm-project/clang/lib/Analysis/MacroExpansionContext.cpp new file mode 100644 index 000000000000..564e359668a5 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/MacroExpansionContext.cpp @@ -0,0 +1,232 @@ +//===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/MacroExpansionContext.h" +#include "llvm/Support/Debug.h" +#include <optional> + +#define DEBUG_TYPE "macro-expansion-context" + +static void dumpTokenInto(const clang::Preprocessor &PP, clang::raw_ostream &OS, + clang::Token Tok); + +namespace clang { +namespace detail { +class MacroExpansionRangeRecorder : public PPCallbacks { + const Preprocessor &PP; + SourceManager &SM; + MacroExpansionContext::ExpansionRangeMap &ExpansionRanges; + +public: + explicit MacroExpansionRangeRecorder( + const Preprocessor &PP, SourceManager &SM, + MacroExpansionContext::ExpansionRangeMap &ExpansionRanges) + : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {} + + void MacroExpands(const Token &MacroName, const MacroDefinition &MD, + SourceRange Range, const MacroArgs *Args) override { + // Ignore annotation tokens like: _Pragma("pack(push, 1)") + if (MacroName.getIdentifierInfo()->getName() == "_Pragma") + return; + + SourceLocation MacroNameBegin = SM.getExpansionLoc(MacroName.getLocation()); + assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin())); + + const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] { + // If the range is empty, use the length of the macro. + if (Range.getBegin() == Range.getEnd()) + return SM.getExpansionLoc( + MacroName.getLocation().getLocWithOffset(MacroName.getLength())); + + // Include the last character. + return SM.getExpansionLoc(Range.getEnd()).getLocWithOffset(1); + }(); + + (void)PP; + LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '"; + dumpTokenInto(PP, llvm::dbgs(), MacroName); + llvm::dbgs() + << "' with length " << MacroName.getLength() << " at "; + MacroNameBegin.print(llvm::dbgs(), SM); + llvm::dbgs() << ", expansion end at "; + ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';); + + // If the expansion range is empty, use the identifier of the macro as a + // range. + MacroExpansionContext::ExpansionRangeMap::iterator It; + bool Inserted; + std::tie(It, Inserted) = + ExpansionRanges.try_emplace(MacroNameBegin, ExpansionEnd); + if (Inserted) { + LLVM_DEBUG(llvm::dbgs() << "maps "; + It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to "; + It->getSecond().print(llvm::dbgs(), SM); + llvm::dbgs() << '\n';); + } else { + if (SM.isBeforeInTranslationUnit(It->getSecond(), ExpansionEnd)) { + It->getSecond() = ExpansionEnd; + LLVM_DEBUG( + llvm::dbgs() << "remaps "; It->getFirst().print(llvm::dbgs(), SM); + llvm::dbgs() << " to "; It->getSecond().print(llvm::dbgs(), SM); + llvm::dbgs() << '\n';); + } + } + } +}; +} // namespace detail +} // namespace clang + +using namespace clang; + +MacroExpansionContext::MacroExpansionContext(const LangOptions &LangOpts) + : LangOpts(LangOpts) {} + +void MacroExpansionContext::registerForPreprocessor(Preprocessor &NewPP) { + PP = &NewPP; + SM = &NewPP.getSourceManager(); + + // Make sure that the Preprocessor does not outlive the MacroExpansionContext. + PP->addPPCallbacks(std::make_unique<detail::MacroExpansionRangeRecorder>( + *PP, *SM, ExpansionRanges)); + // Same applies here. + PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); }); +} + +std::optional<StringRef> +MacroExpansionContext::getExpandedText(SourceLocation MacroExpansionLoc) const { + if (MacroExpansionLoc.isMacroID()) + return std::nullopt; + + // If there was no macro expansion at that location, return std::nullopt. + if (ExpansionRanges.find_as(MacroExpansionLoc) == ExpansionRanges.end()) + return std::nullopt; + + // There was macro expansion, but resulted in no tokens, return empty string. + const auto It = ExpandedTokens.find_as(MacroExpansionLoc); + if (It == ExpandedTokens.end()) + return StringRef{""}; + + // Otherwise we have the actual token sequence as string. + return It->getSecond().str(); +} + +std::optional<StringRef> +MacroExpansionContext::getOriginalText(SourceLocation MacroExpansionLoc) const { + if (MacroExpansionLoc.isMacroID()) + return std::nullopt; + + const auto It = ExpansionRanges.find_as(MacroExpansionLoc); + if (It == ExpansionRanges.end()) + return std::nullopt; + + assert(It->getFirst() != It->getSecond() && + "Every macro expansion must cover a non-empty range."); + + return Lexer::getSourceText( + CharSourceRange::getCharRange(It->getFirst(), It->getSecond()), *SM, + LangOpts); +} + +void MacroExpansionContext::dumpExpansionRanges() const { + dumpExpansionRangesToStream(llvm::dbgs()); +} +void MacroExpansionContext::dumpExpandedTexts() const { + dumpExpandedTextsToStream(llvm::dbgs()); +} + +void MacroExpansionContext::dumpExpansionRangesToStream(raw_ostream &OS) const { + std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges; + LocalExpansionRanges.reserve(ExpansionRanges.size()); + for (const auto &Record : ExpansionRanges) + LocalExpansionRanges.emplace_back( + std::make_pair(Record.getFirst(), Record.getSecond())); + llvm::sort(LocalExpansionRanges); + + OS << "\n=============== ExpansionRanges ===============\n"; + for (const auto &Record : LocalExpansionRanges) { + OS << "> "; + Record.first.print(OS, *SM); + OS << ", "; + Record.second.print(OS, *SM); + OS << '\n'; + } +} + +void MacroExpansionContext::dumpExpandedTextsToStream(raw_ostream &OS) const { + std::vector<std::pair<SourceLocation, MacroExpansionText>> + LocalExpandedTokens; + LocalExpandedTokens.reserve(ExpandedTokens.size()); + for (const auto &Record : ExpandedTokens) + LocalExpandedTokens.emplace_back( + std::make_pair(Record.getFirst(), Record.getSecond())); + llvm::sort(LocalExpandedTokens); + + OS << "\n=============== ExpandedTokens ===============\n"; + for (const auto &Record : LocalExpandedTokens) { + OS << "> "; + Record.first.print(OS, *SM); + OS << " -> '" << Record.second << "'\n"; + } +} + +static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) { + assert(Tok.isNot(tok::raw_identifier)); + + // Ignore annotation tokens like: _Pragma("pack(push, 1)") + if (Tok.isAnnotation()) + return; + + if (IdentifierInfo *II = Tok.getIdentifierInfo()) { + // FIXME: For now, we don't respect whitespaces between macro expanded + // tokens. We just emit a space after every identifier to produce a valid + // code for `int a ;` like expansions. + // ^-^-- Space after the 'int' and 'a' identifiers. + OS << II->getName() << ' '; + } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) { + OS << StringRef(Tok.getLiteralData(), Tok.getLength()); + } else { + char Tmp[256]; + if (Tok.getLength() < sizeof(Tmp)) { + const char *TokPtr = Tmp; + // FIXME: Might use a different overload for cleaner callsite. + unsigned Len = PP.getSpelling(Tok, TokPtr); + OS.write(TokPtr, Len); + } else { + OS << "<too long token>"; + } + } +} + +void MacroExpansionContext::onTokenLexed(const Token &Tok) { + SourceLocation SLoc = Tok.getLocation(); + if (SLoc.isFileID()) + return; + + LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '"; + dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at "; + SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';); + + // Remove spelling location. + SourceLocation CurrExpansionLoc = SM->getExpansionLoc(SLoc); + + MacroExpansionText TokenAsString; + llvm::raw_svector_ostream OS(TokenAsString); + + // FIXME: Prepend newlines and space to produce the exact same output as the + // preprocessor would for this token. + + dumpTokenInto(*PP, OS, Tok); + + ExpansionMap::iterator It; + bool Inserted; + std::tie(It, Inserted) = + ExpandedTokens.try_emplace(CurrExpansionLoc, std::move(TokenAsString)); + if (!Inserted) + It->getSecond().append(TokenAsString); +} + diff --git a/contrib/llvm-project/clang/lib/Analysis/ObjCNoReturn.cpp b/contrib/llvm-project/clang/lib/Analysis/ObjCNoReturn.cpp new file mode 100644 index 000000000000..9d7c365c3b99 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/ObjCNoReturn.cpp @@ -0,0 +1,63 @@ +//= ObjCNoReturn.cpp - Handling of Cocoa APIs known not to return --*- C++ -*--- +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements special handling of recognizing ObjC API hooks that +// do not return but aren't marked as such in API headers. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/ASTContext.h" +#include "clang/AST/ExprObjC.h" +#include "clang/Analysis/DomainSpecific/ObjCNoReturn.h" + +using namespace clang; + +static bool isSubclass(const ObjCInterfaceDecl *Class, IdentifierInfo *II) { + if (!Class) + return false; + if (Class->getIdentifier() == II) + return true; + return isSubclass(Class->getSuperClass(), II); +} + +ObjCNoReturn::ObjCNoReturn(ASTContext &C) + : RaiseSel(GetNullarySelector("raise", C)), + NSExceptionII(&C.Idents.get("NSException")) +{ + // Generate selectors. + SmallVector<IdentifierInfo*, 3> II; + + // raise:format: + II.push_back(&C.Idents.get("raise")); + II.push_back(&C.Idents.get("format")); + NSExceptionInstanceRaiseSelectors[0] = + C.Selectors.getSelector(II.size(), &II[0]); + + // raise:format:arguments: + II.push_back(&C.Idents.get("arguments")); + NSExceptionInstanceRaiseSelectors[1] = + C.Selectors.getSelector(II.size(), &II[0]); +} + + +bool ObjCNoReturn::isImplicitNoReturn(const ObjCMessageExpr *ME) { + Selector S = ME->getSelector(); + + if (ME->isInstanceMessage()) { + // Check for the "raise" message. + return S == RaiseSel; + } + + if (const ObjCInterfaceDecl *ID = ME->getReceiverInterface()) { + if (isSubclass(ID, NSExceptionII) && + llvm::is_contained(NSExceptionInstanceRaiseSelectors, S)) + return true; + } + + return false; +} diff --git a/contrib/llvm-project/clang/lib/Analysis/PathDiagnostic.cpp b/contrib/llvm-project/clang/lib/Analysis/PathDiagnostic.cpp new file mode 100644 index 000000000000..ac1306fd8071 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/PathDiagnostic.cpp @@ -0,0 +1,1236 @@ +//===- PathDiagnostic.cpp - Path-Specific Diagnostic Handling -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the PathDiagnostic-related interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/PathDiagnostic.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/DeclTemplate.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/OperationKinds.h" +#include "clang/AST/ParentMap.h" +#include "clang/AST/PrettyPrinter.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/ProgramPoint.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstring> +#include <memory> +#include <optional> +#include <utility> +#include <vector> + +using namespace clang; +using namespace ento; + +static StringRef StripTrailingDots(StringRef s) { + for (StringRef::size_type i = s.size(); i != 0; --i) + if (s[i - 1] != '.') + return s.substr(0, i); + return {}; +} + +PathDiagnosticPiece::PathDiagnosticPiece(StringRef s, + Kind k, DisplayHint hint) + : str(StripTrailingDots(s)), kind(k), Hint(hint) {} + +PathDiagnosticPiece::PathDiagnosticPiece(Kind k, DisplayHint hint) + : kind(k), Hint(hint) {} + +PathDiagnosticPiece::~PathDiagnosticPiece() = default; + +PathDiagnosticEventPiece::~PathDiagnosticEventPiece() = default; + +PathDiagnosticCallPiece::~PathDiagnosticCallPiece() = default; + +PathDiagnosticControlFlowPiece::~PathDiagnosticControlFlowPiece() = default; + +PathDiagnosticMacroPiece::~PathDiagnosticMacroPiece() = default; + +PathDiagnosticNotePiece::~PathDiagnosticNotePiece() = default; + +PathDiagnosticPopUpPiece::~PathDiagnosticPopUpPiece() = default; + +void PathPieces::flattenTo(PathPieces &Primary, PathPieces &Current, + bool ShouldFlattenMacros) const { + for (auto &Piece : *this) { + switch (Piece->getKind()) { + case PathDiagnosticPiece::Call: { + auto &Call = cast<PathDiagnosticCallPiece>(*Piece); + if (auto CallEnter = Call.getCallEnterEvent()) + Current.push_back(std::move(CallEnter)); + Call.path.flattenTo(Primary, Primary, ShouldFlattenMacros); + if (auto callExit = Call.getCallExitEvent()) + Current.push_back(std::move(callExit)); + break; + } + case PathDiagnosticPiece::Macro: { + auto &Macro = cast<PathDiagnosticMacroPiece>(*Piece); + if (ShouldFlattenMacros) { + Macro.subPieces.flattenTo(Primary, Primary, ShouldFlattenMacros); + } else { + Current.push_back(Piece); + PathPieces NewPath; + Macro.subPieces.flattenTo(Primary, NewPath, ShouldFlattenMacros); + // FIXME: This probably shouldn't mutate the original path piece. + Macro.subPieces = NewPath; + } + break; + } + case PathDiagnosticPiece::Event: + case PathDiagnosticPiece::ControlFlow: + case PathDiagnosticPiece::Note: + case PathDiagnosticPiece::PopUp: + Current.push_back(Piece); + break; + } + } +} + +PathDiagnostic::~PathDiagnostic() = default; + +PathDiagnostic::PathDiagnostic( + StringRef CheckerName, const Decl *declWithIssue, StringRef bugtype, + StringRef verboseDesc, StringRef shortDesc, StringRef category, + PathDiagnosticLocation LocationToUnique, const Decl *DeclToUnique, + std::unique_ptr<FilesToLineNumsMap> ExecutedLines) + : CheckerName(CheckerName), DeclWithIssue(declWithIssue), + BugType(StripTrailingDots(bugtype)), + VerboseDesc(StripTrailingDots(verboseDesc)), + ShortDesc(StripTrailingDots(shortDesc)), + Category(StripTrailingDots(category)), UniqueingLoc(LocationToUnique), + UniqueingDecl(DeclToUnique), ExecutedLines(std::move(ExecutedLines)), + path(pathImpl) {} + +void PathDiagnosticConsumer::anchor() {} + +PathDiagnosticConsumer::~PathDiagnosticConsumer() { + // Delete the contents of the FoldingSet if it isn't empty already. + for (auto &Diag : Diags) + delete &Diag; +} + +void PathDiagnosticConsumer::HandlePathDiagnostic( + std::unique_ptr<PathDiagnostic> D) { + if (!D || D->path.empty()) + return; + + // We need to flatten the locations (convert Stmt* to locations) because + // the referenced statements may be freed by the time the diagnostics + // are emitted. + D->flattenLocations(); + + // If the PathDiagnosticConsumer does not support diagnostics that + // cross file boundaries, prune out such diagnostics now. + if (!supportsCrossFileDiagnostics()) { + // Verify that the entire path is from the same FileID. + FileID FID; + const SourceManager &SMgr = D->path.front()->getLocation().getManager(); + SmallVector<const PathPieces *, 5> WorkList; + WorkList.push_back(&D->path); + SmallString<128> buf; + llvm::raw_svector_ostream warning(buf); + warning << "warning: Path diagnostic report is not generated. Current " + << "output format does not support diagnostics that cross file " + << "boundaries. Refer to --analyzer-output for valid output " + << "formats\n"; + + while (!WorkList.empty()) { + const PathPieces &path = *WorkList.pop_back_val(); + + for (const auto &I : path) { + const PathDiagnosticPiece *piece = I.get(); + FullSourceLoc L = piece->getLocation().asLocation().getExpansionLoc(); + + if (FID.isInvalid()) { + FID = SMgr.getFileID(L); + } else if (SMgr.getFileID(L) != FID) { + llvm::errs() << warning.str(); + return; + } + + // Check the source ranges. + ArrayRef<SourceRange> Ranges = piece->getRanges(); + for (const auto &I : Ranges) { + SourceLocation L = SMgr.getExpansionLoc(I.getBegin()); + if (!L.isFileID() || SMgr.getFileID(L) != FID) { + llvm::errs() << warning.str(); + return; + } + L = SMgr.getExpansionLoc(I.getEnd()); + if (!L.isFileID() || SMgr.getFileID(L) != FID) { + llvm::errs() << warning.str(); + return; + } + } + + if (const auto *call = dyn_cast<PathDiagnosticCallPiece>(piece)) + WorkList.push_back(&call->path); + else if (const auto *macro = dyn_cast<PathDiagnosticMacroPiece>(piece)) + WorkList.push_back(¯o->subPieces); + } + } + + if (FID.isInvalid()) + return; // FIXME: Emit a warning? + } + + // Profile the node to see if we already have something matching it + llvm::FoldingSetNodeID profile; + D->Profile(profile); + void *InsertPos = nullptr; + + if (PathDiagnostic *orig = Diags.FindNodeOrInsertPos(profile, InsertPos)) { + // Keep the PathDiagnostic with the shorter path. + // Note, the enclosing routine is called in deterministic order, so the + // results will be consistent between runs (no reason to break ties if the + // size is the same). + const unsigned orig_size = orig->full_size(); + const unsigned new_size = D->full_size(); + if (orig_size <= new_size) + return; + + assert(orig != D.get()); + Diags.RemoveNode(orig); + delete orig; + } + + Diags.InsertNode(D.release()); +} + +static std::optional<bool> comparePath(const PathPieces &X, + const PathPieces &Y); + +static std::optional<bool> +compareControlFlow(const PathDiagnosticControlFlowPiece &X, + const PathDiagnosticControlFlowPiece &Y) { + FullSourceLoc XSL = X.getStartLocation().asLocation(); + FullSourceLoc YSL = Y.getStartLocation().asLocation(); + if (XSL != YSL) + return XSL.isBeforeInTranslationUnitThan(YSL); + FullSourceLoc XEL = X.getEndLocation().asLocation(); + FullSourceLoc YEL = Y.getEndLocation().asLocation(); + if (XEL != YEL) + return XEL.isBeforeInTranslationUnitThan(YEL); + return std::nullopt; +} + +static std::optional<bool> compareMacro(const PathDiagnosticMacroPiece &X, + const PathDiagnosticMacroPiece &Y) { + return comparePath(X.subPieces, Y.subPieces); +} + +static std::optional<bool> compareCall(const PathDiagnosticCallPiece &X, + const PathDiagnosticCallPiece &Y) { + FullSourceLoc X_CEL = X.callEnter.asLocation(); + FullSourceLoc Y_CEL = Y.callEnter.asLocation(); + if (X_CEL != Y_CEL) + return X_CEL.isBeforeInTranslationUnitThan(Y_CEL); + FullSourceLoc X_CEWL = X.callEnterWithin.asLocation(); + FullSourceLoc Y_CEWL = Y.callEnterWithin.asLocation(); + if (X_CEWL != Y_CEWL) + return X_CEWL.isBeforeInTranslationUnitThan(Y_CEWL); + FullSourceLoc X_CRL = X.callReturn.asLocation(); + FullSourceLoc Y_CRL = Y.callReturn.asLocation(); + if (X_CRL != Y_CRL) + return X_CRL.isBeforeInTranslationUnitThan(Y_CRL); + return comparePath(X.path, Y.path); +} + +static std::optional<bool> comparePiece(const PathDiagnosticPiece &X, + const PathDiagnosticPiece &Y) { + if (X.getKind() != Y.getKind()) + return X.getKind() < Y.getKind(); + + FullSourceLoc XL = X.getLocation().asLocation(); + FullSourceLoc YL = Y.getLocation().asLocation(); + if (XL != YL) + return XL.isBeforeInTranslationUnitThan(YL); + + if (X.getString() != Y.getString()) + return X.getString() < Y.getString(); + + if (X.getRanges().size() != Y.getRanges().size()) + return X.getRanges().size() < Y.getRanges().size(); + + const SourceManager &SM = XL.getManager(); + + for (unsigned i = 0, n = X.getRanges().size(); i < n; ++i) { + SourceRange XR = X.getRanges()[i]; + SourceRange YR = Y.getRanges()[i]; + if (XR != YR) { + if (XR.getBegin() != YR.getBegin()) + return SM.isBeforeInTranslationUnit(XR.getBegin(), YR.getBegin()); + return SM.isBeforeInTranslationUnit(XR.getEnd(), YR.getEnd()); + } + } + + switch (X.getKind()) { + case PathDiagnosticPiece::ControlFlow: + return compareControlFlow(cast<PathDiagnosticControlFlowPiece>(X), + cast<PathDiagnosticControlFlowPiece>(Y)); + case PathDiagnosticPiece::Macro: + return compareMacro(cast<PathDiagnosticMacroPiece>(X), + cast<PathDiagnosticMacroPiece>(Y)); + case PathDiagnosticPiece::Call: + return compareCall(cast<PathDiagnosticCallPiece>(X), + cast<PathDiagnosticCallPiece>(Y)); + case PathDiagnosticPiece::Event: + case PathDiagnosticPiece::Note: + case PathDiagnosticPiece::PopUp: + return std::nullopt; + } + llvm_unreachable("all cases handled"); +} + +static std::optional<bool> comparePath(const PathPieces &X, + const PathPieces &Y) { + if (X.size() != Y.size()) + return X.size() < Y.size(); + + PathPieces::const_iterator X_I = X.begin(), X_end = X.end(); + PathPieces::const_iterator Y_I = Y.begin(), Y_end = Y.end(); + + for (; X_I != X_end && Y_I != Y_end; ++X_I, ++Y_I) + if (std::optional<bool> b = comparePiece(**X_I, **Y_I)) + return *b; + + return std::nullopt; +} + +static bool compareCrossTUSourceLocs(FullSourceLoc XL, FullSourceLoc YL) { + if (XL.isInvalid() && YL.isValid()) + return true; + if (XL.isValid() && YL.isInvalid()) + return false; + std::pair<FileID, unsigned> XOffs = XL.getDecomposedLoc(); + std::pair<FileID, unsigned> YOffs = YL.getDecomposedLoc(); + const SourceManager &SM = XL.getManager(); + std::pair<bool, bool> InSameTU = SM.isInTheSameTranslationUnit(XOffs, YOffs); + if (InSameTU.first) + return XL.isBeforeInTranslationUnitThan(YL); + const FileEntry *XFE = SM.getFileEntryForID(XL.getSpellingLoc().getFileID()); + const FileEntry *YFE = SM.getFileEntryForID(YL.getSpellingLoc().getFileID()); + if (!XFE || !YFE) + return XFE && !YFE; + int NameCmp = XFE->getName().compare(YFE->getName()); + if (NameCmp != 0) + return NameCmp < 0; + // Last resort: Compare raw file IDs that are possibly expansions. + return XL.getFileID() < YL.getFileID(); +} + +static bool compare(const PathDiagnostic &X, const PathDiagnostic &Y) { + FullSourceLoc XL = X.getLocation().asLocation(); + FullSourceLoc YL = Y.getLocation().asLocation(); + if (XL != YL) + return compareCrossTUSourceLocs(XL, YL); + FullSourceLoc XUL = X.getUniqueingLoc().asLocation(); + FullSourceLoc YUL = Y.getUniqueingLoc().asLocation(); + if (XUL != YUL) + return compareCrossTUSourceLocs(XUL, YUL); + if (X.getBugType() != Y.getBugType()) + return X.getBugType() < Y.getBugType(); + if (X.getCategory() != Y.getCategory()) + return X.getCategory() < Y.getCategory(); + if (X.getVerboseDescription() != Y.getVerboseDescription()) + return X.getVerboseDescription() < Y.getVerboseDescription(); + if (X.getShortDescription() != Y.getShortDescription()) + return X.getShortDescription() < Y.getShortDescription(); + auto CompareDecls = [&XL](const Decl *D1, + const Decl *D2) -> std::optional<bool> { + if (D1 == D2) + return std::nullopt; + if (!D1) + return true; + if (!D2) + return false; + SourceLocation D1L = D1->getLocation(); + SourceLocation D2L = D2->getLocation(); + if (D1L != D2L) { + const SourceManager &SM = XL.getManager(); + return compareCrossTUSourceLocs(FullSourceLoc(D1L, SM), + FullSourceLoc(D2L, SM)); + } + return std::nullopt; + }; + if (auto Result = CompareDecls(X.getDeclWithIssue(), Y.getDeclWithIssue())) + return *Result; + if (XUL.isValid()) { + if (auto Result = CompareDecls(X.getUniqueingDecl(), Y.getUniqueingDecl())) + return *Result; + } + PathDiagnostic::meta_iterator XI = X.meta_begin(), XE = X.meta_end(); + PathDiagnostic::meta_iterator YI = Y.meta_begin(), YE = Y.meta_end(); + if (XE - XI != YE - YI) + return (XE - XI) < (YE - YI); + for ( ; XI != XE ; ++XI, ++YI) { + if (*XI != *YI) + return (*XI) < (*YI); + } + return *comparePath(X.path, Y.path); +} + +void PathDiagnosticConsumer::FlushDiagnostics( + PathDiagnosticConsumer::FilesMade *Files) { + if (flushed) + return; + + flushed = true; + + std::vector<const PathDiagnostic *> BatchDiags; + for (const auto &D : Diags) + BatchDiags.push_back(&D); + + // Sort the diagnostics so that they are always emitted in a deterministic + // order. + int (*Comp)(const PathDiagnostic *const *, const PathDiagnostic *const *) = + [](const PathDiagnostic *const *X, const PathDiagnostic *const *Y) { + assert(*X != *Y && "PathDiagnostics not uniqued!"); + if (compare(**X, **Y)) + return -1; + assert(compare(**Y, **X) && "Not a total order!"); + return 1; + }; + array_pod_sort(BatchDiags.begin(), BatchDiags.end(), Comp); + + FlushDiagnosticsImpl(BatchDiags, Files); + + // Delete the flushed diagnostics. + for (const auto D : BatchDiags) + delete D; + + // Clear out the FoldingSet. + Diags.clear(); +} + +PathDiagnosticConsumer::FilesMade::~FilesMade() { + for (auto It = Set.begin(); It != Set.end();) + (It++)->~PDFileEntry(); +} + +void PathDiagnosticConsumer::FilesMade::addDiagnostic(const PathDiagnostic &PD, + StringRef ConsumerName, + StringRef FileName) { + llvm::FoldingSetNodeID NodeID; + NodeID.Add(PD); + void *InsertPos; + PDFileEntry *Entry = Set.FindNodeOrInsertPos(NodeID, InsertPos); + if (!Entry) { + Entry = Alloc.Allocate<PDFileEntry>(); + Entry = new (Entry) PDFileEntry(NodeID); + Set.InsertNode(Entry, InsertPos); + } + + // Allocate persistent storage for the file name. + char *FileName_cstr = (char*) Alloc.Allocate(FileName.size(), 1); + memcpy(FileName_cstr, FileName.data(), FileName.size()); + + Entry->files.push_back(std::make_pair(ConsumerName, + StringRef(FileName_cstr, + FileName.size()))); +} + +PathDiagnosticConsumer::PDFileEntry::ConsumerFiles * +PathDiagnosticConsumer::FilesMade::getFiles(const PathDiagnostic &PD) { + llvm::FoldingSetNodeID NodeID; + NodeID.Add(PD); + void *InsertPos; + PDFileEntry *Entry = Set.FindNodeOrInsertPos(NodeID, InsertPos); + if (!Entry) + return nullptr; + return &Entry->files; +} + +//===----------------------------------------------------------------------===// +// PathDiagnosticLocation methods. +//===----------------------------------------------------------------------===// + +SourceLocation PathDiagnosticLocation::getValidSourceLocation( + const Stmt *S, LocationOrAnalysisDeclContext LAC, bool UseEndOfStatement) { + SourceLocation L = UseEndOfStatement ? S->getEndLoc() : S->getBeginLoc(); + assert(!LAC.isNull() && + "A valid LocationContext or AnalysisDeclContext should be passed to " + "PathDiagnosticLocation upon creation."); + + // S might be a temporary statement that does not have a location in the + // source code, so find an enclosing statement and use its location. + if (!L.isValid()) { + AnalysisDeclContext *ADC; + if (LAC.is<const LocationContext*>()) + ADC = LAC.get<const LocationContext*>()->getAnalysisDeclContext(); + else + ADC = LAC.get<AnalysisDeclContext*>(); + + ParentMap &PM = ADC->getParentMap(); + + const Stmt *Parent = S; + do { + Parent = PM.getParent(Parent); + + // In rare cases, we have implicit top-level expressions, + // such as arguments for implicit member initializers. + // In this case, fall back to the start of the body (even if we were + // asked for the statement end location). + if (!Parent) { + const Stmt *Body = ADC->getBody(); + if (Body) + L = Body->getBeginLoc(); + else + L = ADC->getDecl()->getEndLoc(); + break; + } + + L = UseEndOfStatement ? Parent->getEndLoc() : Parent->getBeginLoc(); + } while (!L.isValid()); + } + + // FIXME: Ironically, this assert actually fails in some cases. + //assert(L.isValid()); + return L; +} + +static PathDiagnosticLocation +getLocationForCaller(const StackFrameContext *SFC, + const LocationContext *CallerCtx, + const SourceManager &SM) { + const CFGBlock &Block = *SFC->getCallSiteBlock(); + CFGElement Source = Block[SFC->getIndex()]; + + switch (Source.getKind()) { + case CFGElement::Statement: + case CFGElement::Constructor: + case CFGElement::CXXRecordTypedCall: + return PathDiagnosticLocation(Source.castAs<CFGStmt>().getStmt(), + SM, CallerCtx); + case CFGElement::Initializer: { + const CFGInitializer &Init = Source.castAs<CFGInitializer>(); + return PathDiagnosticLocation(Init.getInitializer()->getInit(), + SM, CallerCtx); + } + case CFGElement::AutomaticObjectDtor: { + const CFGAutomaticObjDtor &Dtor = Source.castAs<CFGAutomaticObjDtor>(); + return PathDiagnosticLocation::createEnd(Dtor.getTriggerStmt(), + SM, CallerCtx); + } + case CFGElement::DeleteDtor: { + const CFGDeleteDtor &Dtor = Source.castAs<CFGDeleteDtor>(); + return PathDiagnosticLocation(Dtor.getDeleteExpr(), SM, CallerCtx); + } + case CFGElement::BaseDtor: + case CFGElement::MemberDtor: { + const AnalysisDeclContext *CallerInfo = CallerCtx->getAnalysisDeclContext(); + if (const Stmt *CallerBody = CallerInfo->getBody()) + return PathDiagnosticLocation::createEnd(CallerBody, SM, CallerCtx); + return PathDiagnosticLocation::create(CallerInfo->getDecl(), SM); + } + case CFGElement::NewAllocator: { + const CFGNewAllocator &Alloc = Source.castAs<CFGNewAllocator>(); + return PathDiagnosticLocation(Alloc.getAllocatorExpr(), SM, CallerCtx); + } + case CFGElement::TemporaryDtor: { + // Temporary destructors are for temporaries. They die immediately at around + // the location of CXXBindTemporaryExpr. If they are lifetime-extended, + // they'd be dealt with via an AutomaticObjectDtor instead. + const auto &Dtor = Source.castAs<CFGTemporaryDtor>(); + return PathDiagnosticLocation::createEnd(Dtor.getBindTemporaryExpr(), SM, + CallerCtx); + } + case CFGElement::ScopeBegin: + case CFGElement::ScopeEnd: + llvm_unreachable("not yet implemented!"); + case CFGElement::LifetimeEnds: + case CFGElement::LoopExit: + llvm_unreachable("CFGElement kind should not be on callsite!"); + } + + llvm_unreachable("Unknown CFGElement kind"); +} + +PathDiagnosticLocation +PathDiagnosticLocation::createBegin(const Decl *D, + const SourceManager &SM) { + return PathDiagnosticLocation(D->getBeginLoc(), SM, SingleLocK); +} + +PathDiagnosticLocation +PathDiagnosticLocation::createBegin(const Stmt *S, + const SourceManager &SM, + LocationOrAnalysisDeclContext LAC) { + return PathDiagnosticLocation(getValidSourceLocation(S, LAC), + SM, SingleLocK); +} + +PathDiagnosticLocation +PathDiagnosticLocation::createEnd(const Stmt *S, + const SourceManager &SM, + LocationOrAnalysisDeclContext LAC) { + if (const auto *CS = dyn_cast<CompoundStmt>(S)) + return createEndBrace(CS, SM); + return PathDiagnosticLocation(getValidSourceLocation(S, LAC, /*End=*/true), + SM, SingleLocK); +} + +PathDiagnosticLocation +PathDiagnosticLocation::createOperatorLoc(const BinaryOperator *BO, + const SourceManager &SM) { + return PathDiagnosticLocation(BO->getOperatorLoc(), SM, SingleLocK); +} + +PathDiagnosticLocation +PathDiagnosticLocation::createConditionalColonLoc( + const ConditionalOperator *CO, + const SourceManager &SM) { + return PathDiagnosticLocation(CO->getColonLoc(), SM, SingleLocK); +} + +PathDiagnosticLocation +PathDiagnosticLocation::createMemberLoc(const MemberExpr *ME, + const SourceManager &SM) { + + assert(ME->getMemberLoc().isValid() || ME->getBeginLoc().isValid()); + + // In some cases, getMemberLoc isn't valid -- in this case we'll return with + // some other related valid SourceLocation. + if (ME->getMemberLoc().isValid()) + return PathDiagnosticLocation(ME->getMemberLoc(), SM, SingleLocK); + + return PathDiagnosticLocation(ME->getBeginLoc(), SM, SingleLocK); +} + +PathDiagnosticLocation +PathDiagnosticLocation::createBeginBrace(const CompoundStmt *CS, + const SourceManager &SM) { + SourceLocation L = CS->getLBracLoc(); + return PathDiagnosticLocation(L, SM, SingleLocK); +} + +PathDiagnosticLocation +PathDiagnosticLocation::createEndBrace(const CompoundStmt *CS, + const SourceManager &SM) { + SourceLocation L = CS->getRBracLoc(); + return PathDiagnosticLocation(L, SM, SingleLocK); +} + +PathDiagnosticLocation +PathDiagnosticLocation::createDeclBegin(const LocationContext *LC, + const SourceManager &SM) { + // FIXME: Should handle CXXTryStmt if analyser starts supporting C++. + if (const auto *CS = dyn_cast_or_null<CompoundStmt>(LC->getDecl()->getBody())) + if (!CS->body_empty()) { + SourceLocation Loc = (*CS->body_begin())->getBeginLoc(); + return PathDiagnosticLocation(Loc, SM, SingleLocK); + } + + return PathDiagnosticLocation(); +} + +PathDiagnosticLocation +PathDiagnosticLocation::createDeclEnd(const LocationContext *LC, + const SourceManager &SM) { + SourceLocation L = LC->getDecl()->getBodyRBrace(); + return PathDiagnosticLocation(L, SM, SingleLocK); +} + +PathDiagnosticLocation +PathDiagnosticLocation::create(const ProgramPoint& P, + const SourceManager &SMng) { + const Stmt* S = nullptr; + if (std::optional<BlockEdge> BE = P.getAs<BlockEdge>()) { + const CFGBlock *BSrc = BE->getSrc(); + if (BSrc->getTerminator().isVirtualBaseBranch()) { + // TODO: VirtualBaseBranches should also appear for destructors. + // In this case we should put the diagnostic at the end of decl. + return PathDiagnosticLocation::createBegin( + P.getLocationContext()->getDecl(), SMng); + + } else { + S = BSrc->getTerminatorCondition(); + if (!S) { + // If the BlockEdge has no terminator condition statement but its + // source is the entry of the CFG (e.g. a checker crated the branch at + // the beginning of a function), use the function's declaration instead. + assert(BSrc == &BSrc->getParent()->getEntry() && "CFGBlock has no " + "TerminatorCondition and is not the enrty block of the CFG"); + return PathDiagnosticLocation::createBegin( + P.getLocationContext()->getDecl(), SMng); + } + } + } else if (std::optional<StmtPoint> SP = P.getAs<StmtPoint>()) { + S = SP->getStmt(); + if (P.getAs<PostStmtPurgeDeadSymbols>()) + return PathDiagnosticLocation::createEnd(S, SMng, P.getLocationContext()); + } else if (std::optional<PostInitializer> PIP = P.getAs<PostInitializer>()) { + return PathDiagnosticLocation(PIP->getInitializer()->getSourceLocation(), + SMng); + } else if (std::optional<PreImplicitCall> PIC = P.getAs<PreImplicitCall>()) { + return PathDiagnosticLocation(PIC->getLocation(), SMng); + } else if (std::optional<PostImplicitCall> PIE = + P.getAs<PostImplicitCall>()) { + return PathDiagnosticLocation(PIE->getLocation(), SMng); + } else if (std::optional<CallEnter> CE = P.getAs<CallEnter>()) { + return getLocationForCaller(CE->getCalleeContext(), + CE->getLocationContext(), + SMng); + } else if (std::optional<CallExitEnd> CEE = P.getAs<CallExitEnd>()) { + return getLocationForCaller(CEE->getCalleeContext(), + CEE->getLocationContext(), + SMng); + } else if (auto CEB = P.getAs<CallExitBegin>()) { + if (const ReturnStmt *RS = CEB->getReturnStmt()) + return PathDiagnosticLocation::createBegin(RS, SMng, + CEB->getLocationContext()); + return PathDiagnosticLocation( + CEB->getLocationContext()->getDecl()->getSourceRange().getEnd(), SMng); + } else if (std::optional<BlockEntrance> BE = P.getAs<BlockEntrance>()) { + if (std::optional<CFGElement> BlockFront = BE->getFirstElement()) { + if (auto StmtElt = BlockFront->getAs<CFGStmt>()) { + return PathDiagnosticLocation(StmtElt->getStmt()->getBeginLoc(), SMng); + } else if (auto NewAllocElt = BlockFront->getAs<CFGNewAllocator>()) { + return PathDiagnosticLocation( + NewAllocElt->getAllocatorExpr()->getBeginLoc(), SMng); + } + llvm_unreachable("Unexpected CFG element at front of block"); + } + + return PathDiagnosticLocation( + BE->getBlock()->getTerminatorStmt()->getBeginLoc(), SMng); + } else if (std::optional<FunctionExitPoint> FE = + P.getAs<FunctionExitPoint>()) { + return PathDiagnosticLocation(FE->getStmt(), SMng, + FE->getLocationContext()); + } else { + llvm_unreachable("Unexpected ProgramPoint"); + } + + return PathDiagnosticLocation(S, SMng, P.getLocationContext()); +} + +PathDiagnosticLocation PathDiagnosticLocation::createSingleLocation( + const PathDiagnosticLocation &PDL) { + FullSourceLoc L = PDL.asLocation(); + return PathDiagnosticLocation(L, L.getManager(), SingleLocK); +} + +FullSourceLoc + PathDiagnosticLocation::genLocation(SourceLocation L, + LocationOrAnalysisDeclContext LAC) const { + assert(isValid()); + // Note that we want a 'switch' here so that the compiler can warn us in + // case we add more cases. + switch (K) { + case SingleLocK: + case RangeK: + break; + case StmtK: + // Defensive checking. + if (!S) + break; + return FullSourceLoc(getValidSourceLocation(S, LAC), + const_cast<SourceManager&>(*SM)); + case DeclK: + // Defensive checking. + if (!D) + break; + return FullSourceLoc(D->getLocation(), const_cast<SourceManager&>(*SM)); + } + + return FullSourceLoc(L, const_cast<SourceManager&>(*SM)); +} + +PathDiagnosticRange + PathDiagnosticLocation::genRange(LocationOrAnalysisDeclContext LAC) const { + assert(isValid()); + // Note that we want a 'switch' here so that the compiler can warn us in + // case we add more cases. + switch (K) { + case SingleLocK: + return PathDiagnosticRange(SourceRange(Loc,Loc), true); + case RangeK: + break; + case StmtK: { + const Stmt *S = asStmt(); + switch (S->getStmtClass()) { + default: + break; + case Stmt::DeclStmtClass: { + const auto *DS = cast<DeclStmt>(S); + if (DS->isSingleDecl()) { + // Should always be the case, but we'll be defensive. + return SourceRange(DS->getBeginLoc(), + DS->getSingleDecl()->getLocation()); + } + break; + } + // FIXME: Provide better range information for different + // terminators. + case Stmt::IfStmtClass: + case Stmt::WhileStmtClass: + case Stmt::DoStmtClass: + case Stmt::ForStmtClass: + case Stmt::ChooseExprClass: + case Stmt::IndirectGotoStmtClass: + case Stmt::SwitchStmtClass: + case Stmt::BinaryConditionalOperatorClass: + case Stmt::ConditionalOperatorClass: + case Stmt::ObjCForCollectionStmtClass: { + SourceLocation L = getValidSourceLocation(S, LAC); + return SourceRange(L, L); + } + } + SourceRange R = S->getSourceRange(); + if (R.isValid()) + return R; + break; + } + case DeclK: + if (const auto *MD = dyn_cast<ObjCMethodDecl>(D)) + return MD->getSourceRange(); + if (const auto *FD = dyn_cast<FunctionDecl>(D)) { + if (Stmt *Body = FD->getBody()) + return Body->getSourceRange(); + } + else { + SourceLocation L = D->getLocation(); + return PathDiagnosticRange(SourceRange(L, L), true); + } + } + + return SourceRange(Loc, Loc); +} + +void PathDiagnosticLocation::flatten() { + if (K == StmtK) { + K = RangeK; + S = nullptr; + D = nullptr; + } + else if (K == DeclK) { + K = SingleLocK; + S = nullptr; + D = nullptr; + } +} + +//===----------------------------------------------------------------------===// +// Manipulation of PathDiagnosticCallPieces. +//===----------------------------------------------------------------------===// + +std::shared_ptr<PathDiagnosticCallPiece> +PathDiagnosticCallPiece::construct(const CallExitEnd &CE, + const SourceManager &SM) { + const Decl *caller = CE.getLocationContext()->getDecl(); + PathDiagnosticLocation pos = getLocationForCaller(CE.getCalleeContext(), + CE.getLocationContext(), + SM); + return std::shared_ptr<PathDiagnosticCallPiece>( + new PathDiagnosticCallPiece(caller, pos)); +} + +PathDiagnosticCallPiece * +PathDiagnosticCallPiece::construct(PathPieces &path, + const Decl *caller) { + std::shared_ptr<PathDiagnosticCallPiece> C( + new PathDiagnosticCallPiece(path, caller)); + path.clear(); + auto *R = C.get(); + path.push_front(std::move(C)); + return R; +} + +void PathDiagnosticCallPiece::setCallee(const CallEnter &CE, + const SourceManager &SM) { + const StackFrameContext *CalleeCtx = CE.getCalleeContext(); + Callee = CalleeCtx->getDecl(); + + callEnterWithin = PathDiagnosticLocation::createBegin(Callee, SM); + callEnter = getLocationForCaller(CalleeCtx, CE.getLocationContext(), SM); + + // Autosynthesized property accessors are special because we'd never + // pop back up to non-autosynthesized code until we leave them. + // This is not generally true for autosynthesized callees, which may call + // non-autosynthesized callbacks. + // Unless set here, the IsCalleeAnAutosynthesizedPropertyAccessor flag + // defaults to false. + if (const auto *MD = dyn_cast<ObjCMethodDecl>(Callee)) + IsCalleeAnAutosynthesizedPropertyAccessor = ( + MD->isPropertyAccessor() && + CalleeCtx->getAnalysisDeclContext()->isBodyAutosynthesized()); +} + +static void describeTemplateParameters(raw_ostream &Out, + const ArrayRef<TemplateArgument> TAList, + const LangOptions &LO, + StringRef Prefix = StringRef(), + StringRef Postfix = StringRef()); + +static void describeTemplateParameter(raw_ostream &Out, + const TemplateArgument &TArg, + const LangOptions &LO) { + + if (TArg.getKind() == TemplateArgument::ArgKind::Pack) { + describeTemplateParameters(Out, TArg.getPackAsArray(), LO); + } else { + TArg.print(PrintingPolicy(LO), Out, /*IncludeType*/ true); + } +} + +static void describeTemplateParameters(raw_ostream &Out, + const ArrayRef<TemplateArgument> TAList, + const LangOptions &LO, + StringRef Prefix, StringRef Postfix) { + if (TAList.empty()) + return; + + Out << Prefix; + for (int I = 0, Last = TAList.size() - 1; I != Last; ++I) { + describeTemplateParameter(Out, TAList[I], LO); + Out << ", "; + } + describeTemplateParameter(Out, TAList[TAList.size() - 1], LO); + Out << Postfix; +} + +static void describeClass(raw_ostream &Out, const CXXRecordDecl *D, + StringRef Prefix = StringRef()) { + if (!D->getIdentifier()) + return; + Out << Prefix << '\'' << *D; + if (const auto T = dyn_cast<ClassTemplateSpecializationDecl>(D)) + describeTemplateParameters(Out, T->getTemplateArgs().asArray(), + D->getLangOpts(), "<", ">"); + + Out << '\''; +} + +static bool describeCodeDecl(raw_ostream &Out, const Decl *D, + bool ExtendedDescription, + StringRef Prefix = StringRef()) { + if (!D) + return false; + + if (isa<BlockDecl>(D)) { + if (ExtendedDescription) + Out << Prefix << "anonymous block"; + return ExtendedDescription; + } + + if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) { + Out << Prefix; + if (ExtendedDescription && !MD->isUserProvided()) { + if (MD->isExplicitlyDefaulted()) + Out << "defaulted "; + else + Out << "implicit "; + } + + if (const auto *CD = dyn_cast<CXXConstructorDecl>(MD)) { + if (CD->isDefaultConstructor()) + Out << "default "; + else if (CD->isCopyConstructor()) + Out << "copy "; + else if (CD->isMoveConstructor()) + Out << "move "; + + Out << "constructor"; + describeClass(Out, MD->getParent(), " for "); + } else if (isa<CXXDestructorDecl>(MD)) { + if (!MD->isUserProvided()) { + Out << "destructor"; + describeClass(Out, MD->getParent(), " for "); + } else { + // Use ~Foo for explicitly-written destructors. + Out << "'" << *MD << "'"; + } + } else if (MD->isCopyAssignmentOperator()) { + Out << "copy assignment operator"; + describeClass(Out, MD->getParent(), " for "); + } else if (MD->isMoveAssignmentOperator()) { + Out << "move assignment operator"; + describeClass(Out, MD->getParent(), " for "); + } else { + if (MD->getParent()->getIdentifier()) + Out << "'" << *MD->getParent() << "::" << *MD << "'"; + else + Out << "'" << *MD << "'"; + } + + return true; + } + + Out << Prefix << '\'' << cast<NamedDecl>(*D); + + // Adding template parameters. + if (const auto FD = dyn_cast<FunctionDecl>(D)) + if (const TemplateArgumentList *TAList = + FD->getTemplateSpecializationArgs()) + describeTemplateParameters(Out, TAList->asArray(), FD->getLangOpts(), "<", + ">"); + + Out << '\''; + return true; +} + +std::shared_ptr<PathDiagnosticEventPiece> +PathDiagnosticCallPiece::getCallEnterEvent() const { + // We do not produce call enters and call exits for autosynthesized property + // accessors. We do generally produce them for other functions coming from + // the body farm because they may call callbacks that bring us back into + // visible code. + if (!Callee || IsCalleeAnAutosynthesizedPropertyAccessor) + return nullptr; + + SmallString<256> buf; + llvm::raw_svector_ostream Out(buf); + + Out << "Calling "; + describeCodeDecl(Out, Callee, /*ExtendedDescription=*/true); + + assert(callEnter.asLocation().isValid()); + return std::make_shared<PathDiagnosticEventPiece>(callEnter, Out.str()); +} + +std::shared_ptr<PathDiagnosticEventPiece> +PathDiagnosticCallPiece::getCallEnterWithinCallerEvent() const { + if (!callEnterWithin.asLocation().isValid()) + return nullptr; + if (Callee->isImplicit() || !Callee->hasBody()) + return nullptr; + if (const auto *MD = dyn_cast<CXXMethodDecl>(Callee)) + if (MD->isDefaulted()) + return nullptr; + + SmallString<256> buf; + llvm::raw_svector_ostream Out(buf); + + Out << "Entered call"; + describeCodeDecl(Out, Caller, /*ExtendedDescription=*/false, " from "); + + return std::make_shared<PathDiagnosticEventPiece>(callEnterWithin, Out.str()); +} + +std::shared_ptr<PathDiagnosticEventPiece> +PathDiagnosticCallPiece::getCallExitEvent() const { + // We do not produce call enters and call exits for autosynthesized property + // accessors. We do generally produce them for other functions coming from + // the body farm because they may call callbacks that bring us back into + // visible code. + if (NoExit || IsCalleeAnAutosynthesizedPropertyAccessor) + return nullptr; + + SmallString<256> buf; + llvm::raw_svector_ostream Out(buf); + + if (!CallStackMessage.empty()) { + Out << CallStackMessage; + } else { + bool DidDescribe = describeCodeDecl(Out, Callee, + /*ExtendedDescription=*/false, + "Returning from "); + if (!DidDescribe) + Out << "Returning to caller"; + } + + assert(callReturn.asLocation().isValid()); + return std::make_shared<PathDiagnosticEventPiece>(callReturn, Out.str()); +} + +static void compute_path_size(const PathPieces &pieces, unsigned &size) { + for (const auto &I : pieces) { + const PathDiagnosticPiece *piece = I.get(); + if (const auto *cp = dyn_cast<PathDiagnosticCallPiece>(piece)) + compute_path_size(cp->path, size); + else + ++size; + } +} + +unsigned PathDiagnostic::full_size() { + unsigned size = 0; + compute_path_size(path, size); + return size; +} + +//===----------------------------------------------------------------------===// +// FoldingSet profiling methods. +//===----------------------------------------------------------------------===// + +void PathDiagnosticLocation::Profile(llvm::FoldingSetNodeID &ID) const { + ID.Add(Range.getBegin()); + ID.Add(Range.getEnd()); + ID.Add(static_cast<const SourceLocation &>(Loc)); +} + +void PathDiagnosticPiece::Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddInteger((unsigned) getKind()); + ID.AddString(str); + // FIXME: Add profiling support for code hints. + ID.AddInteger((unsigned) getDisplayHint()); + ArrayRef<SourceRange> Ranges = getRanges(); + for (const auto &I : Ranges) { + ID.Add(I.getBegin()); + ID.Add(I.getEnd()); + } +} + +void PathDiagnosticCallPiece::Profile(llvm::FoldingSetNodeID &ID) const { + PathDiagnosticPiece::Profile(ID); + for (const auto &I : path) + ID.Add(*I); +} + +void PathDiagnosticSpotPiece::Profile(llvm::FoldingSetNodeID &ID) const { + PathDiagnosticPiece::Profile(ID); + ID.Add(Pos); +} + +void PathDiagnosticControlFlowPiece::Profile(llvm::FoldingSetNodeID &ID) const { + PathDiagnosticPiece::Profile(ID); + for (const auto &I : *this) + ID.Add(I); +} + +void PathDiagnosticMacroPiece::Profile(llvm::FoldingSetNodeID &ID) const { + PathDiagnosticSpotPiece::Profile(ID); + for (const auto &I : subPieces) + ID.Add(*I); +} + +void PathDiagnosticNotePiece::Profile(llvm::FoldingSetNodeID &ID) const { + PathDiagnosticSpotPiece::Profile(ID); +} + +void PathDiagnosticPopUpPiece::Profile(llvm::FoldingSetNodeID &ID) const { + PathDiagnosticSpotPiece::Profile(ID); +} + +void PathDiagnostic::Profile(llvm::FoldingSetNodeID &ID) const { + ID.Add(getLocation()); + ID.Add(getUniqueingLoc()); + ID.AddString(BugType); + ID.AddString(VerboseDesc); + ID.AddString(Category); +} + +void PathDiagnostic::FullProfile(llvm::FoldingSetNodeID &ID) const { + Profile(ID); + for (const auto &I : path) + ID.Add(*I); + for (meta_iterator I = meta_begin(), E = meta_end(); I != E; ++I) + ID.AddString(*I); +} + +LLVM_DUMP_METHOD void PathPieces::dump() const { + unsigned index = 0; + for (PathPieces::const_iterator I = begin(), E = end(); I != E; ++I) { + llvm::errs() << "[" << index++ << "] "; + (*I)->dump(); + llvm::errs() << "\n"; + } +} + +LLVM_DUMP_METHOD void PathDiagnosticCallPiece::dump() const { + llvm::errs() << "CALL\n--------------\n"; + + if (const Stmt *SLoc = getLocation().getStmtOrNull()) + SLoc->dump(); + else if (const auto *ND = dyn_cast_or_null<NamedDecl>(getCallee())) + llvm::errs() << *ND << "\n"; + else + getLocation().dump(); +} + +LLVM_DUMP_METHOD void PathDiagnosticEventPiece::dump() const { + llvm::errs() << "EVENT\n--------------\n"; + llvm::errs() << getString() << "\n"; + llvm::errs() << " ---- at ----\n"; + getLocation().dump(); +} + +LLVM_DUMP_METHOD void PathDiagnosticControlFlowPiece::dump() const { + llvm::errs() << "CONTROL\n--------------\n"; + getStartLocation().dump(); + llvm::errs() << " ---- to ----\n"; + getEndLocation().dump(); +} + +LLVM_DUMP_METHOD void PathDiagnosticMacroPiece::dump() const { + llvm::errs() << "MACRO\n--------------\n"; + // FIXME: Print which macro is being invoked. +} + +LLVM_DUMP_METHOD void PathDiagnosticNotePiece::dump() const { + llvm::errs() << "NOTE\n--------------\n"; + llvm::errs() << getString() << "\n"; + llvm::errs() << " ---- at ----\n"; + getLocation().dump(); +} + +LLVM_DUMP_METHOD void PathDiagnosticPopUpPiece::dump() const { + llvm::errs() << "POP-UP\n--------------\n"; + llvm::errs() << getString() << "\n"; + llvm::errs() << " ---- at ----\n"; + getLocation().dump(); +} + +LLVM_DUMP_METHOD void PathDiagnosticLocation::dump() const { + if (!isValid()) { + llvm::errs() << "<INVALID>\n"; + return; + } + + switch (K) { + case RangeK: + // FIXME: actually print the range. + llvm::errs() << "<range>\n"; + break; + case SingleLocK: + asLocation().dump(); + llvm::errs() << "\n"; + break; + case StmtK: + if (S) + S->dump(); + else + llvm::errs() << "<NULL STMT>\n"; + break; + case DeclK: + if (const auto *ND = dyn_cast_or_null<NamedDecl>(D)) + llvm::errs() << *ND << "\n"; + else if (isa<BlockDecl>(D)) + // FIXME: Make this nicer. + llvm::errs() << "<block>\n"; + else if (D) + llvm::errs() << "<unknown decl>\n"; + else + llvm::errs() << "<NULL DECL>\n"; + break; + } +} diff --git a/contrib/llvm-project/clang/lib/Analysis/PostOrderCFGView.cpp b/contrib/llvm-project/clang/lib/Analysis/PostOrderCFGView.cpp new file mode 100644 index 000000000000..0c09c0f97ff6 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/PostOrderCFGView.cpp @@ -0,0 +1,50 @@ +//===- PostOrderCFGView.cpp - Post order view of CFG blocks ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements post order view of the blocks in a CFG. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" + +using namespace clang; + +void PostOrderCFGView::anchor() {} + +PostOrderCFGView::PostOrderCFGView(const CFG *cfg) { + Blocks.reserve(cfg->getNumBlockIDs()); + CFGBlockSet BSet(cfg); + + for (po_iterator I = po_iterator::begin(cfg, BSet), + E = po_iterator::end(cfg, BSet); I != E; ++I) { + BlockOrder[*I] = Blocks.size() + 1; + Blocks.push_back(*I); + } +} + +std::unique_ptr<PostOrderCFGView> +PostOrderCFGView::create(AnalysisDeclContext &ctx) { + const CFG *cfg = ctx.getCFG(); + if (!cfg) + return nullptr; + return std::make_unique<PostOrderCFGView>(cfg); +} + +const void *PostOrderCFGView::getTag() { static int x; return &x; } + +bool PostOrderCFGView::BlockOrderCompare::operator()(const CFGBlock *b1, + const CFGBlock *b2) const { + PostOrderCFGView::BlockOrderTy::const_iterator b1It = POV.BlockOrder.find(b1); + PostOrderCFGView::BlockOrderTy::const_iterator b2It = POV.BlockOrder.find(b2); + + unsigned b1V = (b1It == POV.BlockOrder.end()) ? 0 : b1It->second; + unsigned b2V = (b2It == POV.BlockOrder.end()) ? 0 : b2It->second; + return b1V > b2V; +} diff --git a/contrib/llvm-project/clang/lib/Analysis/ProgramPoint.cpp b/contrib/llvm-project/clang/lib/Analysis/ProgramPoint.cpp new file mode 100644 index 000000000000..2a91749affd2 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/ProgramPoint.cpp @@ -0,0 +1,243 @@ +//==- ProgramPoint.cpp - Program Points for Path-Sensitive Analysis -*- C++ -*-/ +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface ProgramPoint, which identifies a +// distinct location in a function. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/ProgramPoint.h" +#include "clang/AST/ASTContext.h" +#include "clang/Basic/JsonSupport.h" + +using namespace clang; + +ProgramPointTag::~ProgramPointTag() {} + +ProgramPoint ProgramPoint::getProgramPoint(const Stmt *S, ProgramPoint::Kind K, + const LocationContext *LC, + const ProgramPointTag *tag){ + switch (K) { + default: + llvm_unreachable("Unhandled ProgramPoint kind"); + case ProgramPoint::PreStmtKind: + return PreStmt(S, LC, tag); + case ProgramPoint::PostStmtKind: + return PostStmt(S, LC, tag); + case ProgramPoint::PreLoadKind: + return PreLoad(S, LC, tag); + case ProgramPoint::PostLoadKind: + return PostLoad(S, LC, tag); + case ProgramPoint::PreStoreKind: + return PreStore(S, LC, tag); + case ProgramPoint::PostLValueKind: + return PostLValue(S, LC, tag); + case ProgramPoint::PostStmtPurgeDeadSymbolsKind: + return PostStmtPurgeDeadSymbols(S, LC, tag); + case ProgramPoint::PreStmtPurgeDeadSymbolsKind: + return PreStmtPurgeDeadSymbols(S, LC, tag); + } +} + +LLVM_DUMP_METHOD void ProgramPoint::dump() const { + return printJson(llvm::errs()); +} + +void ProgramPoint::printJson(llvm::raw_ostream &Out, const char *NL) const { + const ASTContext &Context = + getLocationContext()->getAnalysisDeclContext()->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const PrintingPolicy &PP = Context.getPrintingPolicy(); + const bool AddQuotes = true; + + Out << "\"kind\": \""; + switch (getKind()) { + case ProgramPoint::BlockEntranceKind: + Out << "BlockEntrance\"" + << ", \"block_id\": " + << castAs<BlockEntrance>().getBlock()->getBlockID(); + break; + + case ProgramPoint::FunctionExitKind: { + auto FEP = getAs<FunctionExitPoint>(); + Out << "FunctionExit\"" + << ", \"block_id\": " << FEP->getBlock()->getBlockID() + << ", \"stmt_id\": "; + + if (const ReturnStmt *RS = FEP->getStmt()) { + Out << RS->getID(Context) << ", \"stmt\": "; + RS->printJson(Out, nullptr, PP, AddQuotes); + } else { + Out << "null, \"stmt\": null"; + } + break; + } + case ProgramPoint::BlockExitKind: + llvm_unreachable("BlockExitKind"); + break; + case ProgramPoint::CallEnterKind: + Out << "CallEnter\""; + break; + case ProgramPoint::CallExitBeginKind: + Out << "CallExitBegin\""; + break; + case ProgramPoint::CallExitEndKind: + Out << "CallExitEnd\""; + break; + case ProgramPoint::EpsilonKind: + Out << "EpsilonPoint\""; + break; + + case ProgramPoint::LoopExitKind: + Out << "LoopExit\", \"stmt\": \"" + << castAs<LoopExit>().getLoopStmt()->getStmtClassName() << '\"'; + break; + + case ProgramPoint::PreImplicitCallKind: { + ImplicitCallPoint PC = castAs<ImplicitCallPoint>(); + Out << "PreCall\", \"decl\": \"" + << PC.getDecl()->getAsFunction()->getQualifiedNameAsString() + << "\", \"location\": "; + printSourceLocationAsJson(Out, PC.getLocation(), SM); + break; + } + + case ProgramPoint::PostImplicitCallKind: { + ImplicitCallPoint PC = castAs<ImplicitCallPoint>(); + Out << "PostCall\", \"decl\": \"" + << PC.getDecl()->getAsFunction()->getQualifiedNameAsString() + << "\", \"location\": "; + printSourceLocationAsJson(Out, PC.getLocation(), SM); + break; + } + + case ProgramPoint::PostInitializerKind: { + Out << "PostInitializer\", "; + const CXXCtorInitializer *Init = castAs<PostInitializer>().getInitializer(); + if (const FieldDecl *FD = Init->getAnyMember()) { + Out << "\"field_decl\": \"" << *FD << '\"'; + } else { + Out << "\"type\": \""; + QualType Ty = Init->getTypeSourceInfo()->getType(); + Ty = Ty.getLocalUnqualifiedType(); + Ty.print(Out, Context.getLangOpts()); + Out << '\"'; + } + break; + } + + case ProgramPoint::BlockEdgeKind: { + const BlockEdge &E = castAs<BlockEdge>(); + const Stmt *T = E.getSrc()->getTerminatorStmt(); + Out << "Edge\", \"src_id\": " << E.getSrc()->getBlockID() + << ", \"dst_id\": " << E.getDst()->getBlockID() << ", \"terminator\": "; + + if (!T) { + Out << "null, \"term_kind\": null"; + break; + } + + E.getSrc()->printTerminatorJson(Out, Context.getLangOpts(), + /*AddQuotes=*/true); + Out << ", \"location\": "; + printSourceLocationAsJson(Out, T->getBeginLoc(), SM); + + Out << ", \"term_kind\": \""; + if (isa<SwitchStmt>(T)) { + Out << "SwitchStmt\", \"case\": "; + if (const Stmt *Label = E.getDst()->getLabel()) { + if (const auto *C = dyn_cast<CaseStmt>(Label)) { + Out << "{ \"lhs\": "; + if (const Stmt *LHS = C->getLHS()) { + LHS->printJson(Out, nullptr, PP, AddQuotes); + } else { + Out << "null"; + } + + Out << ", \"rhs\": "; + if (const Stmt *RHS = C->getRHS()) { + RHS->printJson(Out, nullptr, PP, AddQuotes); + } else { + Out << "null"; + } + Out << " }"; + } else { + assert(isa<DefaultStmt>(Label)); + Out << "\"default\""; + } + } else { + Out << "\"implicit default\""; + } + } else if (isa<IndirectGotoStmt>(T)) { + // FIXME: More info. + Out << "IndirectGotoStmt\""; + } else { + Out << "Condition\", \"value\": " + << (*E.getSrc()->succ_begin() == E.getDst() ? "true" : "false"); + } + break; + } + + default: { + const Stmt *S = castAs<StmtPoint>().getStmt(); + assert(S != nullptr && "Expecting non-null Stmt"); + + Out << "Statement\", \"stmt_kind\": \"" << S->getStmtClassName() + << "\", \"stmt_id\": " << S->getID(Context) + << ", \"pointer\": \"" << (const void *)S << "\", "; + if (const auto *CS = dyn_cast<CastExpr>(S)) + Out << "\"cast_kind\": \"" << CS->getCastKindName() << "\", "; + + Out << "\"pretty\": "; + + S->printJson(Out, nullptr, PP, AddQuotes); + + Out << ", \"location\": "; + printSourceLocationAsJson(Out, S->getBeginLoc(), SM); + + Out << ", \"stmt_point_kind\": \""; + if (getAs<PreLoad>()) + Out << "PreLoad"; + else if (getAs<PreStore>()) + Out << "PreStore"; + else if (getAs<PostAllocatorCall>()) + Out << "PostAllocatorCall"; + else if (getAs<PostCondition>()) + Out << "PostCondition"; + else if (getAs<PostLoad>()) + Out << "PostLoad"; + else if (getAs<PostLValue>()) + Out << "PostLValue"; + else if (getAs<PostStore>()) + Out << "PostStore"; + else if (getAs<PostStmt>()) + Out << "PostStmt"; + else if (getAs<PostStmtPurgeDeadSymbols>()) + Out << "PostStmtPurgeDeadSymbols"; + else if (getAs<PreStmtPurgeDeadSymbols>()) + Out << "PreStmtPurgeDeadSymbols"; + else if (getAs<PreStmt>()) + Out << "PreStmt"; + else { + Out << "\nKind: '" << getKind(); + llvm_unreachable("' is unhandled StmtPoint kind!"); + } + + Out << '\"'; + break; + } + } +} + +SimpleProgramPointTag::SimpleProgramPointTag(StringRef MsgProvider, + StringRef Msg) + : Desc((MsgProvider + " : " + Msg).str()) {} + +StringRef SimpleProgramPointTag::getTagDescription() const { + return Desc; +} diff --git a/contrib/llvm-project/clang/lib/Analysis/ReachableCode.cpp b/contrib/llvm-project/clang/lib/Analysis/ReachableCode.cpp new file mode 100644 index 000000000000..5cc63bb17b09 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/ReachableCode.cpp @@ -0,0 +1,723 @@ +//===-- ReachableCode.cpp - Code Reachability Analysis --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a flow-sensitive, path-insensitive analysis of +// determining reachable blocks within a CFG. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/ReachableCode.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/ExprObjC.h" +#include "clang/AST/ParentMap.h" +#include "clang/AST/StmtCXX.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Basic/Builtins.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Preprocessor.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include <optional> + +using namespace clang; + +//===----------------------------------------------------------------------===// +// Core Reachability Analysis routines. +//===----------------------------------------------------------------------===// + +static bool isEnumConstant(const Expr *Ex) { + const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Ex); + if (!DR) + return false; + return isa<EnumConstantDecl>(DR->getDecl()); +} + +static bool isTrivialExpression(const Expr *Ex) { + Ex = Ex->IgnoreParenCasts(); + return isa<IntegerLiteral>(Ex) || isa<StringLiteral>(Ex) || + isa<CXXBoolLiteralExpr>(Ex) || isa<ObjCBoolLiteralExpr>(Ex) || + isa<CharacterLiteral>(Ex) || + isEnumConstant(Ex); +} + +static bool isTrivialDoWhile(const CFGBlock *B, const Stmt *S) { + // Check if the block ends with a do...while() and see if 'S' is the + // condition. + if (const Stmt *Term = B->getTerminatorStmt()) { + if (const DoStmt *DS = dyn_cast<DoStmt>(Term)) { + const Expr *Cond = DS->getCond()->IgnoreParenCasts(); + return Cond == S && isTrivialExpression(Cond); + } + } + return false; +} + +static bool isBuiltinUnreachable(const Stmt *S) { + if (const auto *DRE = dyn_cast<DeclRefExpr>(S)) + if (const auto *FDecl = dyn_cast<FunctionDecl>(DRE->getDecl())) + return FDecl->getIdentifier() && + FDecl->getBuiltinID() == Builtin::BI__builtin_unreachable; + return false; +} + +static bool isBuiltinAssumeFalse(const CFGBlock *B, const Stmt *S, + ASTContext &C) { + if (B->empty()) { + // Happens if S is B's terminator and B contains nothing else + // (e.g. a CFGBlock containing only a goto). + return false; + } + if (std::optional<CFGStmt> CS = B->back().getAs<CFGStmt>()) { + if (const auto *CE = dyn_cast<CallExpr>(CS->getStmt())) { + return CE->getCallee()->IgnoreCasts() == S && CE->isBuiltinAssumeFalse(C); + } + } + return false; +} + +static bool isDeadReturn(const CFGBlock *B, const Stmt *S) { + // Look to see if the current control flow ends with a 'return', and see if + // 'S' is a substatement. The 'return' may not be the last element in the + // block, or may be in a subsequent block because of destructors. + const CFGBlock *Current = B; + while (true) { + for (const CFGElement &CE : llvm::reverse(*Current)) { + if (std::optional<CFGStmt> CS = CE.getAs<CFGStmt>()) { + if (const ReturnStmt *RS = dyn_cast<ReturnStmt>(CS->getStmt())) { + if (RS == S) + return true; + if (const Expr *RE = RS->getRetValue()) { + RE = RE->IgnoreParenCasts(); + if (RE == S) + return true; + ParentMap PM(const_cast<Expr *>(RE)); + // If 'S' is in the ParentMap, it is a subexpression of + // the return statement. + return PM.getParent(S); + } + } + break; + } + } + // Note also that we are restricting the search for the return statement + // to stop at control-flow; only part of a return statement may be dead, + // without the whole return statement being dead. + if (Current->getTerminator().isTemporaryDtorsBranch()) { + // Temporary destructors have a predictable control flow, thus we want to + // look into the next block for the return statement. + // We look into the false branch, as we know the true branch only contains + // the call to the destructor. + assert(Current->succ_size() == 2); + Current = *(Current->succ_begin() + 1); + } else if (!Current->getTerminatorStmt() && Current->succ_size() == 1) { + // If there is only one successor, we're not dealing with outgoing control + // flow. Thus, look into the next block. + Current = *Current->succ_begin(); + if (Current->pred_size() > 1) { + // If there is more than one predecessor, we're dealing with incoming + // control flow - if the return statement is in that block, it might + // well be reachable via a different control flow, thus it's not dead. + return false; + } + } else { + // We hit control flow or a dead end. Stop searching. + return false; + } + } + llvm_unreachable("Broke out of infinite loop."); +} + +static SourceLocation getTopMostMacro(SourceLocation Loc, SourceManager &SM) { + assert(Loc.isMacroID()); + SourceLocation Last; + do { + Last = Loc; + Loc = SM.getImmediateMacroCallerLoc(Loc); + } while (Loc.isMacroID()); + return Last; +} + +/// Returns true if the statement is expanded from a configuration macro. +static bool isExpandedFromConfigurationMacro(const Stmt *S, + Preprocessor &PP, + bool IgnoreYES_NO = false) { + // FIXME: This is not very precise. Here we just check to see if the + // value comes from a macro, but we can do much better. This is likely + // to be over conservative. This logic is factored into a separate function + // so that we can refine it later. + SourceLocation L = S->getBeginLoc(); + if (L.isMacroID()) { + SourceManager &SM = PP.getSourceManager(); + if (IgnoreYES_NO) { + // The Objective-C constant 'YES' and 'NO' + // are defined as macros. Do not treat them + // as configuration values. + SourceLocation TopL = getTopMostMacro(L, SM); + StringRef MacroName = PP.getImmediateMacroName(TopL); + if (MacroName == "YES" || MacroName == "NO") + return false; + } else if (!PP.getLangOpts().CPlusPlus) { + // Do not treat C 'false' and 'true' macros as configuration values. + SourceLocation TopL = getTopMostMacro(L, SM); + StringRef MacroName = PP.getImmediateMacroName(TopL); + if (MacroName == "false" || MacroName == "true") + return false; + } + return true; + } + return false; +} + +static bool isConfigurationValue(const ValueDecl *D, Preprocessor &PP); + +/// Returns true if the statement represents a configuration value. +/// +/// A configuration value is something usually determined at compile-time +/// to conditionally always execute some branch. Such guards are for +/// "sometimes unreachable" code. Such code is usually not interesting +/// to report as unreachable, and may mask truly unreachable code within +/// those blocks. +static bool isConfigurationValue(const Stmt *S, + Preprocessor &PP, + SourceRange *SilenceableCondVal = nullptr, + bool IncludeIntegers = true, + bool WrappedInParens = false) { + if (!S) + return false; + + if (const auto *Ex = dyn_cast<Expr>(S)) + S = Ex->IgnoreImplicit(); + + if (const auto *Ex = dyn_cast<Expr>(S)) + S = Ex->IgnoreCasts(); + + // Special case looking for the sigil '()' around an integer literal. + if (const ParenExpr *PE = dyn_cast<ParenExpr>(S)) + if (!PE->getBeginLoc().isMacroID()) + return isConfigurationValue(PE->getSubExpr(), PP, SilenceableCondVal, + IncludeIntegers, true); + + if (const Expr *Ex = dyn_cast<Expr>(S)) + S = Ex->IgnoreCasts(); + + bool IgnoreYES_NO = false; + + switch (S->getStmtClass()) { + case Stmt::CallExprClass: { + const FunctionDecl *Callee = + dyn_cast_or_null<FunctionDecl>(cast<CallExpr>(S)->getCalleeDecl()); + return Callee ? Callee->isConstexpr() : false; + } + case Stmt::DeclRefExprClass: + return isConfigurationValue(cast<DeclRefExpr>(S)->getDecl(), PP); + case Stmt::ObjCBoolLiteralExprClass: + IgnoreYES_NO = true; + [[fallthrough]]; + case Stmt::CXXBoolLiteralExprClass: + case Stmt::IntegerLiteralClass: { + const Expr *E = cast<Expr>(S); + if (IncludeIntegers) { + if (SilenceableCondVal && !SilenceableCondVal->getBegin().isValid()) + *SilenceableCondVal = E->getSourceRange(); + return WrappedInParens || + isExpandedFromConfigurationMacro(E, PP, IgnoreYES_NO); + } + return false; + } + case Stmt::MemberExprClass: + return isConfigurationValue(cast<MemberExpr>(S)->getMemberDecl(), PP); + case Stmt::UnaryExprOrTypeTraitExprClass: + return true; + case Stmt::BinaryOperatorClass: { + const BinaryOperator *B = cast<BinaryOperator>(S); + // Only include raw integers (not enums) as configuration + // values if they are used in a logical or comparison operator + // (not arithmetic). + IncludeIntegers &= (B->isLogicalOp() || B->isComparisonOp()); + return isConfigurationValue(B->getLHS(), PP, SilenceableCondVal, + IncludeIntegers) || + isConfigurationValue(B->getRHS(), PP, SilenceableCondVal, + IncludeIntegers); + } + case Stmt::UnaryOperatorClass: { + const UnaryOperator *UO = cast<UnaryOperator>(S); + if (UO->getOpcode() != UO_LNot && UO->getOpcode() != UO_Minus) + return false; + bool SilenceableCondValNotSet = + SilenceableCondVal && SilenceableCondVal->getBegin().isInvalid(); + bool IsSubExprConfigValue = + isConfigurationValue(UO->getSubExpr(), PP, SilenceableCondVal, + IncludeIntegers, WrappedInParens); + // Update the silenceable condition value source range only if the range + // was set directly by the child expression. + if (SilenceableCondValNotSet && + SilenceableCondVal->getBegin().isValid() && + *SilenceableCondVal == + UO->getSubExpr()->IgnoreCasts()->getSourceRange()) + *SilenceableCondVal = UO->getSourceRange(); + return IsSubExprConfigValue; + } + default: + return false; + } +} + +static bool isConfigurationValue(const ValueDecl *D, Preprocessor &PP) { + if (const EnumConstantDecl *ED = dyn_cast<EnumConstantDecl>(D)) + return isConfigurationValue(ED->getInitExpr(), PP); + if (const VarDecl *VD = dyn_cast<VarDecl>(D)) { + // As a heuristic, treat globals as configuration values. Note + // that we only will get here if Sema evaluated this + // condition to a constant expression, which means the global + // had to be declared in a way to be a truly constant value. + // We could generalize this to local variables, but it isn't + // clear if those truly represent configuration values that + // gate unreachable code. + if (!VD->hasLocalStorage()) + return true; + + // As a heuristic, locals that have been marked 'const' explicitly + // can be treated as configuration values as well. + return VD->getType().isLocalConstQualified(); + } + return false; +} + +/// Returns true if we should always explore all successors of a block. +static bool shouldTreatSuccessorsAsReachable(const CFGBlock *B, + Preprocessor &PP) { + if (const Stmt *Term = B->getTerminatorStmt()) { + if (isa<SwitchStmt>(Term)) + return true; + // Specially handle '||' and '&&'. + if (isa<BinaryOperator>(Term)) { + return isConfigurationValue(Term, PP); + } + // Do not treat constexpr if statement successors as unreachable in warnings + // since the point of these statements is to determine branches at compile + // time. + if (const auto *IS = dyn_cast<IfStmt>(Term); + IS != nullptr && IS->isConstexpr()) + return true; + } + + const Stmt *Cond = B->getTerminatorCondition(/* stripParens */ false); + return isConfigurationValue(Cond, PP); +} + +static unsigned scanFromBlock(const CFGBlock *Start, + llvm::BitVector &Reachable, + Preprocessor *PP, + bool IncludeSometimesUnreachableEdges) { + unsigned count = 0; + + // Prep work queue + SmallVector<const CFGBlock*, 32> WL; + + // The entry block may have already been marked reachable + // by the caller. + if (!Reachable[Start->getBlockID()]) { + ++count; + Reachable[Start->getBlockID()] = true; + } + + WL.push_back(Start); + + // Find the reachable blocks from 'Start'. + while (!WL.empty()) { + const CFGBlock *item = WL.pop_back_val(); + + // There are cases where we want to treat all successors as reachable. + // The idea is that some "sometimes unreachable" code is not interesting, + // and that we should forge ahead and explore those branches anyway. + // This allows us to potentially uncover some "always unreachable" code + // within the "sometimes unreachable" code. + // Look at the successors and mark then reachable. + std::optional<bool> TreatAllSuccessorsAsReachable; + if (!IncludeSometimesUnreachableEdges) + TreatAllSuccessorsAsReachable = false; + + for (CFGBlock::const_succ_iterator I = item->succ_begin(), + E = item->succ_end(); I != E; ++I) { + const CFGBlock *B = *I; + if (!B) do { + const CFGBlock *UB = I->getPossiblyUnreachableBlock(); + if (!UB) + break; + + if (!TreatAllSuccessorsAsReachable) { + assert(PP); + TreatAllSuccessorsAsReachable = + shouldTreatSuccessorsAsReachable(item, *PP); + } + + if (*TreatAllSuccessorsAsReachable) { + B = UB; + break; + } + } + while (false); + + if (B) { + unsigned blockID = B->getBlockID(); + if (!Reachable[blockID]) { + Reachable.set(blockID); + WL.push_back(B); + ++count; + } + } + } + } + return count; +} + +static unsigned scanMaybeReachableFromBlock(const CFGBlock *Start, + Preprocessor &PP, + llvm::BitVector &Reachable) { + return scanFromBlock(Start, Reachable, &PP, true); +} + +//===----------------------------------------------------------------------===// +// Dead Code Scanner. +//===----------------------------------------------------------------------===// + +namespace { + class DeadCodeScan { + llvm::BitVector Visited; + llvm::BitVector &Reachable; + SmallVector<const CFGBlock *, 10> WorkList; + Preprocessor &PP; + ASTContext &C; + + typedef SmallVector<std::pair<const CFGBlock *, const Stmt *>, 12> + DeferredLocsTy; + + DeferredLocsTy DeferredLocs; + + public: + DeadCodeScan(llvm::BitVector &reachable, Preprocessor &PP, ASTContext &C) + : Visited(reachable.size()), + Reachable(reachable), + PP(PP), C(C) {} + + void enqueue(const CFGBlock *block); + unsigned scanBackwards(const CFGBlock *Start, + clang::reachable_code::Callback &CB); + + bool isDeadCodeRoot(const CFGBlock *Block); + + const Stmt *findDeadCode(const CFGBlock *Block); + + void reportDeadCode(const CFGBlock *B, + const Stmt *S, + clang::reachable_code::Callback &CB); + }; +} + +void DeadCodeScan::enqueue(const CFGBlock *block) { + unsigned blockID = block->getBlockID(); + if (Reachable[blockID] || Visited[blockID]) + return; + Visited[blockID] = true; + WorkList.push_back(block); +} + +bool DeadCodeScan::isDeadCodeRoot(const clang::CFGBlock *Block) { + bool isDeadRoot = true; + + for (CFGBlock::const_pred_iterator I = Block->pred_begin(), + E = Block->pred_end(); I != E; ++I) { + if (const CFGBlock *PredBlock = *I) { + unsigned blockID = PredBlock->getBlockID(); + if (Visited[blockID]) { + isDeadRoot = false; + continue; + } + if (!Reachable[blockID]) { + isDeadRoot = false; + Visited[blockID] = true; + WorkList.push_back(PredBlock); + continue; + } + } + } + + return isDeadRoot; +} + +static bool isValidDeadStmt(const Stmt *S) { + if (S->getBeginLoc().isInvalid()) + return false; + if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(S)) + return BO->getOpcode() != BO_Comma; + return true; +} + +const Stmt *DeadCodeScan::findDeadCode(const clang::CFGBlock *Block) { + for (CFGBlock::const_iterator I = Block->begin(), E = Block->end(); I!=E; ++I) + if (std::optional<CFGStmt> CS = I->getAs<CFGStmt>()) { + const Stmt *S = CS->getStmt(); + if (isValidDeadStmt(S)) + return S; + } + + CFGTerminator T = Block->getTerminator(); + if (T.isStmtBranch()) { + const Stmt *S = T.getStmt(); + if (S && isValidDeadStmt(S)) + return S; + } + + return nullptr; +} + +static int SrcCmp(const std::pair<const CFGBlock *, const Stmt *> *p1, + const std::pair<const CFGBlock *, const Stmt *> *p2) { + if (p1->second->getBeginLoc() < p2->second->getBeginLoc()) + return -1; + if (p2->second->getBeginLoc() < p1->second->getBeginLoc()) + return 1; + return 0; +} + +unsigned DeadCodeScan::scanBackwards(const clang::CFGBlock *Start, + clang::reachable_code::Callback &CB) { + + unsigned count = 0; + enqueue(Start); + + while (!WorkList.empty()) { + const CFGBlock *Block = WorkList.pop_back_val(); + + // It is possible that this block has been marked reachable after + // it was enqueued. + if (Reachable[Block->getBlockID()]) + continue; + + // Look for any dead code within the block. + const Stmt *S = findDeadCode(Block); + + if (!S) { + // No dead code. Possibly an empty block. Look at dead predecessors. + for (CFGBlock::const_pred_iterator I = Block->pred_begin(), + E = Block->pred_end(); I != E; ++I) { + if (const CFGBlock *predBlock = *I) + enqueue(predBlock); + } + continue; + } + + // Specially handle macro-expanded code. + if (S->getBeginLoc().isMacroID()) { + count += scanMaybeReachableFromBlock(Block, PP, Reachable); + continue; + } + + if (isDeadCodeRoot(Block)) { + reportDeadCode(Block, S, CB); + count += scanMaybeReachableFromBlock(Block, PP, Reachable); + } + else { + // Record this statement as the possibly best location in a + // strongly-connected component of dead code for emitting a + // warning. + DeferredLocs.push_back(std::make_pair(Block, S)); + } + } + + // If we didn't find a dead root, then report the dead code with the + // earliest location. + if (!DeferredLocs.empty()) { + llvm::array_pod_sort(DeferredLocs.begin(), DeferredLocs.end(), SrcCmp); + for (const auto &I : DeferredLocs) { + const CFGBlock *Block = I.first; + if (Reachable[Block->getBlockID()]) + continue; + reportDeadCode(Block, I.second, CB); + count += scanMaybeReachableFromBlock(Block, PP, Reachable); + } + } + + return count; +} + +static SourceLocation GetUnreachableLoc(const Stmt *S, + SourceRange &R1, + SourceRange &R2) { + R1 = R2 = SourceRange(); + + if (const Expr *Ex = dyn_cast<Expr>(S)) + S = Ex->IgnoreParenImpCasts(); + + switch (S->getStmtClass()) { + case Expr::BinaryOperatorClass: { + const BinaryOperator *BO = cast<BinaryOperator>(S); + return BO->getOperatorLoc(); + } + case Expr::UnaryOperatorClass: { + const UnaryOperator *UO = cast<UnaryOperator>(S); + R1 = UO->getSubExpr()->getSourceRange(); + return UO->getOperatorLoc(); + } + case Expr::CompoundAssignOperatorClass: { + const CompoundAssignOperator *CAO = cast<CompoundAssignOperator>(S); + R1 = CAO->getLHS()->getSourceRange(); + R2 = CAO->getRHS()->getSourceRange(); + return CAO->getOperatorLoc(); + } + case Expr::BinaryConditionalOperatorClass: + case Expr::ConditionalOperatorClass: { + const AbstractConditionalOperator *CO = + cast<AbstractConditionalOperator>(S); + return CO->getQuestionLoc(); + } + case Expr::MemberExprClass: { + const MemberExpr *ME = cast<MemberExpr>(S); + R1 = ME->getSourceRange(); + return ME->getMemberLoc(); + } + case Expr::ArraySubscriptExprClass: { + const ArraySubscriptExpr *ASE = cast<ArraySubscriptExpr>(S); + R1 = ASE->getLHS()->getSourceRange(); + R2 = ASE->getRHS()->getSourceRange(); + return ASE->getRBracketLoc(); + } + case Expr::CStyleCastExprClass: { + const CStyleCastExpr *CSC = cast<CStyleCastExpr>(S); + R1 = CSC->getSubExpr()->getSourceRange(); + return CSC->getLParenLoc(); + } + case Expr::CXXFunctionalCastExprClass: { + const CXXFunctionalCastExpr *CE = cast <CXXFunctionalCastExpr>(S); + R1 = CE->getSubExpr()->getSourceRange(); + return CE->getBeginLoc(); + } + case Stmt::CXXTryStmtClass: { + return cast<CXXTryStmt>(S)->getHandler(0)->getCatchLoc(); + } + case Expr::ObjCBridgedCastExprClass: { + const ObjCBridgedCastExpr *CSC = cast<ObjCBridgedCastExpr>(S); + R1 = CSC->getSubExpr()->getSourceRange(); + return CSC->getLParenLoc(); + } + default: ; + } + R1 = S->getSourceRange(); + return S->getBeginLoc(); +} + +void DeadCodeScan::reportDeadCode(const CFGBlock *B, + const Stmt *S, + clang::reachable_code::Callback &CB) { + // Classify the unreachable code found, or suppress it in some cases. + reachable_code::UnreachableKind UK = reachable_code::UK_Other; + + if (isa<BreakStmt>(S)) { + UK = reachable_code::UK_Break; + } else if (isTrivialDoWhile(B, S) || isBuiltinUnreachable(S) || + isBuiltinAssumeFalse(B, S, C)) { + return; + } + else if (isDeadReturn(B, S)) { + UK = reachable_code::UK_Return; + } + + SourceRange SilenceableCondVal; + + if (UK == reachable_code::UK_Other) { + // Check if the dead code is part of the "loop target" of + // a for/for-range loop. This is the block that contains + // the increment code. + if (const Stmt *LoopTarget = B->getLoopTarget()) { + SourceLocation Loc = LoopTarget->getBeginLoc(); + SourceRange R1(Loc, Loc), R2; + + if (const ForStmt *FS = dyn_cast<ForStmt>(LoopTarget)) { + const Expr *Inc = FS->getInc(); + Loc = Inc->getBeginLoc(); + R2 = Inc->getSourceRange(); + } + + CB.HandleUnreachable(reachable_code::UK_Loop_Increment, + Loc, SourceRange(), SourceRange(Loc, Loc), R2); + return; + } + + // Check if the dead block has a predecessor whose branch has + // a configuration value that *could* be modified to + // silence the warning. + CFGBlock::const_pred_iterator PI = B->pred_begin(); + if (PI != B->pred_end()) { + if (const CFGBlock *PredBlock = PI->getPossiblyUnreachableBlock()) { + const Stmt *TermCond = + PredBlock->getTerminatorCondition(/* strip parens */ false); + isConfigurationValue(TermCond, PP, &SilenceableCondVal); + } + } + } + + SourceRange R1, R2; + SourceLocation Loc = GetUnreachableLoc(S, R1, R2); + CB.HandleUnreachable(UK, Loc, SilenceableCondVal, R1, R2); +} + +//===----------------------------------------------------------------------===// +// Reachability APIs. +//===----------------------------------------------------------------------===// + +namespace clang { namespace reachable_code { + +void Callback::anchor() { } + +unsigned ScanReachableFromBlock(const CFGBlock *Start, + llvm::BitVector &Reachable) { + return scanFromBlock(Start, Reachable, /* SourceManager* */ nullptr, false); +} + +void FindUnreachableCode(AnalysisDeclContext &AC, Preprocessor &PP, + Callback &CB) { + + CFG *cfg = AC.getCFG(); + if (!cfg) + return; + + // Scan for reachable blocks from the entrance of the CFG. + // If there are no unreachable blocks, we're done. + llvm::BitVector reachable(cfg->getNumBlockIDs()); + unsigned numReachable = + scanMaybeReachableFromBlock(&cfg->getEntry(), PP, reachable); + if (numReachable == cfg->getNumBlockIDs()) + return; + + // If there aren't explicit EH edges, we should include the 'try' dispatch + // blocks as roots. + if (!AC.getCFGBuildOptions().AddEHEdges) { + for (const CFGBlock *B : cfg->try_blocks()) + numReachable += scanMaybeReachableFromBlock(B, PP, reachable); + if (numReachable == cfg->getNumBlockIDs()) + return; + } + + // There are some unreachable blocks. We need to find the root blocks that + // contain code that should be considered unreachable. + for (const CFGBlock *block : *cfg) { + // A block may have been marked reachable during this loop. + if (reachable[block->getBlockID()]) + continue; + + DeadCodeScan DS(reachable, PP, AC.getASTContext()); + numReachable += DS.scanBackwards(block, CB); + + if (numReachable == cfg->getNumBlockIDs()) + return; + } +} + +}} // end namespace clang::reachable_code diff --git a/contrib/llvm-project/clang/lib/Analysis/RetainSummaryManager.cpp b/contrib/llvm-project/clang/lib/Analysis/RetainSummaryManager.cpp new file mode 100644 index 000000000000..468e94b23c3a --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/RetainSummaryManager.cpp @@ -0,0 +1,1297 @@ +//== RetainSummaryManager.cpp - Summaries for reference counting --*- C++ -*--// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines summaries implementation for retain counting, which +// implements a reference count checker for Core Foundation, Cocoa +// and OSObject (on Mac OS X). +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/DomainSpecific/CocoaConventions.h" +#include "clang/Analysis/RetainSummaryManager.h" +#include "clang/AST/Attr.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/ParentMap.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include <optional> + +using namespace clang; +using namespace ento; + +template <class T> +constexpr static bool isOneOf() { + return false; +} + +/// Helper function to check whether the class is one of the +/// rest of varargs. +template <class T, class P, class... ToCompare> +constexpr static bool isOneOf() { + return std::is_same_v<T, P> || isOneOf<T, ToCompare...>(); +} + +namespace { + +/// Fake attribute class for RC* attributes. +struct GeneralizedReturnsRetainedAttr { + static bool classof(const Attr *A) { + if (auto AA = dyn_cast<AnnotateAttr>(A)) + return AA->getAnnotation() == "rc_ownership_returns_retained"; + return false; + } +}; + +struct GeneralizedReturnsNotRetainedAttr { + static bool classof(const Attr *A) { + if (auto AA = dyn_cast<AnnotateAttr>(A)) + return AA->getAnnotation() == "rc_ownership_returns_not_retained"; + return false; + } +}; + +struct GeneralizedConsumedAttr { + static bool classof(const Attr *A) { + if (auto AA = dyn_cast<AnnotateAttr>(A)) + return AA->getAnnotation() == "rc_ownership_consumed"; + return false; + } +}; + +} + +template <class T> +std::optional<ObjKind> RetainSummaryManager::hasAnyEnabledAttrOf(const Decl *D, + QualType QT) { + ObjKind K; + if (isOneOf<T, CFConsumedAttr, CFReturnsRetainedAttr, + CFReturnsNotRetainedAttr>()) { + if (!TrackObjCAndCFObjects) + return std::nullopt; + + K = ObjKind::CF; + } else if (isOneOf<T, NSConsumedAttr, NSConsumesSelfAttr, + NSReturnsAutoreleasedAttr, NSReturnsRetainedAttr, + NSReturnsNotRetainedAttr, NSConsumesSelfAttr>()) { + + if (!TrackObjCAndCFObjects) + return std::nullopt; + + if (isOneOf<T, NSReturnsRetainedAttr, NSReturnsAutoreleasedAttr, + NSReturnsNotRetainedAttr>() && + !cocoa::isCocoaObjectRef(QT)) + return std::nullopt; + K = ObjKind::ObjC; + } else if (isOneOf<T, OSConsumedAttr, OSConsumesThisAttr, + OSReturnsNotRetainedAttr, OSReturnsRetainedAttr, + OSReturnsRetainedOnZeroAttr, + OSReturnsRetainedOnNonZeroAttr>()) { + if (!TrackOSObjects) + return std::nullopt; + K = ObjKind::OS; + } else if (isOneOf<T, GeneralizedReturnsNotRetainedAttr, + GeneralizedReturnsRetainedAttr, + GeneralizedConsumedAttr>()) { + K = ObjKind::Generalized; + } else { + llvm_unreachable("Unexpected attribute"); + } + if (D->hasAttr<T>()) + return K; + return std::nullopt; +} + +template <class T1, class T2, class... Others> +std::optional<ObjKind> RetainSummaryManager::hasAnyEnabledAttrOf(const Decl *D, + QualType QT) { + if (auto Out = hasAnyEnabledAttrOf<T1>(D, QT)) + return Out; + return hasAnyEnabledAttrOf<T2, Others...>(D, QT); +} + +const RetainSummary * +RetainSummaryManager::getPersistentSummary(const RetainSummary &OldSumm) { + // Unique "simple" summaries -- those without ArgEffects. + if (OldSumm.isSimple()) { + ::llvm::FoldingSetNodeID ID; + OldSumm.Profile(ID); + + void *Pos; + CachedSummaryNode *N = SimpleSummaries.FindNodeOrInsertPos(ID, Pos); + + if (!N) { + N = (CachedSummaryNode *) BPAlloc.Allocate<CachedSummaryNode>(); + new (N) CachedSummaryNode(OldSumm); + SimpleSummaries.InsertNode(N, Pos); + } + + return &N->getValue(); + } + + RetainSummary *Summ = (RetainSummary *) BPAlloc.Allocate<RetainSummary>(); + new (Summ) RetainSummary(OldSumm); + return Summ; +} + +static bool isSubclass(const Decl *D, + StringRef ClassName) { + using namespace ast_matchers; + DeclarationMatcher SubclassM = + cxxRecordDecl(isSameOrDerivedFrom(std::string(ClassName))); + return !(match(SubclassM, *D, D->getASTContext()).empty()); +} + +static bool isExactClass(const Decl *D, StringRef ClassName) { + using namespace ast_matchers; + DeclarationMatcher sameClassM = + cxxRecordDecl(hasName(std::string(ClassName))); + return !(match(sameClassM, *D, D->getASTContext()).empty()); +} + +static bool isOSObjectSubclass(const Decl *D) { + return D && isSubclass(D, "OSMetaClassBase") && + !isExactClass(D, "OSMetaClass"); +} + +static bool isOSObjectDynamicCast(StringRef S) { return S == "safeMetaCast"; } + +static bool isOSObjectRequiredCast(StringRef S) { + return S == "requiredMetaCast"; +} + +static bool isOSObjectThisCast(StringRef S) { + return S == "metaCast"; +} + + +static bool isOSObjectPtr(QualType QT) { + return isOSObjectSubclass(QT->getPointeeCXXRecordDecl()); +} + +static bool isISLObjectRef(QualType Ty) { + return StringRef(Ty.getAsString()).startswith("isl_"); +} + +static bool isOSIteratorSubclass(const Decl *D) { + return isSubclass(D, "OSIterator"); +} + +static bool hasRCAnnotation(const Decl *D, StringRef rcAnnotation) { + for (const auto *Ann : D->specific_attrs<AnnotateAttr>()) { + if (Ann->getAnnotation() == rcAnnotation) + return true; + } + return false; +} + +static bool isRetain(const FunctionDecl *FD, StringRef FName) { + return FName.startswith_insensitive("retain") || + FName.endswith_insensitive("retain"); +} + +static bool isRelease(const FunctionDecl *FD, StringRef FName) { + return FName.startswith_insensitive("release") || + FName.endswith_insensitive("release"); +} + +static bool isAutorelease(const FunctionDecl *FD, StringRef FName) { + return FName.startswith_insensitive("autorelease") || + FName.endswith_insensitive("autorelease"); +} + +static bool isMakeCollectable(StringRef FName) { + return FName.contains_insensitive("MakeCollectable"); +} + +/// A function is OSObject related if it is declared on a subclass +/// of OSObject, or any of the parameters is a subclass of an OSObject. +static bool isOSObjectRelated(const CXXMethodDecl *MD) { + if (isOSObjectSubclass(MD->getParent())) + return true; + + for (ParmVarDecl *Param : MD->parameters()) { + QualType PT = Param->getType()->getPointeeType(); + if (!PT.isNull()) + if (CXXRecordDecl *RD = PT->getAsCXXRecordDecl()) + if (isOSObjectSubclass(RD)) + return true; + } + + return false; +} + +bool +RetainSummaryManager::isKnownSmartPointer(QualType QT) { + QT = QT.getCanonicalType(); + const auto *RD = QT->getAsCXXRecordDecl(); + if (!RD) + return false; + const IdentifierInfo *II = RD->getIdentifier(); + if (II && II->getName() == "smart_ptr") + if (const auto *ND = dyn_cast<NamespaceDecl>(RD->getDeclContext())) + if (ND->getNameAsString() == "os") + return true; + return false; +} + +const RetainSummary * +RetainSummaryManager::getSummaryForOSObject(const FunctionDecl *FD, + StringRef FName, QualType RetTy) { + assert(TrackOSObjects && + "Requesting a summary for an OSObject but OSObjects are not tracked"); + + if (RetTy->isPointerType()) { + const CXXRecordDecl *PD = RetTy->getPointeeType()->getAsCXXRecordDecl(); + if (PD && isOSObjectSubclass(PD)) { + if (isOSObjectDynamicCast(FName) || isOSObjectRequiredCast(FName) || + isOSObjectThisCast(FName)) + return getDefaultSummary(); + + // TODO: Add support for the slightly common *Matching(table) idiom. + // Cf. IOService::nameMatching() etc. - these function have an unusual + // contract of returning at +0 or +1 depending on their last argument. + if (FName.endswith("Matching")) { + return getPersistentStopSummary(); + } + + // All objects returned with functions *not* starting with 'get', + // or iterators, are returned at +1. + if ((!FName.startswith("get") && !FName.startswith("Get")) || + isOSIteratorSubclass(PD)) { + return getOSSummaryCreateRule(FD); + } else { + return getOSSummaryGetRule(FD); + } + } + } + + if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { + const CXXRecordDecl *Parent = MD->getParent(); + if (Parent && isOSObjectSubclass(Parent)) { + if (FName == "release" || FName == "taggedRelease") + return getOSSummaryReleaseRule(FD); + + if (FName == "retain" || FName == "taggedRetain") + return getOSSummaryRetainRule(FD); + + if (FName == "free") + return getOSSummaryFreeRule(FD); + + if (MD->getOverloadedOperator() == OO_New) + return getOSSummaryCreateRule(MD); + } + } + + return nullptr; +} + +const RetainSummary *RetainSummaryManager::getSummaryForObjCOrCFObject( + const FunctionDecl *FD, + StringRef FName, + QualType RetTy, + const FunctionType *FT, + bool &AllowAnnotations) { + + ArgEffects ScratchArgs(AF.getEmptyMap()); + + std::string RetTyName = RetTy.getAsString(); + if (FName == "pthread_create" || FName == "pthread_setspecific") { + // Part of: <rdar://problem/7299394> and <rdar://problem/11282706>. + // This will be addressed better with IPA. + return getPersistentStopSummary(); + } else if(FName == "NSMakeCollectable") { + // Handle: id NSMakeCollectable(CFTypeRef) + AllowAnnotations = false; + return RetTy->isObjCIdType() ? getUnarySummary(FT, DoNothing) + : getPersistentStopSummary(); + } else if (FName == "CMBufferQueueDequeueAndRetain" || + FName == "CMBufferQueueDequeueIfDataReadyAndRetain") { + // Part of: <rdar://problem/39390714>. + return getPersistentSummary(RetEffect::MakeOwned(ObjKind::CF), + ScratchArgs, + ArgEffect(DoNothing), + ArgEffect(DoNothing)); + } else if (FName == "CFPlugInInstanceCreate") { + return getPersistentSummary(RetEffect::MakeNoRet(), ScratchArgs); + } else if (FName == "IORegistryEntrySearchCFProperty" || + (RetTyName == "CFMutableDictionaryRef" && + (FName == "IOBSDNameMatching" || FName == "IOServiceMatching" || + FName == "IOServiceNameMatching" || + FName == "IORegistryEntryIDMatching" || + FName == "IOOpenFirmwarePathMatching"))) { + // Part of <rdar://problem/6961230>. (IOKit) + // This should be addressed using a API table. + return getPersistentSummary(RetEffect::MakeOwned(ObjKind::CF), ScratchArgs, + ArgEffect(DoNothing), ArgEffect(DoNothing)); + } else if (FName == "IOServiceGetMatchingService" || + FName == "IOServiceGetMatchingServices") { + // FIXES: <rdar://problem/6326900> + // This should be addressed using a API table. This strcmp is also + // a little gross, but there is no need to super optimize here. + ScratchArgs = AF.add(ScratchArgs, 1, ArgEffect(DecRef, ObjKind::CF)); + return getPersistentSummary(RetEffect::MakeNoRet(), + ScratchArgs, + ArgEffect(DoNothing), ArgEffect(DoNothing)); + } else if (FName == "IOServiceAddNotification" || + FName == "IOServiceAddMatchingNotification") { + // Part of <rdar://problem/6961230>. (IOKit) + // This should be addressed using a API table. + ScratchArgs = AF.add(ScratchArgs, 2, ArgEffect(DecRef, ObjKind::CF)); + return getPersistentSummary(RetEffect::MakeNoRet(), + ScratchArgs, + ArgEffect(DoNothing), ArgEffect(DoNothing)); + } else if (FName == "CVPixelBufferCreateWithBytes") { + // FIXES: <rdar://problem/7283567> + // Eventually this can be improved by recognizing that the pixel + // buffer passed to CVPixelBufferCreateWithBytes is released via + // a callback and doing full IPA to make sure this is done correctly. + // FIXME: This function has an out parameter that returns an + // allocated object. + ScratchArgs = AF.add(ScratchArgs, 7, ArgEffect(StopTracking)); + return getPersistentSummary(RetEffect::MakeNoRet(), + ScratchArgs, + ArgEffect(DoNothing), ArgEffect(DoNothing)); + } else if (FName == "CGBitmapContextCreateWithData") { + // FIXES: <rdar://problem/7358899> + // Eventually this can be improved by recognizing that 'releaseInfo' + // passed to CGBitmapContextCreateWithData is released via + // a callback and doing full IPA to make sure this is done correctly. + ScratchArgs = AF.add(ScratchArgs, 8, ArgEffect(ArgEffect(StopTracking))); + return getPersistentSummary(RetEffect::MakeOwned(ObjKind::CF), ScratchArgs, + ArgEffect(DoNothing), ArgEffect(DoNothing)); + } else if (FName == "CVPixelBufferCreateWithPlanarBytes") { + // FIXES: <rdar://problem/7283567> + // Eventually this can be improved by recognizing that the pixel + // buffer passed to CVPixelBufferCreateWithPlanarBytes is released + // via a callback and doing full IPA to make sure this is done + // correctly. + ScratchArgs = AF.add(ScratchArgs, 12, ArgEffect(StopTracking)); + return getPersistentSummary(RetEffect::MakeNoRet(), + ScratchArgs, + ArgEffect(DoNothing), ArgEffect(DoNothing)); + } else if (FName == "VTCompressionSessionEncodeFrame") { + // The context argument passed to VTCompressionSessionEncodeFrame() + // is passed to the callback specified when creating the session + // (e.g. with VTCompressionSessionCreate()) which can release it. + // To account for this possibility, conservatively stop tracking + // the context. + ScratchArgs = AF.add(ScratchArgs, 5, ArgEffect(StopTracking)); + return getPersistentSummary(RetEffect::MakeNoRet(), + ScratchArgs, + ArgEffect(DoNothing), ArgEffect(DoNothing)); + } else if (FName == "dispatch_set_context" || + FName == "xpc_connection_set_context") { + // <rdar://problem/11059275> - The analyzer currently doesn't have + // a good way to reason about the finalizer function for libdispatch. + // If we pass a context object that is memory managed, stop tracking it. + // <rdar://problem/13783514> - Same problem, but for XPC. + // FIXME: this hack should possibly go away once we can handle + // libdispatch and XPC finalizers. + ScratchArgs = AF.add(ScratchArgs, 1, ArgEffect(StopTracking)); + return getPersistentSummary(RetEffect::MakeNoRet(), + ScratchArgs, + ArgEffect(DoNothing), ArgEffect(DoNothing)); + } else if (FName.startswith("NSLog")) { + return getDoNothingSummary(); + } else if (FName.startswith("NS") && FName.contains("Insert")) { + // Allowlist NSXXInsertXX, for example NSMapInsertIfAbsent, since they can + // be deallocated by NSMapRemove. (radar://11152419) + ScratchArgs = AF.add(ScratchArgs, 1, ArgEffect(StopTracking)); + ScratchArgs = AF.add(ScratchArgs, 2, ArgEffect(StopTracking)); + return getPersistentSummary(RetEffect::MakeNoRet(), + ScratchArgs, ArgEffect(DoNothing), + ArgEffect(DoNothing)); + } + + if (RetTy->isPointerType()) { + + // For CoreFoundation ('CF') types. + if (cocoa::isRefType(RetTy, "CF", FName)) { + if (isRetain(FD, FName)) { + // CFRetain isn't supposed to be annotated. However, this may as + // well be a user-made "safe" CFRetain function that is incorrectly + // annotated as cf_returns_retained due to lack of better options. + // We want to ignore such annotation. + AllowAnnotations = false; + + return getUnarySummary(FT, IncRef); + } else if (isAutorelease(FD, FName)) { + // The headers use cf_consumed, but we can fully model CFAutorelease + // ourselves. + AllowAnnotations = false; + + return getUnarySummary(FT, Autorelease); + } else if (isMakeCollectable(FName)) { + AllowAnnotations = false; + return getUnarySummary(FT, DoNothing); + } else { + return getCFCreateGetRuleSummary(FD); + } + } + + // For CoreGraphics ('CG') and CoreVideo ('CV') types. + if (cocoa::isRefType(RetTy, "CG", FName) || + cocoa::isRefType(RetTy, "CV", FName)) { + if (isRetain(FD, FName)) + return getUnarySummary(FT, IncRef); + else + return getCFCreateGetRuleSummary(FD); + } + + // For all other CF-style types, use the Create/Get + // rule for summaries but don't support Retain functions + // with framework-specific prefixes. + if (coreFoundation::isCFObjectRef(RetTy)) { + return getCFCreateGetRuleSummary(FD); + } + + if (FD->hasAttr<CFAuditedTransferAttr>()) { + return getCFCreateGetRuleSummary(FD); + } + } + + // Check for release functions, the only kind of functions that we care + // about that don't return a pointer type. + if (FName.startswith("CG") || FName.startswith("CF")) { + // Test for 'CGCF'. + FName = FName.substr(FName.startswith("CGCF") ? 4 : 2); + + if (isRelease(FD, FName)) + return getUnarySummary(FT, DecRef); + else { + assert(ScratchArgs.isEmpty()); + // Remaining CoreFoundation and CoreGraphics functions. + // We use to assume that they all strictly followed the ownership idiom + // and that ownership cannot be transferred. While this is technically + // correct, many methods allow a tracked object to escape. For example: + // + // CFMutableDictionaryRef x = CFDictionaryCreateMutable(...); + // CFDictionaryAddValue(y, key, x); + // CFRelease(x); + // ... it is okay to use 'x' since 'y' has a reference to it + // + // We handle this and similar cases with the follow heuristic. If the + // function name contains "InsertValue", "SetValue", "AddValue", + // "AppendValue", or "SetAttribute", then we assume that arguments may + // "escape." This means that something else holds on to the object, + // allowing it be used even after its local retain count drops to 0. + ArgEffectKind E = + (StrInStrNoCase(FName, "InsertValue") != StringRef::npos || + StrInStrNoCase(FName, "AddValue") != StringRef::npos || + StrInStrNoCase(FName, "SetValue") != StringRef::npos || + StrInStrNoCase(FName, "AppendValue") != StringRef::npos || + StrInStrNoCase(FName, "SetAttribute") != StringRef::npos) + ? MayEscape + : DoNothing; + + return getPersistentSummary(RetEffect::MakeNoRet(), ScratchArgs, + ArgEffect(DoNothing), ArgEffect(E, ObjKind::CF)); + } + } + + return nullptr; +} + +const RetainSummary * +RetainSummaryManager::generateSummary(const FunctionDecl *FD, + bool &AllowAnnotations) { + // We generate "stop" summaries for implicitly defined functions. + if (FD->isImplicit()) + return getPersistentStopSummary(); + + const IdentifierInfo *II = FD->getIdentifier(); + + StringRef FName = II ? II->getName() : ""; + + // Strip away preceding '_'. Doing this here will effect all the checks + // down below. + FName = FName.substr(FName.find_first_not_of('_')); + + // Inspect the result type. Strip away any typedefs. + const auto *FT = FD->getType()->castAs<FunctionType>(); + QualType RetTy = FT->getReturnType(); + + if (TrackOSObjects) + if (const RetainSummary *S = getSummaryForOSObject(FD, FName, RetTy)) + return S; + + if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) + if (!isOSObjectRelated(MD)) + return getPersistentSummary(RetEffect::MakeNoRet(), + ArgEffects(AF.getEmptyMap()), + ArgEffect(DoNothing), + ArgEffect(StopTracking), + ArgEffect(DoNothing)); + + if (TrackObjCAndCFObjects) + if (const RetainSummary *S = + getSummaryForObjCOrCFObject(FD, FName, RetTy, FT, AllowAnnotations)) + return S; + + return getDefaultSummary(); +} + +const RetainSummary * +RetainSummaryManager::getFunctionSummary(const FunctionDecl *FD) { + // If we don't know what function we're calling, use our default summary. + if (!FD) + return getDefaultSummary(); + + // Look up a summary in our cache of FunctionDecls -> Summaries. + FuncSummariesTy::iterator I = FuncSummaries.find(FD); + if (I != FuncSummaries.end()) + return I->second; + + // No summary? Generate one. + bool AllowAnnotations = true; + const RetainSummary *S = generateSummary(FD, AllowAnnotations); + + // Annotations override defaults. + if (AllowAnnotations) + updateSummaryFromAnnotations(S, FD); + + FuncSummaries[FD] = S; + return S; +} + +//===----------------------------------------------------------------------===// +// Summary creation for functions (largely uses of Core Foundation). +//===----------------------------------------------------------------------===// + +static ArgEffect getStopTrackingHardEquivalent(ArgEffect E) { + switch (E.getKind()) { + case DoNothing: + case Autorelease: + case DecRefBridgedTransferred: + case IncRef: + case UnretainedOutParameter: + case RetainedOutParameter: + case RetainedOutParameterOnZero: + case RetainedOutParameterOnNonZero: + case MayEscape: + case StopTracking: + case StopTrackingHard: + return E.withKind(StopTrackingHard); + case DecRef: + case DecRefAndStopTrackingHard: + return E.withKind(DecRefAndStopTrackingHard); + case Dealloc: + return E.withKind(Dealloc); + } + + llvm_unreachable("Unknown ArgEffect kind"); +} + +const RetainSummary * +RetainSummaryManager::updateSummaryForNonZeroCallbackArg(const RetainSummary *S, + AnyCall &C) { + ArgEffect RecEffect = getStopTrackingHardEquivalent(S->getReceiverEffect()); + ArgEffect DefEffect = getStopTrackingHardEquivalent(S->getDefaultArgEffect()); + + ArgEffects ScratchArgs(AF.getEmptyMap()); + ArgEffects CustomArgEffects = S->getArgEffects(); + for (ArgEffects::iterator I = CustomArgEffects.begin(), + E = CustomArgEffects.end(); + I != E; ++I) { + ArgEffect Translated = getStopTrackingHardEquivalent(I->second); + if (Translated.getKind() != DefEffect.getKind()) + ScratchArgs = AF.add(ScratchArgs, I->first, Translated); + } + + RetEffect RE = RetEffect::MakeNoRetHard(); + + // Special cases where the callback argument CANNOT free the return value. + // This can generally only happen if we know that the callback will only be + // called when the return value is already being deallocated. + if (const IdentifierInfo *Name = C.getIdentifier()) { + // When the CGBitmapContext is deallocated, the callback here will free + // the associated data buffer. + // The callback in dispatch_data_create frees the buffer, but not + // the data object. + if (Name->isStr("CGBitmapContextCreateWithData") || + Name->isStr("dispatch_data_create")) + RE = S->getRetEffect(); + } + + return getPersistentSummary(RE, ScratchArgs, RecEffect, DefEffect); +} + +void RetainSummaryManager::updateSummaryForReceiverUnconsumedSelf( + const RetainSummary *&S) { + + RetainSummaryTemplate Template(S, *this); + + Template->setReceiverEffect(ArgEffect(DoNothing)); + Template->setRetEffect(RetEffect::MakeNoRet()); +} + + +void RetainSummaryManager::updateSummaryForArgumentTypes( + const AnyCall &C, const RetainSummary *&RS) { + RetainSummaryTemplate Template(RS, *this); + + unsigned parm_idx = 0; + for (auto pi = C.param_begin(), pe = C.param_end(); pi != pe; + ++pi, ++parm_idx) { + QualType QT = (*pi)->getType(); + + // Skip already created values. + if (RS->getArgEffects().contains(parm_idx)) + continue; + + ObjKind K = ObjKind::AnyObj; + + if (isISLObjectRef(QT)) { + K = ObjKind::Generalized; + } else if (isOSObjectPtr(QT)) { + K = ObjKind::OS; + } else if (cocoa::isCocoaObjectRef(QT)) { + K = ObjKind::ObjC; + } else if (coreFoundation::isCFObjectRef(QT)) { + K = ObjKind::CF; + } + + if (K != ObjKind::AnyObj) + Template->addArg(AF, parm_idx, + ArgEffect(RS->getDefaultArgEffect().getKind(), K)); + } +} + +const RetainSummary * +RetainSummaryManager::getSummary(AnyCall C, + bool HasNonZeroCallbackArg, + bool IsReceiverUnconsumedSelf, + QualType ReceiverType) { + const RetainSummary *Summ; + switch (C.getKind()) { + case AnyCall::Function: + case AnyCall::Constructor: + case AnyCall::InheritedConstructor: + case AnyCall::Allocator: + case AnyCall::Deallocator: + Summ = getFunctionSummary(cast_or_null<FunctionDecl>(C.getDecl())); + break; + case AnyCall::Block: + case AnyCall::Destructor: + // FIXME: These calls are currently unsupported. + return getPersistentStopSummary(); + case AnyCall::ObjCMethod: { + const auto *ME = cast_or_null<ObjCMessageExpr>(C.getExpr()); + if (!ME) { + Summ = getMethodSummary(cast<ObjCMethodDecl>(C.getDecl())); + } else if (ME->isInstanceMessage()) { + Summ = getInstanceMethodSummary(ME, ReceiverType); + } else { + Summ = getClassMethodSummary(ME); + } + break; + } + } + + if (HasNonZeroCallbackArg) + Summ = updateSummaryForNonZeroCallbackArg(Summ, C); + + if (IsReceiverUnconsumedSelf) + updateSummaryForReceiverUnconsumedSelf(Summ); + + updateSummaryForArgumentTypes(C, Summ); + + assert(Summ && "Unknown call type?"); + return Summ; +} + + +const RetainSummary * +RetainSummaryManager::getCFCreateGetRuleSummary(const FunctionDecl *FD) { + if (coreFoundation::followsCreateRule(FD)) + return getCFSummaryCreateRule(FD); + + return getCFSummaryGetRule(FD); +} + +bool RetainSummaryManager::isTrustedReferenceCountImplementation( + const Decl *FD) { + return hasRCAnnotation(FD, "rc_ownership_trusted_implementation"); +} + +std::optional<RetainSummaryManager::BehaviorSummary> +RetainSummaryManager::canEval(const CallExpr *CE, const FunctionDecl *FD, + bool &hasTrustedImplementationAnnotation) { + + IdentifierInfo *II = FD->getIdentifier(); + if (!II) + return std::nullopt; + + StringRef FName = II->getName(); + FName = FName.substr(FName.find_first_not_of('_')); + + QualType ResultTy = CE->getCallReturnType(Ctx); + if (ResultTy->isObjCIdType()) { + if (II->isStr("NSMakeCollectable")) + return BehaviorSummary::Identity; + } else if (ResultTy->isPointerType()) { + // Handle: (CF|CG|CV)Retain + // CFAutorelease + // It's okay to be a little sloppy here. + if (FName == "CMBufferQueueDequeueAndRetain" || + FName == "CMBufferQueueDequeueIfDataReadyAndRetain") { + // Part of: <rdar://problem/39390714>. + // These are not retain. They just return something and retain it. + return std::nullopt; + } + if (CE->getNumArgs() == 1 && + (cocoa::isRefType(ResultTy, "CF", FName) || + cocoa::isRefType(ResultTy, "CG", FName) || + cocoa::isRefType(ResultTy, "CV", FName)) && + (isRetain(FD, FName) || isAutorelease(FD, FName) || + isMakeCollectable(FName))) + return BehaviorSummary::Identity; + + // safeMetaCast is called by OSDynamicCast. + // We assume that OSDynamicCast is either an identity (cast is OK, + // the input was non-zero), + // or that it returns zero (when the cast failed, or the input + // was zero). + if (TrackOSObjects) { + if (isOSObjectDynamicCast(FName) && FD->param_size() >= 1) { + return BehaviorSummary::IdentityOrZero; + } else if (isOSObjectRequiredCast(FName) && FD->param_size() >= 1) { + return BehaviorSummary::Identity; + } else if (isOSObjectThisCast(FName) && isa<CXXMethodDecl>(FD) && + !cast<CXXMethodDecl>(FD)->isStatic()) { + return BehaviorSummary::IdentityThis; + } + } + + const FunctionDecl* FDD = FD->getDefinition(); + if (FDD && isTrustedReferenceCountImplementation(FDD)) { + hasTrustedImplementationAnnotation = true; + return BehaviorSummary::Identity; + } + } + + if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { + const CXXRecordDecl *Parent = MD->getParent(); + if (TrackOSObjects && Parent && isOSObjectSubclass(Parent)) + if (FName == "release" || FName == "retain") + return BehaviorSummary::NoOp; + } + + return std::nullopt; +} + +const RetainSummary * +RetainSummaryManager::getUnarySummary(const FunctionType* FT, + ArgEffectKind AE) { + + // Unary functions have no arg effects by definition. + ArgEffects ScratchArgs(AF.getEmptyMap()); + + // Verify that this is *really* a unary function. This can + // happen if people do weird things. + const FunctionProtoType* FTP = dyn_cast<FunctionProtoType>(FT); + if (!FTP || FTP->getNumParams() != 1) + return getPersistentStopSummary(); + + ArgEffect Effect(AE, ObjKind::CF); + + ScratchArgs = AF.add(ScratchArgs, 0, Effect); + return getPersistentSummary(RetEffect::MakeNoRet(), + ScratchArgs, + ArgEffect(DoNothing), ArgEffect(DoNothing)); +} + +const RetainSummary * +RetainSummaryManager::getOSSummaryRetainRule(const FunctionDecl *FD) { + return getPersistentSummary(RetEffect::MakeNoRet(), + AF.getEmptyMap(), + /*ReceiverEff=*/ArgEffect(DoNothing), + /*DefaultEff=*/ArgEffect(DoNothing), + /*ThisEff=*/ArgEffect(IncRef, ObjKind::OS)); +} + +const RetainSummary * +RetainSummaryManager::getOSSummaryReleaseRule(const FunctionDecl *FD) { + return getPersistentSummary(RetEffect::MakeNoRet(), + AF.getEmptyMap(), + /*ReceiverEff=*/ArgEffect(DoNothing), + /*DefaultEff=*/ArgEffect(DoNothing), + /*ThisEff=*/ArgEffect(DecRef, ObjKind::OS)); +} + +const RetainSummary * +RetainSummaryManager::getOSSummaryFreeRule(const FunctionDecl *FD) { + return getPersistentSummary(RetEffect::MakeNoRet(), + AF.getEmptyMap(), + /*ReceiverEff=*/ArgEffect(DoNothing), + /*DefaultEff=*/ArgEffect(DoNothing), + /*ThisEff=*/ArgEffect(Dealloc, ObjKind::OS)); +} + +const RetainSummary * +RetainSummaryManager::getOSSummaryCreateRule(const FunctionDecl *FD) { + return getPersistentSummary(RetEffect::MakeOwned(ObjKind::OS), + AF.getEmptyMap()); +} + +const RetainSummary * +RetainSummaryManager::getOSSummaryGetRule(const FunctionDecl *FD) { + return getPersistentSummary(RetEffect::MakeNotOwned(ObjKind::OS), + AF.getEmptyMap()); +} + +const RetainSummary * +RetainSummaryManager::getCFSummaryCreateRule(const FunctionDecl *FD) { + return getPersistentSummary(RetEffect::MakeOwned(ObjKind::CF), + ArgEffects(AF.getEmptyMap())); +} + +const RetainSummary * +RetainSummaryManager::getCFSummaryGetRule(const FunctionDecl *FD) { + return getPersistentSummary(RetEffect::MakeNotOwned(ObjKind::CF), + ArgEffects(AF.getEmptyMap()), + ArgEffect(DoNothing), ArgEffect(DoNothing)); +} + + + + +//===----------------------------------------------------------------------===// +// Summary creation for Selectors. +//===----------------------------------------------------------------------===// + +std::optional<RetEffect> +RetainSummaryManager::getRetEffectFromAnnotations(QualType RetTy, + const Decl *D) { + if (hasAnyEnabledAttrOf<NSReturnsRetainedAttr>(D, RetTy)) + return ObjCAllocRetE; + + if (auto K = hasAnyEnabledAttrOf<CFReturnsRetainedAttr, OSReturnsRetainedAttr, + GeneralizedReturnsRetainedAttr>(D, RetTy)) + return RetEffect::MakeOwned(*K); + + if (auto K = hasAnyEnabledAttrOf< + CFReturnsNotRetainedAttr, OSReturnsNotRetainedAttr, + GeneralizedReturnsNotRetainedAttr, NSReturnsNotRetainedAttr, + NSReturnsAutoreleasedAttr>(D, RetTy)) + return RetEffect::MakeNotOwned(*K); + + if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) + for (const auto *PD : MD->overridden_methods()) + if (auto RE = getRetEffectFromAnnotations(RetTy, PD)) + return RE; + + return std::nullopt; +} + +/// \return Whether the chain of typedefs starting from @c QT +/// has a typedef with a given name @c Name. +static bool hasTypedefNamed(QualType QT, + StringRef Name) { + while (auto *T = QT->getAs<TypedefType>()) { + const auto &Context = T->getDecl()->getASTContext(); + if (T->getDecl()->getIdentifier() == &Context.Idents.get(Name)) + return true; + QT = T->getDecl()->getUnderlyingType(); + } + return false; +} + +static QualType getCallableReturnType(const NamedDecl *ND) { + if (const auto *FD = dyn_cast<FunctionDecl>(ND)) { + return FD->getReturnType(); + } else if (const auto *MD = dyn_cast<ObjCMethodDecl>(ND)) { + return MD->getReturnType(); + } else { + llvm_unreachable("Unexpected decl"); + } +} + +bool RetainSummaryManager::applyParamAnnotationEffect( + const ParmVarDecl *pd, unsigned parm_idx, const NamedDecl *FD, + RetainSummaryTemplate &Template) { + QualType QT = pd->getType(); + if (auto K = + hasAnyEnabledAttrOf<NSConsumedAttr, CFConsumedAttr, OSConsumedAttr, + GeneralizedConsumedAttr>(pd, QT)) { + Template->addArg(AF, parm_idx, ArgEffect(DecRef, *K)); + return true; + } else if (auto K = hasAnyEnabledAttrOf< + CFReturnsRetainedAttr, OSReturnsRetainedAttr, + OSReturnsRetainedOnNonZeroAttr, OSReturnsRetainedOnZeroAttr, + GeneralizedReturnsRetainedAttr>(pd, QT)) { + + // For OSObjects, we try to guess whether the object is created based + // on the return value. + if (K == ObjKind::OS) { + QualType QT = getCallableReturnType(FD); + + bool HasRetainedOnZero = pd->hasAttr<OSReturnsRetainedOnZeroAttr>(); + bool HasRetainedOnNonZero = pd->hasAttr<OSReturnsRetainedOnNonZeroAttr>(); + + // The usual convention is to create an object on non-zero return, but + // it's reverted if the typedef chain has a typedef kern_return_t, + // because kReturnSuccess constant is defined as zero. + // The convention can be overwritten by custom attributes. + bool SuccessOnZero = + HasRetainedOnZero || + (hasTypedefNamed(QT, "kern_return_t") && !HasRetainedOnNonZero); + bool ShouldSplit = !QT.isNull() && !QT->isVoidType(); + ArgEffectKind AK = RetainedOutParameter; + if (ShouldSplit && SuccessOnZero) { + AK = RetainedOutParameterOnZero; + } else if (ShouldSplit && (!SuccessOnZero || HasRetainedOnNonZero)) { + AK = RetainedOutParameterOnNonZero; + } + Template->addArg(AF, parm_idx, ArgEffect(AK, ObjKind::OS)); + } + + // For others: + // Do nothing. Retained out parameters will either point to a +1 reference + // or NULL, but the way you check for failure differs depending on the + // API. Consequently, we don't have a good way to track them yet. + return true; + } else if (auto K = hasAnyEnabledAttrOf<CFReturnsNotRetainedAttr, + OSReturnsNotRetainedAttr, + GeneralizedReturnsNotRetainedAttr>( + pd, QT)) { + Template->addArg(AF, parm_idx, ArgEffect(UnretainedOutParameter, *K)); + return true; + } + + if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { + for (const auto *OD : MD->overridden_methods()) { + const ParmVarDecl *OP = OD->parameters()[parm_idx]; + if (applyParamAnnotationEffect(OP, parm_idx, OD, Template)) + return true; + } + } + + return false; +} + +void +RetainSummaryManager::updateSummaryFromAnnotations(const RetainSummary *&Summ, + const FunctionDecl *FD) { + if (!FD) + return; + + assert(Summ && "Must have a summary to add annotations to."); + RetainSummaryTemplate Template(Summ, *this); + + // Effects on the parameters. + unsigned parm_idx = 0; + for (auto pi = FD->param_begin(), + pe = FD->param_end(); pi != pe; ++pi, ++parm_idx) + applyParamAnnotationEffect(*pi, parm_idx, FD, Template); + + QualType RetTy = FD->getReturnType(); + if (std::optional<RetEffect> RetE = getRetEffectFromAnnotations(RetTy, FD)) + Template->setRetEffect(*RetE); + + if (hasAnyEnabledAttrOf<OSConsumesThisAttr>(FD, RetTy)) + Template->setThisEffect(ArgEffect(DecRef, ObjKind::OS)); +} + +void +RetainSummaryManager::updateSummaryFromAnnotations(const RetainSummary *&Summ, + const ObjCMethodDecl *MD) { + if (!MD) + return; + + assert(Summ && "Must have a valid summary to add annotations to"); + RetainSummaryTemplate Template(Summ, *this); + + // Effects on the receiver. + if (hasAnyEnabledAttrOf<NSConsumesSelfAttr>(MD, MD->getReturnType())) + Template->setReceiverEffect(ArgEffect(DecRef, ObjKind::ObjC)); + + // Effects on the parameters. + unsigned parm_idx = 0; + for (auto pi = MD->param_begin(), pe = MD->param_end(); pi != pe; + ++pi, ++parm_idx) + applyParamAnnotationEffect(*pi, parm_idx, MD, Template); + + QualType RetTy = MD->getReturnType(); + if (std::optional<RetEffect> RetE = getRetEffectFromAnnotations(RetTy, MD)) + Template->setRetEffect(*RetE); +} + +const RetainSummary * +RetainSummaryManager::getStandardMethodSummary(const ObjCMethodDecl *MD, + Selector S, QualType RetTy) { + // Any special effects? + ArgEffect ReceiverEff = ArgEffect(DoNothing, ObjKind::ObjC); + RetEffect ResultEff = RetEffect::MakeNoRet(); + + // Check the method family, and apply any default annotations. + switch (MD ? MD->getMethodFamily() : S.getMethodFamily()) { + case OMF_None: + case OMF_initialize: + case OMF_performSelector: + // Assume all Objective-C methods follow Cocoa Memory Management rules. + // FIXME: Does the non-threaded performSelector family really belong here? + // The selector could be, say, @selector(copy). + if (cocoa::isCocoaObjectRef(RetTy)) + ResultEff = RetEffect::MakeNotOwned(ObjKind::ObjC); + else if (coreFoundation::isCFObjectRef(RetTy)) { + // ObjCMethodDecl currently doesn't consider CF objects as valid return + // values for alloc, new, copy, or mutableCopy, so we have to + // double-check with the selector. This is ugly, but there aren't that + // many Objective-C methods that return CF objects, right? + if (MD) { + switch (S.getMethodFamily()) { + case OMF_alloc: + case OMF_new: + case OMF_copy: + case OMF_mutableCopy: + ResultEff = RetEffect::MakeOwned(ObjKind::CF); + break; + default: + ResultEff = RetEffect::MakeNotOwned(ObjKind::CF); + break; + } + } else { + ResultEff = RetEffect::MakeNotOwned(ObjKind::CF); + } + } + break; + case OMF_init: + ResultEff = ObjCInitRetE; + ReceiverEff = ArgEffect(DecRef, ObjKind::ObjC); + break; + case OMF_alloc: + case OMF_new: + case OMF_copy: + case OMF_mutableCopy: + if (cocoa::isCocoaObjectRef(RetTy)) + ResultEff = ObjCAllocRetE; + else if (coreFoundation::isCFObjectRef(RetTy)) + ResultEff = RetEffect::MakeOwned(ObjKind::CF); + break; + case OMF_autorelease: + ReceiverEff = ArgEffect(Autorelease, ObjKind::ObjC); + break; + case OMF_retain: + ReceiverEff = ArgEffect(IncRef, ObjKind::ObjC); + break; + case OMF_release: + ReceiverEff = ArgEffect(DecRef, ObjKind::ObjC); + break; + case OMF_dealloc: + ReceiverEff = ArgEffect(Dealloc, ObjKind::ObjC); + break; + case OMF_self: + // -self is handled specially by the ExprEngine to propagate the receiver. + break; + case OMF_retainCount: + case OMF_finalize: + // These methods don't return objects. + break; + } + + // If one of the arguments in the selector has the keyword 'delegate' we + // should stop tracking the reference count for the receiver. This is + // because the reference count is quite possibly handled by a delegate + // method. + if (S.isKeywordSelector()) { + for (unsigned i = 0, e = S.getNumArgs(); i != e; ++i) { + StringRef Slot = S.getNameForSlot(i); + if (Slot.substr(Slot.size() - 8).equals_insensitive("delegate")) { + if (ResultEff == ObjCInitRetE) + ResultEff = RetEffect::MakeNoRetHard(); + else + ReceiverEff = ArgEffect(StopTrackingHard, ObjKind::ObjC); + } + } + } + + if (ReceiverEff.getKind() == DoNothing && + ResultEff.getKind() == RetEffect::NoRet) + return getDefaultSummary(); + + return getPersistentSummary(ResultEff, ArgEffects(AF.getEmptyMap()), + ArgEffect(ReceiverEff), ArgEffect(MayEscape)); +} + +const RetainSummary * +RetainSummaryManager::getClassMethodSummary(const ObjCMessageExpr *ME) { + assert(!ME->isInstanceMessage()); + const ObjCInterfaceDecl *Class = ME->getReceiverInterface(); + + return getMethodSummary(ME->getSelector(), Class, ME->getMethodDecl(), + ME->getType(), ObjCClassMethodSummaries); +} + +const RetainSummary *RetainSummaryManager::getInstanceMethodSummary( + const ObjCMessageExpr *ME, + QualType ReceiverType) { + const ObjCInterfaceDecl *ReceiverClass = nullptr; + + // We do better tracking of the type of the object than the core ExprEngine. + // See if we have its type in our private state. + if (!ReceiverType.isNull()) + if (const auto *PT = ReceiverType->getAs<ObjCObjectPointerType>()) + ReceiverClass = PT->getInterfaceDecl(); + + // If we don't know what kind of object this is, fall back to its static type. + if (!ReceiverClass) + ReceiverClass = ME->getReceiverInterface(); + + // FIXME: The receiver could be a reference to a class, meaning that + // we should use the class method. + // id x = [NSObject class]; + // [x performSelector:... withObject:... afterDelay:...]; + Selector S = ME->getSelector(); + const ObjCMethodDecl *Method = ME->getMethodDecl(); + if (!Method && ReceiverClass) + Method = ReceiverClass->getInstanceMethod(S); + + return getMethodSummary(S, ReceiverClass, Method, ME->getType(), + ObjCMethodSummaries); +} + +const RetainSummary * +RetainSummaryManager::getMethodSummary(Selector S, + const ObjCInterfaceDecl *ID, + const ObjCMethodDecl *MD, QualType RetTy, + ObjCMethodSummariesTy &CachedSummaries) { + + // Objective-C method summaries are only applicable to ObjC and CF objects. + if (!TrackObjCAndCFObjects) + return getDefaultSummary(); + + // Look up a summary in our summary cache. + const RetainSummary *Summ = CachedSummaries.find(ID, S); + + if (!Summ) { + Summ = getStandardMethodSummary(MD, S, RetTy); + + // Annotations override defaults. + updateSummaryFromAnnotations(Summ, MD); + + // Memoize the summary. + CachedSummaries[ObjCSummaryKey(ID, S)] = Summ; + } + + return Summ; +} + +void RetainSummaryManager::InitializeClassMethodSummaries() { + ArgEffects ScratchArgs = AF.getEmptyMap(); + + // Create the [NSAssertionHandler currentHander] summary. + addClassMethSummary("NSAssertionHandler", "currentHandler", + getPersistentSummary(RetEffect::MakeNotOwned(ObjKind::ObjC), + ScratchArgs)); + + // Create the [NSAutoreleasePool addObject:] summary. + ScratchArgs = AF.add(ScratchArgs, 0, ArgEffect(Autorelease)); + addClassMethSummary("NSAutoreleasePool", "addObject", + getPersistentSummary(RetEffect::MakeNoRet(), ScratchArgs, + ArgEffect(DoNothing), + ArgEffect(Autorelease))); +} + +void RetainSummaryManager::InitializeMethodSummaries() { + + ArgEffects ScratchArgs = AF.getEmptyMap(); + // Create the "init" selector. It just acts as a pass-through for the + // receiver. + const RetainSummary *InitSumm = getPersistentSummary( + ObjCInitRetE, ScratchArgs, ArgEffect(DecRef, ObjKind::ObjC)); + addNSObjectMethSummary(GetNullarySelector("init", Ctx), InitSumm); + + // awakeAfterUsingCoder: behaves basically like an 'init' method. It + // claims the receiver and returns a retained object. + addNSObjectMethSummary(GetUnarySelector("awakeAfterUsingCoder", Ctx), + InitSumm); + + // The next methods are allocators. + const RetainSummary *AllocSumm = getPersistentSummary(ObjCAllocRetE, + ScratchArgs); + const RetainSummary *CFAllocSumm = + getPersistentSummary(RetEffect::MakeOwned(ObjKind::CF), ScratchArgs); + + // Create the "retain" selector. + RetEffect NoRet = RetEffect::MakeNoRet(); + const RetainSummary *Summ = getPersistentSummary( + NoRet, ScratchArgs, ArgEffect(IncRef, ObjKind::ObjC)); + addNSObjectMethSummary(GetNullarySelector("retain", Ctx), Summ); + + // Create the "release" selector. + Summ = getPersistentSummary(NoRet, ScratchArgs, + ArgEffect(DecRef, ObjKind::ObjC)); + addNSObjectMethSummary(GetNullarySelector("release", Ctx), Summ); + + // Create the -dealloc summary. + Summ = getPersistentSummary(NoRet, ScratchArgs, ArgEffect(Dealloc, + ObjKind::ObjC)); + addNSObjectMethSummary(GetNullarySelector("dealloc", Ctx), Summ); + + // Create the "autorelease" selector. + Summ = getPersistentSummary(NoRet, ScratchArgs, ArgEffect(Autorelease, + ObjKind::ObjC)); + addNSObjectMethSummary(GetNullarySelector("autorelease", Ctx), Summ); + + // For NSWindow, allocated objects are (initially) self-owned. + // FIXME: For now we opt for false negatives with NSWindow, as these objects + // self-own themselves. However, they only do this once they are displayed. + // Thus, we need to track an NSWindow's display status. + // This is tracked in <rdar://problem/6062711>. + // See also http://llvm.org/bugs/show_bug.cgi?id=3714. + const RetainSummary *NoTrackYet = + getPersistentSummary(RetEffect::MakeNoRet(), ScratchArgs, + ArgEffect(StopTracking), ArgEffect(StopTracking)); + + addClassMethSummary("NSWindow", "alloc", NoTrackYet); + + // For NSPanel (which subclasses NSWindow), allocated objects are not + // self-owned. + // FIXME: For now we don't track NSPanels. object for the same reason + // as for NSWindow objects. + addClassMethSummary("NSPanel", "alloc", NoTrackYet); + + // For NSNull, objects returned by +null are singletons that ignore + // retain/release semantics. Just don't track them. + // <rdar://problem/12858915> + addClassMethSummary("NSNull", "null", NoTrackYet); + + // Don't track allocated autorelease pools, as it is okay to prematurely + // exit a method. + addClassMethSummary("NSAutoreleasePool", "alloc", NoTrackYet); + addClassMethSummary("NSAutoreleasePool", "allocWithZone", NoTrackYet, false); + addClassMethSummary("NSAutoreleasePool", "new", NoTrackYet); + + // Create summaries QCRenderer/QCView -createSnapShotImageOfType: + addInstMethSummary("QCRenderer", AllocSumm, "createSnapshotImageOfType"); + addInstMethSummary("QCView", AllocSumm, "createSnapshotImageOfType"); + + // Create summaries for CIContext, 'createCGImage' and + // 'createCGLayerWithSize'. These objects are CF objects, and are not + // automatically garbage collected. + addInstMethSummary("CIContext", CFAllocSumm, "createCGImage", "fromRect"); + addInstMethSummary("CIContext", CFAllocSumm, "createCGImage", "fromRect", + "format", "colorSpace"); + addInstMethSummary("CIContext", CFAllocSumm, "createCGLayerWithSize", "info"); +} + +const RetainSummary * +RetainSummaryManager::getMethodSummary(const ObjCMethodDecl *MD) { + const ObjCInterfaceDecl *ID = MD->getClassInterface(); + Selector S = MD->getSelector(); + QualType ResultTy = MD->getReturnType(); + + ObjCMethodSummariesTy *CachedSummaries; + if (MD->isInstanceMethod()) + CachedSummaries = &ObjCMethodSummaries; + else + CachedSummaries = &ObjCClassMethodSummaries; + + return getMethodSummary(S, ID, MD, ResultTy, *CachedSummaries); +} diff --git a/contrib/llvm-project/clang/lib/Analysis/ThreadSafety.cpp b/contrib/llvm-project/clang/lib/Analysis/ThreadSafety.cpp new file mode 100644 index 000000000000..899c6018895e --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/ThreadSafety.cpp @@ -0,0 +1,2515 @@ +//===- ThreadSafety.cpp ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A intra-procedural analysis for thread safety (e.g. deadlocks and race +// conditions), based off of an annotation system. +// +// See http://clang.llvm.org/docs/ThreadSafetyAnalysis.html +// for more information. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/ThreadSafety.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclGroup.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/OperationKinds.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "clang/Analysis/Analyses/ThreadSafetyCommon.h" +#include "clang/Analysis/Analyses/ThreadSafetyTIL.h" +#include "clang/Analysis/Analyses/ThreadSafetyTraverse.h" +#include "clang/Analysis/Analyses/ThreadSafetyUtil.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Basic/Builtins.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/OperatorKinds.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/Specifiers.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <functional> +#include <iterator> +#include <memory> +#include <optional> +#include <string> +#include <type_traits> +#include <utility> +#include <vector> + +using namespace clang; +using namespace threadSafety; + +// Key method definition +ThreadSafetyHandler::~ThreadSafetyHandler() = default; + +/// Issue a warning about an invalid lock expression +static void warnInvalidLock(ThreadSafetyHandler &Handler, + const Expr *MutexExp, const NamedDecl *D, + const Expr *DeclExp, StringRef Kind) { + SourceLocation Loc; + if (DeclExp) + Loc = DeclExp->getExprLoc(); + + // FIXME: add a note about the attribute location in MutexExp or D + if (Loc.isValid()) + Handler.handleInvalidLockExp(Loc); +} + +namespace { + +/// A set of CapabilityExpr objects, which are compiled from thread safety +/// attributes on a function. +class CapExprSet : public SmallVector<CapabilityExpr, 4> { +public: + /// Push M onto list, but discard duplicates. + void push_back_nodup(const CapabilityExpr &CapE) { + if (llvm::none_of(*this, [=](const CapabilityExpr &CapE2) { + return CapE.equals(CapE2); + })) + push_back(CapE); + } +}; + +class FactManager; +class FactSet; + +/// This is a helper class that stores a fact that is known at a +/// particular point in program execution. Currently, a fact is a capability, +/// along with additional information, such as where it was acquired, whether +/// it is exclusive or shared, etc. +/// +/// FIXME: this analysis does not currently support re-entrant locking. +class FactEntry : public CapabilityExpr { +public: + /// Where a fact comes from. + enum SourceKind { + Acquired, ///< The fact has been directly acquired. + Asserted, ///< The fact has been asserted to be held. + Declared, ///< The fact is assumed to be held by callers. + Managed, ///< The fact has been acquired through a scoped capability. + }; + +private: + /// Exclusive or shared. + LockKind LKind : 8; + + // How it was acquired. + SourceKind Source : 8; + + /// Where it was acquired. + SourceLocation AcquireLoc; + +public: + FactEntry(const CapabilityExpr &CE, LockKind LK, SourceLocation Loc, + SourceKind Src) + : CapabilityExpr(CE), LKind(LK), Source(Src), AcquireLoc(Loc) {} + virtual ~FactEntry() = default; + + LockKind kind() const { return LKind; } + SourceLocation loc() const { return AcquireLoc; } + + bool asserted() const { return Source == Asserted; } + bool declared() const { return Source == Declared; } + bool managed() const { return Source == Managed; } + + virtual void + handleRemovalFromIntersection(const FactSet &FSet, FactManager &FactMan, + SourceLocation JoinLoc, LockErrorKind LEK, + ThreadSafetyHandler &Handler) const = 0; + virtual void handleLock(FactSet &FSet, FactManager &FactMan, + const FactEntry &entry, + ThreadSafetyHandler &Handler) const = 0; + virtual void handleUnlock(FactSet &FSet, FactManager &FactMan, + const CapabilityExpr &Cp, SourceLocation UnlockLoc, + bool FullyRemove, + ThreadSafetyHandler &Handler) const = 0; + + // Return true if LKind >= LK, where exclusive > shared + bool isAtLeast(LockKind LK) const { + return (LKind == LK_Exclusive) || (LK == LK_Shared); + } +}; + +using FactID = unsigned short; + +/// FactManager manages the memory for all facts that are created during +/// the analysis of a single routine. +class FactManager { +private: + std::vector<std::unique_ptr<const FactEntry>> Facts; + +public: + FactID newFact(std::unique_ptr<FactEntry> Entry) { + Facts.push_back(std::move(Entry)); + return static_cast<unsigned short>(Facts.size() - 1); + } + + const FactEntry &operator[](FactID F) const { return *Facts[F]; } +}; + +/// A FactSet is the set of facts that are known to be true at a +/// particular program point. FactSets must be small, because they are +/// frequently copied, and are thus implemented as a set of indices into a +/// table maintained by a FactManager. A typical FactSet only holds 1 or 2 +/// locks, so we can get away with doing a linear search for lookup. Note +/// that a hashtable or map is inappropriate in this case, because lookups +/// may involve partial pattern matches, rather than exact matches. +class FactSet { +private: + using FactVec = SmallVector<FactID, 4>; + + FactVec FactIDs; + +public: + using iterator = FactVec::iterator; + using const_iterator = FactVec::const_iterator; + + iterator begin() { return FactIDs.begin(); } + const_iterator begin() const { return FactIDs.begin(); } + + iterator end() { return FactIDs.end(); } + const_iterator end() const { return FactIDs.end(); } + + bool isEmpty() const { return FactIDs.size() == 0; } + + // Return true if the set contains only negative facts + bool isEmpty(FactManager &FactMan) const { + for (const auto FID : *this) { + if (!FactMan[FID].negative()) + return false; + } + return true; + } + + void addLockByID(FactID ID) { FactIDs.push_back(ID); } + + FactID addLock(FactManager &FM, std::unique_ptr<FactEntry> Entry) { + FactID F = FM.newFact(std::move(Entry)); + FactIDs.push_back(F); + return F; + } + + bool removeLock(FactManager& FM, const CapabilityExpr &CapE) { + unsigned n = FactIDs.size(); + if (n == 0) + return false; + + for (unsigned i = 0; i < n-1; ++i) { + if (FM[FactIDs[i]].matches(CapE)) { + FactIDs[i] = FactIDs[n-1]; + FactIDs.pop_back(); + return true; + } + } + if (FM[FactIDs[n-1]].matches(CapE)) { + FactIDs.pop_back(); + return true; + } + return false; + } + + iterator findLockIter(FactManager &FM, const CapabilityExpr &CapE) { + return std::find_if(begin(), end(), [&](FactID ID) { + return FM[ID].matches(CapE); + }); + } + + const FactEntry *findLock(FactManager &FM, const CapabilityExpr &CapE) const { + auto I = std::find_if(begin(), end(), [&](FactID ID) { + return FM[ID].matches(CapE); + }); + return I != end() ? &FM[*I] : nullptr; + } + + const FactEntry *findLockUniv(FactManager &FM, + const CapabilityExpr &CapE) const { + auto I = std::find_if(begin(), end(), [&](FactID ID) -> bool { + return FM[ID].matchesUniv(CapE); + }); + return I != end() ? &FM[*I] : nullptr; + } + + const FactEntry *findPartialMatch(FactManager &FM, + const CapabilityExpr &CapE) const { + auto I = std::find_if(begin(), end(), [&](FactID ID) -> bool { + return FM[ID].partiallyMatches(CapE); + }); + return I != end() ? &FM[*I] : nullptr; + } + + bool containsMutexDecl(FactManager &FM, const ValueDecl* Vd) const { + auto I = std::find_if(begin(), end(), [&](FactID ID) -> bool { + return FM[ID].valueDecl() == Vd; + }); + return I != end(); + } +}; + +class ThreadSafetyAnalyzer; + +} // namespace + +namespace clang { +namespace threadSafety { + +class BeforeSet { +private: + using BeforeVect = SmallVector<const ValueDecl *, 4>; + + struct BeforeInfo { + BeforeVect Vect; + int Visited = 0; + + BeforeInfo() = default; + BeforeInfo(BeforeInfo &&) = default; + }; + + using BeforeMap = + llvm::DenseMap<const ValueDecl *, std::unique_ptr<BeforeInfo>>; + using CycleMap = llvm::DenseMap<const ValueDecl *, bool>; + +public: + BeforeSet() = default; + + BeforeInfo* insertAttrExprs(const ValueDecl* Vd, + ThreadSafetyAnalyzer& Analyzer); + + BeforeInfo *getBeforeInfoForDecl(const ValueDecl *Vd, + ThreadSafetyAnalyzer &Analyzer); + + void checkBeforeAfter(const ValueDecl* Vd, + const FactSet& FSet, + ThreadSafetyAnalyzer& Analyzer, + SourceLocation Loc, StringRef CapKind); + +private: + BeforeMap BMap; + CycleMap CycMap; +}; + +} // namespace threadSafety +} // namespace clang + +namespace { + +class LocalVariableMap; + +using LocalVarContext = llvm::ImmutableMap<const NamedDecl *, unsigned>; + +/// A side (entry or exit) of a CFG node. +enum CFGBlockSide { CBS_Entry, CBS_Exit }; + +/// CFGBlockInfo is a struct which contains all the information that is +/// maintained for each block in the CFG. See LocalVariableMap for more +/// information about the contexts. +struct CFGBlockInfo { + // Lockset held at entry to block + FactSet EntrySet; + + // Lockset held at exit from block + FactSet ExitSet; + + // Context held at entry to block + LocalVarContext EntryContext; + + // Context held at exit from block + LocalVarContext ExitContext; + + // Location of first statement in block + SourceLocation EntryLoc; + + // Location of last statement in block. + SourceLocation ExitLoc; + + // Used to replay contexts later + unsigned EntryIndex; + + // Is this block reachable? + bool Reachable = false; + + const FactSet &getSet(CFGBlockSide Side) const { + return Side == CBS_Entry ? EntrySet : ExitSet; + } + + SourceLocation getLocation(CFGBlockSide Side) const { + return Side == CBS_Entry ? EntryLoc : ExitLoc; + } + +private: + CFGBlockInfo(LocalVarContext EmptyCtx) + : EntryContext(EmptyCtx), ExitContext(EmptyCtx) {} + +public: + static CFGBlockInfo getEmptyBlockInfo(LocalVariableMap &M); +}; + +// A LocalVariableMap maintains a map from local variables to their currently +// valid definitions. It provides SSA-like functionality when traversing the +// CFG. Like SSA, each definition or assignment to a variable is assigned a +// unique name (an integer), which acts as the SSA name for that definition. +// The total set of names is shared among all CFG basic blocks. +// Unlike SSA, we do not rewrite expressions to replace local variables declrefs +// with their SSA-names. Instead, we compute a Context for each point in the +// code, which maps local variables to the appropriate SSA-name. This map +// changes with each assignment. +// +// The map is computed in a single pass over the CFG. Subsequent analyses can +// then query the map to find the appropriate Context for a statement, and use +// that Context to look up the definitions of variables. +class LocalVariableMap { +public: + using Context = LocalVarContext; + + /// A VarDefinition consists of an expression, representing the value of the + /// variable, along with the context in which that expression should be + /// interpreted. A reference VarDefinition does not itself contain this + /// information, but instead contains a pointer to a previous VarDefinition. + struct VarDefinition { + public: + friend class LocalVariableMap; + + // The original declaration for this variable. + const NamedDecl *Dec; + + // The expression for this variable, OR + const Expr *Exp = nullptr; + + // Reference to another VarDefinition + unsigned Ref = 0; + + // The map with which Exp should be interpreted. + Context Ctx; + + bool isReference() { return !Exp; } + + private: + // Create ordinary variable definition + VarDefinition(const NamedDecl *D, const Expr *E, Context C) + : Dec(D), Exp(E), Ctx(C) {} + + // Create reference to previous definition + VarDefinition(const NamedDecl *D, unsigned R, Context C) + : Dec(D), Ref(R), Ctx(C) {} + }; + +private: + Context::Factory ContextFactory; + std::vector<VarDefinition> VarDefinitions; + std::vector<std::pair<const Stmt *, Context>> SavedContexts; + +public: + LocalVariableMap() { + // index 0 is a placeholder for undefined variables (aka phi-nodes). + VarDefinitions.push_back(VarDefinition(nullptr, 0u, getEmptyContext())); + } + + /// Look up a definition, within the given context. + const VarDefinition* lookup(const NamedDecl *D, Context Ctx) { + const unsigned *i = Ctx.lookup(D); + if (!i) + return nullptr; + assert(*i < VarDefinitions.size()); + return &VarDefinitions[*i]; + } + + /// Look up the definition for D within the given context. Returns + /// NULL if the expression is not statically known. If successful, also + /// modifies Ctx to hold the context of the return Expr. + const Expr* lookupExpr(const NamedDecl *D, Context &Ctx) { + const unsigned *P = Ctx.lookup(D); + if (!P) + return nullptr; + + unsigned i = *P; + while (i > 0) { + if (VarDefinitions[i].Exp) { + Ctx = VarDefinitions[i].Ctx; + return VarDefinitions[i].Exp; + } + i = VarDefinitions[i].Ref; + } + return nullptr; + } + + Context getEmptyContext() { return ContextFactory.getEmptyMap(); } + + /// Return the next context after processing S. This function is used by + /// clients of the class to get the appropriate context when traversing the + /// CFG. It must be called for every assignment or DeclStmt. + Context getNextContext(unsigned &CtxIndex, const Stmt *S, Context C) { + if (SavedContexts[CtxIndex+1].first == S) { + CtxIndex++; + Context Result = SavedContexts[CtxIndex].second; + return Result; + } + return C; + } + + void dumpVarDefinitionName(unsigned i) { + if (i == 0) { + llvm::errs() << "Undefined"; + return; + } + const NamedDecl *Dec = VarDefinitions[i].Dec; + if (!Dec) { + llvm::errs() << "<<NULL>>"; + return; + } + Dec->printName(llvm::errs()); + llvm::errs() << "." << i << " " << ((const void*) Dec); + } + + /// Dumps an ASCII representation of the variable map to llvm::errs() + void dump() { + for (unsigned i = 1, e = VarDefinitions.size(); i < e; ++i) { + const Expr *Exp = VarDefinitions[i].Exp; + unsigned Ref = VarDefinitions[i].Ref; + + dumpVarDefinitionName(i); + llvm::errs() << " = "; + if (Exp) Exp->dump(); + else { + dumpVarDefinitionName(Ref); + llvm::errs() << "\n"; + } + } + } + + /// Dumps an ASCII representation of a Context to llvm::errs() + void dumpContext(Context C) { + for (Context::iterator I = C.begin(), E = C.end(); I != E; ++I) { + const NamedDecl *D = I.getKey(); + D->printName(llvm::errs()); + const unsigned *i = C.lookup(D); + llvm::errs() << " -> "; + dumpVarDefinitionName(*i); + llvm::errs() << "\n"; + } + } + + /// Builds the variable map. + void traverseCFG(CFG *CFGraph, const PostOrderCFGView *SortedGraph, + std::vector<CFGBlockInfo> &BlockInfo); + +protected: + friend class VarMapBuilder; + + // Get the current context index + unsigned getContextIndex() { return SavedContexts.size()-1; } + + // Save the current context for later replay + void saveContext(const Stmt *S, Context C) { + SavedContexts.push_back(std::make_pair(S, C)); + } + + // Adds a new definition to the given context, and returns a new context. + // This method should be called when declaring a new variable. + Context addDefinition(const NamedDecl *D, const Expr *Exp, Context Ctx) { + assert(!Ctx.contains(D)); + unsigned newID = VarDefinitions.size(); + Context NewCtx = ContextFactory.add(Ctx, D, newID); + VarDefinitions.push_back(VarDefinition(D, Exp, Ctx)); + return NewCtx; + } + + // Add a new reference to an existing definition. + Context addReference(const NamedDecl *D, unsigned i, Context Ctx) { + unsigned newID = VarDefinitions.size(); + Context NewCtx = ContextFactory.add(Ctx, D, newID); + VarDefinitions.push_back(VarDefinition(D, i, Ctx)); + return NewCtx; + } + + // Updates a definition only if that definition is already in the map. + // This method should be called when assigning to an existing variable. + Context updateDefinition(const NamedDecl *D, Expr *Exp, Context Ctx) { + if (Ctx.contains(D)) { + unsigned newID = VarDefinitions.size(); + Context NewCtx = ContextFactory.remove(Ctx, D); + NewCtx = ContextFactory.add(NewCtx, D, newID); + VarDefinitions.push_back(VarDefinition(D, Exp, Ctx)); + return NewCtx; + } + return Ctx; + } + + // Removes a definition from the context, but keeps the variable name + // as a valid variable. The index 0 is a placeholder for cleared definitions. + Context clearDefinition(const NamedDecl *D, Context Ctx) { + Context NewCtx = Ctx; + if (NewCtx.contains(D)) { + NewCtx = ContextFactory.remove(NewCtx, D); + NewCtx = ContextFactory.add(NewCtx, D, 0); + } + return NewCtx; + } + + // Remove a definition entirely frmo the context. + Context removeDefinition(const NamedDecl *D, Context Ctx) { + Context NewCtx = Ctx; + if (NewCtx.contains(D)) { + NewCtx = ContextFactory.remove(NewCtx, D); + } + return NewCtx; + } + + Context intersectContexts(Context C1, Context C2); + Context createReferenceContext(Context C); + void intersectBackEdge(Context C1, Context C2); +}; + +} // namespace + +// This has to be defined after LocalVariableMap. +CFGBlockInfo CFGBlockInfo::getEmptyBlockInfo(LocalVariableMap &M) { + return CFGBlockInfo(M.getEmptyContext()); +} + +namespace { + +/// Visitor which builds a LocalVariableMap +class VarMapBuilder : public ConstStmtVisitor<VarMapBuilder> { +public: + LocalVariableMap* VMap; + LocalVariableMap::Context Ctx; + + VarMapBuilder(LocalVariableMap *VM, LocalVariableMap::Context C) + : VMap(VM), Ctx(C) {} + + void VisitDeclStmt(const DeclStmt *S); + void VisitBinaryOperator(const BinaryOperator *BO); +}; + +} // namespace + +// Add new local variables to the variable map +void VarMapBuilder::VisitDeclStmt(const DeclStmt *S) { + bool modifiedCtx = false; + const DeclGroupRef DGrp = S->getDeclGroup(); + for (const auto *D : DGrp) { + if (const auto *VD = dyn_cast_or_null<VarDecl>(D)) { + const Expr *E = VD->getInit(); + + // Add local variables with trivial type to the variable map + QualType T = VD->getType(); + if (T.isTrivialType(VD->getASTContext())) { + Ctx = VMap->addDefinition(VD, E, Ctx); + modifiedCtx = true; + } + } + } + if (modifiedCtx) + VMap->saveContext(S, Ctx); +} + +// Update local variable definitions in variable map +void VarMapBuilder::VisitBinaryOperator(const BinaryOperator *BO) { + if (!BO->isAssignmentOp()) + return; + + Expr *LHSExp = BO->getLHS()->IgnoreParenCasts(); + + // Update the variable map and current context. + if (const auto *DRE = dyn_cast<DeclRefExpr>(LHSExp)) { + const ValueDecl *VDec = DRE->getDecl(); + if (Ctx.lookup(VDec)) { + if (BO->getOpcode() == BO_Assign) + Ctx = VMap->updateDefinition(VDec, BO->getRHS(), Ctx); + else + // FIXME -- handle compound assignment operators + Ctx = VMap->clearDefinition(VDec, Ctx); + VMap->saveContext(BO, Ctx); + } + } +} + +// Computes the intersection of two contexts. The intersection is the +// set of variables which have the same definition in both contexts; +// variables with different definitions are discarded. +LocalVariableMap::Context +LocalVariableMap::intersectContexts(Context C1, Context C2) { + Context Result = C1; + for (const auto &P : C1) { + const NamedDecl *Dec = P.first; + const unsigned *i2 = C2.lookup(Dec); + if (!i2) // variable doesn't exist on second path + Result = removeDefinition(Dec, Result); + else if (*i2 != P.second) // variable exists, but has different definition + Result = clearDefinition(Dec, Result); + } + return Result; +} + +// For every variable in C, create a new variable that refers to the +// definition in C. Return a new context that contains these new variables. +// (We use this for a naive implementation of SSA on loop back-edges.) +LocalVariableMap::Context LocalVariableMap::createReferenceContext(Context C) { + Context Result = getEmptyContext(); + for (const auto &P : C) + Result = addReference(P.first, P.second, Result); + return Result; +} + +// This routine also takes the intersection of C1 and C2, but it does so by +// altering the VarDefinitions. C1 must be the result of an earlier call to +// createReferenceContext. +void LocalVariableMap::intersectBackEdge(Context C1, Context C2) { + for (const auto &P : C1) { + unsigned i1 = P.second; + VarDefinition *VDef = &VarDefinitions[i1]; + assert(VDef->isReference()); + + const unsigned *i2 = C2.lookup(P.first); + if (!i2 || (*i2 != i1)) + VDef->Ref = 0; // Mark this variable as undefined + } +} + +// Traverse the CFG in topological order, so all predecessors of a block +// (excluding back-edges) are visited before the block itself. At +// each point in the code, we calculate a Context, which holds the set of +// variable definitions which are visible at that point in execution. +// Visible variables are mapped to their definitions using an array that +// contains all definitions. +// +// At join points in the CFG, the set is computed as the intersection of +// the incoming sets along each edge, E.g. +// +// { Context | VarDefinitions } +// int x = 0; { x -> x1 | x1 = 0 } +// int y = 0; { x -> x1, y -> y1 | y1 = 0, x1 = 0 } +// if (b) x = 1; { x -> x2, y -> y1 | x2 = 1, y1 = 0, ... } +// else x = 2; { x -> x3, y -> y1 | x3 = 2, x2 = 1, ... } +// ... { y -> y1 (x is unknown) | x3 = 2, x2 = 1, ... } +// +// This is essentially a simpler and more naive version of the standard SSA +// algorithm. Those definitions that remain in the intersection are from blocks +// that strictly dominate the current block. We do not bother to insert proper +// phi nodes, because they are not used in our analysis; instead, wherever +// a phi node would be required, we simply remove that definition from the +// context (E.g. x above). +// +// The initial traversal does not capture back-edges, so those need to be +// handled on a separate pass. Whenever the first pass encounters an +// incoming back edge, it duplicates the context, creating new definitions +// that refer back to the originals. (These correspond to places where SSA +// might have to insert a phi node.) On the second pass, these definitions are +// set to NULL if the variable has changed on the back-edge (i.e. a phi +// node was actually required.) E.g. +// +// { Context | VarDefinitions } +// int x = 0, y = 0; { x -> x1, y -> y1 | y1 = 0, x1 = 0 } +// while (b) { x -> x2, y -> y1 | [1st:] x2=x1; [2nd:] x2=NULL; } +// x = x+1; { x -> x3, y -> y1 | x3 = x2 + 1, ... } +// ... { y -> y1 | x3 = 2, x2 = 1, ... } +void LocalVariableMap::traverseCFG(CFG *CFGraph, + const PostOrderCFGView *SortedGraph, + std::vector<CFGBlockInfo> &BlockInfo) { + PostOrderCFGView::CFGBlockSet VisitedBlocks(CFGraph); + + for (const auto *CurrBlock : *SortedGraph) { + unsigned CurrBlockID = CurrBlock->getBlockID(); + CFGBlockInfo *CurrBlockInfo = &BlockInfo[CurrBlockID]; + + VisitedBlocks.insert(CurrBlock); + + // Calculate the entry context for the current block + bool HasBackEdges = false; + bool CtxInit = true; + for (CFGBlock::const_pred_iterator PI = CurrBlock->pred_begin(), + PE = CurrBlock->pred_end(); PI != PE; ++PI) { + // if *PI -> CurrBlock is a back edge, so skip it + if (*PI == nullptr || !VisitedBlocks.alreadySet(*PI)) { + HasBackEdges = true; + continue; + } + + unsigned PrevBlockID = (*PI)->getBlockID(); + CFGBlockInfo *PrevBlockInfo = &BlockInfo[PrevBlockID]; + + if (CtxInit) { + CurrBlockInfo->EntryContext = PrevBlockInfo->ExitContext; + CtxInit = false; + } + else { + CurrBlockInfo->EntryContext = + intersectContexts(CurrBlockInfo->EntryContext, + PrevBlockInfo->ExitContext); + } + } + + // Duplicate the context if we have back-edges, so we can call + // intersectBackEdges later. + if (HasBackEdges) + CurrBlockInfo->EntryContext = + createReferenceContext(CurrBlockInfo->EntryContext); + + // Create a starting context index for the current block + saveContext(nullptr, CurrBlockInfo->EntryContext); + CurrBlockInfo->EntryIndex = getContextIndex(); + + // Visit all the statements in the basic block. + VarMapBuilder VMapBuilder(this, CurrBlockInfo->EntryContext); + for (const auto &BI : *CurrBlock) { + switch (BI.getKind()) { + case CFGElement::Statement: { + CFGStmt CS = BI.castAs<CFGStmt>(); + VMapBuilder.Visit(CS.getStmt()); + break; + } + default: + break; + } + } + CurrBlockInfo->ExitContext = VMapBuilder.Ctx; + + // Mark variables on back edges as "unknown" if they've been changed. + for (CFGBlock::const_succ_iterator SI = CurrBlock->succ_begin(), + SE = CurrBlock->succ_end(); SI != SE; ++SI) { + // if CurrBlock -> *SI is *not* a back edge + if (*SI == nullptr || !VisitedBlocks.alreadySet(*SI)) + continue; + + CFGBlock *FirstLoopBlock = *SI; + Context LoopBegin = BlockInfo[FirstLoopBlock->getBlockID()].EntryContext; + Context LoopEnd = CurrBlockInfo->ExitContext; + intersectBackEdge(LoopBegin, LoopEnd); + } + } + + // Put an extra entry at the end of the indexed context array + unsigned exitID = CFGraph->getExit().getBlockID(); + saveContext(nullptr, BlockInfo[exitID].ExitContext); +} + +/// Find the appropriate source locations to use when producing diagnostics for +/// each block in the CFG. +static void findBlockLocations(CFG *CFGraph, + const PostOrderCFGView *SortedGraph, + std::vector<CFGBlockInfo> &BlockInfo) { + for (const auto *CurrBlock : *SortedGraph) { + CFGBlockInfo *CurrBlockInfo = &BlockInfo[CurrBlock->getBlockID()]; + + // Find the source location of the last statement in the block, if the + // block is not empty. + if (const Stmt *S = CurrBlock->getTerminatorStmt()) { + CurrBlockInfo->EntryLoc = CurrBlockInfo->ExitLoc = S->getBeginLoc(); + } else { + for (CFGBlock::const_reverse_iterator BI = CurrBlock->rbegin(), + BE = CurrBlock->rend(); BI != BE; ++BI) { + // FIXME: Handle other CFGElement kinds. + if (std::optional<CFGStmt> CS = BI->getAs<CFGStmt>()) { + CurrBlockInfo->ExitLoc = CS->getStmt()->getBeginLoc(); + break; + } + } + } + + if (CurrBlockInfo->ExitLoc.isValid()) { + // This block contains at least one statement. Find the source location + // of the first statement in the block. + for (const auto &BI : *CurrBlock) { + // FIXME: Handle other CFGElement kinds. + if (std::optional<CFGStmt> CS = BI.getAs<CFGStmt>()) { + CurrBlockInfo->EntryLoc = CS->getStmt()->getBeginLoc(); + break; + } + } + } else if (CurrBlock->pred_size() == 1 && *CurrBlock->pred_begin() && + CurrBlock != &CFGraph->getExit()) { + // The block is empty, and has a single predecessor. Use its exit + // location. + CurrBlockInfo->EntryLoc = CurrBlockInfo->ExitLoc = + BlockInfo[(*CurrBlock->pred_begin())->getBlockID()].ExitLoc; + } else if (CurrBlock->succ_size() == 1 && *CurrBlock->succ_begin()) { + // The block is empty, and has a single successor. Use its entry + // location. + CurrBlockInfo->EntryLoc = CurrBlockInfo->ExitLoc = + BlockInfo[(*CurrBlock->succ_begin())->getBlockID()].EntryLoc; + } + } +} + +namespace { + +class LockableFactEntry : public FactEntry { +public: + LockableFactEntry(const CapabilityExpr &CE, LockKind LK, SourceLocation Loc, + SourceKind Src = Acquired) + : FactEntry(CE, LK, Loc, Src) {} + + void + handleRemovalFromIntersection(const FactSet &FSet, FactManager &FactMan, + SourceLocation JoinLoc, LockErrorKind LEK, + ThreadSafetyHandler &Handler) const override { + if (!asserted() && !negative() && !isUniversal()) { + Handler.handleMutexHeldEndOfScope(getKind(), toString(), loc(), JoinLoc, + LEK); + } + } + + void handleLock(FactSet &FSet, FactManager &FactMan, const FactEntry &entry, + ThreadSafetyHandler &Handler) const override { + Handler.handleDoubleLock(entry.getKind(), entry.toString(), loc(), + entry.loc()); + } + + void handleUnlock(FactSet &FSet, FactManager &FactMan, + const CapabilityExpr &Cp, SourceLocation UnlockLoc, + bool FullyRemove, + ThreadSafetyHandler &Handler) const override { + FSet.removeLock(FactMan, Cp); + if (!Cp.negative()) { + FSet.addLock(FactMan, std::make_unique<LockableFactEntry>( + !Cp, LK_Exclusive, UnlockLoc)); + } + } +}; + +class ScopedLockableFactEntry : public FactEntry { +private: + enum UnderlyingCapabilityKind { + UCK_Acquired, ///< Any kind of acquired capability. + UCK_ReleasedShared, ///< Shared capability that was released. + UCK_ReleasedExclusive, ///< Exclusive capability that was released. + }; + + struct UnderlyingCapability { + CapabilityExpr Cap; + UnderlyingCapabilityKind Kind; + }; + + SmallVector<UnderlyingCapability, 2> UnderlyingMutexes; + +public: + ScopedLockableFactEntry(const CapabilityExpr &CE, SourceLocation Loc) + : FactEntry(CE, LK_Exclusive, Loc, Acquired) {} + + void addLock(const CapabilityExpr &M) { + UnderlyingMutexes.push_back(UnderlyingCapability{M, UCK_Acquired}); + } + + void addExclusiveUnlock(const CapabilityExpr &M) { + UnderlyingMutexes.push_back(UnderlyingCapability{M, UCK_ReleasedExclusive}); + } + + void addSharedUnlock(const CapabilityExpr &M) { + UnderlyingMutexes.push_back(UnderlyingCapability{M, UCK_ReleasedShared}); + } + + void + handleRemovalFromIntersection(const FactSet &FSet, FactManager &FactMan, + SourceLocation JoinLoc, LockErrorKind LEK, + ThreadSafetyHandler &Handler) const override { + for (const auto &UnderlyingMutex : UnderlyingMutexes) { + const auto *Entry = FSet.findLock(FactMan, UnderlyingMutex.Cap); + if ((UnderlyingMutex.Kind == UCK_Acquired && Entry) || + (UnderlyingMutex.Kind != UCK_Acquired && !Entry)) { + // If this scoped lock manages another mutex, and if the underlying + // mutex is still/not held, then warn about the underlying mutex. + Handler.handleMutexHeldEndOfScope(UnderlyingMutex.Cap.getKind(), + UnderlyingMutex.Cap.toString(), loc(), + JoinLoc, LEK); + } + } + } + + void handleLock(FactSet &FSet, FactManager &FactMan, const FactEntry &entry, + ThreadSafetyHandler &Handler) const override { + for (const auto &UnderlyingMutex : UnderlyingMutexes) { + if (UnderlyingMutex.Kind == UCK_Acquired) + lock(FSet, FactMan, UnderlyingMutex.Cap, entry.kind(), entry.loc(), + &Handler); + else + unlock(FSet, FactMan, UnderlyingMutex.Cap, entry.loc(), &Handler); + } + } + + void handleUnlock(FactSet &FSet, FactManager &FactMan, + const CapabilityExpr &Cp, SourceLocation UnlockLoc, + bool FullyRemove, + ThreadSafetyHandler &Handler) const override { + assert(!Cp.negative() && "Managing object cannot be negative."); + for (const auto &UnderlyingMutex : UnderlyingMutexes) { + // Remove/lock the underlying mutex if it exists/is still unlocked; warn + // on double unlocking/locking if we're not destroying the scoped object. + ThreadSafetyHandler *TSHandler = FullyRemove ? nullptr : &Handler; + if (UnderlyingMutex.Kind == UCK_Acquired) { + unlock(FSet, FactMan, UnderlyingMutex.Cap, UnlockLoc, TSHandler); + } else { + LockKind kind = UnderlyingMutex.Kind == UCK_ReleasedShared + ? LK_Shared + : LK_Exclusive; + lock(FSet, FactMan, UnderlyingMutex.Cap, kind, UnlockLoc, TSHandler); + } + } + if (FullyRemove) + FSet.removeLock(FactMan, Cp); + } + +private: + void lock(FactSet &FSet, FactManager &FactMan, const CapabilityExpr &Cp, + LockKind kind, SourceLocation loc, + ThreadSafetyHandler *Handler) const { + if (const FactEntry *Fact = FSet.findLock(FactMan, Cp)) { + if (Handler) + Handler->handleDoubleLock(Cp.getKind(), Cp.toString(), Fact->loc(), + loc); + } else { + FSet.removeLock(FactMan, !Cp); + FSet.addLock(FactMan, + std::make_unique<LockableFactEntry>(Cp, kind, loc, Managed)); + } + } + + void unlock(FactSet &FSet, FactManager &FactMan, const CapabilityExpr &Cp, + SourceLocation loc, ThreadSafetyHandler *Handler) const { + if (FSet.findLock(FactMan, Cp)) { + FSet.removeLock(FactMan, Cp); + FSet.addLock(FactMan, std::make_unique<LockableFactEntry>( + !Cp, LK_Exclusive, loc)); + } else if (Handler) { + SourceLocation PrevLoc; + if (const FactEntry *Neg = FSet.findLock(FactMan, !Cp)) + PrevLoc = Neg->loc(); + Handler->handleUnmatchedUnlock(Cp.getKind(), Cp.toString(), loc, PrevLoc); + } + } +}; + +/// Class which implements the core thread safety analysis routines. +class ThreadSafetyAnalyzer { + friend class BuildLockset; + friend class threadSafety::BeforeSet; + + llvm::BumpPtrAllocator Bpa; + threadSafety::til::MemRegionRef Arena; + threadSafety::SExprBuilder SxBuilder; + + ThreadSafetyHandler &Handler; + const CXXMethodDecl *CurrentMethod; + LocalVariableMap LocalVarMap; + FactManager FactMan; + std::vector<CFGBlockInfo> BlockInfo; + + BeforeSet *GlobalBeforeSet; + +public: + ThreadSafetyAnalyzer(ThreadSafetyHandler &H, BeforeSet* Bset) + : Arena(&Bpa), SxBuilder(Arena), Handler(H), GlobalBeforeSet(Bset) {} + + bool inCurrentScope(const CapabilityExpr &CapE); + + void addLock(FactSet &FSet, std::unique_ptr<FactEntry> Entry, + bool ReqAttr = false); + void removeLock(FactSet &FSet, const CapabilityExpr &CapE, + SourceLocation UnlockLoc, bool FullyRemove, LockKind Kind); + + template <typename AttrType> + void getMutexIDs(CapExprSet &Mtxs, AttrType *Attr, const Expr *Exp, + const NamedDecl *D, til::SExpr *Self = nullptr); + + template <class AttrType> + void getMutexIDs(CapExprSet &Mtxs, AttrType *Attr, const Expr *Exp, + const NamedDecl *D, + const CFGBlock *PredBlock, const CFGBlock *CurrBlock, + Expr *BrE, bool Neg); + + const CallExpr* getTrylockCallExpr(const Stmt *Cond, LocalVarContext C, + bool &Negate); + + void getEdgeLockset(FactSet &Result, const FactSet &ExitSet, + const CFGBlock* PredBlock, + const CFGBlock *CurrBlock); + + bool join(const FactEntry &a, const FactEntry &b, bool CanModify); + + void intersectAndWarn(FactSet &EntrySet, const FactSet &ExitSet, + SourceLocation JoinLoc, LockErrorKind EntryLEK, + LockErrorKind ExitLEK); + + void intersectAndWarn(FactSet &EntrySet, const FactSet &ExitSet, + SourceLocation JoinLoc, LockErrorKind LEK) { + intersectAndWarn(EntrySet, ExitSet, JoinLoc, LEK, LEK); + } + + void runAnalysis(AnalysisDeclContext &AC); +}; + +} // namespace + +/// Process acquired_before and acquired_after attributes on Vd. +BeforeSet::BeforeInfo* BeforeSet::insertAttrExprs(const ValueDecl* Vd, + ThreadSafetyAnalyzer& Analyzer) { + // Create a new entry for Vd. + BeforeInfo *Info = nullptr; + { + // Keep InfoPtr in its own scope in case BMap is modified later and the + // reference becomes invalid. + std::unique_ptr<BeforeInfo> &InfoPtr = BMap[Vd]; + if (!InfoPtr) + InfoPtr.reset(new BeforeInfo()); + Info = InfoPtr.get(); + } + + for (const auto *At : Vd->attrs()) { + switch (At->getKind()) { + case attr::AcquiredBefore: { + const auto *A = cast<AcquiredBeforeAttr>(At); + + // Read exprs from the attribute, and add them to BeforeVect. + for (const auto *Arg : A->args()) { + CapabilityExpr Cp = + Analyzer.SxBuilder.translateAttrExpr(Arg, nullptr); + if (const ValueDecl *Cpvd = Cp.valueDecl()) { + Info->Vect.push_back(Cpvd); + const auto It = BMap.find(Cpvd); + if (It == BMap.end()) + insertAttrExprs(Cpvd, Analyzer); + } + } + break; + } + case attr::AcquiredAfter: { + const auto *A = cast<AcquiredAfterAttr>(At); + + // Read exprs from the attribute, and add them to BeforeVect. + for (const auto *Arg : A->args()) { + CapabilityExpr Cp = + Analyzer.SxBuilder.translateAttrExpr(Arg, nullptr); + if (const ValueDecl *ArgVd = Cp.valueDecl()) { + // Get entry for mutex listed in attribute + BeforeInfo *ArgInfo = getBeforeInfoForDecl(ArgVd, Analyzer); + ArgInfo->Vect.push_back(Vd); + } + } + break; + } + default: + break; + } + } + + return Info; +} + +BeforeSet::BeforeInfo * +BeforeSet::getBeforeInfoForDecl(const ValueDecl *Vd, + ThreadSafetyAnalyzer &Analyzer) { + auto It = BMap.find(Vd); + BeforeInfo *Info = nullptr; + if (It == BMap.end()) + Info = insertAttrExprs(Vd, Analyzer); + else + Info = It->second.get(); + assert(Info && "BMap contained nullptr?"); + return Info; +} + +/// Return true if any mutexes in FSet are in the acquired_before set of Vd. +void BeforeSet::checkBeforeAfter(const ValueDecl* StartVd, + const FactSet& FSet, + ThreadSafetyAnalyzer& Analyzer, + SourceLocation Loc, StringRef CapKind) { + SmallVector<BeforeInfo*, 8> InfoVect; + + // Do a depth-first traversal of Vd. + // Return true if there are cycles. + std::function<bool (const ValueDecl*)> traverse = [&](const ValueDecl* Vd) { + if (!Vd) + return false; + + BeforeSet::BeforeInfo *Info = getBeforeInfoForDecl(Vd, Analyzer); + + if (Info->Visited == 1) + return true; + + if (Info->Visited == 2) + return false; + + if (Info->Vect.empty()) + return false; + + InfoVect.push_back(Info); + Info->Visited = 1; + for (const auto *Vdb : Info->Vect) { + // Exclude mutexes in our immediate before set. + if (FSet.containsMutexDecl(Analyzer.FactMan, Vdb)) { + StringRef L1 = StartVd->getName(); + StringRef L2 = Vdb->getName(); + Analyzer.Handler.handleLockAcquiredBefore(CapKind, L1, L2, Loc); + } + // Transitively search other before sets, and warn on cycles. + if (traverse(Vdb)) { + if (CycMap.find(Vd) == CycMap.end()) { + CycMap.insert(std::make_pair(Vd, true)); + StringRef L1 = Vd->getName(); + Analyzer.Handler.handleBeforeAfterCycle(L1, Vd->getLocation()); + } + } + } + Info->Visited = 2; + return false; + }; + + traverse(StartVd); + + for (auto *Info : InfoVect) + Info->Visited = 0; +} + +/// Gets the value decl pointer from DeclRefExprs or MemberExprs. +static const ValueDecl *getValueDecl(const Expr *Exp) { + if (const auto *CE = dyn_cast<ImplicitCastExpr>(Exp)) + return getValueDecl(CE->getSubExpr()); + + if (const auto *DR = dyn_cast<DeclRefExpr>(Exp)) + return DR->getDecl(); + + if (const auto *ME = dyn_cast<MemberExpr>(Exp)) + return ME->getMemberDecl(); + + return nullptr; +} + +namespace { + +template <typename Ty> +class has_arg_iterator_range { + using yes = char[1]; + using no = char[2]; + + template <typename Inner> + static yes& test(Inner *I, decltype(I->args()) * = nullptr); + + template <typename> + static no& test(...); + +public: + static const bool value = sizeof(test<Ty>(nullptr)) == sizeof(yes); +}; + +} // namespace + +bool ThreadSafetyAnalyzer::inCurrentScope(const CapabilityExpr &CapE) { + const threadSafety::til::SExpr *SExp = CapE.sexpr(); + assert(SExp && "Null expressions should be ignored"); + + if (const auto *LP = dyn_cast<til::LiteralPtr>(SExp)) { + const ValueDecl *VD = LP->clangDecl(); + // Variables defined in a function are always inaccessible. + if (!VD || !VD->isDefinedOutsideFunctionOrMethod()) + return false; + // For now we consider static class members to be inaccessible. + if (isa<CXXRecordDecl>(VD->getDeclContext())) + return false; + // Global variables are always in scope. + return true; + } + + // Members are in scope from methods of the same class. + if (const auto *P = dyn_cast<til::Project>(SExp)) { + if (!CurrentMethod) + return false; + const ValueDecl *VD = P->clangDecl(); + return VD->getDeclContext() == CurrentMethod->getDeclContext(); + } + + return false; +} + +/// Add a new lock to the lockset, warning if the lock is already there. +/// \param ReqAttr -- true if this is part of an initial Requires attribute. +void ThreadSafetyAnalyzer::addLock(FactSet &FSet, + std::unique_ptr<FactEntry> Entry, + bool ReqAttr) { + if (Entry->shouldIgnore()) + return; + + if (!ReqAttr && !Entry->negative()) { + // look for the negative capability, and remove it from the fact set. + CapabilityExpr NegC = !*Entry; + const FactEntry *Nen = FSet.findLock(FactMan, NegC); + if (Nen) { + FSet.removeLock(FactMan, NegC); + } + else { + if (inCurrentScope(*Entry) && !Entry->asserted()) + Handler.handleNegativeNotHeld(Entry->getKind(), Entry->toString(), + NegC.toString(), Entry->loc()); + } + } + + // Check before/after constraints + if (Handler.issueBetaWarnings() && + !Entry->asserted() && !Entry->declared()) { + GlobalBeforeSet->checkBeforeAfter(Entry->valueDecl(), FSet, *this, + Entry->loc(), Entry->getKind()); + } + + // FIXME: Don't always warn when we have support for reentrant locks. + if (const FactEntry *Cp = FSet.findLock(FactMan, *Entry)) { + if (!Entry->asserted()) + Cp->handleLock(FSet, FactMan, *Entry, Handler); + } else { + FSet.addLock(FactMan, std::move(Entry)); + } +} + +/// Remove a lock from the lockset, warning if the lock is not there. +/// \param UnlockLoc The source location of the unlock (only used in error msg) +void ThreadSafetyAnalyzer::removeLock(FactSet &FSet, const CapabilityExpr &Cp, + SourceLocation UnlockLoc, + bool FullyRemove, LockKind ReceivedKind) { + if (Cp.shouldIgnore()) + return; + + const FactEntry *LDat = FSet.findLock(FactMan, Cp); + if (!LDat) { + SourceLocation PrevLoc; + if (const FactEntry *Neg = FSet.findLock(FactMan, !Cp)) + PrevLoc = Neg->loc(); + Handler.handleUnmatchedUnlock(Cp.getKind(), Cp.toString(), UnlockLoc, + PrevLoc); + return; + } + + // Generic lock removal doesn't care about lock kind mismatches, but + // otherwise diagnose when the lock kinds are mismatched. + if (ReceivedKind != LK_Generic && LDat->kind() != ReceivedKind) { + Handler.handleIncorrectUnlockKind(Cp.getKind(), Cp.toString(), LDat->kind(), + ReceivedKind, LDat->loc(), UnlockLoc); + } + + LDat->handleUnlock(FSet, FactMan, Cp, UnlockLoc, FullyRemove, Handler); +} + +/// Extract the list of mutexIDs from the attribute on an expression, +/// and push them onto Mtxs, discarding any duplicates. +template <typename AttrType> +void ThreadSafetyAnalyzer::getMutexIDs(CapExprSet &Mtxs, AttrType *Attr, + const Expr *Exp, const NamedDecl *D, + til::SExpr *Self) { + if (Attr->args_size() == 0) { + // The mutex held is the "this" object. + CapabilityExpr Cp = SxBuilder.translateAttrExpr(nullptr, D, Exp, Self); + if (Cp.isInvalid()) { + warnInvalidLock(Handler, nullptr, D, Exp, Cp.getKind()); + return; + } + //else + if (!Cp.shouldIgnore()) + Mtxs.push_back_nodup(Cp); + return; + } + + for (const auto *Arg : Attr->args()) { + CapabilityExpr Cp = SxBuilder.translateAttrExpr(Arg, D, Exp, Self); + if (Cp.isInvalid()) { + warnInvalidLock(Handler, nullptr, D, Exp, Cp.getKind()); + continue; + } + //else + if (!Cp.shouldIgnore()) + Mtxs.push_back_nodup(Cp); + } +} + +/// Extract the list of mutexIDs from a trylock attribute. If the +/// trylock applies to the given edge, then push them onto Mtxs, discarding +/// any duplicates. +template <class AttrType> +void ThreadSafetyAnalyzer::getMutexIDs(CapExprSet &Mtxs, AttrType *Attr, + const Expr *Exp, const NamedDecl *D, + const CFGBlock *PredBlock, + const CFGBlock *CurrBlock, + Expr *BrE, bool Neg) { + // Find out which branch has the lock + bool branch = false; + if (const auto *BLE = dyn_cast_or_null<CXXBoolLiteralExpr>(BrE)) + branch = BLE->getValue(); + else if (const auto *ILE = dyn_cast_or_null<IntegerLiteral>(BrE)) + branch = ILE->getValue().getBoolValue(); + + int branchnum = branch ? 0 : 1; + if (Neg) + branchnum = !branchnum; + + // If we've taken the trylock branch, then add the lock + int i = 0; + for (CFGBlock::const_succ_iterator SI = PredBlock->succ_begin(), + SE = PredBlock->succ_end(); SI != SE && i < 2; ++SI, ++i) { + if (*SI == CurrBlock && i == branchnum) + getMutexIDs(Mtxs, Attr, Exp, D); + } +} + +static bool getStaticBooleanValue(Expr *E, bool &TCond) { + if (isa<CXXNullPtrLiteralExpr>(E) || isa<GNUNullExpr>(E)) { + TCond = false; + return true; + } else if (const auto *BLE = dyn_cast<CXXBoolLiteralExpr>(E)) { + TCond = BLE->getValue(); + return true; + } else if (const auto *ILE = dyn_cast<IntegerLiteral>(E)) { + TCond = ILE->getValue().getBoolValue(); + return true; + } else if (auto *CE = dyn_cast<ImplicitCastExpr>(E)) + return getStaticBooleanValue(CE->getSubExpr(), TCond); + return false; +} + +// If Cond can be traced back to a function call, return the call expression. +// The negate variable should be called with false, and will be set to true +// if the function call is negated, e.g. if (!mu.tryLock(...)) +const CallExpr* ThreadSafetyAnalyzer::getTrylockCallExpr(const Stmt *Cond, + LocalVarContext C, + bool &Negate) { + if (!Cond) + return nullptr; + + if (const auto *CallExp = dyn_cast<CallExpr>(Cond)) { + if (CallExp->getBuiltinCallee() == Builtin::BI__builtin_expect) + return getTrylockCallExpr(CallExp->getArg(0), C, Negate); + return CallExp; + } + else if (const auto *PE = dyn_cast<ParenExpr>(Cond)) + return getTrylockCallExpr(PE->getSubExpr(), C, Negate); + else if (const auto *CE = dyn_cast<ImplicitCastExpr>(Cond)) + return getTrylockCallExpr(CE->getSubExpr(), C, Negate); + else if (const auto *FE = dyn_cast<FullExpr>(Cond)) + return getTrylockCallExpr(FE->getSubExpr(), C, Negate); + else if (const auto *DRE = dyn_cast<DeclRefExpr>(Cond)) { + const Expr *E = LocalVarMap.lookupExpr(DRE->getDecl(), C); + return getTrylockCallExpr(E, C, Negate); + } + else if (const auto *UOP = dyn_cast<UnaryOperator>(Cond)) { + if (UOP->getOpcode() == UO_LNot) { + Negate = !Negate; + return getTrylockCallExpr(UOP->getSubExpr(), C, Negate); + } + return nullptr; + } + else if (const auto *BOP = dyn_cast<BinaryOperator>(Cond)) { + if (BOP->getOpcode() == BO_EQ || BOP->getOpcode() == BO_NE) { + if (BOP->getOpcode() == BO_NE) + Negate = !Negate; + + bool TCond = false; + if (getStaticBooleanValue(BOP->getRHS(), TCond)) { + if (!TCond) Negate = !Negate; + return getTrylockCallExpr(BOP->getLHS(), C, Negate); + } + TCond = false; + if (getStaticBooleanValue(BOP->getLHS(), TCond)) { + if (!TCond) Negate = !Negate; + return getTrylockCallExpr(BOP->getRHS(), C, Negate); + } + return nullptr; + } + if (BOP->getOpcode() == BO_LAnd) { + // LHS must have been evaluated in a different block. + return getTrylockCallExpr(BOP->getRHS(), C, Negate); + } + if (BOP->getOpcode() == BO_LOr) + return getTrylockCallExpr(BOP->getRHS(), C, Negate); + return nullptr; + } else if (const auto *COP = dyn_cast<ConditionalOperator>(Cond)) { + bool TCond, FCond; + if (getStaticBooleanValue(COP->getTrueExpr(), TCond) && + getStaticBooleanValue(COP->getFalseExpr(), FCond)) { + if (TCond && !FCond) + return getTrylockCallExpr(COP->getCond(), C, Negate); + if (!TCond && FCond) { + Negate = !Negate; + return getTrylockCallExpr(COP->getCond(), C, Negate); + } + } + } + return nullptr; +} + +/// Find the lockset that holds on the edge between PredBlock +/// and CurrBlock. The edge set is the exit set of PredBlock (passed +/// as the ExitSet parameter) plus any trylocks, which are conditionally held. +void ThreadSafetyAnalyzer::getEdgeLockset(FactSet& Result, + const FactSet &ExitSet, + const CFGBlock *PredBlock, + const CFGBlock *CurrBlock) { + Result = ExitSet; + + const Stmt *Cond = PredBlock->getTerminatorCondition(); + // We don't acquire try-locks on ?: branches, only when its result is used. + if (!Cond || isa<ConditionalOperator>(PredBlock->getTerminatorStmt())) + return; + + bool Negate = false; + const CFGBlockInfo *PredBlockInfo = &BlockInfo[PredBlock->getBlockID()]; + const LocalVarContext &LVarCtx = PredBlockInfo->ExitContext; + + const auto *Exp = getTrylockCallExpr(Cond, LVarCtx, Negate); + if (!Exp) + return; + + auto *FunDecl = dyn_cast_or_null<NamedDecl>(Exp->getCalleeDecl()); + if(!FunDecl || !FunDecl->hasAttrs()) + return; + + CapExprSet ExclusiveLocksToAdd; + CapExprSet SharedLocksToAdd; + + // If the condition is a call to a Trylock function, then grab the attributes + for (const auto *Attr : FunDecl->attrs()) { + switch (Attr->getKind()) { + case attr::TryAcquireCapability: { + auto *A = cast<TryAcquireCapabilityAttr>(Attr); + getMutexIDs(A->isShared() ? SharedLocksToAdd : ExclusiveLocksToAdd, A, + Exp, FunDecl, PredBlock, CurrBlock, A->getSuccessValue(), + Negate); + break; + }; + case attr::ExclusiveTrylockFunction: { + const auto *A = cast<ExclusiveTrylockFunctionAttr>(Attr); + getMutexIDs(ExclusiveLocksToAdd, A, Exp, FunDecl, PredBlock, CurrBlock, + A->getSuccessValue(), Negate); + break; + } + case attr::SharedTrylockFunction: { + const auto *A = cast<SharedTrylockFunctionAttr>(Attr); + getMutexIDs(SharedLocksToAdd, A, Exp, FunDecl, PredBlock, CurrBlock, + A->getSuccessValue(), Negate); + break; + } + default: + break; + } + } + + // Add and remove locks. + SourceLocation Loc = Exp->getExprLoc(); + for (const auto &ExclusiveLockToAdd : ExclusiveLocksToAdd) + addLock(Result, std::make_unique<LockableFactEntry>(ExclusiveLockToAdd, + LK_Exclusive, Loc)); + for (const auto &SharedLockToAdd : SharedLocksToAdd) + addLock(Result, std::make_unique<LockableFactEntry>(SharedLockToAdd, + LK_Shared, Loc)); +} + +namespace { + +/// We use this class to visit different types of expressions in +/// CFGBlocks, and build up the lockset. +/// An expression may cause us to add or remove locks from the lockset, or else +/// output error messages related to missing locks. +/// FIXME: In future, we may be able to not inherit from a visitor. +class BuildLockset : public ConstStmtVisitor<BuildLockset> { + friend class ThreadSafetyAnalyzer; + + ThreadSafetyAnalyzer *Analyzer; + FactSet FSet; + /// Maps constructed objects to `this` placeholder prior to initialization. + llvm::SmallDenseMap<const Expr *, til::LiteralPtr *> ConstructedObjects; + LocalVariableMap::Context LVarCtx; + unsigned CtxIndex; + + // helper functions + void warnIfMutexNotHeld(const NamedDecl *D, const Expr *Exp, AccessKind AK, + Expr *MutexExp, ProtectedOperationKind POK, + til::LiteralPtr *Self, SourceLocation Loc); + void warnIfMutexHeld(const NamedDecl *D, const Expr *Exp, Expr *MutexExp, + til::LiteralPtr *Self, SourceLocation Loc); + + void checkAccess(const Expr *Exp, AccessKind AK, + ProtectedOperationKind POK = POK_VarAccess); + void checkPtAccess(const Expr *Exp, AccessKind AK, + ProtectedOperationKind POK = POK_VarAccess); + + void handleCall(const Expr *Exp, const NamedDecl *D, + til::LiteralPtr *Self = nullptr, + SourceLocation Loc = SourceLocation()); + void examineArguments(const FunctionDecl *FD, + CallExpr::const_arg_iterator ArgBegin, + CallExpr::const_arg_iterator ArgEnd, + bool SkipFirstParam = false); + +public: + BuildLockset(ThreadSafetyAnalyzer *Anlzr, CFGBlockInfo &Info) + : ConstStmtVisitor<BuildLockset>(), Analyzer(Anlzr), FSet(Info.EntrySet), + LVarCtx(Info.EntryContext), CtxIndex(Info.EntryIndex) {} + + void VisitUnaryOperator(const UnaryOperator *UO); + void VisitBinaryOperator(const BinaryOperator *BO); + void VisitCastExpr(const CastExpr *CE); + void VisitCallExpr(const CallExpr *Exp); + void VisitCXXConstructExpr(const CXXConstructExpr *Exp); + void VisitDeclStmt(const DeclStmt *S); + void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *Exp); +}; + +} // namespace + +/// Warn if the LSet does not contain a lock sufficient to protect access +/// of at least the passed in AccessKind. +void BuildLockset::warnIfMutexNotHeld(const NamedDecl *D, const Expr *Exp, + AccessKind AK, Expr *MutexExp, + ProtectedOperationKind POK, + til::LiteralPtr *Self, + SourceLocation Loc) { + LockKind LK = getLockKindFromAccessKind(AK); + + CapabilityExpr Cp = + Analyzer->SxBuilder.translateAttrExpr(MutexExp, D, Exp, Self); + if (Cp.isInvalid()) { + warnInvalidLock(Analyzer->Handler, MutexExp, D, Exp, Cp.getKind()); + return; + } else if (Cp.shouldIgnore()) { + return; + } + + if (Cp.negative()) { + // Negative capabilities act like locks excluded + const FactEntry *LDat = FSet.findLock(Analyzer->FactMan, !Cp); + if (LDat) { + Analyzer->Handler.handleFunExcludesLock( + Cp.getKind(), D->getNameAsString(), (!Cp).toString(), Loc); + return; + } + + // If this does not refer to a negative capability in the same class, + // then stop here. + if (!Analyzer->inCurrentScope(Cp)) + return; + + // Otherwise the negative requirement must be propagated to the caller. + LDat = FSet.findLock(Analyzer->FactMan, Cp); + if (!LDat) { + Analyzer->Handler.handleNegativeNotHeld(D, Cp.toString(), Loc); + } + return; + } + + const FactEntry *LDat = FSet.findLockUniv(Analyzer->FactMan, Cp); + bool NoError = true; + if (!LDat) { + // No exact match found. Look for a partial match. + LDat = FSet.findPartialMatch(Analyzer->FactMan, Cp); + if (LDat) { + // Warn that there's no precise match. + std::string PartMatchStr = LDat->toString(); + StringRef PartMatchName(PartMatchStr); + Analyzer->Handler.handleMutexNotHeld(Cp.getKind(), D, POK, Cp.toString(), + LK, Loc, &PartMatchName); + } else { + // Warn that there's no match at all. + Analyzer->Handler.handleMutexNotHeld(Cp.getKind(), D, POK, Cp.toString(), + LK, Loc); + } + NoError = false; + } + // Make sure the mutex we found is the right kind. + if (NoError && LDat && !LDat->isAtLeast(LK)) { + Analyzer->Handler.handleMutexNotHeld(Cp.getKind(), D, POK, Cp.toString(), + LK, Loc); + } +} + +/// Warn if the LSet contains the given lock. +void BuildLockset::warnIfMutexHeld(const NamedDecl *D, const Expr *Exp, + Expr *MutexExp, til::LiteralPtr *Self, + SourceLocation Loc) { + CapabilityExpr Cp = + Analyzer->SxBuilder.translateAttrExpr(MutexExp, D, Exp, Self); + if (Cp.isInvalid()) { + warnInvalidLock(Analyzer->Handler, MutexExp, D, Exp, Cp.getKind()); + return; + } else if (Cp.shouldIgnore()) { + return; + } + + const FactEntry *LDat = FSet.findLock(Analyzer->FactMan, Cp); + if (LDat) { + Analyzer->Handler.handleFunExcludesLock(Cp.getKind(), D->getNameAsString(), + Cp.toString(), Loc); + } +} + +/// Checks guarded_by and pt_guarded_by attributes. +/// Whenever we identify an access (read or write) to a DeclRefExpr that is +/// marked with guarded_by, we must ensure the appropriate mutexes are held. +/// Similarly, we check if the access is to an expression that dereferences +/// a pointer marked with pt_guarded_by. +void BuildLockset::checkAccess(const Expr *Exp, AccessKind AK, + ProtectedOperationKind POK) { + Exp = Exp->IgnoreImplicit()->IgnoreParenCasts(); + + SourceLocation Loc = Exp->getExprLoc(); + + // Local variables of reference type cannot be re-assigned; + // map them to their initializer. + while (const auto *DRE = dyn_cast<DeclRefExpr>(Exp)) { + const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()->getCanonicalDecl()); + if (VD && VD->isLocalVarDecl() && VD->getType()->isReferenceType()) { + if (const auto *E = VD->getInit()) { + // Guard against self-initialization. e.g., int &i = i; + if (E == Exp) + break; + Exp = E; + continue; + } + } + break; + } + + if (const auto *UO = dyn_cast<UnaryOperator>(Exp)) { + // For dereferences + if (UO->getOpcode() == UO_Deref) + checkPtAccess(UO->getSubExpr(), AK, POK); + return; + } + + if (const auto *BO = dyn_cast<BinaryOperator>(Exp)) { + switch (BO->getOpcode()) { + case BO_PtrMemD: // .* + return checkAccess(BO->getLHS(), AK, POK); + case BO_PtrMemI: // ->* + return checkPtAccess(BO->getLHS(), AK, POK); + default: + return; + } + } + + if (const auto *AE = dyn_cast<ArraySubscriptExpr>(Exp)) { + checkPtAccess(AE->getLHS(), AK, POK); + return; + } + + if (const auto *ME = dyn_cast<MemberExpr>(Exp)) { + if (ME->isArrow()) + checkPtAccess(ME->getBase(), AK, POK); + else + checkAccess(ME->getBase(), AK, POK); + } + + const ValueDecl *D = getValueDecl(Exp); + if (!D || !D->hasAttrs()) + return; + + if (D->hasAttr<GuardedVarAttr>() && FSet.isEmpty(Analyzer->FactMan)) { + Analyzer->Handler.handleNoMutexHeld(D, POK, AK, Loc); + } + + for (const auto *I : D->specific_attrs<GuardedByAttr>()) + warnIfMutexNotHeld(D, Exp, AK, I->getArg(), POK, nullptr, Loc); +} + +/// Checks pt_guarded_by and pt_guarded_var attributes. +/// POK is the same operationKind that was passed to checkAccess. +void BuildLockset::checkPtAccess(const Expr *Exp, AccessKind AK, + ProtectedOperationKind POK) { + while (true) { + if (const auto *PE = dyn_cast<ParenExpr>(Exp)) { + Exp = PE->getSubExpr(); + continue; + } + if (const auto *CE = dyn_cast<CastExpr>(Exp)) { + if (CE->getCastKind() == CK_ArrayToPointerDecay) { + // If it's an actual array, and not a pointer, then it's elements + // are protected by GUARDED_BY, not PT_GUARDED_BY; + checkAccess(CE->getSubExpr(), AK, POK); + return; + } + Exp = CE->getSubExpr(); + continue; + } + break; + } + + // Pass by reference warnings are under a different flag. + ProtectedOperationKind PtPOK = POK_VarDereference; + if (POK == POK_PassByRef) PtPOK = POK_PtPassByRef; + + const ValueDecl *D = getValueDecl(Exp); + if (!D || !D->hasAttrs()) + return; + + if (D->hasAttr<PtGuardedVarAttr>() && FSet.isEmpty(Analyzer->FactMan)) + Analyzer->Handler.handleNoMutexHeld(D, PtPOK, AK, Exp->getExprLoc()); + + for (auto const *I : D->specific_attrs<PtGuardedByAttr>()) + warnIfMutexNotHeld(D, Exp, AK, I->getArg(), PtPOK, nullptr, + Exp->getExprLoc()); +} + +/// Process a function call, method call, constructor call, +/// or destructor call. This involves looking at the attributes on the +/// corresponding function/method/constructor/destructor, issuing warnings, +/// and updating the locksets accordingly. +/// +/// FIXME: For classes annotated with one of the guarded annotations, we need +/// to treat const method calls as reads and non-const method calls as writes, +/// and check that the appropriate locks are held. Non-const method calls with +/// the same signature as const method calls can be also treated as reads. +/// +/// \param Exp The call expression. +/// \param D The callee declaration. +/// \param Self If \p Exp = nullptr, the implicit this argument. +/// \param Loc If \p Exp = nullptr, the location. +void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, + til::LiteralPtr *Self, SourceLocation Loc) { + CapExprSet ExclusiveLocksToAdd, SharedLocksToAdd; + CapExprSet ExclusiveLocksToRemove, SharedLocksToRemove, GenericLocksToRemove; + CapExprSet ScopedReqsAndExcludes; + + // Figure out if we're constructing an object of scoped lockable class + CapabilityExpr Scp; + if (Exp) { + assert(!Self); + const auto *TagT = Exp->getType()->getAs<TagType>(); + if (TagT && Exp->isPRValue()) { + std::pair<til::LiteralPtr *, StringRef> Placeholder = + Analyzer->SxBuilder.createThisPlaceholder(Exp); + [[maybe_unused]] auto inserted = + ConstructedObjects.insert({Exp, Placeholder.first}); + assert(inserted.second && "Are we visiting the same expression again?"); + if (isa<CXXConstructExpr>(Exp)) + Self = Placeholder.first; + if (TagT->getDecl()->hasAttr<ScopedLockableAttr>()) + Scp = CapabilityExpr(Placeholder.first, Placeholder.second, false); + } + + assert(Loc.isInvalid()); + Loc = Exp->getExprLoc(); + } + + for(const Attr *At : D->attrs()) { + switch (At->getKind()) { + // When we encounter a lock function, we need to add the lock to our + // lockset. + case attr::AcquireCapability: { + const auto *A = cast<AcquireCapabilityAttr>(At); + Analyzer->getMutexIDs(A->isShared() ? SharedLocksToAdd + : ExclusiveLocksToAdd, + A, Exp, D, Self); + break; + } + + // An assert will add a lock to the lockset, but will not generate + // a warning if it is already there, and will not generate a warning + // if it is not removed. + case attr::AssertExclusiveLock: { + const auto *A = cast<AssertExclusiveLockAttr>(At); + + CapExprSet AssertLocks; + Analyzer->getMutexIDs(AssertLocks, A, Exp, D, Self); + for (const auto &AssertLock : AssertLocks) + Analyzer->addLock( + FSet, std::make_unique<LockableFactEntry>( + AssertLock, LK_Exclusive, Loc, FactEntry::Asserted)); + break; + } + case attr::AssertSharedLock: { + const auto *A = cast<AssertSharedLockAttr>(At); + + CapExprSet AssertLocks; + Analyzer->getMutexIDs(AssertLocks, A, Exp, D, Self); + for (const auto &AssertLock : AssertLocks) + Analyzer->addLock( + FSet, std::make_unique<LockableFactEntry>( + AssertLock, LK_Shared, Loc, FactEntry::Asserted)); + break; + } + + case attr::AssertCapability: { + const auto *A = cast<AssertCapabilityAttr>(At); + CapExprSet AssertLocks; + Analyzer->getMutexIDs(AssertLocks, A, Exp, D, Self); + for (const auto &AssertLock : AssertLocks) + Analyzer->addLock(FSet, std::make_unique<LockableFactEntry>( + AssertLock, + A->isShared() ? LK_Shared : LK_Exclusive, + Loc, FactEntry::Asserted)); + break; + } + + // When we encounter an unlock function, we need to remove unlocked + // mutexes from the lockset, and flag a warning if they are not there. + case attr::ReleaseCapability: { + const auto *A = cast<ReleaseCapabilityAttr>(At); + if (A->isGeneric()) + Analyzer->getMutexIDs(GenericLocksToRemove, A, Exp, D, Self); + else if (A->isShared()) + Analyzer->getMutexIDs(SharedLocksToRemove, A, Exp, D, Self); + else + Analyzer->getMutexIDs(ExclusiveLocksToRemove, A, Exp, D, Self); + break; + } + + case attr::RequiresCapability: { + const auto *A = cast<RequiresCapabilityAttr>(At); + for (auto *Arg : A->args()) { + warnIfMutexNotHeld(D, Exp, A->isShared() ? AK_Read : AK_Written, Arg, + POK_FunctionCall, Self, Loc); + // use for adopting a lock + if (!Scp.shouldIgnore()) + Analyzer->getMutexIDs(ScopedReqsAndExcludes, A, Exp, D, Self); + } + break; + } + + case attr::LocksExcluded: { + const auto *A = cast<LocksExcludedAttr>(At); + for (auto *Arg : A->args()) { + warnIfMutexHeld(D, Exp, Arg, Self, Loc); + // use for deferring a lock + if (!Scp.shouldIgnore()) + Analyzer->getMutexIDs(ScopedReqsAndExcludes, A, Exp, D, Self); + } + break; + } + + // Ignore attributes unrelated to thread-safety + default: + break; + } + } + + // Remove locks first to allow lock upgrading/downgrading. + // FIXME -- should only fully remove if the attribute refers to 'this'. + bool Dtor = isa<CXXDestructorDecl>(D); + for (const auto &M : ExclusiveLocksToRemove) + Analyzer->removeLock(FSet, M, Loc, Dtor, LK_Exclusive); + for (const auto &M : SharedLocksToRemove) + Analyzer->removeLock(FSet, M, Loc, Dtor, LK_Shared); + for (const auto &M : GenericLocksToRemove) + Analyzer->removeLock(FSet, M, Loc, Dtor, LK_Generic); + + // Add locks. + FactEntry::SourceKind Source = + !Scp.shouldIgnore() ? FactEntry::Managed : FactEntry::Acquired; + for (const auto &M : ExclusiveLocksToAdd) + Analyzer->addLock(FSet, std::make_unique<LockableFactEntry>(M, LK_Exclusive, + Loc, Source)); + for (const auto &M : SharedLocksToAdd) + Analyzer->addLock( + FSet, std::make_unique<LockableFactEntry>(M, LK_Shared, Loc, Source)); + + if (!Scp.shouldIgnore()) { + // Add the managing object as a dummy mutex, mapped to the underlying mutex. + auto ScopedEntry = std::make_unique<ScopedLockableFactEntry>(Scp, Loc); + for (const auto &M : ExclusiveLocksToAdd) + ScopedEntry->addLock(M); + for (const auto &M : SharedLocksToAdd) + ScopedEntry->addLock(M); + for (const auto &M : ScopedReqsAndExcludes) + ScopedEntry->addLock(M); + for (const auto &M : ExclusiveLocksToRemove) + ScopedEntry->addExclusiveUnlock(M); + for (const auto &M : SharedLocksToRemove) + ScopedEntry->addSharedUnlock(M); + Analyzer->addLock(FSet, std::move(ScopedEntry)); + } +} + +/// For unary operations which read and write a variable, we need to +/// check whether we hold any required mutexes. Reads are checked in +/// VisitCastExpr. +void BuildLockset::VisitUnaryOperator(const UnaryOperator *UO) { + switch (UO->getOpcode()) { + case UO_PostDec: + case UO_PostInc: + case UO_PreDec: + case UO_PreInc: + checkAccess(UO->getSubExpr(), AK_Written); + break; + default: + break; + } +} + +/// For binary operations which assign to a variable (writes), we need to check +/// whether we hold any required mutexes. +/// FIXME: Deal with non-primitive types. +void BuildLockset::VisitBinaryOperator(const BinaryOperator *BO) { + if (!BO->isAssignmentOp()) + return; + + // adjust the context + LVarCtx = Analyzer->LocalVarMap.getNextContext(CtxIndex, BO, LVarCtx); + + checkAccess(BO->getLHS(), AK_Written); +} + +/// Whenever we do an LValue to Rvalue cast, we are reading a variable and +/// need to ensure we hold any required mutexes. +/// FIXME: Deal with non-primitive types. +void BuildLockset::VisitCastExpr(const CastExpr *CE) { + if (CE->getCastKind() != CK_LValueToRValue) + return; + checkAccess(CE->getSubExpr(), AK_Read); +} + +void BuildLockset::examineArguments(const FunctionDecl *FD, + CallExpr::const_arg_iterator ArgBegin, + CallExpr::const_arg_iterator ArgEnd, + bool SkipFirstParam) { + // Currently we can't do anything if we don't know the function declaration. + if (!FD) + return; + + // NO_THREAD_SAFETY_ANALYSIS does double duty here. Normally it + // only turns off checking within the body of a function, but we also + // use it to turn off checking in arguments to the function. This + // could result in some false negatives, but the alternative is to + // create yet another attribute. + if (FD->hasAttr<NoThreadSafetyAnalysisAttr>()) + return; + + const ArrayRef<ParmVarDecl *> Params = FD->parameters(); + auto Param = Params.begin(); + if (SkipFirstParam) + ++Param; + + // There can be default arguments, so we stop when one iterator is at end(). + for (auto Arg = ArgBegin; Param != Params.end() && Arg != ArgEnd; + ++Param, ++Arg) { + QualType Qt = (*Param)->getType(); + if (Qt->isReferenceType()) + checkAccess(*Arg, AK_Read, POK_PassByRef); + } +} + +void BuildLockset::VisitCallExpr(const CallExpr *Exp) { + if (const auto *CE = dyn_cast<CXXMemberCallExpr>(Exp)) { + const auto *ME = dyn_cast<MemberExpr>(CE->getCallee()); + // ME can be null when calling a method pointer + const CXXMethodDecl *MD = CE->getMethodDecl(); + + if (ME && MD) { + if (ME->isArrow()) { + // Should perhaps be AK_Written if !MD->isConst(). + checkPtAccess(CE->getImplicitObjectArgument(), AK_Read); + } else { + // Should perhaps be AK_Written if !MD->isConst(). + checkAccess(CE->getImplicitObjectArgument(), AK_Read); + } + } + + examineArguments(CE->getDirectCallee(), CE->arg_begin(), CE->arg_end()); + } else if (const auto *OE = dyn_cast<CXXOperatorCallExpr>(Exp)) { + OverloadedOperatorKind OEop = OE->getOperator(); + switch (OEop) { + case OO_Equal: + case OO_PlusEqual: + case OO_MinusEqual: + case OO_StarEqual: + case OO_SlashEqual: + case OO_PercentEqual: + case OO_CaretEqual: + case OO_AmpEqual: + case OO_PipeEqual: + case OO_LessLessEqual: + case OO_GreaterGreaterEqual: + checkAccess(OE->getArg(1), AK_Read); + [[fallthrough]]; + case OO_PlusPlus: + case OO_MinusMinus: + checkAccess(OE->getArg(0), AK_Written); + break; + case OO_Star: + case OO_ArrowStar: + case OO_Arrow: + case OO_Subscript: + if (!(OEop == OO_Star && OE->getNumArgs() > 1)) { + // Grrr. operator* can be multiplication... + checkPtAccess(OE->getArg(0), AK_Read); + } + [[fallthrough]]; + default: { + // TODO: get rid of this, and rely on pass-by-ref instead. + const Expr *Obj = OE->getArg(0); + checkAccess(Obj, AK_Read); + // Check the remaining arguments. For method operators, the first + // argument is the implicit self argument, and doesn't appear in the + // FunctionDecl, but for non-methods it does. + const FunctionDecl *FD = OE->getDirectCallee(); + examineArguments(FD, std::next(OE->arg_begin()), OE->arg_end(), + /*SkipFirstParam*/ !isa<CXXMethodDecl>(FD)); + break; + } + } + } else { + examineArguments(Exp->getDirectCallee(), Exp->arg_begin(), Exp->arg_end()); + } + + auto *D = dyn_cast_or_null<NamedDecl>(Exp->getCalleeDecl()); + if(!D || !D->hasAttrs()) + return; + handleCall(Exp, D); +} + +void BuildLockset::VisitCXXConstructExpr(const CXXConstructExpr *Exp) { + const CXXConstructorDecl *D = Exp->getConstructor(); + if (D && D->isCopyConstructor()) { + const Expr* Source = Exp->getArg(0); + checkAccess(Source, AK_Read); + } else { + examineArguments(D, Exp->arg_begin(), Exp->arg_end()); + } + if (D && D->hasAttrs()) + handleCall(Exp, D); +} + +static const Expr *UnpackConstruction(const Expr *E) { + if (auto *CE = dyn_cast<CastExpr>(E)) + if (CE->getCastKind() == CK_NoOp) + E = CE->getSubExpr()->IgnoreParens(); + if (auto *CE = dyn_cast<CastExpr>(E)) + if (CE->getCastKind() == CK_ConstructorConversion || + CE->getCastKind() == CK_UserDefinedConversion) + E = CE->getSubExpr(); + if (auto *BTE = dyn_cast<CXXBindTemporaryExpr>(E)) + E = BTE->getSubExpr(); + return E; +} + +void BuildLockset::VisitDeclStmt(const DeclStmt *S) { + // adjust the context + LVarCtx = Analyzer->LocalVarMap.getNextContext(CtxIndex, S, LVarCtx); + + for (auto *D : S->getDeclGroup()) { + if (auto *VD = dyn_cast_or_null<VarDecl>(D)) { + const Expr *E = VD->getInit(); + if (!E) + continue; + E = E->IgnoreParens(); + + // handle constructors that involve temporaries + if (auto *EWC = dyn_cast<ExprWithCleanups>(E)) + E = EWC->getSubExpr()->IgnoreParens(); + E = UnpackConstruction(E); + + if (auto Object = ConstructedObjects.find(E); + Object != ConstructedObjects.end()) { + Object->second->setClangDecl(VD); + ConstructedObjects.erase(Object); + } + } + } +} + +void BuildLockset::VisitMaterializeTemporaryExpr( + const MaterializeTemporaryExpr *Exp) { + if (const ValueDecl *ExtD = Exp->getExtendingDecl()) { + if (auto Object = + ConstructedObjects.find(UnpackConstruction(Exp->getSubExpr())); + Object != ConstructedObjects.end()) { + Object->second->setClangDecl(ExtD); + ConstructedObjects.erase(Object); + } + } +} + +/// Given two facts merging on a join point, possibly warn and decide whether to +/// keep or replace. +/// +/// \param CanModify Whether we can replace \p A by \p B. +/// \return false if we should keep \p A, true if we should take \p B. +bool ThreadSafetyAnalyzer::join(const FactEntry &A, const FactEntry &B, + bool CanModify) { + if (A.kind() != B.kind()) { + // For managed capabilities, the destructor should unlock in the right mode + // anyway. For asserted capabilities no unlocking is needed. + if ((A.managed() || A.asserted()) && (B.managed() || B.asserted())) { + // The shared capability subsumes the exclusive capability, if possible. + bool ShouldTakeB = B.kind() == LK_Shared; + if (CanModify || !ShouldTakeB) + return ShouldTakeB; + } + Handler.handleExclusiveAndShared(B.getKind(), B.toString(), B.loc(), + A.loc()); + // Take the exclusive capability to reduce further warnings. + return CanModify && B.kind() == LK_Exclusive; + } else { + // The non-asserted capability is the one we want to track. + return CanModify && A.asserted() && !B.asserted(); + } +} + +/// Compute the intersection of two locksets and issue warnings for any +/// locks in the symmetric difference. +/// +/// This function is used at a merge point in the CFG when comparing the lockset +/// of each branch being merged. For example, given the following sequence: +/// A; if () then B; else C; D; we need to check that the lockset after B and C +/// are the same. In the event of a difference, we use the intersection of these +/// two locksets at the start of D. +/// +/// \param EntrySet A lockset for entry into a (possibly new) block. +/// \param ExitSet The lockset on exiting a preceding block. +/// \param JoinLoc The location of the join point for error reporting +/// \param EntryLEK The warning if a mutex is missing from \p EntrySet. +/// \param ExitLEK The warning if a mutex is missing from \p ExitSet. +void ThreadSafetyAnalyzer::intersectAndWarn(FactSet &EntrySet, + const FactSet &ExitSet, + SourceLocation JoinLoc, + LockErrorKind EntryLEK, + LockErrorKind ExitLEK) { + FactSet EntrySetOrig = EntrySet; + + // Find locks in ExitSet that conflict or are not in EntrySet, and warn. + for (const auto &Fact : ExitSet) { + const FactEntry &ExitFact = FactMan[Fact]; + + FactSet::iterator EntryIt = EntrySet.findLockIter(FactMan, ExitFact); + if (EntryIt != EntrySet.end()) { + if (join(FactMan[*EntryIt], ExitFact, + EntryLEK != LEK_LockedSomeLoopIterations)) + *EntryIt = Fact; + } else if (!ExitFact.managed()) { + ExitFact.handleRemovalFromIntersection(ExitSet, FactMan, JoinLoc, + EntryLEK, Handler); + } + } + + // Find locks in EntrySet that are not in ExitSet, and remove them. + for (const auto &Fact : EntrySetOrig) { + const FactEntry *EntryFact = &FactMan[Fact]; + const FactEntry *ExitFact = ExitSet.findLock(FactMan, *EntryFact); + + if (!ExitFact) { + if (!EntryFact->managed() || ExitLEK == LEK_LockedSomeLoopIterations) + EntryFact->handleRemovalFromIntersection(EntrySetOrig, FactMan, JoinLoc, + ExitLEK, Handler); + if (ExitLEK == LEK_LockedSomePredecessors) + EntrySet.removeLock(FactMan, *EntryFact); + } + } +} + +// Return true if block B never continues to its successors. +static bool neverReturns(const CFGBlock *B) { + if (B->hasNoReturnElement()) + return true; + if (B->empty()) + return false; + + CFGElement Last = B->back(); + if (std::optional<CFGStmt> S = Last.getAs<CFGStmt>()) { + if (isa<CXXThrowExpr>(S->getStmt())) + return true; + } + return false; +} + +/// Check a function's CFG for thread-safety violations. +/// +/// We traverse the blocks in the CFG, compute the set of mutexes that are held +/// at the end of each block, and issue warnings for thread safety violations. +/// Each block in the CFG is traversed exactly once. +void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { + // TODO: this whole function needs be rewritten as a visitor for CFGWalker. + // For now, we just use the walker to set things up. + threadSafety::CFGWalker walker; + if (!walker.init(AC)) + return; + + // AC.dumpCFG(true); + // threadSafety::printSCFG(walker); + + CFG *CFGraph = walker.getGraph(); + const NamedDecl *D = walker.getDecl(); + const auto *CurrentFunction = dyn_cast<FunctionDecl>(D); + CurrentMethod = dyn_cast<CXXMethodDecl>(D); + + if (D->hasAttr<NoThreadSafetyAnalysisAttr>()) + return; + + // FIXME: Do something a bit more intelligent inside constructor and + // destructor code. Constructors and destructors must assume unique access + // to 'this', so checks on member variable access is disabled, but we should + // still enable checks on other objects. + if (isa<CXXConstructorDecl>(D)) + return; // Don't check inside constructors. + if (isa<CXXDestructorDecl>(D)) + return; // Don't check inside destructors. + + Handler.enterFunction(CurrentFunction); + + BlockInfo.resize(CFGraph->getNumBlockIDs(), + CFGBlockInfo::getEmptyBlockInfo(LocalVarMap)); + + // We need to explore the CFG via a "topological" ordering. + // That way, we will be guaranteed to have information about required + // predecessor locksets when exploring a new block. + const PostOrderCFGView *SortedGraph = walker.getSortedGraph(); + PostOrderCFGView::CFGBlockSet VisitedBlocks(CFGraph); + + // Mark entry block as reachable + BlockInfo[CFGraph->getEntry().getBlockID()].Reachable = true; + + // Compute SSA names for local variables + LocalVarMap.traverseCFG(CFGraph, SortedGraph, BlockInfo); + + // Fill in source locations for all CFGBlocks. + findBlockLocations(CFGraph, SortedGraph, BlockInfo); + + CapExprSet ExclusiveLocksAcquired; + CapExprSet SharedLocksAcquired; + CapExprSet LocksReleased; + + // Add locks from exclusive_locks_required and shared_locks_required + // to initial lockset. Also turn off checking for lock and unlock functions. + // FIXME: is there a more intelligent way to check lock/unlock functions? + if (!SortedGraph->empty() && D->hasAttrs()) { + const CFGBlock *FirstBlock = *SortedGraph->begin(); + FactSet &InitialLockset = BlockInfo[FirstBlock->getBlockID()].EntrySet; + + CapExprSet ExclusiveLocksToAdd; + CapExprSet SharedLocksToAdd; + + SourceLocation Loc = D->getLocation(); + for (const auto *Attr : D->attrs()) { + Loc = Attr->getLocation(); + if (const auto *A = dyn_cast<RequiresCapabilityAttr>(Attr)) { + getMutexIDs(A->isShared() ? SharedLocksToAdd : ExclusiveLocksToAdd, A, + nullptr, D); + } else if (const auto *A = dyn_cast<ReleaseCapabilityAttr>(Attr)) { + // UNLOCK_FUNCTION() is used to hide the underlying lock implementation. + // We must ignore such methods. + if (A->args_size() == 0) + return; + getMutexIDs(A->isShared() ? SharedLocksToAdd : ExclusiveLocksToAdd, A, + nullptr, D); + getMutexIDs(LocksReleased, A, nullptr, D); + } else if (const auto *A = dyn_cast<AcquireCapabilityAttr>(Attr)) { + if (A->args_size() == 0) + return; + getMutexIDs(A->isShared() ? SharedLocksAcquired + : ExclusiveLocksAcquired, + A, nullptr, D); + } else if (isa<ExclusiveTrylockFunctionAttr>(Attr)) { + // Don't try to check trylock functions for now. + return; + } else if (isa<SharedTrylockFunctionAttr>(Attr)) { + // Don't try to check trylock functions for now. + return; + } else if (isa<TryAcquireCapabilityAttr>(Attr)) { + // Don't try to check trylock functions for now. + return; + } + } + + // FIXME -- Loc can be wrong here. + for (const auto &Mu : ExclusiveLocksToAdd) { + auto Entry = std::make_unique<LockableFactEntry>(Mu, LK_Exclusive, Loc, + FactEntry::Declared); + addLock(InitialLockset, std::move(Entry), true); + } + for (const auto &Mu : SharedLocksToAdd) { + auto Entry = std::make_unique<LockableFactEntry>(Mu, LK_Shared, Loc, + FactEntry::Declared); + addLock(InitialLockset, std::move(Entry), true); + } + } + + for (const auto *CurrBlock : *SortedGraph) { + unsigned CurrBlockID = CurrBlock->getBlockID(); + CFGBlockInfo *CurrBlockInfo = &BlockInfo[CurrBlockID]; + + // Use the default initial lockset in case there are no predecessors. + VisitedBlocks.insert(CurrBlock); + + // Iterate through the predecessor blocks and warn if the lockset for all + // predecessors is not the same. We take the entry lockset of the current + // block to be the intersection of all previous locksets. + // FIXME: By keeping the intersection, we may output more errors in future + // for a lock which is not in the intersection, but was in the union. We + // may want to also keep the union in future. As an example, let's say + // the intersection contains Mutex L, and the union contains L and M. + // Later we unlock M. At this point, we would output an error because we + // never locked M; although the real error is probably that we forgot to + // lock M on all code paths. Conversely, let's say that later we lock M. + // In this case, we should compare against the intersection instead of the + // union because the real error is probably that we forgot to unlock M on + // all code paths. + bool LocksetInitialized = false; + for (CFGBlock::const_pred_iterator PI = CurrBlock->pred_begin(), + PE = CurrBlock->pred_end(); PI != PE; ++PI) { + // if *PI -> CurrBlock is a back edge + if (*PI == nullptr || !VisitedBlocks.alreadySet(*PI)) + continue; + + unsigned PrevBlockID = (*PI)->getBlockID(); + CFGBlockInfo *PrevBlockInfo = &BlockInfo[PrevBlockID]; + + // Ignore edges from blocks that can't return. + if (neverReturns(*PI) || !PrevBlockInfo->Reachable) + continue; + + // Okay, we can reach this block from the entry. + CurrBlockInfo->Reachable = true; + + FactSet PrevLockset; + getEdgeLockset(PrevLockset, PrevBlockInfo->ExitSet, *PI, CurrBlock); + + if (!LocksetInitialized) { + CurrBlockInfo->EntrySet = PrevLockset; + LocksetInitialized = true; + } else { + // Surprisingly 'continue' doesn't always produce back edges, because + // the CFG has empty "transition" blocks where they meet with the end + // of the regular loop body. We still want to diagnose them as loop. + intersectAndWarn( + CurrBlockInfo->EntrySet, PrevLockset, CurrBlockInfo->EntryLoc, + isa_and_nonnull<ContinueStmt>((*PI)->getTerminatorStmt()) + ? LEK_LockedSomeLoopIterations + : LEK_LockedSomePredecessors); + } + } + + // Skip rest of block if it's not reachable. + if (!CurrBlockInfo->Reachable) + continue; + + BuildLockset LocksetBuilder(this, *CurrBlockInfo); + + // Visit all the statements in the basic block. + for (const auto &BI : *CurrBlock) { + switch (BI.getKind()) { + case CFGElement::Statement: { + CFGStmt CS = BI.castAs<CFGStmt>(); + LocksetBuilder.Visit(CS.getStmt()); + break; + } + // Ignore BaseDtor and MemberDtor for now. + case CFGElement::AutomaticObjectDtor: { + CFGAutomaticObjDtor AD = BI.castAs<CFGAutomaticObjDtor>(); + const auto *DD = AD.getDestructorDecl(AC.getASTContext()); + if (!DD->hasAttrs()) + break; + + LocksetBuilder.handleCall(nullptr, DD, + SxBuilder.createVariable(AD.getVarDecl()), + AD.getTriggerStmt()->getEndLoc()); + break; + } + case CFGElement::TemporaryDtor: { + auto TD = BI.castAs<CFGTemporaryDtor>(); + + // Clean up constructed object even if there are no attributes to + // keep the number of objects in limbo as small as possible. + if (auto Object = LocksetBuilder.ConstructedObjects.find( + TD.getBindTemporaryExpr()->getSubExpr()); + Object != LocksetBuilder.ConstructedObjects.end()) { + const auto *DD = TD.getDestructorDecl(AC.getASTContext()); + if (DD->hasAttrs()) + // TODO: the location here isn't quite correct. + LocksetBuilder.handleCall(nullptr, DD, Object->second, + TD.getBindTemporaryExpr()->getEndLoc()); + LocksetBuilder.ConstructedObjects.erase(Object); + } + break; + } + default: + break; + } + } + CurrBlockInfo->ExitSet = LocksetBuilder.FSet; + + // For every back edge from CurrBlock (the end of the loop) to another block + // (FirstLoopBlock) we need to check that the Lockset of Block is equal to + // the one held at the beginning of FirstLoopBlock. We can look up the + // Lockset held at the beginning of FirstLoopBlock in the EntryLockSets map. + for (CFGBlock::const_succ_iterator SI = CurrBlock->succ_begin(), + SE = CurrBlock->succ_end(); SI != SE; ++SI) { + // if CurrBlock -> *SI is *not* a back edge + if (*SI == nullptr || !VisitedBlocks.alreadySet(*SI)) + continue; + + CFGBlock *FirstLoopBlock = *SI; + CFGBlockInfo *PreLoop = &BlockInfo[FirstLoopBlock->getBlockID()]; + CFGBlockInfo *LoopEnd = &BlockInfo[CurrBlockID]; + intersectAndWarn(PreLoop->EntrySet, LoopEnd->ExitSet, PreLoop->EntryLoc, + LEK_LockedSomeLoopIterations); + } + } + + CFGBlockInfo *Initial = &BlockInfo[CFGraph->getEntry().getBlockID()]; + CFGBlockInfo *Final = &BlockInfo[CFGraph->getExit().getBlockID()]; + + // Skip the final check if the exit block is unreachable. + if (!Final->Reachable) + return; + + // By default, we expect all locks held on entry to be held on exit. + FactSet ExpectedExitSet = Initial->EntrySet; + + // Adjust the expected exit set by adding or removing locks, as declared + // by *-LOCK_FUNCTION and UNLOCK_FUNCTION. The intersect below will then + // issue the appropriate warning. + // FIXME: the location here is not quite right. + for (const auto &Lock : ExclusiveLocksAcquired) + ExpectedExitSet.addLock(FactMan, std::make_unique<LockableFactEntry>( + Lock, LK_Exclusive, D->getLocation())); + for (const auto &Lock : SharedLocksAcquired) + ExpectedExitSet.addLock(FactMan, std::make_unique<LockableFactEntry>( + Lock, LK_Shared, D->getLocation())); + for (const auto &Lock : LocksReleased) + ExpectedExitSet.removeLock(FactMan, Lock); + + // FIXME: Should we call this function for all blocks which exit the function? + intersectAndWarn(ExpectedExitSet, Final->ExitSet, Final->ExitLoc, + LEK_LockedAtEndOfFunction, LEK_NotLockedAtEndOfFunction); + + Handler.leaveFunction(CurrentFunction); +} + +/// Check a function's CFG for thread-safety violations. +/// +/// We traverse the blocks in the CFG, compute the set of mutexes that are held +/// at the end of each block, and issue warnings for thread safety violations. +/// Each block in the CFG is traversed exactly once. +void threadSafety::runThreadSafetyAnalysis(AnalysisDeclContext &AC, + ThreadSafetyHandler &Handler, + BeforeSet **BSet) { + if (!*BSet) + *BSet = new BeforeSet; + ThreadSafetyAnalyzer Analyzer(Handler, *BSet); + Analyzer.runAnalysis(AC); +} + +void threadSafety::threadSafetyCleanup(BeforeSet *Cache) { delete Cache; } + +/// Helper function that returns a LockKind required for the given level +/// of access. +LockKind threadSafety::getLockKindFromAccessKind(AccessKind AK) { + switch (AK) { + case AK_Read : + return LK_Shared; + case AK_Written : + return LK_Exclusive; + } + llvm_unreachable("Unknown AccessKind"); +} diff --git a/contrib/llvm-project/clang/lib/Analysis/ThreadSafetyCommon.cpp b/contrib/llvm-project/clang/lib/Analysis/ThreadSafetyCommon.cpp new file mode 100644 index 000000000000..a771149f1591 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/ThreadSafetyCommon.cpp @@ -0,0 +1,1011 @@ +//===- ThreadSafetyCommon.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of the interfaces declared in ThreadSafetyCommon.h +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/ThreadSafetyCommon.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclGroup.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/OperationKinds.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/Analyses/ThreadSafetyTIL.h" +#include "clang/Analysis/CFG.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/OperatorKinds.h" +#include "clang/Basic/Specifiers.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include <algorithm> +#include <cassert> +#include <string> +#include <utility> + +using namespace clang; +using namespace threadSafety; + +// From ThreadSafetyUtil.h +std::string threadSafety::getSourceLiteralString(const Expr *CE) { + switch (CE->getStmtClass()) { + case Stmt::IntegerLiteralClass: + return toString(cast<IntegerLiteral>(CE)->getValue(), 10, true); + case Stmt::StringLiteralClass: { + std::string ret("\""); + ret += cast<StringLiteral>(CE)->getString(); + ret += "\""; + return ret; + } + case Stmt::CharacterLiteralClass: + case Stmt::CXXNullPtrLiteralExprClass: + case Stmt::GNUNullExprClass: + case Stmt::CXXBoolLiteralExprClass: + case Stmt::FloatingLiteralClass: + case Stmt::ImaginaryLiteralClass: + case Stmt::ObjCStringLiteralClass: + default: + return "#lit"; + } +} + +// Return true if E is a variable that points to an incomplete Phi node. +static bool isIncompletePhi(const til::SExpr *E) { + if (const auto *Ph = dyn_cast<til::Phi>(E)) + return Ph->status() == til::Phi::PH_Incomplete; + return false; +} + +using CallingContext = SExprBuilder::CallingContext; + +til::SExpr *SExprBuilder::lookupStmt(const Stmt *S) { + auto It = SMap.find(S); + if (It != SMap.end()) + return It->second; + return nullptr; +} + +til::SCFG *SExprBuilder::buildCFG(CFGWalker &Walker) { + Walker.walk(*this); + return Scfg; +} + +static bool isCalleeArrow(const Expr *E) { + const auto *ME = dyn_cast<MemberExpr>(E->IgnoreParenCasts()); + return ME ? ME->isArrow() : false; +} + +static StringRef ClassifyDiagnostic(const CapabilityAttr *A) { + return A->getName(); +} + +static StringRef ClassifyDiagnostic(QualType VDT) { + // We need to look at the declaration of the type of the value to determine + // which it is. The type should either be a record or a typedef, or a pointer + // or reference thereof. + if (const auto *RT = VDT->getAs<RecordType>()) { + if (const auto *RD = RT->getDecl()) + if (const auto *CA = RD->getAttr<CapabilityAttr>()) + return ClassifyDiagnostic(CA); + } else if (const auto *TT = VDT->getAs<TypedefType>()) { + if (const auto *TD = TT->getDecl()) + if (const auto *CA = TD->getAttr<CapabilityAttr>()) + return ClassifyDiagnostic(CA); + } else if (VDT->isPointerType() || VDT->isReferenceType()) + return ClassifyDiagnostic(VDT->getPointeeType()); + + return "mutex"; +} + +/// Translate a clang expression in an attribute to a til::SExpr. +/// Constructs the context from D, DeclExp, and SelfDecl. +/// +/// \param AttrExp The expression to translate. +/// \param D The declaration to which the attribute is attached. +/// \param DeclExp An expression involving the Decl to which the attribute +/// is attached. E.g. the call to a function. +/// \param Self S-expression to substitute for a \ref CXXThisExpr. +CapabilityExpr SExprBuilder::translateAttrExpr(const Expr *AttrExp, + const NamedDecl *D, + const Expr *DeclExp, + til::SExpr *Self) { + // If we are processing a raw attribute expression, with no substitutions. + if (!DeclExp && !Self) + return translateAttrExpr(AttrExp, nullptr); + + CallingContext Ctx(nullptr, D); + + // Examine DeclExp to find SelfArg and FunArgs, which are used to substitute + // for formal parameters when we call buildMutexID later. + if (!DeclExp) + /* We'll use Self. */; + else if (const auto *ME = dyn_cast<MemberExpr>(DeclExp)) { + Ctx.SelfArg = ME->getBase(); + Ctx.SelfArrow = ME->isArrow(); + } else if (const auto *CE = dyn_cast<CXXMemberCallExpr>(DeclExp)) { + Ctx.SelfArg = CE->getImplicitObjectArgument(); + Ctx.SelfArrow = isCalleeArrow(CE->getCallee()); + Ctx.NumArgs = CE->getNumArgs(); + Ctx.FunArgs = CE->getArgs(); + } else if (const auto *CE = dyn_cast<CallExpr>(DeclExp)) { + Ctx.NumArgs = CE->getNumArgs(); + Ctx.FunArgs = CE->getArgs(); + } else if (const auto *CE = dyn_cast<CXXConstructExpr>(DeclExp)) { + Ctx.SelfArg = nullptr; // Will be set below + Ctx.NumArgs = CE->getNumArgs(); + Ctx.FunArgs = CE->getArgs(); + } + + if (Self) { + assert(!Ctx.SelfArg && "Ambiguous self argument"); + Ctx.SelfArg = Self; + + // If the attribute has no arguments, then assume the argument is "this". + if (!AttrExp) + return CapabilityExpr( + Self, ClassifyDiagnostic(cast<CXXMethodDecl>(D)->getThisObjectType()), + false); + else // For most attributes. + return translateAttrExpr(AttrExp, &Ctx); + } + + // If the attribute has no arguments, then assume the argument is "this". + if (!AttrExp) + return translateAttrExpr(cast<const Expr *>(Ctx.SelfArg), nullptr); + else // For most attributes. + return translateAttrExpr(AttrExp, &Ctx); +} + +/// Translate a clang expression in an attribute to a til::SExpr. +// This assumes a CallingContext has already been created. +CapabilityExpr SExprBuilder::translateAttrExpr(const Expr *AttrExp, + CallingContext *Ctx) { + if (!AttrExp) + return CapabilityExpr(); + + if (const auto* SLit = dyn_cast<StringLiteral>(AttrExp)) { + if (SLit->getString() == StringRef("*")) + // The "*" expr is a universal lock, which essentially turns off + // checks until it is removed from the lockset. + return CapabilityExpr(new (Arena) til::Wildcard(), StringRef("wildcard"), + false); + else + // Ignore other string literals for now. + return CapabilityExpr(); + } + + bool Neg = false; + if (const auto *OE = dyn_cast<CXXOperatorCallExpr>(AttrExp)) { + if (OE->getOperator() == OO_Exclaim) { + Neg = true; + AttrExp = OE->getArg(0); + } + } + else if (const auto *UO = dyn_cast<UnaryOperator>(AttrExp)) { + if (UO->getOpcode() == UO_LNot) { + Neg = true; + AttrExp = UO->getSubExpr(); + } + } + + til::SExpr *E = translate(AttrExp, Ctx); + + // Trap mutex expressions like nullptr, or 0. + // Any literal value is nonsense. + if (!E || isa<til::Literal>(E)) + return CapabilityExpr(); + + StringRef Kind = ClassifyDiagnostic(AttrExp->getType()); + + // Hack to deal with smart pointers -- strip off top-level pointer casts. + if (const auto *CE = dyn_cast<til::Cast>(E)) { + if (CE->castOpcode() == til::CAST_objToPtr) + return CapabilityExpr(CE->expr(), Kind, Neg); + } + return CapabilityExpr(E, Kind, Neg); +} + +til::LiteralPtr *SExprBuilder::createVariable(const VarDecl *VD) { + return new (Arena) til::LiteralPtr(VD); +} + +std::pair<til::LiteralPtr *, StringRef> +SExprBuilder::createThisPlaceholder(const Expr *Exp) { + return {new (Arena) til::LiteralPtr(nullptr), + ClassifyDiagnostic(Exp->getType())}; +} + +// Translate a clang statement or expression to a TIL expression. +// Also performs substitution of variables; Ctx provides the context. +// Dispatches on the type of S. +til::SExpr *SExprBuilder::translate(const Stmt *S, CallingContext *Ctx) { + if (!S) + return nullptr; + + // Check if S has already been translated and cached. + // This handles the lookup of SSA names for DeclRefExprs here. + if (til::SExpr *E = lookupStmt(S)) + return E; + + switch (S->getStmtClass()) { + case Stmt::DeclRefExprClass: + return translateDeclRefExpr(cast<DeclRefExpr>(S), Ctx); + case Stmt::CXXThisExprClass: + return translateCXXThisExpr(cast<CXXThisExpr>(S), Ctx); + case Stmt::MemberExprClass: + return translateMemberExpr(cast<MemberExpr>(S), Ctx); + case Stmt::ObjCIvarRefExprClass: + return translateObjCIVarRefExpr(cast<ObjCIvarRefExpr>(S), Ctx); + case Stmt::CallExprClass: + return translateCallExpr(cast<CallExpr>(S), Ctx); + case Stmt::CXXMemberCallExprClass: + return translateCXXMemberCallExpr(cast<CXXMemberCallExpr>(S), Ctx); + case Stmt::CXXOperatorCallExprClass: + return translateCXXOperatorCallExpr(cast<CXXOperatorCallExpr>(S), Ctx); + case Stmt::UnaryOperatorClass: + return translateUnaryOperator(cast<UnaryOperator>(S), Ctx); + case Stmt::BinaryOperatorClass: + case Stmt::CompoundAssignOperatorClass: + return translateBinaryOperator(cast<BinaryOperator>(S), Ctx); + + case Stmt::ArraySubscriptExprClass: + return translateArraySubscriptExpr(cast<ArraySubscriptExpr>(S), Ctx); + case Stmt::ConditionalOperatorClass: + return translateAbstractConditionalOperator( + cast<ConditionalOperator>(S), Ctx); + case Stmt::BinaryConditionalOperatorClass: + return translateAbstractConditionalOperator( + cast<BinaryConditionalOperator>(S), Ctx); + + // We treat these as no-ops + case Stmt::ConstantExprClass: + return translate(cast<ConstantExpr>(S)->getSubExpr(), Ctx); + case Stmt::ParenExprClass: + return translate(cast<ParenExpr>(S)->getSubExpr(), Ctx); + case Stmt::ExprWithCleanupsClass: + return translate(cast<ExprWithCleanups>(S)->getSubExpr(), Ctx); + case Stmt::CXXBindTemporaryExprClass: + return translate(cast<CXXBindTemporaryExpr>(S)->getSubExpr(), Ctx); + case Stmt::MaterializeTemporaryExprClass: + return translate(cast<MaterializeTemporaryExpr>(S)->getSubExpr(), Ctx); + + // Collect all literals + case Stmt::CharacterLiteralClass: + case Stmt::CXXNullPtrLiteralExprClass: + case Stmt::GNUNullExprClass: + case Stmt::CXXBoolLiteralExprClass: + case Stmt::FloatingLiteralClass: + case Stmt::ImaginaryLiteralClass: + case Stmt::IntegerLiteralClass: + case Stmt::StringLiteralClass: + case Stmt::ObjCStringLiteralClass: + return new (Arena) til::Literal(cast<Expr>(S)); + + case Stmt::DeclStmtClass: + return translateDeclStmt(cast<DeclStmt>(S), Ctx); + default: + break; + } + if (const auto *CE = dyn_cast<CastExpr>(S)) + return translateCastExpr(CE, Ctx); + + return new (Arena) til::Undefined(S); +} + +til::SExpr *SExprBuilder::translateDeclRefExpr(const DeclRefExpr *DRE, + CallingContext *Ctx) { + const auto *VD = cast<ValueDecl>(DRE->getDecl()->getCanonicalDecl()); + + // Function parameters require substitution and/or renaming. + if (const auto *PV = dyn_cast<ParmVarDecl>(VD)) { + unsigned I = PV->getFunctionScopeIndex(); + const DeclContext *D = PV->getDeclContext(); + if (Ctx && Ctx->FunArgs) { + const Decl *Canonical = Ctx->AttrDecl->getCanonicalDecl(); + if (isa<FunctionDecl>(D) + ? (cast<FunctionDecl>(D)->getCanonicalDecl() == Canonical) + : (cast<ObjCMethodDecl>(D)->getCanonicalDecl() == Canonical)) { + // Substitute call arguments for references to function parameters + assert(I < Ctx->NumArgs); + return translate(Ctx->FunArgs[I], Ctx->Prev); + } + } + // Map the param back to the param of the original function declaration + // for consistent comparisons. + VD = isa<FunctionDecl>(D) + ? cast<FunctionDecl>(D)->getCanonicalDecl()->getParamDecl(I) + : cast<ObjCMethodDecl>(D)->getCanonicalDecl()->getParamDecl(I); + } + + // For non-local variables, treat it as a reference to a named object. + return new (Arena) til::LiteralPtr(VD); +} + +til::SExpr *SExprBuilder::translateCXXThisExpr(const CXXThisExpr *TE, + CallingContext *Ctx) { + // Substitute for 'this' + if (Ctx && Ctx->SelfArg) { + if (const auto *SelfArg = dyn_cast<const Expr *>(Ctx->SelfArg)) + return translate(SelfArg, Ctx->Prev); + else + return cast<til::SExpr *>(Ctx->SelfArg); + } + assert(SelfVar && "We have no variable for 'this'!"); + return SelfVar; +} + +static const ValueDecl *getValueDeclFromSExpr(const til::SExpr *E) { + if (const auto *V = dyn_cast<til::Variable>(E)) + return V->clangDecl(); + if (const auto *Ph = dyn_cast<til::Phi>(E)) + return Ph->clangDecl(); + if (const auto *P = dyn_cast<til::Project>(E)) + return P->clangDecl(); + if (const auto *L = dyn_cast<til::LiteralPtr>(E)) + return L->clangDecl(); + return nullptr; +} + +static bool hasAnyPointerType(const til::SExpr *E) { + auto *VD = getValueDeclFromSExpr(E); + if (VD && VD->getType()->isAnyPointerType()) + return true; + if (const auto *C = dyn_cast<til::Cast>(E)) + return C->castOpcode() == til::CAST_objToPtr; + + return false; +} + +// Grab the very first declaration of virtual method D +static const CXXMethodDecl *getFirstVirtualDecl(const CXXMethodDecl *D) { + while (true) { + D = D->getCanonicalDecl(); + auto OverriddenMethods = D->overridden_methods(); + if (OverriddenMethods.begin() == OverriddenMethods.end()) + return D; // Method does not override anything + // FIXME: this does not work with multiple inheritance. + D = *OverriddenMethods.begin(); + } + return nullptr; +} + +til::SExpr *SExprBuilder::translateMemberExpr(const MemberExpr *ME, + CallingContext *Ctx) { + til::SExpr *BE = translate(ME->getBase(), Ctx); + til::SExpr *E = new (Arena) til::SApply(BE); + + const auto *D = cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl()); + if (const auto *VD = dyn_cast<CXXMethodDecl>(D)) + D = getFirstVirtualDecl(VD); + + til::Project *P = new (Arena) til::Project(E, D); + if (hasAnyPointerType(BE)) + P->setArrow(true); + return P; +} + +til::SExpr *SExprBuilder::translateObjCIVarRefExpr(const ObjCIvarRefExpr *IVRE, + CallingContext *Ctx) { + til::SExpr *BE = translate(IVRE->getBase(), Ctx); + til::SExpr *E = new (Arena) til::SApply(BE); + + const auto *D = cast<ObjCIvarDecl>(IVRE->getDecl()->getCanonicalDecl()); + + til::Project *P = new (Arena) til::Project(E, D); + if (hasAnyPointerType(BE)) + P->setArrow(true); + return P; +} + +til::SExpr *SExprBuilder::translateCallExpr(const CallExpr *CE, + CallingContext *Ctx, + const Expr *SelfE) { + if (CapabilityExprMode) { + // Handle LOCK_RETURNED + if (const FunctionDecl *FD = CE->getDirectCallee()) { + FD = FD->getMostRecentDecl(); + if (LockReturnedAttr *At = FD->getAttr<LockReturnedAttr>()) { + CallingContext LRCallCtx(Ctx); + LRCallCtx.AttrDecl = CE->getDirectCallee(); + LRCallCtx.SelfArg = SelfE; + LRCallCtx.NumArgs = CE->getNumArgs(); + LRCallCtx.FunArgs = CE->getArgs(); + return const_cast<til::SExpr *>( + translateAttrExpr(At->getArg(), &LRCallCtx).sexpr()); + } + } + } + + til::SExpr *E = translate(CE->getCallee(), Ctx); + for (const auto *Arg : CE->arguments()) { + til::SExpr *A = translate(Arg, Ctx); + E = new (Arena) til::Apply(E, A); + } + return new (Arena) til::Call(E, CE); +} + +til::SExpr *SExprBuilder::translateCXXMemberCallExpr( + const CXXMemberCallExpr *ME, CallingContext *Ctx) { + if (CapabilityExprMode) { + // Ignore calls to get() on smart pointers. + if (ME->getMethodDecl()->getNameAsString() == "get" && + ME->getNumArgs() == 0) { + auto *E = translate(ME->getImplicitObjectArgument(), Ctx); + return new (Arena) til::Cast(til::CAST_objToPtr, E); + // return E; + } + } + return translateCallExpr(cast<CallExpr>(ME), Ctx, + ME->getImplicitObjectArgument()); +} + +til::SExpr *SExprBuilder::translateCXXOperatorCallExpr( + const CXXOperatorCallExpr *OCE, CallingContext *Ctx) { + if (CapabilityExprMode) { + // Ignore operator * and operator -> on smart pointers. + OverloadedOperatorKind k = OCE->getOperator(); + if (k == OO_Star || k == OO_Arrow) { + auto *E = translate(OCE->getArg(0), Ctx); + return new (Arena) til::Cast(til::CAST_objToPtr, E); + // return E; + } + } + return translateCallExpr(cast<CallExpr>(OCE), Ctx); +} + +til::SExpr *SExprBuilder::translateUnaryOperator(const UnaryOperator *UO, + CallingContext *Ctx) { + switch (UO->getOpcode()) { + case UO_PostInc: + case UO_PostDec: + case UO_PreInc: + case UO_PreDec: + return new (Arena) til::Undefined(UO); + + case UO_AddrOf: + if (CapabilityExprMode) { + // interpret &Graph::mu_ as an existential. + if (const auto *DRE = dyn_cast<DeclRefExpr>(UO->getSubExpr())) { + if (DRE->getDecl()->isCXXInstanceMember()) { + // This is a pointer-to-member expression, e.g. &MyClass::mu_. + // We interpret this syntax specially, as a wildcard. + auto *W = new (Arena) til::Wildcard(); + return new (Arena) til::Project(W, DRE->getDecl()); + } + } + } + // otherwise, & is a no-op + return translate(UO->getSubExpr(), Ctx); + + // We treat these as no-ops + case UO_Deref: + case UO_Plus: + return translate(UO->getSubExpr(), Ctx); + + case UO_Minus: + return new (Arena) + til::UnaryOp(til::UOP_Minus, translate(UO->getSubExpr(), Ctx)); + case UO_Not: + return new (Arena) + til::UnaryOp(til::UOP_BitNot, translate(UO->getSubExpr(), Ctx)); + case UO_LNot: + return new (Arena) + til::UnaryOp(til::UOP_LogicNot, translate(UO->getSubExpr(), Ctx)); + + // Currently unsupported + case UO_Real: + case UO_Imag: + case UO_Extension: + case UO_Coawait: + return new (Arena) til::Undefined(UO); + } + return new (Arena) til::Undefined(UO); +} + +til::SExpr *SExprBuilder::translateBinOp(til::TIL_BinaryOpcode Op, + const BinaryOperator *BO, + CallingContext *Ctx, bool Reverse) { + til::SExpr *E0 = translate(BO->getLHS(), Ctx); + til::SExpr *E1 = translate(BO->getRHS(), Ctx); + if (Reverse) + return new (Arena) til::BinaryOp(Op, E1, E0); + else + return new (Arena) til::BinaryOp(Op, E0, E1); +} + +til::SExpr *SExprBuilder::translateBinAssign(til::TIL_BinaryOpcode Op, + const BinaryOperator *BO, + CallingContext *Ctx, + bool Assign) { + const Expr *LHS = BO->getLHS(); + const Expr *RHS = BO->getRHS(); + til::SExpr *E0 = translate(LHS, Ctx); + til::SExpr *E1 = translate(RHS, Ctx); + + const ValueDecl *VD = nullptr; + til::SExpr *CV = nullptr; + if (const auto *DRE = dyn_cast<DeclRefExpr>(LHS)) { + VD = DRE->getDecl(); + CV = lookupVarDecl(VD); + } + + if (!Assign) { + til::SExpr *Arg = CV ? CV : new (Arena) til::Load(E0); + E1 = new (Arena) til::BinaryOp(Op, Arg, E1); + E1 = addStatement(E1, nullptr, VD); + } + if (VD && CV) + return updateVarDecl(VD, E1); + return new (Arena) til::Store(E0, E1); +} + +til::SExpr *SExprBuilder::translateBinaryOperator(const BinaryOperator *BO, + CallingContext *Ctx) { + switch (BO->getOpcode()) { + case BO_PtrMemD: + case BO_PtrMemI: + return new (Arena) til::Undefined(BO); + + case BO_Mul: return translateBinOp(til::BOP_Mul, BO, Ctx); + case BO_Div: return translateBinOp(til::BOP_Div, BO, Ctx); + case BO_Rem: return translateBinOp(til::BOP_Rem, BO, Ctx); + case BO_Add: return translateBinOp(til::BOP_Add, BO, Ctx); + case BO_Sub: return translateBinOp(til::BOP_Sub, BO, Ctx); + case BO_Shl: return translateBinOp(til::BOP_Shl, BO, Ctx); + case BO_Shr: return translateBinOp(til::BOP_Shr, BO, Ctx); + case BO_LT: return translateBinOp(til::BOP_Lt, BO, Ctx); + case BO_GT: return translateBinOp(til::BOP_Lt, BO, Ctx, true); + case BO_LE: return translateBinOp(til::BOP_Leq, BO, Ctx); + case BO_GE: return translateBinOp(til::BOP_Leq, BO, Ctx, true); + case BO_EQ: return translateBinOp(til::BOP_Eq, BO, Ctx); + case BO_NE: return translateBinOp(til::BOP_Neq, BO, Ctx); + case BO_Cmp: return translateBinOp(til::BOP_Cmp, BO, Ctx); + case BO_And: return translateBinOp(til::BOP_BitAnd, BO, Ctx); + case BO_Xor: return translateBinOp(til::BOP_BitXor, BO, Ctx); + case BO_Or: return translateBinOp(til::BOP_BitOr, BO, Ctx); + case BO_LAnd: return translateBinOp(til::BOP_LogicAnd, BO, Ctx); + case BO_LOr: return translateBinOp(til::BOP_LogicOr, BO, Ctx); + + case BO_Assign: return translateBinAssign(til::BOP_Eq, BO, Ctx, true); + case BO_MulAssign: return translateBinAssign(til::BOP_Mul, BO, Ctx); + case BO_DivAssign: return translateBinAssign(til::BOP_Div, BO, Ctx); + case BO_RemAssign: return translateBinAssign(til::BOP_Rem, BO, Ctx); + case BO_AddAssign: return translateBinAssign(til::BOP_Add, BO, Ctx); + case BO_SubAssign: return translateBinAssign(til::BOP_Sub, BO, Ctx); + case BO_ShlAssign: return translateBinAssign(til::BOP_Shl, BO, Ctx); + case BO_ShrAssign: return translateBinAssign(til::BOP_Shr, BO, Ctx); + case BO_AndAssign: return translateBinAssign(til::BOP_BitAnd, BO, Ctx); + case BO_XorAssign: return translateBinAssign(til::BOP_BitXor, BO, Ctx); + case BO_OrAssign: return translateBinAssign(til::BOP_BitOr, BO, Ctx); + + case BO_Comma: + // The clang CFG should have already processed both sides. + return translate(BO->getRHS(), Ctx); + } + return new (Arena) til::Undefined(BO); +} + +til::SExpr *SExprBuilder::translateCastExpr(const CastExpr *CE, + CallingContext *Ctx) { + CastKind K = CE->getCastKind(); + switch (K) { + case CK_LValueToRValue: { + if (const auto *DRE = dyn_cast<DeclRefExpr>(CE->getSubExpr())) { + til::SExpr *E0 = lookupVarDecl(DRE->getDecl()); + if (E0) + return E0; + } + til::SExpr *E0 = translate(CE->getSubExpr(), Ctx); + return E0; + // FIXME!! -- get Load working properly + // return new (Arena) til::Load(E0); + } + case CK_NoOp: + case CK_DerivedToBase: + case CK_UncheckedDerivedToBase: + case CK_ArrayToPointerDecay: + case CK_FunctionToPointerDecay: { + til::SExpr *E0 = translate(CE->getSubExpr(), Ctx); + return E0; + } + default: { + // FIXME: handle different kinds of casts. + til::SExpr *E0 = translate(CE->getSubExpr(), Ctx); + if (CapabilityExprMode) + return E0; + return new (Arena) til::Cast(til::CAST_none, E0); + } + } +} + +til::SExpr * +SExprBuilder::translateArraySubscriptExpr(const ArraySubscriptExpr *E, + CallingContext *Ctx) { + til::SExpr *E0 = translate(E->getBase(), Ctx); + til::SExpr *E1 = translate(E->getIdx(), Ctx); + return new (Arena) til::ArrayIndex(E0, E1); +} + +til::SExpr * +SExprBuilder::translateAbstractConditionalOperator( + const AbstractConditionalOperator *CO, CallingContext *Ctx) { + auto *C = translate(CO->getCond(), Ctx); + auto *T = translate(CO->getTrueExpr(), Ctx); + auto *E = translate(CO->getFalseExpr(), Ctx); + return new (Arena) til::IfThenElse(C, T, E); +} + +til::SExpr * +SExprBuilder::translateDeclStmt(const DeclStmt *S, CallingContext *Ctx) { + DeclGroupRef DGrp = S->getDeclGroup(); + for (auto *I : DGrp) { + if (auto *VD = dyn_cast_or_null<VarDecl>(I)) { + Expr *E = VD->getInit(); + til::SExpr* SE = translate(E, Ctx); + + // Add local variables with trivial type to the variable map + QualType T = VD->getType(); + if (T.isTrivialType(VD->getASTContext())) + return addVarDecl(VD, SE); + else { + // TODO: add alloca + } + } + } + return nullptr; +} + +// If (E) is non-trivial, then add it to the current basic block, and +// update the statement map so that S refers to E. Returns a new variable +// that refers to E. +// If E is trivial returns E. +til::SExpr *SExprBuilder::addStatement(til::SExpr* E, const Stmt *S, + const ValueDecl *VD) { + if (!E || !CurrentBB || E->block() || til::ThreadSafetyTIL::isTrivial(E)) + return E; + if (VD) + E = new (Arena) til::Variable(E, VD); + CurrentInstructions.push_back(E); + if (S) + insertStmt(S, E); + return E; +} + +// Returns the current value of VD, if known, and nullptr otherwise. +til::SExpr *SExprBuilder::lookupVarDecl(const ValueDecl *VD) { + auto It = LVarIdxMap.find(VD); + if (It != LVarIdxMap.end()) { + assert(CurrentLVarMap[It->second].first == VD); + return CurrentLVarMap[It->second].second; + } + return nullptr; +} + +// if E is a til::Variable, update its clangDecl. +static void maybeUpdateVD(til::SExpr *E, const ValueDecl *VD) { + if (!E) + return; + if (auto *V = dyn_cast<til::Variable>(E)) { + if (!V->clangDecl()) + V->setClangDecl(VD); + } +} + +// Adds a new variable declaration. +til::SExpr *SExprBuilder::addVarDecl(const ValueDecl *VD, til::SExpr *E) { + maybeUpdateVD(E, VD); + LVarIdxMap.insert(std::make_pair(VD, CurrentLVarMap.size())); + CurrentLVarMap.makeWritable(); + CurrentLVarMap.push_back(std::make_pair(VD, E)); + return E; +} + +// Updates a current variable declaration. (E.g. by assignment) +til::SExpr *SExprBuilder::updateVarDecl(const ValueDecl *VD, til::SExpr *E) { + maybeUpdateVD(E, VD); + auto It = LVarIdxMap.find(VD); + if (It == LVarIdxMap.end()) { + til::SExpr *Ptr = new (Arena) til::LiteralPtr(VD); + til::SExpr *St = new (Arena) til::Store(Ptr, E); + return St; + } + CurrentLVarMap.makeWritable(); + CurrentLVarMap.elem(It->second).second = E; + return E; +} + +// Make a Phi node in the current block for the i^th variable in CurrentVarMap. +// If E != null, sets Phi[CurrentBlockInfo->ArgIndex] = E. +// If E == null, this is a backedge and will be set later. +void SExprBuilder::makePhiNodeVar(unsigned i, unsigned NPreds, til::SExpr *E) { + unsigned ArgIndex = CurrentBlockInfo->ProcessedPredecessors; + assert(ArgIndex > 0 && ArgIndex < NPreds); + + til::SExpr *CurrE = CurrentLVarMap[i].second; + if (CurrE->block() == CurrentBB) { + // We already have a Phi node in the current block, + // so just add the new variable to the Phi node. + auto *Ph = dyn_cast<til::Phi>(CurrE); + assert(Ph && "Expecting Phi node."); + if (E) + Ph->values()[ArgIndex] = E; + return; + } + + // Make a new phi node: phi(..., E) + // All phi args up to the current index are set to the current value. + til::Phi *Ph = new (Arena) til::Phi(Arena, NPreds); + Ph->values().setValues(NPreds, nullptr); + for (unsigned PIdx = 0; PIdx < ArgIndex; ++PIdx) + Ph->values()[PIdx] = CurrE; + if (E) + Ph->values()[ArgIndex] = E; + Ph->setClangDecl(CurrentLVarMap[i].first); + // If E is from a back-edge, or either E or CurrE are incomplete, then + // mark this node as incomplete; we may need to remove it later. + if (!E || isIncompletePhi(E) || isIncompletePhi(CurrE)) + Ph->setStatus(til::Phi::PH_Incomplete); + + // Add Phi node to current block, and update CurrentLVarMap[i] + CurrentArguments.push_back(Ph); + if (Ph->status() == til::Phi::PH_Incomplete) + IncompleteArgs.push_back(Ph); + + CurrentLVarMap.makeWritable(); + CurrentLVarMap.elem(i).second = Ph; +} + +// Merge values from Map into the current variable map. +// This will construct Phi nodes in the current basic block as necessary. +void SExprBuilder::mergeEntryMap(LVarDefinitionMap Map) { + assert(CurrentBlockInfo && "Not processing a block!"); + + if (!CurrentLVarMap.valid()) { + // Steal Map, using copy-on-write. + CurrentLVarMap = std::move(Map); + return; + } + if (CurrentLVarMap.sameAs(Map)) + return; // Easy merge: maps from different predecessors are unchanged. + + unsigned NPreds = CurrentBB->numPredecessors(); + unsigned ESz = CurrentLVarMap.size(); + unsigned MSz = Map.size(); + unsigned Sz = std::min(ESz, MSz); + + for (unsigned i = 0; i < Sz; ++i) { + if (CurrentLVarMap[i].first != Map[i].first) { + // We've reached the end of variables in common. + CurrentLVarMap.makeWritable(); + CurrentLVarMap.downsize(i); + break; + } + if (CurrentLVarMap[i].second != Map[i].second) + makePhiNodeVar(i, NPreds, Map[i].second); + } + if (ESz > MSz) { + CurrentLVarMap.makeWritable(); + CurrentLVarMap.downsize(Map.size()); + } +} + +// Merge a back edge into the current variable map. +// This will create phi nodes for all variables in the variable map. +void SExprBuilder::mergeEntryMapBackEdge() { + // We don't have definitions for variables on the backedge, because we + // haven't gotten that far in the CFG. Thus, when encountering a back edge, + // we conservatively create Phi nodes for all variables. Unnecessary Phi + // nodes will be marked as incomplete, and stripped out at the end. + // + // An Phi node is unnecessary if it only refers to itself and one other + // variable, e.g. x = Phi(y, y, x) can be reduced to x = y. + + assert(CurrentBlockInfo && "Not processing a block!"); + + if (CurrentBlockInfo->HasBackEdges) + return; + CurrentBlockInfo->HasBackEdges = true; + + CurrentLVarMap.makeWritable(); + unsigned Sz = CurrentLVarMap.size(); + unsigned NPreds = CurrentBB->numPredecessors(); + + for (unsigned i = 0; i < Sz; ++i) + makePhiNodeVar(i, NPreds, nullptr); +} + +// Update the phi nodes that were initially created for a back edge +// once the variable definitions have been computed. +// I.e., merge the current variable map into the phi nodes for Blk. +void SExprBuilder::mergePhiNodesBackEdge(const CFGBlock *Blk) { + til::BasicBlock *BB = lookupBlock(Blk); + unsigned ArgIndex = BBInfo[Blk->getBlockID()].ProcessedPredecessors; + assert(ArgIndex > 0 && ArgIndex < BB->numPredecessors()); + + for (til::SExpr *PE : BB->arguments()) { + auto *Ph = dyn_cast_or_null<til::Phi>(PE); + assert(Ph && "Expecting Phi Node."); + assert(Ph->values()[ArgIndex] == nullptr && "Wrong index for back edge."); + + til::SExpr *E = lookupVarDecl(Ph->clangDecl()); + assert(E && "Couldn't find local variable for Phi node."); + Ph->values()[ArgIndex] = E; + } +} + +void SExprBuilder::enterCFG(CFG *Cfg, const NamedDecl *D, + const CFGBlock *First) { + // Perform initial setup operations. + unsigned NBlocks = Cfg->getNumBlockIDs(); + Scfg = new (Arena) til::SCFG(Arena, NBlocks); + + // allocate all basic blocks immediately, to handle forward references. + BBInfo.resize(NBlocks); + BlockMap.resize(NBlocks, nullptr); + // create map from clang blockID to til::BasicBlocks + for (auto *B : *Cfg) { + auto *BB = new (Arena) til::BasicBlock(Arena); + BB->reserveInstructions(B->size()); + BlockMap[B->getBlockID()] = BB; + } + + CurrentBB = lookupBlock(&Cfg->getEntry()); + auto Parms = isa<ObjCMethodDecl>(D) ? cast<ObjCMethodDecl>(D)->parameters() + : cast<FunctionDecl>(D)->parameters(); + for (auto *Pm : Parms) { + QualType T = Pm->getType(); + if (!T.isTrivialType(Pm->getASTContext())) + continue; + + // Add parameters to local variable map. + // FIXME: right now we emulate params with loads; that should be fixed. + til::SExpr *Lp = new (Arena) til::LiteralPtr(Pm); + til::SExpr *Ld = new (Arena) til::Load(Lp); + til::SExpr *V = addStatement(Ld, nullptr, Pm); + addVarDecl(Pm, V); + } +} + +void SExprBuilder::enterCFGBlock(const CFGBlock *B) { + // Initialize TIL basic block and add it to the CFG. + CurrentBB = lookupBlock(B); + CurrentBB->reservePredecessors(B->pred_size()); + Scfg->add(CurrentBB); + + CurrentBlockInfo = &BBInfo[B->getBlockID()]; + + // CurrentLVarMap is moved to ExitMap on block exit. + // FIXME: the entry block will hold function parameters. + // assert(!CurrentLVarMap.valid() && "CurrentLVarMap already initialized."); +} + +void SExprBuilder::handlePredecessor(const CFGBlock *Pred) { + // Compute CurrentLVarMap on entry from ExitMaps of predecessors + + CurrentBB->addPredecessor(BlockMap[Pred->getBlockID()]); + BlockInfo *PredInfo = &BBInfo[Pred->getBlockID()]; + assert(PredInfo->UnprocessedSuccessors > 0); + + if (--PredInfo->UnprocessedSuccessors == 0) + mergeEntryMap(std::move(PredInfo->ExitMap)); + else + mergeEntryMap(PredInfo->ExitMap.clone()); + + ++CurrentBlockInfo->ProcessedPredecessors; +} + +void SExprBuilder::handlePredecessorBackEdge(const CFGBlock *Pred) { + mergeEntryMapBackEdge(); +} + +void SExprBuilder::enterCFGBlockBody(const CFGBlock *B) { + // The merge*() methods have created arguments. + // Push those arguments onto the basic block. + CurrentBB->arguments().reserve( + static_cast<unsigned>(CurrentArguments.size()), Arena); + for (auto *A : CurrentArguments) + CurrentBB->addArgument(A); +} + +void SExprBuilder::handleStatement(const Stmt *S) { + til::SExpr *E = translate(S, nullptr); + addStatement(E, S); +} + +void SExprBuilder::handleDestructorCall(const VarDecl *VD, + const CXXDestructorDecl *DD) { + til::SExpr *Sf = new (Arena) til::LiteralPtr(VD); + til::SExpr *Dr = new (Arena) til::LiteralPtr(DD); + til::SExpr *Ap = new (Arena) til::Apply(Dr, Sf); + til::SExpr *E = new (Arena) til::Call(Ap); + addStatement(E, nullptr); +} + +void SExprBuilder::exitCFGBlockBody(const CFGBlock *B) { + CurrentBB->instructions().reserve( + static_cast<unsigned>(CurrentInstructions.size()), Arena); + for (auto *V : CurrentInstructions) + CurrentBB->addInstruction(V); + + // Create an appropriate terminator + unsigned N = B->succ_size(); + auto It = B->succ_begin(); + if (N == 1) { + til::BasicBlock *BB = *It ? lookupBlock(*It) : nullptr; + // TODO: set index + unsigned Idx = BB ? BB->findPredecessorIndex(CurrentBB) : 0; + auto *Tm = new (Arena) til::Goto(BB, Idx); + CurrentBB->setTerminator(Tm); + } + else if (N == 2) { + til::SExpr *C = translate(B->getTerminatorCondition(true), nullptr); + til::BasicBlock *BB1 = *It ? lookupBlock(*It) : nullptr; + ++It; + til::BasicBlock *BB2 = *It ? lookupBlock(*It) : nullptr; + // FIXME: make sure these aren't critical edges. + auto *Tm = new (Arena) til::Branch(C, BB1, BB2); + CurrentBB->setTerminator(Tm); + } +} + +void SExprBuilder::handleSuccessor(const CFGBlock *Succ) { + ++CurrentBlockInfo->UnprocessedSuccessors; +} + +void SExprBuilder::handleSuccessorBackEdge(const CFGBlock *Succ) { + mergePhiNodesBackEdge(Succ); + ++BBInfo[Succ->getBlockID()].ProcessedPredecessors; +} + +void SExprBuilder::exitCFGBlock(const CFGBlock *B) { + CurrentArguments.clear(); + CurrentInstructions.clear(); + CurrentBlockInfo->ExitMap = std::move(CurrentLVarMap); + CurrentBB = nullptr; + CurrentBlockInfo = nullptr; +} + +void SExprBuilder::exitCFG(const CFGBlock *Last) { + for (auto *Ph : IncompleteArgs) { + if (Ph->status() == til::Phi::PH_Incomplete) + simplifyIncompleteArg(Ph); + } + + CurrentArguments.clear(); + CurrentInstructions.clear(); + IncompleteArgs.clear(); +} + +/* +namespace { + +class TILPrinter : + public til::PrettyPrinter<TILPrinter, llvm::raw_ostream> {}; + +} // namespace + +namespace clang { +namespace threadSafety { + +void printSCFG(CFGWalker &Walker) { + llvm::BumpPtrAllocator Bpa; + til::MemRegionRef Arena(&Bpa); + SExprBuilder SxBuilder(Arena); + til::SCFG *Scfg = SxBuilder.buildCFG(Walker); + TILPrinter::print(Scfg, llvm::errs()); +} + +} // namespace threadSafety +} // namespace clang +*/ diff --git a/contrib/llvm-project/clang/lib/Analysis/ThreadSafetyLogical.cpp b/contrib/llvm-project/clang/lib/Analysis/ThreadSafetyLogical.cpp new file mode 100644 index 000000000000..ac730770093e --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/ThreadSafetyLogical.cpp @@ -0,0 +1,111 @@ +//===- ThreadSafetyLogical.cpp ---------------------------------*- C++ --*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file defines a representation for logical expressions with SExpr leaves +// that are used as part of fact-checking capability expressions. +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/ThreadSafetyLogical.h" + +using namespace llvm; +using namespace clang::threadSafety::lexpr; + +// Implication. We implement De Morgan's Laws by maintaining LNeg and RNeg +// to keep track of whether LHS and RHS are negated. +static bool implies(const LExpr *LHS, bool LNeg, const LExpr *RHS, bool RNeg) { + // In comments below, we write => for implication. + + // Calculates the logical AND implication operator. + const auto LeftAndOperator = [=](const BinOp *A) { + return implies(A->left(), LNeg, RHS, RNeg) && + implies(A->right(), LNeg, RHS, RNeg); + }; + const auto RightAndOperator = [=](const BinOp *A) { + return implies(LHS, LNeg, A->left(), RNeg) && + implies(LHS, LNeg, A->right(), RNeg); + }; + + // Calculates the logical OR implication operator. + const auto LeftOrOperator = [=](const BinOp *A) { + return implies(A->left(), LNeg, RHS, RNeg) || + implies(A->right(), LNeg, RHS, RNeg); + }; + const auto RightOrOperator = [=](const BinOp *A) { + return implies(LHS, LNeg, A->left(), RNeg) || + implies(LHS, LNeg, A->right(), RNeg); + }; + + // Recurse on right. + switch (RHS->kind()) { + case LExpr::And: + // When performing right recursion: + // C => A & B [if] C => A and C => B + // When performing right recursion (negated): + // C => !(A & B) [if] C => !A | !B [===] C => !A or C => !B + return RNeg ? RightOrOperator(cast<And>(RHS)) + : RightAndOperator(cast<And>(RHS)); + case LExpr::Or: + // When performing right recursion: + // C => (A | B) [if] C => A or C => B + // When performing right recursion (negated): + // C => !(A | B) [if] C => !A & !B [===] C => !A and C => !B + return RNeg ? RightAndOperator(cast<Or>(RHS)) + : RightOrOperator(cast<Or>(RHS)); + case LExpr::Not: + // Note that C => !A is very different from !(C => A). It would be incorrect + // to return !implies(LHS, RHS). + return implies(LHS, LNeg, cast<Not>(RHS)->exp(), !RNeg); + case LExpr::Terminal: + // After reaching the terminal, it's time to recurse on the left. + break; + } + + // RHS is now a terminal. Recurse on Left. + switch (LHS->kind()) { + case LExpr::And: + // When performing left recursion: + // A & B => C [if] A => C or B => C + // When performing left recursion (negated): + // !(A & B) => C [if] !A | !B => C [===] !A => C and !B => C + return LNeg ? LeftAndOperator(cast<And>(LHS)) + : LeftOrOperator(cast<And>(LHS)); + case LExpr::Or: + // When performing left recursion: + // A | B => C [if] A => C and B => C + // When performing left recursion (negated): + // !(A | B) => C [if] !A & !B => C [===] !A => C or !B => C + return LNeg ? LeftOrOperator(cast<Or>(LHS)) + : LeftAndOperator(cast<Or>(LHS)); + case LExpr::Not: + // Note that A => !C is very different from !(A => C). It would be incorrect + // to return !implies(LHS, RHS). + return implies(cast<Not>(LHS)->exp(), !LNeg, RHS, RNeg); + case LExpr::Terminal: + // After reaching the terminal, it's time to perform identity comparisons. + break; + } + + // A => A + // !A => !A + if (LNeg != RNeg) + return false; + + // FIXME -- this should compare SExprs for equality, not pointer equality. + return cast<Terminal>(LHS)->expr() == cast<Terminal>(RHS)->expr(); +} + +namespace clang { +namespace threadSafety { +namespace lexpr { + +bool implies(const LExpr *LHS, const LExpr *RHS) { + // Start out by assuming that LHS and RHS are not negated. + return ::implies(LHS, false, RHS, false); +} +} +} +} diff --git a/contrib/llvm-project/clang/lib/Analysis/ThreadSafetyTIL.cpp b/contrib/llvm-project/clang/lib/Analysis/ThreadSafetyTIL.cpp new file mode 100644 index 000000000000..652f953d2a6d --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/ThreadSafetyTIL.cpp @@ -0,0 +1,332 @@ +//===- ThreadSafetyTIL.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/ThreadSafetyTIL.h" +#include "clang/Basic/LLVM.h" +#include "llvm/Support/Casting.h" +#include <cassert> +#include <cstddef> + +using namespace clang; +using namespace threadSafety; +using namespace til; + +StringRef til::getUnaryOpcodeString(TIL_UnaryOpcode Op) { + switch (Op) { + case UOP_Minus: return "-"; + case UOP_BitNot: return "~"; + case UOP_LogicNot: return "!"; + } + return {}; +} + +StringRef til::getBinaryOpcodeString(TIL_BinaryOpcode Op) { + switch (Op) { + case BOP_Mul: return "*"; + case BOP_Div: return "/"; + case BOP_Rem: return "%"; + case BOP_Add: return "+"; + case BOP_Sub: return "-"; + case BOP_Shl: return "<<"; + case BOP_Shr: return ">>"; + case BOP_BitAnd: return "&"; + case BOP_BitXor: return "^"; + case BOP_BitOr: return "|"; + case BOP_Eq: return "=="; + case BOP_Neq: return "!="; + case BOP_Lt: return "<"; + case BOP_Leq: return "<="; + case BOP_Cmp: return "<=>"; + case BOP_LogicAnd: return "&&"; + case BOP_LogicOr: return "||"; + } + return {}; +} + +SExpr* Future::force() { + Status = FS_evaluating; + Result = compute(); + Status = FS_done; + return Result; +} + +unsigned BasicBlock::addPredecessor(BasicBlock *Pred) { + unsigned Idx = Predecessors.size(); + Predecessors.reserveCheck(1, Arena); + Predecessors.push_back(Pred); + for (auto *E : Args) { + if (auto *Ph = dyn_cast<Phi>(E)) { + Ph->values().reserveCheck(1, Arena); + Ph->values().push_back(nullptr); + } + } + return Idx; +} + +void BasicBlock::reservePredecessors(unsigned NumPreds) { + Predecessors.reserve(NumPreds, Arena); + for (auto *E : Args) { + if (auto *Ph = dyn_cast<Phi>(E)) { + Ph->values().reserve(NumPreds, Arena); + } + } +} + +// If E is a variable, then trace back through any aliases or redundant +// Phi nodes to find the canonical definition. +const SExpr *til::getCanonicalVal(const SExpr *E) { + while (true) { + if (const auto *V = dyn_cast<Variable>(E)) { + if (V->kind() == Variable::VK_Let) { + E = V->definition(); + continue; + } + } + if (const auto *Ph = dyn_cast<Phi>(E)) { + if (Ph->status() == Phi::PH_SingleVal) { + E = Ph->values()[0]; + continue; + } + } + break; + } + return E; +} + +// If E is a variable, then trace back through any aliases or redundant +// Phi nodes to find the canonical definition. +// The non-const version will simplify incomplete Phi nodes. +SExpr *til::simplifyToCanonicalVal(SExpr *E) { + while (true) { + if (auto *V = dyn_cast<Variable>(E)) { + if (V->kind() != Variable::VK_Let) + return V; + // Eliminate redundant variables, e.g. x = y, or x = 5, + // but keep anything more complicated. + if (til::ThreadSafetyTIL::isTrivial(V->definition())) { + E = V->definition(); + continue; + } + return V; + } + if (auto *Ph = dyn_cast<Phi>(E)) { + if (Ph->status() == Phi::PH_Incomplete) + simplifyIncompleteArg(Ph); + // Eliminate redundant Phi nodes. + if (Ph->status() == Phi::PH_SingleVal) { + E = Ph->values()[0]; + continue; + } + } + return E; + } +} + +// Trace the arguments of an incomplete Phi node to see if they have the same +// canonical definition. If so, mark the Phi node as redundant. +// getCanonicalVal() will recursively call simplifyIncompletePhi(). +void til::simplifyIncompleteArg(til::Phi *Ph) { + assert(Ph && Ph->status() == Phi::PH_Incomplete); + + // eliminate infinite recursion -- assume that this node is not redundant. + Ph->setStatus(Phi::PH_MultiVal); + + SExpr *E0 = simplifyToCanonicalVal(Ph->values()[0]); + for (unsigned i = 1, n = Ph->values().size(); i < n; ++i) { + SExpr *Ei = simplifyToCanonicalVal(Ph->values()[i]); + if (Ei == Ph) + continue; // Recursive reference to itself. Don't count. + if (Ei != E0) { + return; // Status is already set to MultiVal. + } + } + Ph->setStatus(Phi::PH_SingleVal); +} + +// Renumbers the arguments and instructions to have unique, sequential IDs. +unsigned BasicBlock::renumberInstrs(unsigned ID) { + for (auto *Arg : Args) + Arg->setID(this, ID++); + for (auto *Instr : Instrs) + Instr->setID(this, ID++); + TermInstr->setID(this, ID++); + return ID; +} + +// Sorts the CFGs blocks using a reverse post-order depth-first traversal. +// Each block will be written into the Blocks array in order, and its BlockID +// will be set to the index in the array. Sorting should start from the entry +// block, and ID should be the total number of blocks. +unsigned BasicBlock::topologicalSort(SimpleArray<BasicBlock *> &Blocks, + unsigned ID) { + if (Visited) return ID; + Visited = true; + for (auto *Block : successors()) + ID = Block->topologicalSort(Blocks, ID); + // set ID and update block array in place. + // We may lose pointers to unreachable blocks. + assert(ID > 0); + BlockID = --ID; + Blocks[BlockID] = this; + return ID; +} + +// Performs a reverse topological traversal, starting from the exit block and +// following back-edges. The dominator is serialized before any predecessors, +// which guarantees that all blocks are serialized after their dominator and +// before their post-dominator (because it's a reverse topological traversal). +// ID should be initially set to 0. +// +// This sort assumes that (1) dominators have been computed, (2) there are no +// critical edges, and (3) the entry block is reachable from the exit block +// and no blocks are accessible via traversal of back-edges from the exit that +// weren't accessible via forward edges from the entry. +unsigned BasicBlock::topologicalFinalSort(SimpleArray<BasicBlock *> &Blocks, + unsigned ID) { + // Visited is assumed to have been set by the topologicalSort. This pass + // assumes !Visited means that we've visited this node before. + if (!Visited) return ID; + Visited = false; + if (DominatorNode.Parent) + ID = DominatorNode.Parent->topologicalFinalSort(Blocks, ID); + for (auto *Pred : Predecessors) + ID = Pred->topologicalFinalSort(Blocks, ID); + assert(static_cast<size_t>(ID) < Blocks.size()); + BlockID = ID++; + Blocks[BlockID] = this; + return ID; +} + +// Computes the immediate dominator of the current block. Assumes that all of +// its predecessors have already computed their dominators. This is achieved +// by visiting the nodes in topological order. +void BasicBlock::computeDominator() { + BasicBlock *Candidate = nullptr; + // Walk backwards from each predecessor to find the common dominator node. + for (auto *Pred : Predecessors) { + // Skip back-edges + if (Pred->BlockID >= BlockID) continue; + // If we don't yet have a candidate for dominator yet, take this one. + if (Candidate == nullptr) { + Candidate = Pred; + continue; + } + // Walk the alternate and current candidate back to find a common ancestor. + auto *Alternate = Pred; + while (Alternate != Candidate) { + if (Candidate->BlockID > Alternate->BlockID) + Candidate = Candidate->DominatorNode.Parent; + else + Alternate = Alternate->DominatorNode.Parent; + } + } + DominatorNode.Parent = Candidate; + DominatorNode.SizeOfSubTree = 1; +} + +// Computes the immediate post-dominator of the current block. Assumes that all +// of its successors have already computed their post-dominators. This is +// achieved visiting the nodes in reverse topological order. +void BasicBlock::computePostDominator() { + BasicBlock *Candidate = nullptr; + // Walk back from each predecessor to find the common post-dominator node. + for (auto *Succ : successors()) { + // Skip back-edges + if (Succ->BlockID <= BlockID) continue; + // If we don't yet have a candidate for post-dominator yet, take this one. + if (Candidate == nullptr) { + Candidate = Succ; + continue; + } + // Walk the alternate and current candidate back to find a common ancestor. + auto *Alternate = Succ; + while (Alternate != Candidate) { + if (Candidate->BlockID < Alternate->BlockID) + Candidate = Candidate->PostDominatorNode.Parent; + else + Alternate = Alternate->PostDominatorNode.Parent; + } + } + PostDominatorNode.Parent = Candidate; + PostDominatorNode.SizeOfSubTree = 1; +} + +// Renumber instructions in all blocks +void SCFG::renumberInstrs() { + unsigned InstrID = 0; + for (auto *Block : Blocks) + InstrID = Block->renumberInstrs(InstrID); +} + +static inline void computeNodeSize(BasicBlock *B, + BasicBlock::TopologyNode BasicBlock::*TN) { + BasicBlock::TopologyNode *N = &(B->*TN); + if (N->Parent) { + BasicBlock::TopologyNode *P = &(N->Parent->*TN); + // Initially set ID relative to the (as yet uncomputed) parent ID + N->NodeID = P->SizeOfSubTree; + P->SizeOfSubTree += N->SizeOfSubTree; + } +} + +static inline void computeNodeID(BasicBlock *B, + BasicBlock::TopologyNode BasicBlock::*TN) { + BasicBlock::TopologyNode *N = &(B->*TN); + if (N->Parent) { + BasicBlock::TopologyNode *P = &(N->Parent->*TN); + N->NodeID += P->NodeID; // Fix NodeIDs relative to starting node. + } +} + +// Normalizes a CFG. Normalization has a few major components: +// 1) Removing unreachable blocks. +// 2) Computing dominators and post-dominators +// 3) Topologically sorting the blocks into the "Blocks" array. +void SCFG::computeNormalForm() { + // Topologically sort the blocks starting from the entry block. + unsigned NumUnreachableBlocks = Entry->topologicalSort(Blocks, Blocks.size()); + if (NumUnreachableBlocks > 0) { + // If there were unreachable blocks shift everything down, and delete them. + for (unsigned I = NumUnreachableBlocks, E = Blocks.size(); I < E; ++I) { + unsigned NI = I - NumUnreachableBlocks; + Blocks[NI] = Blocks[I]; + Blocks[NI]->BlockID = NI; + // FIXME: clean up predecessor pointers to unreachable blocks? + } + Blocks.drop(NumUnreachableBlocks); + } + + // Compute dominators. + for (auto *Block : Blocks) + Block->computeDominator(); + + // Once dominators have been computed, the final sort may be performed. + unsigned NumBlocks = Exit->topologicalFinalSort(Blocks, 0); + assert(static_cast<size_t>(NumBlocks) == Blocks.size()); + (void) NumBlocks; + + // Renumber the instructions now that we have a final sort. + renumberInstrs(); + + // Compute post-dominators and compute the sizes of each node in the + // dominator tree. + for (auto *Block : Blocks.reverse()) { + Block->computePostDominator(); + computeNodeSize(Block, &BasicBlock::DominatorNode); + } + // Compute the sizes of each node in the post-dominator tree and assign IDs in + // the dominator tree. + for (auto *Block : Blocks) { + computeNodeID(Block, &BasicBlock::DominatorNode); + computeNodeSize(Block, &BasicBlock::PostDominatorNode); + } + // Assign IDs in the post-dominator tree. + for (auto *Block : Blocks.reverse()) { + computeNodeID(Block, &BasicBlock::PostDominatorNode); + } +} diff --git a/contrib/llvm-project/clang/lib/Analysis/UninitializedValues.cpp b/contrib/llvm-project/clang/lib/Analysis/UninitializedValues.cpp new file mode 100644 index 000000000000..2437095a22cf --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/UninitializedValues.cpp @@ -0,0 +1,970 @@ +//===- UninitializedValues.cpp - Find Uninitialized Values ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements uninitialized values analysis for source-level CFGs. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/UninitializedValues.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" +#include "clang/AST/Expr.h" +#include "clang/AST/OperationKinds.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/StmtObjC.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/DomainSpecific/ObjCNoReturn.h" +#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PackedVector.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Casting.h" +#include <algorithm> +#include <cassert> +#include <optional> + +using namespace clang; + +#define DEBUG_LOGGING 0 + +static bool isTrackedVar(const VarDecl *vd, const DeclContext *dc) { + if (vd->isLocalVarDecl() && !vd->hasGlobalStorage() && + !vd->isExceptionVariable() && !vd->isInitCapture() && + !vd->isImplicit() && vd->getDeclContext() == dc) { + QualType ty = vd->getType(); + return ty->isScalarType() || ty->isVectorType() || ty->isRecordType() || + ty->isRVVType(); + } + return false; +} + +//------------------------------------------------------------------------====// +// DeclToIndex: a mapping from Decls we track to value indices. +//====------------------------------------------------------------------------// + +namespace { + +class DeclToIndex { + llvm::DenseMap<const VarDecl *, unsigned> map; + +public: + DeclToIndex() = default; + + /// Compute the actual mapping from declarations to bits. + void computeMap(const DeclContext &dc); + + /// Return the number of declarations in the map. + unsigned size() const { return map.size(); } + + /// Returns the bit vector index for a given declaration. + std::optional<unsigned> getValueIndex(const VarDecl *d) const; +}; + +} // namespace + +void DeclToIndex::computeMap(const DeclContext &dc) { + unsigned count = 0; + DeclContext::specific_decl_iterator<VarDecl> I(dc.decls_begin()), + E(dc.decls_end()); + for ( ; I != E; ++I) { + const VarDecl *vd = *I; + if (isTrackedVar(vd, &dc)) + map[vd] = count++; + } +} + +std::optional<unsigned> DeclToIndex::getValueIndex(const VarDecl *d) const { + llvm::DenseMap<const VarDecl *, unsigned>::const_iterator I = map.find(d); + if (I == map.end()) + return std::nullopt; + return I->second; +} + +//------------------------------------------------------------------------====// +// CFGBlockValues: dataflow values for CFG blocks. +//====------------------------------------------------------------------------// + +// These values are defined in such a way that a merge can be done using +// a bitwise OR. +enum Value { Unknown = 0x0, /* 00 */ + Initialized = 0x1, /* 01 */ + Uninitialized = 0x2, /* 10 */ + MayUninitialized = 0x3 /* 11 */ }; + +static bool isUninitialized(const Value v) { + return v >= Uninitialized; +} + +static bool isAlwaysUninit(const Value v) { + return v == Uninitialized; +} + +namespace { + +using ValueVector = llvm::PackedVector<Value, 2, llvm::SmallBitVector>; + +class CFGBlockValues { + const CFG &cfg; + SmallVector<ValueVector, 8> vals; + ValueVector scratch; + DeclToIndex declToIndex; + +public: + CFGBlockValues(const CFG &cfg); + + unsigned getNumEntries() const { return declToIndex.size(); } + + void computeSetOfDeclarations(const DeclContext &dc); + + ValueVector &getValueVector(const CFGBlock *block) { + return vals[block->getBlockID()]; + } + + void setAllScratchValues(Value V); + void mergeIntoScratch(ValueVector const &source, bool isFirst); + bool updateValueVectorWithScratch(const CFGBlock *block); + + bool hasNoDeclarations() const { + return declToIndex.size() == 0; + } + + void resetScratch(); + + ValueVector::reference operator[](const VarDecl *vd); + + Value getValue(const CFGBlock *block, const CFGBlock *dstBlock, + const VarDecl *vd) { + std::optional<unsigned> idx = declToIndex.getValueIndex(vd); + return getValueVector(block)[*idx]; + } +}; + +} // namespace + +CFGBlockValues::CFGBlockValues(const CFG &c) : cfg(c), vals(0) {} + +void CFGBlockValues::computeSetOfDeclarations(const DeclContext &dc) { + declToIndex.computeMap(dc); + unsigned decls = declToIndex.size(); + scratch.resize(decls); + unsigned n = cfg.getNumBlockIDs(); + if (!n) + return; + vals.resize(n); + for (auto &val : vals) + val.resize(decls); +} + +#if DEBUG_LOGGING +static void printVector(const CFGBlock *block, ValueVector &bv, + unsigned num) { + llvm::errs() << block->getBlockID() << " :"; + for (const auto &i : bv) + llvm::errs() << ' ' << i; + llvm::errs() << " : " << num << '\n'; +} +#endif + +void CFGBlockValues::setAllScratchValues(Value V) { + for (unsigned I = 0, E = scratch.size(); I != E; ++I) + scratch[I] = V; +} + +void CFGBlockValues::mergeIntoScratch(ValueVector const &source, + bool isFirst) { + if (isFirst) + scratch = source; + else + scratch |= source; +} + +bool CFGBlockValues::updateValueVectorWithScratch(const CFGBlock *block) { + ValueVector &dst = getValueVector(block); + bool changed = (dst != scratch); + if (changed) + dst = scratch; +#if DEBUG_LOGGING + printVector(block, scratch, 0); +#endif + return changed; +} + +void CFGBlockValues::resetScratch() { + scratch.reset(); +} + +ValueVector::reference CFGBlockValues::operator[](const VarDecl *vd) { + return scratch[*declToIndex.getValueIndex(vd)]; +} + +//------------------------------------------------------------------------====// +// Classification of DeclRefExprs as use or initialization. +//====------------------------------------------------------------------------// + +namespace { + +class FindVarResult { + const VarDecl *vd; + const DeclRefExpr *dr; + +public: + FindVarResult(const VarDecl *vd, const DeclRefExpr *dr) : vd(vd), dr(dr) {} + + const DeclRefExpr *getDeclRefExpr() const { return dr; } + const VarDecl *getDecl() const { return vd; } +}; + +} // namespace + +static const Expr *stripCasts(ASTContext &C, const Expr *Ex) { + while (Ex) { + Ex = Ex->IgnoreParenNoopCasts(C); + if (const auto *CE = dyn_cast<CastExpr>(Ex)) { + if (CE->getCastKind() == CK_LValueBitCast) { + Ex = CE->getSubExpr(); + continue; + } + } + break; + } + return Ex; +} + +/// If E is an expression comprising a reference to a single variable, find that +/// variable. +static FindVarResult findVar(const Expr *E, const DeclContext *DC) { + if (const auto *DRE = + dyn_cast<DeclRefExpr>(stripCasts(DC->getParentASTContext(), E))) + if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) + if (isTrackedVar(VD, DC)) + return FindVarResult(VD, DRE); + return FindVarResult(nullptr, nullptr); +} + +namespace { + +/// Classify each DeclRefExpr as an initialization or a use. Any +/// DeclRefExpr which isn't explicitly classified will be assumed to have +/// escaped the analysis and will be treated as an initialization. +class ClassifyRefs : public StmtVisitor<ClassifyRefs> { +public: + enum Class { + Init, + Use, + SelfInit, + ConstRefUse, + Ignore + }; + +private: + const DeclContext *DC; + llvm::DenseMap<const DeclRefExpr *, Class> Classification; + + bool isTrackedVar(const VarDecl *VD) const { + return ::isTrackedVar(VD, DC); + } + + void classify(const Expr *E, Class C); + +public: + ClassifyRefs(AnalysisDeclContext &AC) : DC(cast<DeclContext>(AC.getDecl())) {} + + void VisitDeclStmt(DeclStmt *DS); + void VisitUnaryOperator(UnaryOperator *UO); + void VisitBinaryOperator(BinaryOperator *BO); + void VisitCallExpr(CallExpr *CE); + void VisitCastExpr(CastExpr *CE); + void VisitOMPExecutableDirective(OMPExecutableDirective *ED); + + void operator()(Stmt *S) { Visit(S); } + + Class get(const DeclRefExpr *DRE) const { + llvm::DenseMap<const DeclRefExpr*, Class>::const_iterator I + = Classification.find(DRE); + if (I != Classification.end()) + return I->second; + + const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()); + if (!VD || !isTrackedVar(VD)) + return Ignore; + + return Init; + } +}; + +} // namespace + +static const DeclRefExpr *getSelfInitExpr(VarDecl *VD) { + if (VD->getType()->isRecordType()) + return nullptr; + if (Expr *Init = VD->getInit()) { + const auto *DRE = + dyn_cast<DeclRefExpr>(stripCasts(VD->getASTContext(), Init)); + if (DRE && DRE->getDecl() == VD) + return DRE; + } + return nullptr; +} + +void ClassifyRefs::classify(const Expr *E, Class C) { + // The result of a ?: could also be an lvalue. + E = E->IgnoreParens(); + if (const auto *CO = dyn_cast<ConditionalOperator>(E)) { + classify(CO->getTrueExpr(), C); + classify(CO->getFalseExpr(), C); + return; + } + + if (const auto *BCO = dyn_cast<BinaryConditionalOperator>(E)) { + classify(BCO->getFalseExpr(), C); + return; + } + + if (const auto *OVE = dyn_cast<OpaqueValueExpr>(E)) { + classify(OVE->getSourceExpr(), C); + return; + } + + if (const auto *ME = dyn_cast<MemberExpr>(E)) { + if (const auto *VD = dyn_cast<VarDecl>(ME->getMemberDecl())) { + if (!VD->isStaticDataMember()) + classify(ME->getBase(), C); + } + return; + } + + if (const auto *BO = dyn_cast<BinaryOperator>(E)) { + switch (BO->getOpcode()) { + case BO_PtrMemD: + case BO_PtrMemI: + classify(BO->getLHS(), C); + return; + case BO_Comma: + classify(BO->getRHS(), C); + return; + default: + return; + } + } + + FindVarResult Var = findVar(E, DC); + if (const DeclRefExpr *DRE = Var.getDeclRefExpr()) + Classification[DRE] = std::max(Classification[DRE], C); +} + +void ClassifyRefs::VisitDeclStmt(DeclStmt *DS) { + for (auto *DI : DS->decls()) { + auto *VD = dyn_cast<VarDecl>(DI); + if (VD && isTrackedVar(VD)) + if (const DeclRefExpr *DRE = getSelfInitExpr(VD)) + Classification[DRE] = SelfInit; + } +} + +void ClassifyRefs::VisitBinaryOperator(BinaryOperator *BO) { + // Ignore the evaluation of a DeclRefExpr on the LHS of an assignment. If this + // is not a compound-assignment, we will treat it as initializing the variable + // when TransferFunctions visits it. A compound-assignment does not affect + // whether a variable is uninitialized, and there's no point counting it as a + // use. + if (BO->isCompoundAssignmentOp()) + classify(BO->getLHS(), Use); + else if (BO->getOpcode() == BO_Assign || BO->getOpcode() == BO_Comma) + classify(BO->getLHS(), Ignore); +} + +void ClassifyRefs::VisitUnaryOperator(UnaryOperator *UO) { + // Increment and decrement are uses despite there being no lvalue-to-rvalue + // conversion. + if (UO->isIncrementDecrementOp()) + classify(UO->getSubExpr(), Use); +} + +void ClassifyRefs::VisitOMPExecutableDirective(OMPExecutableDirective *ED) { + for (Stmt *S : OMPExecutableDirective::used_clauses_children(ED->clauses())) + classify(cast<Expr>(S), Use); +} + +static bool isPointerToConst(const QualType &QT) { + return QT->isAnyPointerType() && QT->getPointeeType().isConstQualified(); +} + +static bool hasTrivialBody(CallExpr *CE) { + if (FunctionDecl *FD = CE->getDirectCallee()) { + if (FunctionTemplateDecl *FTD = FD->getPrimaryTemplate()) + return FTD->getTemplatedDecl()->hasTrivialBody(); + return FD->hasTrivialBody(); + } + return false; +} + +void ClassifyRefs::VisitCallExpr(CallExpr *CE) { + // Classify arguments to std::move as used. + if (CE->isCallToStdMove()) { + // RecordTypes are handled in SemaDeclCXX.cpp. + if (!CE->getArg(0)->getType()->isRecordType()) + classify(CE->getArg(0), Use); + return; + } + bool isTrivialBody = hasTrivialBody(CE); + // If a value is passed by const pointer to a function, + // we should not assume that it is initialized by the call, and we + // conservatively do not assume that it is used. + // If a value is passed by const reference to a function, + // it should already be initialized. + for (CallExpr::arg_iterator I = CE->arg_begin(), E = CE->arg_end(); + I != E; ++I) { + if ((*I)->isGLValue()) { + if ((*I)->getType().isConstQualified()) + classify((*I), isTrivialBody ? Ignore : ConstRefUse); + } else if (isPointerToConst((*I)->getType())) { + const Expr *Ex = stripCasts(DC->getParentASTContext(), *I); + const auto *UO = dyn_cast<UnaryOperator>(Ex); + if (UO && UO->getOpcode() == UO_AddrOf) + Ex = UO->getSubExpr(); + classify(Ex, Ignore); + } + } +} + +void ClassifyRefs::VisitCastExpr(CastExpr *CE) { + if (CE->getCastKind() == CK_LValueToRValue) + classify(CE->getSubExpr(), Use); + else if (const auto *CSE = dyn_cast<CStyleCastExpr>(CE)) { + if (CSE->getType()->isVoidType()) { + // Squelch any detected load of an uninitialized value if + // we cast it to void. + // e.g. (void) x; + classify(CSE->getSubExpr(), Ignore); + } + } +} + +//------------------------------------------------------------------------====// +// Transfer function for uninitialized values analysis. +//====------------------------------------------------------------------------// + +namespace { + +class TransferFunctions : public StmtVisitor<TransferFunctions> { + CFGBlockValues &vals; + const CFG &cfg; + const CFGBlock *block; + AnalysisDeclContext ∾ + const ClassifyRefs &classification; + ObjCNoReturn objCNoRet; + UninitVariablesHandler &handler; + +public: + TransferFunctions(CFGBlockValues &vals, const CFG &cfg, + const CFGBlock *block, AnalysisDeclContext &ac, + const ClassifyRefs &classification, + UninitVariablesHandler &handler) + : vals(vals), cfg(cfg), block(block), ac(ac), + classification(classification), objCNoRet(ac.getASTContext()), + handler(handler) {} + + void reportUse(const Expr *ex, const VarDecl *vd); + void reportConstRefUse(const Expr *ex, const VarDecl *vd); + + void VisitBinaryOperator(BinaryOperator *bo); + void VisitBlockExpr(BlockExpr *be); + void VisitCallExpr(CallExpr *ce); + void VisitDeclRefExpr(DeclRefExpr *dr); + void VisitDeclStmt(DeclStmt *ds); + void VisitGCCAsmStmt(GCCAsmStmt *as); + void VisitObjCForCollectionStmt(ObjCForCollectionStmt *FS); + void VisitObjCMessageExpr(ObjCMessageExpr *ME); + void VisitOMPExecutableDirective(OMPExecutableDirective *ED); + + bool isTrackedVar(const VarDecl *vd) { + return ::isTrackedVar(vd, cast<DeclContext>(ac.getDecl())); + } + + FindVarResult findVar(const Expr *ex) { + return ::findVar(ex, cast<DeclContext>(ac.getDecl())); + } + + UninitUse getUninitUse(const Expr *ex, const VarDecl *vd, Value v) { + UninitUse Use(ex, isAlwaysUninit(v)); + + assert(isUninitialized(v)); + if (Use.getKind() == UninitUse::Always) + return Use; + + // If an edge which leads unconditionally to this use did not initialize + // the variable, we can say something stronger than 'may be uninitialized': + // we can say 'either it's used uninitialized or you have dead code'. + // + // We track the number of successors of a node which have been visited, and + // visit a node once we have visited all of its successors. Only edges where + // the variable might still be uninitialized are followed. Since a variable + // can't transfer from being initialized to being uninitialized, this will + // trace out the subgraph which inevitably leads to the use and does not + // initialize the variable. We do not want to skip past loops, since their + // non-termination might be correlated with the initialization condition. + // + // For example: + // + // void f(bool a, bool b) { + // block1: int n; + // if (a) { + // block2: if (b) + // block3: n = 1; + // block4: } else if (b) { + // block5: while (!a) { + // block6: do_work(&a); + // n = 2; + // } + // } + // block7: if (a) + // block8: g(); + // block9: return n; + // } + // + // Starting from the maybe-uninitialized use in block 9: + // * Block 7 is not visited because we have only visited one of its two + // successors. + // * Block 8 is visited because we've visited its only successor. + // From block 8: + // * Block 7 is visited because we've now visited both of its successors. + // From block 7: + // * Blocks 1, 2, 4, 5, and 6 are not visited because we didn't visit all + // of their successors (we didn't visit 4, 3, 5, 6, and 5, respectively). + // * Block 3 is not visited because it initializes 'n'. + // Now the algorithm terminates, having visited blocks 7 and 8, and having + // found the frontier is blocks 2, 4, and 5. + // + // 'n' is definitely uninitialized for two edges into block 7 (from blocks 2 + // and 4), so we report that any time either of those edges is taken (in + // each case when 'b == false'), 'n' is used uninitialized. + SmallVector<const CFGBlock*, 32> Queue; + SmallVector<unsigned, 32> SuccsVisited(cfg.getNumBlockIDs(), 0); + Queue.push_back(block); + // Specify that we've already visited all successors of the starting block. + // This has the dual purpose of ensuring we never add it to the queue, and + // of marking it as not being a candidate element of the frontier. + SuccsVisited[block->getBlockID()] = block->succ_size(); + while (!Queue.empty()) { + const CFGBlock *B = Queue.pop_back_val(); + + // If the use is always reached from the entry block, make a note of that. + if (B == &cfg.getEntry()) + Use.setUninitAfterCall(); + + for (CFGBlock::const_pred_iterator I = B->pred_begin(), E = B->pred_end(); + I != E; ++I) { + const CFGBlock *Pred = *I; + if (!Pred) + continue; + + Value AtPredExit = vals.getValue(Pred, B, vd); + if (AtPredExit == Initialized) + // This block initializes the variable. + continue; + if (AtPredExit == MayUninitialized && + vals.getValue(B, nullptr, vd) == Uninitialized) { + // This block declares the variable (uninitialized), and is reachable + // from a block that initializes the variable. We can't guarantee to + // give an earlier location for the diagnostic (and it appears that + // this code is intended to be reachable) so give a diagnostic here + // and go no further down this path. + Use.setUninitAfterDecl(); + continue; + } + + if (AtPredExit == MayUninitialized) { + // If the predecessor's terminator is an "asm goto" that initializes + // the variable, then don't count it as "initialized" on the indirect + // paths. + CFGTerminator term = Pred->getTerminator(); + if (const auto *as = dyn_cast_or_null<GCCAsmStmt>(term.getStmt())) { + const CFGBlock *fallthrough = *Pred->succ_begin(); + if (as->isAsmGoto() && + llvm::any_of(as->outputs(), [&](const Expr *output) { + return vd == findVar(output).getDecl() && + llvm::any_of(as->labels(), + [&](const AddrLabelExpr *label) { + return label->getLabel()->getStmt() == B->Label && + B != fallthrough; + }); + })) { + Use.setUninitAfterDecl(); + continue; + } + } + } + + unsigned &SV = SuccsVisited[Pred->getBlockID()]; + if (!SV) { + // When visiting the first successor of a block, mark all NULL + // successors as having been visited. + for (CFGBlock::const_succ_iterator SI = Pred->succ_begin(), + SE = Pred->succ_end(); + SI != SE; ++SI) + if (!*SI) + ++SV; + } + + if (++SV == Pred->succ_size()) + // All paths from this block lead to the use and don't initialize the + // variable. + Queue.push_back(Pred); + } + } + + // Scan the frontier, looking for blocks where the variable was + // uninitialized. + for (const auto *Block : cfg) { + unsigned BlockID = Block->getBlockID(); + const Stmt *Term = Block->getTerminatorStmt(); + if (SuccsVisited[BlockID] && SuccsVisited[BlockID] < Block->succ_size() && + Term) { + // This block inevitably leads to the use. If we have an edge from here + // to a post-dominator block, and the variable is uninitialized on that + // edge, we have found a bug. + for (CFGBlock::const_succ_iterator I = Block->succ_begin(), + E = Block->succ_end(); I != E; ++I) { + const CFGBlock *Succ = *I; + if (Succ && SuccsVisited[Succ->getBlockID()] >= Succ->succ_size() && + vals.getValue(Block, Succ, vd) == Uninitialized) { + // Switch cases are a special case: report the label to the caller + // as the 'terminator', not the switch statement itself. Suppress + // situations where no label matched: we can't be sure that's + // possible. + if (isa<SwitchStmt>(Term)) { + const Stmt *Label = Succ->getLabel(); + if (!Label || !isa<SwitchCase>(Label)) + // Might not be possible. + continue; + UninitUse::Branch Branch; + Branch.Terminator = Label; + Branch.Output = 0; // Ignored. + Use.addUninitBranch(Branch); + } else { + UninitUse::Branch Branch; + Branch.Terminator = Term; + Branch.Output = I - Block->succ_begin(); + Use.addUninitBranch(Branch); + } + } + } + } + } + + return Use; + } +}; + +} // namespace + +void TransferFunctions::reportUse(const Expr *ex, const VarDecl *vd) { + Value v = vals[vd]; + if (isUninitialized(v)) + handler.handleUseOfUninitVariable(vd, getUninitUse(ex, vd, v)); +} + +void TransferFunctions::reportConstRefUse(const Expr *ex, const VarDecl *vd) { + Value v = vals[vd]; + if (isAlwaysUninit(v)) + handler.handleConstRefUseOfUninitVariable(vd, getUninitUse(ex, vd, v)); +} + +void TransferFunctions::VisitObjCForCollectionStmt(ObjCForCollectionStmt *FS) { + // This represents an initialization of the 'element' value. + if (const auto *DS = dyn_cast<DeclStmt>(FS->getElement())) { + const auto *VD = cast<VarDecl>(DS->getSingleDecl()); + if (isTrackedVar(VD)) + vals[VD] = Initialized; + } +} + +void TransferFunctions::VisitOMPExecutableDirective( + OMPExecutableDirective *ED) { + for (Stmt *S : OMPExecutableDirective::used_clauses_children(ED->clauses())) { + assert(S && "Expected non-null used-in-clause child."); + Visit(S); + } + if (!ED->isStandaloneDirective()) + Visit(ED->getStructuredBlock()); +} + +void TransferFunctions::VisitBlockExpr(BlockExpr *be) { + const BlockDecl *bd = be->getBlockDecl(); + for (const auto &I : bd->captures()) { + const VarDecl *vd = I.getVariable(); + if (!isTrackedVar(vd)) + continue; + if (I.isByRef()) { + vals[vd] = Initialized; + continue; + } + reportUse(be, vd); + } +} + +void TransferFunctions::VisitCallExpr(CallExpr *ce) { + if (Decl *Callee = ce->getCalleeDecl()) { + if (Callee->hasAttr<ReturnsTwiceAttr>()) { + // After a call to a function like setjmp or vfork, any variable which is + // initialized anywhere within this function may now be initialized. For + // now, just assume such a call initializes all variables. FIXME: Only + // mark variables as initialized if they have an initializer which is + // reachable from here. + vals.setAllScratchValues(Initialized); + } + else if (Callee->hasAttr<AnalyzerNoReturnAttr>()) { + // Functions labeled like "analyzer_noreturn" are often used to denote + // "panic" functions that in special debug situations can still return, + // but for the most part should not be treated as returning. This is a + // useful annotation borrowed from the static analyzer that is useful for + // suppressing branch-specific false positives when we call one of these + // functions but keep pretending the path continues (when in reality the + // user doesn't care). + vals.setAllScratchValues(Unknown); + } + } +} + +void TransferFunctions::VisitDeclRefExpr(DeclRefExpr *dr) { + switch (classification.get(dr)) { + case ClassifyRefs::Ignore: + break; + case ClassifyRefs::Use: + reportUse(dr, cast<VarDecl>(dr->getDecl())); + break; + case ClassifyRefs::Init: + vals[cast<VarDecl>(dr->getDecl())] = Initialized; + break; + case ClassifyRefs::SelfInit: + handler.handleSelfInit(cast<VarDecl>(dr->getDecl())); + break; + case ClassifyRefs::ConstRefUse: + reportConstRefUse(dr, cast<VarDecl>(dr->getDecl())); + break; + } +} + +void TransferFunctions::VisitBinaryOperator(BinaryOperator *BO) { + if (BO->getOpcode() == BO_Assign) { + FindVarResult Var = findVar(BO->getLHS()); + if (const VarDecl *VD = Var.getDecl()) + vals[VD] = Initialized; + } +} + +void TransferFunctions::VisitDeclStmt(DeclStmt *DS) { + for (auto *DI : DS->decls()) { + auto *VD = dyn_cast<VarDecl>(DI); + if (VD && isTrackedVar(VD)) { + if (getSelfInitExpr(VD)) { + // If the initializer consists solely of a reference to itself, we + // explicitly mark the variable as uninitialized. This allows code + // like the following: + // + // int x = x; + // + // to deliberately leave a variable uninitialized. Different analysis + // clients can detect this pattern and adjust their reporting + // appropriately, but we need to continue to analyze subsequent uses + // of the variable. + vals[VD] = Uninitialized; + } else if (VD->getInit()) { + // Treat the new variable as initialized. + vals[VD] = Initialized; + } else { + // No initializer: the variable is now uninitialized. This matters + // for cases like: + // while (...) { + // int n; + // use(n); + // n = 0; + // } + // FIXME: Mark the variable as uninitialized whenever its scope is + // left, since its scope could be re-entered by a jump over the + // declaration. + vals[VD] = Uninitialized; + } + } + } +} + +void TransferFunctions::VisitGCCAsmStmt(GCCAsmStmt *as) { + // An "asm goto" statement is a terminator that may initialize some variables. + if (!as->isAsmGoto()) + return; + + ASTContext &C = ac.getASTContext(); + for (const Expr *O : as->outputs()) { + const Expr *Ex = stripCasts(C, O); + + // Strip away any unary operators. Invalid l-values are reported by other + // semantic analysis passes. + while (const auto *UO = dyn_cast<UnaryOperator>(Ex)) + Ex = stripCasts(C, UO->getSubExpr()); + + // Mark the variable as potentially uninitialized for those cases where + // it's used on an indirect path, where it's not guaranteed to be + // defined. + if (const VarDecl *VD = findVar(Ex).getDecl()) + vals[VD] = MayUninitialized; + } +} + +void TransferFunctions::VisitObjCMessageExpr(ObjCMessageExpr *ME) { + // If the Objective-C message expression is an implicit no-return that + // is not modeled in the CFG, set the tracked dataflow values to Unknown. + if (objCNoRet.isImplicitNoReturn(ME)) { + vals.setAllScratchValues(Unknown); + } +} + +//------------------------------------------------------------------------====// +// High-level "driver" logic for uninitialized values analysis. +//====------------------------------------------------------------------------// + +static bool runOnBlock(const CFGBlock *block, const CFG &cfg, + AnalysisDeclContext &ac, CFGBlockValues &vals, + const ClassifyRefs &classification, + llvm::BitVector &wasAnalyzed, + UninitVariablesHandler &handler) { + wasAnalyzed[block->getBlockID()] = true; + vals.resetScratch(); + // Merge in values of predecessor blocks. + bool isFirst = true; + for (CFGBlock::const_pred_iterator I = block->pred_begin(), + E = block->pred_end(); I != E; ++I) { + const CFGBlock *pred = *I; + if (!pred) + continue; + if (wasAnalyzed[pred->getBlockID()]) { + vals.mergeIntoScratch(vals.getValueVector(pred), isFirst); + isFirst = false; + } + } + // Apply the transfer function. + TransferFunctions tf(vals, cfg, block, ac, classification, handler); + for (const auto &I : *block) { + if (std::optional<CFGStmt> cs = I.getAs<CFGStmt>()) + tf.Visit(const_cast<Stmt *>(cs->getStmt())); + } + CFGTerminator terminator = block->getTerminator(); + if (auto *as = dyn_cast_or_null<GCCAsmStmt>(terminator.getStmt())) + if (as->isAsmGoto()) + tf.Visit(as); + return vals.updateValueVectorWithScratch(block); +} + +namespace { + +/// PruneBlocksHandler is a special UninitVariablesHandler that is used +/// to detect when a CFGBlock has any *potential* use of an uninitialized +/// variable. It is mainly used to prune out work during the final +/// reporting pass. +struct PruneBlocksHandler : public UninitVariablesHandler { + /// Records if a CFGBlock had a potential use of an uninitialized variable. + llvm::BitVector hadUse; + + /// Records if any CFGBlock had a potential use of an uninitialized variable. + bool hadAnyUse = false; + + /// The current block to scribble use information. + unsigned currentBlock = 0; + + PruneBlocksHandler(unsigned numBlocks) : hadUse(numBlocks, false) {} + + ~PruneBlocksHandler() override = default; + + void handleUseOfUninitVariable(const VarDecl *vd, + const UninitUse &use) override { + hadUse[currentBlock] = true; + hadAnyUse = true; + } + + void handleConstRefUseOfUninitVariable(const VarDecl *vd, + const UninitUse &use) override { + hadUse[currentBlock] = true; + hadAnyUse = true; + } + + /// Called when the uninitialized variable analysis detects the + /// idiom 'int x = x'. All other uses of 'x' within the initializer + /// are handled by handleUseOfUninitVariable. + void handleSelfInit(const VarDecl *vd) override { + hadUse[currentBlock] = true; + hadAnyUse = true; + } +}; + +} // namespace + +void clang::runUninitializedVariablesAnalysis( + const DeclContext &dc, + const CFG &cfg, + AnalysisDeclContext &ac, + UninitVariablesHandler &handler, + UninitVariablesAnalysisStats &stats) { + CFGBlockValues vals(cfg); + vals.computeSetOfDeclarations(dc); + if (vals.hasNoDeclarations()) + return; + + stats.NumVariablesAnalyzed = vals.getNumEntries(); + + // Precompute which expressions are uses and which are initializations. + ClassifyRefs classification(ac); + cfg.VisitBlockStmts(classification); + + // Mark all variables uninitialized at the entry. + const CFGBlock &entry = cfg.getEntry(); + ValueVector &vec = vals.getValueVector(&entry); + const unsigned n = vals.getNumEntries(); + for (unsigned j = 0; j < n; ++j) { + vec[j] = Uninitialized; + } + + // Proceed with the workist. + ForwardDataflowWorklist worklist(cfg, ac); + llvm::BitVector previouslyVisited(cfg.getNumBlockIDs()); + worklist.enqueueSuccessors(&cfg.getEntry()); + llvm::BitVector wasAnalyzed(cfg.getNumBlockIDs(), false); + wasAnalyzed[cfg.getEntry().getBlockID()] = true; + PruneBlocksHandler PBH(cfg.getNumBlockIDs()); + + while (const CFGBlock *block = worklist.dequeue()) { + PBH.currentBlock = block->getBlockID(); + + // Did the block change? + bool changed = runOnBlock(block, cfg, ac, vals, + classification, wasAnalyzed, PBH); + ++stats.NumBlockVisits; + if (changed || !previouslyVisited[block->getBlockID()]) + worklist.enqueueSuccessors(block); + previouslyVisited[block->getBlockID()] = true; + } + + if (!PBH.hadAnyUse) + return; + + // Run through the blocks one more time, and report uninitialized variables. + for (const auto *block : cfg) + if (PBH.hadUse[block->getBlockID()]) { + runOnBlock(block, cfg, ac, vals, classification, wasAnalyzed, handler); + ++stats.NumBlockVisits; + } +} + +UninitVariablesHandler::~UninitVariablesHandler() = default; diff --git a/contrib/llvm-project/clang/lib/Analysis/UnsafeBufferUsage.cpp b/contrib/llvm-project/clang/lib/Analysis/UnsafeBufferUsage.cpp new file mode 100644 index 000000000000..2f1417487967 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -0,0 +1,695 @@ +//===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/UnsafeBufferUsage.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "llvm/ADT/SmallVector.h" +#include <memory> +#include <optional> + +using namespace llvm; +using namespace clang; +using namespace ast_matchers; + +namespace clang::ast_matchers { +// A `RecursiveASTVisitor` that traverses all descendants of a given node "n" +// except for those belonging to a different callable of "n". +class MatchDescendantVisitor + : public RecursiveASTVisitor<MatchDescendantVisitor> { +public: + typedef RecursiveASTVisitor<MatchDescendantVisitor> VisitorBase; + + // Creates an AST visitor that matches `Matcher` on all + // descendants of a given node "n" except for the ones + // belonging to a different callable of "n". + MatchDescendantVisitor(const internal::DynTypedMatcher *Matcher, + internal::ASTMatchFinder *Finder, + internal::BoundNodesTreeBuilder *Builder, + internal::ASTMatchFinder::BindKind Bind) + : Matcher(Matcher), Finder(Finder), Builder(Builder), Bind(Bind), + Matches(false) {} + + // Returns true if a match is found in a subtree of `DynNode`, which belongs + // to the same callable of `DynNode`. + bool findMatch(const DynTypedNode &DynNode) { + Matches = false; + if (const Stmt *StmtNode = DynNode.get<Stmt>()) { + TraverseStmt(const_cast<Stmt *>(StmtNode)); + *Builder = ResultBindings; + return Matches; + } + return false; + } + + // The following are overriding methods from the base visitor class. + // They are public only to allow CRTP to work. They are *not *part + // of the public API of this class. + + // For the matchers so far used in safe buffers, we only need to match + // `Stmt`s. To override more as needed. + + bool TraverseDecl(Decl *Node) { + if (!Node) + return true; + if (!match(*Node)) + return false; + // To skip callables: + if (isa<FunctionDecl, BlockDecl, ObjCMethodDecl>(Node)) + return true; + // Traverse descendants + return VisitorBase::TraverseDecl(Node); + } + + bool TraverseStmt(Stmt *Node, DataRecursionQueue *Queue = nullptr) { + if (!Node) + return true; + if (!match(*Node)) + return false; + // To skip callables: + if (isa<LambdaExpr>(Node)) + return true; + return VisitorBase::TraverseStmt(Node); + } + + bool shouldVisitTemplateInstantiations() const { return true; } + bool shouldVisitImplicitCode() const { + // TODO: let's ignore implicit code for now + return false; + } + +private: + // Sets 'Matched' to true if 'Matcher' matches 'Node' + // + // Returns 'true' if traversal should continue after this function + // returns, i.e. if no match is found or 'Bind' is 'BK_All'. + template <typename T> bool match(const T &Node) { + internal::BoundNodesTreeBuilder RecursiveBuilder(*Builder); + + if (Matcher->matches(DynTypedNode::create(Node), Finder, + &RecursiveBuilder)) { + ResultBindings.addMatch(RecursiveBuilder); + Matches = true; + if (Bind != internal::ASTMatchFinder::BK_All) + return false; // Abort as soon as a match is found. + } + return true; + } + + const internal::DynTypedMatcher *const Matcher; + internal::ASTMatchFinder *const Finder; + internal::BoundNodesTreeBuilder *const Builder; + internal::BoundNodesTreeBuilder ResultBindings; + const internal::ASTMatchFinder::BindKind Bind; + bool Matches; +}; + +AST_MATCHER_P(Stmt, forEveryDescendant, internal::Matcher<Stmt>, innerMatcher) { + const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher); + + MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All); + return Visitor.findMatch(DynTypedNode::create(Node)); +} +} // namespace clang::ast_matchers + +namespace { +// Because the analysis revolves around variables and their types, we'll need to +// track uses of variables (aka DeclRefExprs). +using DeclUseList = SmallVector<const DeclRefExpr *, 1>; + +// Convenience typedef. +using FixItList = SmallVector<FixItHint, 4>; + +// Defined below. +class Strategy; +} // namespace + +// Because we're dealing with raw pointers, let's define what we mean by that. +static auto hasPointerType() { + return hasType(hasCanonicalType(pointerType())); +} + +static auto hasArrayType() { + return hasType(hasCanonicalType(arrayType())); +} + +namespace { +/// Gadget is an individual operation in the code that may be of interest to +/// this analysis. Each (non-abstract) subclass corresponds to a specific +/// rigid AST structure that constitutes an operation on a pointer-type object. +/// Discovery of a gadget in the code corresponds to claiming that we understand +/// what this part of code is doing well enough to potentially improve it. +/// Gadgets can be warning (immediately deserving a warning) or fixable (not +/// always deserving a warning per se, but requires our attention to identify +/// it warrants a fixit). +class Gadget { +public: + enum class Kind { +#define GADGET(x) x, +#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" + }; + + /// Common type of ASTMatchers used for discovering gadgets. + /// Useful for implementing the static matcher() methods + /// that are expected from all non-abstract subclasses. + using Matcher = decltype(stmt()); + + Gadget(Kind K) : K(K) {} + + Kind getKind() const { return K; } + + virtual bool isWarningGadget() const = 0; + virtual const Stmt *getBaseStmt() const = 0; + + /// Returns the list of pointer-type variables on which this gadget performs + /// its operation. Typically, there's only one variable. This isn't a list + /// of all DeclRefExprs in the gadget's AST! + virtual DeclUseList getClaimedVarUseSites() const = 0; + + virtual ~Gadget() = default; + +private: + Kind K; +}; + + +/// Warning gadgets correspond to unsafe code patterns that warrants +/// an immediate warning. +class WarningGadget : public Gadget { +public: + WarningGadget(Kind K) : Gadget(K) {} + + static bool classof(const Gadget *G) { return G->isWarningGadget(); } + bool isWarningGadget() const final { return true; } +}; + +/// Fixable gadgets correspond to code patterns that aren't always unsafe but need to be +/// properly recognized in order to emit fixes. For example, if a raw pointer-type +/// variable is replaced by a safe C++ container, every use of such variable must be +/// carefully considered and possibly updated. +class FixableGadget : public Gadget { +public: + FixableGadget(Kind K) : Gadget(K) {} + + static bool classof(const Gadget *G) { return !G->isWarningGadget(); } + bool isWarningGadget() const final { return false; } + + /// Returns a fixit that would fix the current gadget according to + /// the current strategy. Returns None if the fix cannot be produced; + /// returns an empty list if no fixes are necessary. + virtual std::optional<FixItList> getFixits(const Strategy &) const { + return std::nullopt; + } +}; + +using FixableGadgetList = std::vector<std::unique_ptr<FixableGadget>>; +using WarningGadgetList = std::vector<std::unique_ptr<WarningGadget>>; + +/// An increment of a pointer-type value is unsafe as it may run the pointer +/// out of bounds. +class IncrementGadget : public WarningGadget { + static constexpr const char *const OpTag = "op"; + const UnaryOperator *Op; + +public: + IncrementGadget(const MatchFinder::MatchResult &Result) + : WarningGadget(Kind::Increment), + Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::Increment; + } + + static Matcher matcher() { + return stmt(unaryOperator( + hasOperatorName("++"), + hasUnaryOperand(ignoringParenImpCasts(hasPointerType())) + ).bind(OpTag)); + } + + const UnaryOperator *getBaseStmt() const override { return Op; } + + DeclUseList getClaimedVarUseSites() const override { + SmallVector<const DeclRefExpr *, 2> Uses; + if (const auto *DRE = + dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) { + Uses.push_back(DRE); + } + + return std::move(Uses); + } +}; + +/// A decrement of a pointer-type value is unsafe as it may run the pointer +/// out of bounds. +class DecrementGadget : public WarningGadget { + static constexpr const char *const OpTag = "op"; + const UnaryOperator *Op; + +public: + DecrementGadget(const MatchFinder::MatchResult &Result) + : WarningGadget(Kind::Decrement), + Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::Decrement; + } + + static Matcher matcher() { + return stmt(unaryOperator( + hasOperatorName("--"), + hasUnaryOperand(ignoringParenImpCasts(hasPointerType())) + ).bind(OpTag)); + } + + const UnaryOperator *getBaseStmt() const override { return Op; } + + DeclUseList getClaimedVarUseSites() const override { + if (const auto *DRE = + dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) { + return {DRE}; + } + + return {}; + } +}; + +/// Array subscript expressions on raw pointers as if they're arrays. Unsafe as +/// it doesn't have any bounds checks for the array. +class ArraySubscriptGadget : public WarningGadget { + static constexpr const char *const ArraySubscrTag = "arraySubscr"; + const ArraySubscriptExpr *ASE; + +public: + ArraySubscriptGadget(const MatchFinder::MatchResult &Result) + : WarningGadget(Kind::ArraySubscript), + ASE(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ArraySubscrTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::ArraySubscript; + } + + static Matcher matcher() { + // FIXME: What if the index is integer literal 0? Should this be + // a safe gadget in this case? + // clang-format off + return stmt(arraySubscriptExpr( + hasBase(ignoringParenImpCasts( + anyOf(hasPointerType(), hasArrayType()))), + unless(hasIndex(integerLiteral(equals(0))))) + .bind(ArraySubscrTag)); + // clang-format on + } + + const ArraySubscriptExpr *getBaseStmt() const override { return ASE; } + + DeclUseList getClaimedVarUseSites() const override { + if (const auto *DRE = + dyn_cast<DeclRefExpr>(ASE->getBase()->IgnoreParenImpCasts())) { + return {DRE}; + } + + return {}; + } +}; + +/// A pointer arithmetic expression of one of the forms: +/// \code +/// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n +/// \endcode +class PointerArithmeticGadget : public WarningGadget { + static constexpr const char *const PointerArithmeticTag = "ptrAdd"; + static constexpr const char *const PointerArithmeticPointerTag = "ptrAddPtr"; + const BinaryOperator *PA; // pointer arithmetic expression + const Expr * Ptr; // the pointer expression in `PA` + +public: + PointerArithmeticGadget(const MatchFinder::MatchResult &Result) + : WarningGadget(Kind::PointerArithmetic), + PA(Result.Nodes.getNodeAs<BinaryOperator>(PointerArithmeticTag)), + Ptr(Result.Nodes.getNodeAs<Expr>(PointerArithmeticPointerTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::PointerArithmetic; + } + + static Matcher matcher() { + auto HasIntegerType = anyOf( + hasType(isInteger()), hasType(enumType())); + auto PtrAtRight = allOf(hasOperatorName("+"), + hasRHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)), + hasLHS(HasIntegerType)); + auto PtrAtLeft = allOf( + anyOf(hasOperatorName("+"), hasOperatorName("-"), + hasOperatorName("+="), hasOperatorName("-=")), + hasLHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)), + hasRHS(HasIntegerType)); + + return stmt(binaryOperator(anyOf(PtrAtLeft, PtrAtRight)).bind(PointerArithmeticTag)); + } + + const Stmt *getBaseStmt() const override { return PA; } + + DeclUseList getClaimedVarUseSites() const override { + if (const auto *DRE = + dyn_cast<DeclRefExpr>(Ptr->IgnoreParenImpCasts())) { + return {DRE}; + } + + return {}; + } + // FIXME: pointer adding zero should be fine + //FIXME: this gadge will need a fix-it +}; +} // namespace + +namespace { +// An auxiliary tracking facility for the fixit analysis. It helps connect +// declarations to its and make sure we've covered all uses with our analysis +// before we try to fix the declaration. +class DeclUseTracker { + using UseSetTy = SmallSet<const DeclRefExpr *, 16>; + using DefMapTy = DenseMap<const VarDecl *, const DeclStmt *>; + + // Allocate on the heap for easier move. + std::unique_ptr<UseSetTy> Uses{std::make_unique<UseSetTy>()}; + DefMapTy Defs{}; + +public: + DeclUseTracker() = default; + DeclUseTracker(const DeclUseTracker &) = delete; // Let's avoid copies. + DeclUseTracker(DeclUseTracker &&) = default; + DeclUseTracker &operator=(DeclUseTracker &&) = default; + + // Start tracking a freshly discovered DRE. + void discoverUse(const DeclRefExpr *DRE) { Uses->insert(DRE); } + + // Stop tracking the DRE as it's been fully figured out. + void claimUse(const DeclRefExpr *DRE) { + assert(Uses->count(DRE) && + "DRE not found or claimed by multiple matchers!"); + Uses->erase(DRE); + } + + // A variable is unclaimed if at least one use is unclaimed. + bool hasUnclaimedUses(const VarDecl *VD) const { + // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs? + return any_of(*Uses, [VD](const DeclRefExpr *DRE) { + return DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl(); + }); + } + + void discoverDecl(const DeclStmt *DS) { + for (const Decl *D : DS->decls()) { + if (const auto *VD = dyn_cast<VarDecl>(D)) { + // FIXME: Assertion temporarily disabled due to a bug in + // ASTMatcher internal behavior in presence of GNU + // statement-expressions. We need to properly investigate this + // because it can screw up our algorithm in other ways. + // assert(Defs.count(VD) == 0 && "Definition already discovered!"); + Defs[VD] = DS; + } + } + } + + const DeclStmt *lookupDecl(const VarDecl *VD) const { + auto It = Defs.find(VD); + assert(It != Defs.end() && "Definition never discovered!"); + return It->second; + } +}; +} // namespace + +namespace { +// Strategy is a map from variables to the way we plan to emit fixes for +// these variables. It is figured out gradually by trying different fixes +// for different variables depending on gadgets in which these variables +// participate. +class Strategy { +public: + enum class Kind { + Wontfix, // We don't plan to emit a fixit for this variable. + Span, // We recommend replacing the variable with std::span. + Iterator, // We recommend replacing the variable with std::span::iterator. + Array, // We recommend replacing the variable with std::array. + Vector // We recommend replacing the variable with std::vector. + }; + +private: + using MapTy = llvm::DenseMap<const VarDecl *, Kind>; + + MapTy Map; + +public: + Strategy() = default; + Strategy(const Strategy &) = delete; // Let's avoid copies. + Strategy(Strategy &&) = default; + + void set(const VarDecl *VD, Kind K) { + Map[VD] = K; + } + + Kind lookup(const VarDecl *VD) const { + auto I = Map.find(VD); + if (I == Map.end()) + return Kind::Wontfix; + + return I->second; + } +}; +} // namespace + +/// Scan the function and return a list of gadgets found with provided kits. +static std::tuple<FixableGadgetList, WarningGadgetList, DeclUseTracker> findGadgets(const Decl *D) { + + struct GadgetFinderCallback : MatchFinder::MatchCallback { + FixableGadgetList FixableGadgets; + WarningGadgetList WarningGadgets; + DeclUseTracker Tracker; + + void run(const MatchFinder::MatchResult &Result) override { + // In debug mode, assert that we've found exactly one gadget. + // This helps us avoid conflicts in .bind() tags. +#if NDEBUG +#define NEXT return +#else + [[maybe_unused]] int numFound = 0; +#define NEXT ++numFound +#endif + + if (const auto *DRE = Result.Nodes.getNodeAs<DeclRefExpr>("any_dre")) { + Tracker.discoverUse(DRE); + NEXT; + } + + if (const auto *DS = Result.Nodes.getNodeAs<DeclStmt>("any_ds")) { + Tracker.discoverDecl(DS); + NEXT; + } + + // Figure out which matcher we've found, and call the appropriate + // subclass constructor. + // FIXME: Can we do this more logarithmically? +#define FIXABLE_GADGET(name) \ + if (Result.Nodes.getNodeAs<Stmt>(#name)) { \ + FixableGadgets.push_back(std::make_unique<name ## Gadget>(Result)); \ + NEXT; \ + } +#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" +#define WARNING_GADGET(name) \ + if (Result.Nodes.getNodeAs<Stmt>(#name)) { \ + WarningGadgets.push_back(std::make_unique<name ## Gadget>(Result)); \ + NEXT; \ + } +#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" + + assert(numFound >= 1 && "Gadgets not found in match result!"); + assert(numFound <= 1 && "Conflicting bind tags in gadgets!"); + } + }; + + MatchFinder M; + GadgetFinderCallback CB; + + // clang-format off + M.addMatcher( + stmt(forEveryDescendant( + stmt(anyOf( + // Add Gadget::matcher() for every gadget in the registry. +#define GADGET(x) \ + x ## Gadget::matcher().bind(#x), +#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" + // In parallel, match all DeclRefExprs so that to find out + // whether there are any uncovered by gadgets. + declRefExpr(anyOf(hasPointerType(), hasArrayType()), + to(varDecl())).bind("any_dre"), + // Also match DeclStmts because we'll need them when fixing + // their underlying VarDecls that otherwise don't have + // any backreferences to DeclStmts. + declStmt().bind("any_ds") + )) + // FIXME: Idiomatically there should be a forCallable(equalsNode(D)) + // here, to make sure that the statement actually belongs to the + // function and not to a nested function. However, forCallable uses + // ParentMap which can't be used before the AST is fully constructed. + // The original problem doesn't sound like it needs ParentMap though, + // maybe there's a more direct solution? + )), + &CB + ); + // clang-format on + + M.match(*D->getBody(), D->getASTContext()); + + // Gadgets "claim" variables they're responsible for. Once this loop finishes, + // the tracker will only track DREs that weren't claimed by any gadgets, + // i.e. not understood by the analysis. + for (const auto &G : CB.FixableGadgets) { + for (const auto *DRE : G->getClaimedVarUseSites()) { + CB.Tracker.claimUse(DRE); + } + } + + return {std::move(CB.FixableGadgets), std::move(CB.WarningGadgets), std::move(CB.Tracker)}; +} + +struct WarningGadgetSets { + std::map<const VarDecl *, std::set<std::unique_ptr<WarningGadget>>> byVar; + // These Gadgets are not related to pointer variables (e. g. temporaries). + llvm::SmallVector<std::unique_ptr<WarningGadget>, 16> noVar; +}; + +static WarningGadgetSets +groupWarningGadgetsByVar(WarningGadgetList &&AllUnsafeOperations) { + WarningGadgetSets result; + // If some gadgets cover more than one + // variable, they'll appear more than once in the map. + for (auto &G : AllUnsafeOperations) { + DeclUseList ClaimedVarUseSites = G->getClaimedVarUseSites(); + + bool AssociatedWithVarDecl = false; + for (const DeclRefExpr *DRE : ClaimedVarUseSites) { + if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { + result.byVar[VD].emplace(std::move(G)); + AssociatedWithVarDecl = true; + } + } + + if (!AssociatedWithVarDecl) { + result.noVar.emplace_back(std::move(G)); + continue; + } + } + return result; +} + +struct FixableGadgetSets { + std::map<const VarDecl *, std::set<std::unique_ptr<FixableGadget>>> byVar; +}; + +static FixableGadgetSets +groupFixablesByVar(FixableGadgetList &&AllFixableOperations) { + FixableGadgetSets FixablesForUnsafeVars; + for (auto &F : AllFixableOperations) { + DeclUseList DREs = F->getClaimedVarUseSites(); + + for (const DeclRefExpr *DRE : DREs) { + if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { + FixablesForUnsafeVars.byVar[VD].emplace(std::move(F)); + } + } + } + return FixablesForUnsafeVars; +} + +static std::map<const VarDecl *, FixItList> +getFixIts(FixableGadgetSets &FixablesForUnsafeVars, const Strategy &S) { + std::map<const VarDecl *, FixItList> FixItsForVariable; + for (const auto &[VD, Fixables] : FixablesForUnsafeVars.byVar) { + // TODO fixVariable - fixit for the variable itself + bool ImpossibleToFix = false; + llvm::SmallVector<FixItHint, 16> FixItsForVD; + for (const auto &F : Fixables) { + llvm::Optional<FixItList> Fixits = F->getFixits(S); + if (!Fixits) { + ImpossibleToFix = true; + break; + } else { + const FixItList CorrectFixes = Fixits.value(); + FixItsForVD.insert(FixItsForVD.end(), CorrectFixes.begin(), + CorrectFixes.end()); + } + } + if (ImpossibleToFix) + FixItsForVariable.erase(VD); + else + FixItsForVariable[VD].insert(FixItsForVariable[VD].end(), + FixItsForVD.begin(), FixItsForVD.end()); + } + return FixItsForVariable; +} + +static Strategy +getNaiveStrategy(const llvm::SmallVectorImpl<const VarDecl *> &UnsafeVars) { + Strategy S; + for (const VarDecl *VD : UnsafeVars) { + S.set(VD, Strategy::Kind::Span); + } + return S; +} + +void clang::checkUnsafeBufferUsage(const Decl *D, + UnsafeBufferUsageHandler &Handler) { + assert(D && D->getBody()); + + WarningGadgetSets UnsafeOps; + FixableGadgetSets FixablesForUnsafeVars; + DeclUseTracker Tracker; + + { + auto [FixableGadgets, WarningGadgets, TrackerRes] = findGadgets(D); + UnsafeOps = groupWarningGadgetsByVar(std::move(WarningGadgets)); + FixablesForUnsafeVars = groupFixablesByVar(std::move(FixableGadgets)); + Tracker = std::move(TrackerRes); + } + + // Filter out non-local vars and vars with unclaimed DeclRefExpr-s. + for (auto it = FixablesForUnsafeVars.byVar.cbegin(); + it != FixablesForUnsafeVars.byVar.cend();) { + // FIXME: Support ParmVarDecl as well. + if (!it->first->isLocalVarDecl() || Tracker.hasUnclaimedUses(it->first)) { + it = FixablesForUnsafeVars.byVar.erase(it); + } else { + ++it; + } + } + + llvm::SmallVector<const VarDecl *, 16> UnsafeVars; + for (const auto &[VD, ignore] : FixablesForUnsafeVars.byVar) + UnsafeVars.push_back(VD); + + Strategy NaiveStrategy = getNaiveStrategy(UnsafeVars); + std::map<const VarDecl *, FixItList> FixItsForVariable = + getFixIts(FixablesForUnsafeVars, NaiveStrategy); + + // FIXME Detect overlapping FixIts. + + for (const auto &G : UnsafeOps.noVar) { + Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false); + } + + for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) { + auto FixItsIt = FixItsForVariable.find(VD); + Handler.handleFixableVariable(VD, FixItsIt != FixItsForVariable.end() + ? std::move(FixItsIt->second) + : FixItList{}); + for (const auto &G : WarningGadgets) { + Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/true); + } + } +} diff --git a/contrib/llvm-project/clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp b/contrib/llvm-project/clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp new file mode 100644 index 000000000000..be8e1200d0bf --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp @@ -0,0 +1,28 @@ +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Frontend/CheckerRegistry.h" + +using namespace clang; +using namespace ento; + +namespace { +struct Dependency : public Checker<check::BeginFunction> { + void checkBeginFunction(CheckerContext &Ctx) const {} +}; +struct DependendentChecker : public Checker<check::BeginFunction> { + void checkBeginFunction(CheckerContext &Ctx) const {} +}; +} // end anonymous namespace + +// Register plugin! +extern "C" void clang_registerCheckers(CheckerRegistry ®istry) { + registry.addChecker<Dependency>("example.Dependency", "", ""); + registry.addChecker<DependendentChecker>("example.DependendentChecker", "", + ""); + + registry.addDependency("example.DependendentChecker", "example.Dependency"); +} + +extern "C" const char clang_analyzerAPIVersionString[] = + CLANG_ANALYZER_API_VERSION_STRING; diff --git a/contrib/llvm-project/clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandlingAnalyzerPlugin.exports b/contrib/llvm-project/clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandlingAnalyzerPlugin.exports new file mode 100644 index 000000000000..8d9ff882cfb1 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandlingAnalyzerPlugin.exports @@ -0,0 +1,2 @@ +clang_registerCheckers +clang_analyzerAPIVersionString diff --git a/contrib/llvm-project/clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp b/contrib/llvm-project/clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp new file mode 100644 index 000000000000..32fba9c93752 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp @@ -0,0 +1,44 @@ +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Frontend/CheckerRegistry.h" + +using namespace clang; +using namespace ento; + +namespace { +struct MyChecker : public Checker<check::BeginFunction> { + void checkBeginFunction(CheckerContext &Ctx) const {} +}; + +void registerMyChecker(CheckerManager &Mgr) { + MyChecker *Checker = Mgr.registerChecker<MyChecker>(); + llvm::outs() << "Example option is set to " + << (Mgr.getAnalyzerOptions().getCheckerBooleanOption( + Checker, "ExampleOption") + ? "true" + : "false") + << '\n'; +} + +bool shouldRegisterMyChecker(const CheckerManager &mgr) { return true; } + +} // end anonymous namespace + +// Register plugin! +extern "C" void clang_registerCheckers(CheckerRegistry ®istry) { + registry.addChecker(registerMyChecker, shouldRegisterMyChecker, + "example.MyChecker", "Example Description", + "example.mychecker.documentation.nonexistent.html", + /*isHidden*/false); + + registry.addCheckerOption(/*OptionType*/ "bool", + /*CheckerFullName*/ "example.MyChecker", + /*OptionName*/ "ExampleOption", + /*DefaultValStr*/ "false", + /*Description*/ "This is an example checker opt.", + /*DevelopmentStage*/ "released"); +} + +extern "C" const char clang_analyzerAPIVersionString[] = + CLANG_ANALYZER_API_VERSION_STRING; diff --git a/contrib/llvm-project/clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandlingAnalyzerPlugin.exports b/contrib/llvm-project/clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandlingAnalyzerPlugin.exports new file mode 100644 index 000000000000..8d9ff882cfb1 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandlingAnalyzerPlugin.exports @@ -0,0 +1,2 @@ +clang_registerCheckers +clang_analyzerAPIVersionString diff --git a/contrib/llvm-project/clang/lib/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp b/contrib/llvm-project/clang/lib/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp new file mode 100644 index 000000000000..fd210d733fd0 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp @@ -0,0 +1,54 @@ +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Frontend/CheckerRegistry.h" + +using namespace clang; +using namespace ento; + +namespace { +class MainCallChecker : public Checker<check::PreStmt<CallExpr>> { + mutable std::unique_ptr<BugType> BT; + +public: + void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; +}; +} // end anonymous namespace + +void MainCallChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const { + const Expr *Callee = CE->getCallee(); + const FunctionDecl *FD = C.getSVal(Callee).getAsFunctionDecl(); + + if (!FD) + return; + + // Get the name of the callee. + IdentifierInfo *II = FD->getIdentifier(); + if (!II) // if no identifier, not a simple C function + return; + + if (II->isStr("main")) { + ExplodedNode *N = C.generateErrorNode(); + if (!N) + return; + + if (!BT) + BT.reset(new BugType(this, "call to main", "example analyzer plugin")); + + auto report = + std::make_unique<PathSensitiveBugReport>(*BT, BT->getDescription(), N); + report->addRange(Callee->getSourceRange()); + C.emitReport(std::move(report)); + } +} + +// Register plugin! +extern "C" void clang_registerCheckers(CheckerRegistry ®istry) { + registry.addChecker<MainCallChecker>( + "example.MainCallChecker", "Disallows calls to functions called main", + ""); +} + +extern "C" const char clang_analyzerAPIVersionString[] = + CLANG_ANALYZER_API_VERSION_STRING; diff --git a/contrib/llvm-project/clang/lib/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.exports b/contrib/llvm-project/clang/lib/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.exports new file mode 100644 index 000000000000..8d9ff882cfb1 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.exports @@ -0,0 +1,2 @@ +clang_registerCheckers +clang_analyzerAPIVersionString |