diff options
Diffstat (limited to 'clang/lib/StaticAnalyzer/Checkers')
117 files changed, 44225 insertions, 0 deletions
diff --git a/clang/lib/StaticAnalyzer/Checkers/AllocationState.h b/clang/lib/StaticAnalyzer/Checkers/AllocationState.h new file mode 100644 index 000000000000..25de37003319 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/AllocationState.h @@ -0,0 +1,38 @@ +//===--- AllocationState.h ------------------------------------- *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_ALLOCATIONSTATE_H +#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_ALLOCATIONSTATE_H + +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" + +namespace clang { +namespace ento { + +namespace allocation_state { + +ProgramStateRef markReleased(ProgramStateRef State, SymbolRef Sym, + const Expr *Origin); + +/// This function provides an additional visitor that augments the bug report +/// with information relevant to memory errors caused by the misuse of +/// AF_InnerBuffer symbols. +std::unique_ptr<BugReporterVisitor> getInnerPointerBRVisitor(SymbolRef Sym); + +/// 'Sym' represents a pointer to the inner buffer of a container object. +/// This function looks up the memory region of that object in +/// DanglingInternalBufferChecker's program state map. +const MemRegion *getContainerObjRegion(ProgramStateRef State, SymbolRef Sym); + +} // end namespace allocation_state + +} // end namespace ento +} // end namespace clang + +#endif diff --git a/clang/lib/StaticAnalyzer/Checkers/AnalysisOrderChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/AnalysisOrderChecker.cpp new file mode 100644 index 000000000000..d0def6918932 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/AnalysisOrderChecker.cpp @@ -0,0 +1,181 @@ +//===- AnalysisOrderChecker - Print callbacks called ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This checker prints callbacks that are called during analysis. +// This is required to ensure that callbacks are fired in order +// and do not duplicate or get lost. +// Feel free to extend this checker with any callback you need to check. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/ExprCXX.h" +#include "clang/Analysis/CFGStmtMap.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { + +class AnalysisOrderChecker + : public Checker<check::PreStmt<CastExpr>, + check::PostStmt<CastExpr>, + check::PreStmt<ArraySubscriptExpr>, + check::PostStmt<ArraySubscriptExpr>, + check::PreStmt<CXXNewExpr>, + check::PostStmt<CXXNewExpr>, + check::PreStmt<OffsetOfExpr>, + check::PostStmt<OffsetOfExpr>, + check::PreCall, + check::PostCall, + check::EndFunction, + check::NewAllocator, + check::Bind, + check::RegionChanges, + check::LiveSymbols> { + + bool isCallbackEnabled(AnalyzerOptions &Opts, StringRef CallbackName) const { + return Opts.getCheckerBooleanOption(this, "*") || + Opts.getCheckerBooleanOption(this, CallbackName); + } + + bool isCallbackEnabled(CheckerContext &C, StringRef CallbackName) const { + AnalyzerOptions &Opts = C.getAnalysisManager().getAnalyzerOptions(); + return isCallbackEnabled(Opts, CallbackName); + } + + bool isCallbackEnabled(ProgramStateRef State, StringRef CallbackName) const { + AnalyzerOptions &Opts = State->getStateManager().getOwningEngine() + .getAnalysisManager().getAnalyzerOptions(); + return isCallbackEnabled(Opts, CallbackName); + } + +public: + void checkPreStmt(const CastExpr *CE, CheckerContext &C) const { + if (isCallbackEnabled(C, "PreStmtCastExpr")) + llvm::errs() << "PreStmt<CastExpr> (Kind : " << CE->getCastKindName() + << ")\n"; + } + + void checkPostStmt(const CastExpr *CE, CheckerContext &C) const { + if (isCallbackEnabled(C, "PostStmtCastExpr")) + llvm::errs() << "PostStmt<CastExpr> (Kind : " << CE->getCastKindName() + << ")\n"; + } + + void checkPreStmt(const ArraySubscriptExpr *SubExpr, + CheckerContext &C) const { + if (isCallbackEnabled(C, "PreStmtArraySubscriptExpr")) + llvm::errs() << "PreStmt<ArraySubscriptExpr>\n"; + } + + void checkPostStmt(const ArraySubscriptExpr *SubExpr, + CheckerContext &C) const { + if (isCallbackEnabled(C, "PostStmtArraySubscriptExpr")) + llvm::errs() << "PostStmt<ArraySubscriptExpr>\n"; + } + + void checkPreStmt(const CXXNewExpr *NE, CheckerContext &C) const { + if (isCallbackEnabled(C, "PreStmtCXXNewExpr")) + llvm::errs() << "PreStmt<CXXNewExpr>\n"; + } + + void checkPostStmt(const CXXNewExpr *NE, CheckerContext &C) const { + if (isCallbackEnabled(C, "PostStmtCXXNewExpr")) + llvm::errs() << "PostStmt<CXXNewExpr>\n"; + } + + void checkPreStmt(const OffsetOfExpr *OOE, CheckerContext &C) const { + if (isCallbackEnabled(C, "PreStmtOffsetOfExpr")) + llvm::errs() << "PreStmt<OffsetOfExpr>\n"; + } + + void checkPostStmt(const OffsetOfExpr *OOE, CheckerContext &C) const { + if (isCallbackEnabled(C, "PostStmtOffsetOfExpr")) + llvm::errs() << "PostStmt<OffsetOfExpr>\n"; + } + + void checkPreCall(const CallEvent &Call, CheckerContext &C) const { + if (isCallbackEnabled(C, "PreCall")) { + llvm::errs() << "PreCall"; + if (const NamedDecl *ND = dyn_cast_or_null<NamedDecl>(Call.getDecl())) + llvm::errs() << " (" << ND->getQualifiedNameAsString() << ')'; + llvm::errs() << '\n'; + } + } + + void checkPostCall(const CallEvent &Call, CheckerContext &C) const { + if (isCallbackEnabled(C, "PostCall")) { + llvm::errs() << "PostCall"; + if (const NamedDecl *ND = dyn_cast_or_null<NamedDecl>(Call.getDecl())) + llvm::errs() << " (" << ND->getQualifiedNameAsString() << ')'; + llvm::errs() << '\n'; + } + } + + void checkEndFunction(const ReturnStmt *S, CheckerContext &C) const { + if (isCallbackEnabled(C, "EndFunction")) { + llvm::errs() << "EndFunction\nReturnStmt: " << (S ? "yes" : "no") << "\n"; + if (!S) + return; + + llvm::errs() << "CFGElement: "; + CFGStmtMap *Map = C.getCurrentAnalysisDeclContext()->getCFGStmtMap(); + CFGElement LastElement = Map->getBlock(S)->back(); + + if (LastElement.getAs<CFGStmt>()) + llvm::errs() << "CFGStmt\n"; + else if (LastElement.getAs<CFGAutomaticObjDtor>()) + llvm::errs() << "CFGAutomaticObjDtor\n"; + } + } + + void checkNewAllocator(const CXXNewExpr *CNE, SVal Target, + CheckerContext &C) const { + if (isCallbackEnabled(C, "NewAllocator")) + llvm::errs() << "NewAllocator\n"; + } + + void checkBind(SVal Loc, SVal Val, const Stmt *S, CheckerContext &C) const { + if (isCallbackEnabled(C, "Bind")) + llvm::errs() << "Bind\n"; + } + + void checkLiveSymbols(ProgramStateRef State, SymbolReaper &SymReaper) const { + if (isCallbackEnabled(State, "LiveSymbols")) + llvm::errs() << "LiveSymbols\n"; + } + + ProgramStateRef + checkRegionChanges(ProgramStateRef State, + const InvalidatedSymbols *Invalidated, + ArrayRef<const MemRegion *> ExplicitRegions, + ArrayRef<const MemRegion *> Regions, + const LocationContext *LCtx, const CallEvent *Call) const { + if (isCallbackEnabled(State, "RegionChanges")) + llvm::errs() << "RegionChanges\n"; + return State; + } +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Registration. +//===----------------------------------------------------------------------===// + +void ento::registerAnalysisOrderChecker(CheckerManager &mgr) { + mgr.registerChecker<AnalysisOrderChecker>(); +} + +bool ento::shouldRegisterAnalysisOrderChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp new file mode 100644 index 000000000000..20f3008b4a4b --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp @@ -0,0 +1,145 @@ +//==--AnalyzerStatsChecker.cpp - Analyzer visitation statistics --*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file reports various statistics about analyzer visitation. +//===----------------------------------------------------------------------===// +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/DeclObjC.h" +#include "clang/Basic/SourceManager.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +#define DEBUG_TYPE "StatsChecker" + +STATISTIC(NumBlocks, + "The # of blocks in top level functions"); +STATISTIC(NumBlocksUnreachable, + "The # of unreachable blocks in analyzing top level functions"); + +namespace { +class AnalyzerStatsChecker : public Checker<check::EndAnalysis> { +public: + void checkEndAnalysis(ExplodedGraph &G, BugReporter &B,ExprEngine &Eng) const; +}; +} + +void AnalyzerStatsChecker::checkEndAnalysis(ExplodedGraph &G, + BugReporter &B, + ExprEngine &Eng) const { + const CFG *C = nullptr; + const SourceManager &SM = B.getSourceManager(); + llvm::SmallPtrSet<const CFGBlock*, 32> reachable; + + // Root node should have the location context of the top most function. + const ExplodedNode *GraphRoot = *G.roots_begin(); + const LocationContext *LC = GraphRoot->getLocation().getLocationContext(); + + const Decl *D = LC->getDecl(); + + // Iterate over the exploded graph. + for (ExplodedGraph::node_iterator I = G.nodes_begin(); + I != G.nodes_end(); ++I) { + const ProgramPoint &P = I->getLocation(); + + // Only check the coverage in the top level function (optimization). + if (D != P.getLocationContext()->getDecl()) + continue; + + if (Optional<BlockEntrance> BE = P.getAs<BlockEntrance>()) { + const CFGBlock *CB = BE->getBlock(); + reachable.insert(CB); + } + } + + // Get the CFG and the Decl of this block. + C = LC->getCFG(); + + unsigned total = 0, unreachable = 0; + + // Find CFGBlocks that were not covered by any node + for (CFG::const_iterator I = C->begin(); I != C->end(); ++I) { + const CFGBlock *CB = *I; + ++total; + // Check if the block is unreachable + if (!reachable.count(CB)) { + ++unreachable; + } + } + + // We never 'reach' the entry block, so correct the unreachable count + unreachable--; + // There is no BlockEntrance corresponding to the exit block as well, so + // assume it is reached as well. + unreachable--; + + // Generate the warning string + SmallString<128> buf; + llvm::raw_svector_ostream output(buf); + PresumedLoc Loc = SM.getPresumedLoc(D->getLocation()); + if (!Loc.isValid()) + return; + + if (isa<FunctionDecl>(D) || isa<ObjCMethodDecl>(D)) { + const NamedDecl *ND = cast<NamedDecl>(D); + output << *ND; + } + else if (isa<BlockDecl>(D)) { + output << "block(line:" << Loc.getLine() << ":col:" << Loc.getColumn(); + } + + NumBlocksUnreachable += unreachable; + NumBlocks += total; + std::string NameOfRootFunction = output.str(); + + output << " -> Total CFGBlocks: " << total << " | Unreachable CFGBlocks: " + << unreachable << " | Exhausted Block: " + << (Eng.wasBlocksExhausted() ? "yes" : "no") + << " | Empty WorkList: " + << (Eng.hasEmptyWorkList() ? "yes" : "no"); + + B.EmitBasicReport(D, this, "Analyzer Statistics", "Internal Statistics", + output.str(), PathDiagnosticLocation(D, SM)); + + // Emit warning for each block we bailed out on. + typedef CoreEngine::BlocksExhausted::const_iterator ExhaustedIterator; + const CoreEngine &CE = Eng.getCoreEngine(); + for (ExhaustedIterator I = CE.blocks_exhausted_begin(), + E = CE.blocks_exhausted_end(); I != E; ++I) { + const BlockEdge &BE = I->first; + const CFGBlock *Exit = BE.getDst(); + if (Exit->empty()) + continue; + const CFGElement &CE = Exit->front(); + if (Optional<CFGStmt> CS = CE.getAs<CFGStmt>()) { + SmallString<128> bufI; + llvm::raw_svector_ostream outputI(bufI); + outputI << "(" << NameOfRootFunction << ")" << + ": The analyzer generated a sink at this point"; + B.EmitBasicReport( + D, this, "Sink Point", "Internal Statistics", outputI.str(), + PathDiagnosticLocation::createBegin(CS->getStmt(), SM, LC)); + } + } +} + +void ento::registerAnalyzerStatsChecker(CheckerManager &mgr) { + mgr.registerChecker<AnalyzerStatsChecker>(); +} + +bool ento::shouldRegisterAnalyzerStatsChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp new file mode 100644 index 000000000000..8d4793e0802f --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp @@ -0,0 +1,97 @@ +//== ArrayBoundChecker.cpp ------------------------------*- C++ -*--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines ArrayBoundChecker, which is a path-sensitive check +// which looks for an out-of-bound array element access. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" + +using namespace clang; +using namespace ento; + +namespace { +class ArrayBoundChecker : + public Checker<check::Location> { + mutable std::unique_ptr<BuiltinBug> BT; + +public: + void checkLocation(SVal l, bool isLoad, const Stmt* S, + CheckerContext &C) const; +}; +} + +void ArrayBoundChecker::checkLocation(SVal l, bool isLoad, const Stmt* LoadS, + CheckerContext &C) const { + // Check for out of bound array element access. + const MemRegion *R = l.getAsRegion(); + if (!R) + return; + + const ElementRegion *ER = dyn_cast<ElementRegion>(R); + if (!ER) + return; + + // Get the index of the accessed element. + DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>(); + + // Zero index is always in bound, this also passes ElementRegions created for + // pointer casts. + if (Idx.isZeroConstant()) + return; + + ProgramStateRef state = C.getState(); + + // Get the size of the array. + DefinedOrUnknownSVal NumElements + = C.getStoreManager().getSizeInElements(state, ER->getSuperRegion(), + ER->getValueType()); + + ProgramStateRef StInBound = state->assumeInBound(Idx, NumElements, true); + ProgramStateRef StOutBound = state->assumeInBound(Idx, NumElements, false); + if (StOutBound && !StInBound) { + ExplodedNode *N = C.generateErrorNode(StOutBound); + if (!N) + return; + + if (!BT) + BT.reset(new BuiltinBug( + this, "Out-of-bound array access", + "Access out-of-bound array element (buffer overflow)")); + + // FIXME: It would be nice to eventually make this diagnostic more clear, + // e.g., by referencing the original declaration or by saying *why* this + // reference is outside the range. + + // Generate a report for this bug. + auto report = + std::make_unique<PathSensitiveBugReport>(*BT, BT->getDescription(), N); + + report->addRange(LoadS->getSourceRange()); + C.emitReport(std::move(report)); + return; + } + + // Array bound check succeeded. From this point forward the array bound + // should always succeed. + C.addTransition(StInBound); +} + +void ento::registerArrayBoundChecker(CheckerManager &mgr) { + mgr.registerChecker<ArrayBoundChecker>(); +} + +bool ento::shouldRegisterArrayBoundChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp new file mode 100644 index 000000000000..8f3bf138cae4 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -0,0 +1,361 @@ +//== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines ArrayBoundCheckerV2, which is a path-sensitive check +// which looks for an out-of-bound array element access. +// +//===----------------------------------------------------------------------===// + +#include "Taint.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/CharUnits.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; +using namespace taint; + +namespace { +class ArrayBoundCheckerV2 : + public Checker<check::Location> { + mutable std::unique_ptr<BuiltinBug> BT; + + enum OOB_Kind { OOB_Precedes, OOB_Excedes, OOB_Tainted }; + + void reportOOB(CheckerContext &C, ProgramStateRef errorState, OOB_Kind kind, + std::unique_ptr<BugReporterVisitor> Visitor = nullptr) const; + +public: + void checkLocation(SVal l, bool isLoad, const Stmt*S, + CheckerContext &C) const; +}; + +// FIXME: Eventually replace RegionRawOffset with this class. +class RegionRawOffsetV2 { +private: + const SubRegion *baseRegion; + SVal byteOffset; + + RegionRawOffsetV2() + : baseRegion(nullptr), byteOffset(UnknownVal()) {} + +public: + RegionRawOffsetV2(const SubRegion* base, SVal offset) + : baseRegion(base), byteOffset(offset) {} + + NonLoc getByteOffset() const { return byteOffset.castAs<NonLoc>(); } + const SubRegion *getRegion() const { return baseRegion; } + + static RegionRawOffsetV2 computeOffset(ProgramStateRef state, + SValBuilder &svalBuilder, + SVal location); + + void dump() const; + void dumpToStream(raw_ostream &os) const; +}; +} + +static SVal computeExtentBegin(SValBuilder &svalBuilder, + const MemRegion *region) { + const MemSpaceRegion *SR = region->getMemorySpace(); + if (SR->getKind() == MemRegion::UnknownSpaceRegionKind) + return UnknownVal(); + else + return svalBuilder.makeZeroArrayIndex(); +} + +// TODO: once the constraint manager is smart enough to handle non simplified +// symbolic expressions remove this function. Note that this can not be used in +// the constraint manager as is, since this does not handle overflows. It is +// safe to assume, however, that memory offsets will not overflow. +static std::pair<NonLoc, nonloc::ConcreteInt> +getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, + SValBuilder &svalBuilder) { + Optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>(); + if (SymVal && SymVal->isExpression()) { + if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) { + llvm::APSInt constant = + APSIntType(extent.getValue()).convert(SIE->getRHS()); + switch (SIE->getOpcode()) { + case BO_Mul: + // The constant should never be 0 here, since it the result of scaling + // based on the size of a type which is never 0. + if ((extent.getValue() % constant) != 0) + return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); + else + return getSimplifiedOffsets( + nonloc::SymbolVal(SIE->getLHS()), + svalBuilder.makeIntVal(extent.getValue() / constant), + svalBuilder); + case BO_Add: + return getSimplifiedOffsets( + nonloc::SymbolVal(SIE->getLHS()), + svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder); + default: + break; + } + } + } + + return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); +} + +void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, + const Stmt* LoadS, + CheckerContext &checkerContext) const { + + // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping + // some new logic here that reasons directly about memory region extents. + // Once that logic is more mature, we can bring it back to assumeInBound() + // for all clients to use. + // + // The algorithm we are using here for bounds checking is to see if the + // memory access is within the extent of the base region. Since we + // have some flexibility in defining the base region, we can achieve + // various levels of conservatism in our buffer overflow checking. + ProgramStateRef state = checkerContext.getState(); + + SValBuilder &svalBuilder = checkerContext.getSValBuilder(); + const RegionRawOffsetV2 &rawOffset = + RegionRawOffsetV2::computeOffset(state, svalBuilder, location); + + if (!rawOffset.getRegion()) + return; + + NonLoc rawOffsetVal = rawOffset.getByteOffset(); + + // CHECK LOWER BOUND: Is byteOffset < extent begin? + // If so, we are doing a load/store + // before the first valid offset in the memory region. + + SVal extentBegin = computeExtentBegin(svalBuilder, rawOffset.getRegion()); + + if (Optional<NonLoc> NV = extentBegin.getAs<NonLoc>()) { + if (NV->getAs<nonloc::ConcreteInt>()) { + std::pair<NonLoc, nonloc::ConcreteInt> simplifiedOffsets = + getSimplifiedOffsets(rawOffset.getByteOffset(), + NV->castAs<nonloc::ConcreteInt>(), + svalBuilder); + rawOffsetVal = simplifiedOffsets.first; + *NV = simplifiedOffsets.second; + } + + SVal lowerBound = svalBuilder.evalBinOpNN(state, BO_LT, rawOffsetVal, *NV, + svalBuilder.getConditionType()); + + Optional<NonLoc> lowerBoundToCheck = lowerBound.getAs<NonLoc>(); + if (!lowerBoundToCheck) + return; + + ProgramStateRef state_precedesLowerBound, state_withinLowerBound; + std::tie(state_precedesLowerBound, state_withinLowerBound) = + state->assume(*lowerBoundToCheck); + + // Are we constrained enough to definitely precede the lower bound? + if (state_precedesLowerBound && !state_withinLowerBound) { + reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes); + return; + } + + // Otherwise, assume the constraint of the lower bound. + assert(state_withinLowerBound); + state = state_withinLowerBound; + } + + do { + // CHECK UPPER BOUND: Is byteOffset >= extent(baseRegion)? If so, + // we are doing a load/store after the last valid offset. + DefinedOrUnknownSVal extentVal = + rawOffset.getRegion()->getExtent(svalBuilder); + if (!extentVal.getAs<NonLoc>()) + break; + + if (extentVal.getAs<nonloc::ConcreteInt>()) { + std::pair<NonLoc, nonloc::ConcreteInt> simplifiedOffsets = + getSimplifiedOffsets(rawOffset.getByteOffset(), + extentVal.castAs<nonloc::ConcreteInt>(), + svalBuilder); + rawOffsetVal = simplifiedOffsets.first; + extentVal = simplifiedOffsets.second; + } + + SVal upperbound = svalBuilder.evalBinOpNN(state, BO_GE, rawOffsetVal, + extentVal.castAs<NonLoc>(), + svalBuilder.getConditionType()); + + Optional<NonLoc> upperboundToCheck = upperbound.getAs<NonLoc>(); + if (!upperboundToCheck) + break; + + ProgramStateRef state_exceedsUpperBound, state_withinUpperBound; + std::tie(state_exceedsUpperBound, state_withinUpperBound) = + state->assume(*upperboundToCheck); + + // If we are under constrained and the index variables are tainted, report. + if (state_exceedsUpperBound && state_withinUpperBound) { + SVal ByteOffset = rawOffset.getByteOffset(); + if (isTainted(state, ByteOffset)) { + reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted, + std::make_unique<TaintBugVisitor>(ByteOffset)); + return; + } + } else if (state_exceedsUpperBound) { + // If we are constrained enough to definitely exceed the upper bound, + // report. + assert(!state_withinUpperBound); + reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes); + return; + } + + assert(state_withinUpperBound); + state = state_withinUpperBound; + } + while (false); + + checkerContext.addTransition(state); +} + +void ArrayBoundCheckerV2::reportOOB( + CheckerContext &checkerContext, ProgramStateRef errorState, OOB_Kind kind, + std::unique_ptr<BugReporterVisitor> Visitor) const { + + ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState); + if (!errorNode) + return; + + if (!BT) + BT.reset(new BuiltinBug(this, "Out-of-bound access")); + + // FIXME: This diagnostics are preliminary. We should get far better + // diagnostics for explaining buffer overruns. + + SmallString<256> buf; + llvm::raw_svector_ostream os(buf); + os << "Out of bound memory access "; + switch (kind) { + case OOB_Precedes: + os << "(accessed memory precedes memory block)"; + break; + case OOB_Excedes: + os << "(access exceeds upper limit of memory block)"; + break; + case OOB_Tainted: + os << "(index is tainted)"; + break; + } + + auto BR = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), errorNode); + BR->addVisitor(std::move(Visitor)); + checkerContext.emitReport(std::move(BR)); +} + +#ifndef NDEBUG +LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const { + dumpToStream(llvm::errs()); +} + +void RegionRawOffsetV2::dumpToStream(raw_ostream &os) const { + os << "raw_offset_v2{" << getRegion() << ',' << getByteOffset() << '}'; +} +#endif + +// Lazily computes a value to be used by 'computeOffset'. If 'val' +// is unknown or undefined, we lazily substitute '0'. Otherwise, +// return 'val'. +static inline SVal getValue(SVal val, SValBuilder &svalBuilder) { + return val.getAs<UndefinedVal>() ? svalBuilder.makeArrayIndex(0) : val; +} + +// Scale a base value by a scaling factor, and return the scaled +// value as an SVal. Used by 'computeOffset'. +static inline SVal scaleValue(ProgramStateRef state, + NonLoc baseVal, CharUnits scaling, + SValBuilder &sb) { + return sb.evalBinOpNN(state, BO_Mul, baseVal, + sb.makeArrayIndex(scaling.getQuantity()), + sb.getArrayIndexType()); +} + +// Add an SVal to another, treating unknown and undefined values as +// summing to UnknownVal. Used by 'computeOffset'. +static SVal addValue(ProgramStateRef state, SVal x, SVal y, + SValBuilder &svalBuilder) { + // We treat UnknownVals and UndefinedVals the same here because we + // only care about computing offsets. + if (x.isUnknownOrUndef() || y.isUnknownOrUndef()) + return UnknownVal(); + + return svalBuilder.evalBinOpNN(state, BO_Add, x.castAs<NonLoc>(), + y.castAs<NonLoc>(), + svalBuilder.getArrayIndexType()); +} + +/// Compute a raw byte offset from a base region. Used for array bounds +/// checking. +RegionRawOffsetV2 RegionRawOffsetV2::computeOffset(ProgramStateRef state, + SValBuilder &svalBuilder, + SVal location) +{ + const MemRegion *region = location.getAsRegion(); + SVal offset = UndefinedVal(); + + while (region) { + switch (region->getKind()) { + default: { + if (const SubRegion *subReg = dyn_cast<SubRegion>(region)) { + offset = getValue(offset, svalBuilder); + if (!offset.isUnknownOrUndef()) + return RegionRawOffsetV2(subReg, offset); + } + return RegionRawOffsetV2(); + } + case MemRegion::ElementRegionKind: { + const ElementRegion *elemReg = cast<ElementRegion>(region); + SVal index = elemReg->getIndex(); + if (!index.getAs<NonLoc>()) + return RegionRawOffsetV2(); + QualType elemType = elemReg->getElementType(); + // If the element is an incomplete type, go no further. + ASTContext &astContext = svalBuilder.getContext(); + if (elemType->isIncompleteType()) + return RegionRawOffsetV2(); + + // Update the offset. + offset = addValue(state, + getValue(offset, svalBuilder), + scaleValue(state, + index.castAs<NonLoc>(), + astContext.getTypeSizeInChars(elemType), + svalBuilder), + svalBuilder); + + if (offset.isUnknownOrUndef()) + return RegionRawOffsetV2(); + + region = elemReg->getSuperRegion(); + continue; + } + } + } + return RegionRawOffsetV2(); +} + +void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) { + mgr.registerChecker<ArrayBoundCheckerV2>(); +} + +bool ento::shouldRegisterArrayBoundCheckerV2(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp b/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp new file mode 100644 index 000000000000..325952fe4ed4 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp @@ -0,0 +1,1296 @@ +//== BasicObjCFoundationChecks.cpp - Simple Apple-Foundation checks -*- C++ -*-- +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines BasicObjCFoundationChecks, a class that encapsulates +// a set of simple checks to run on Objective-C code using Apple's Foundation +// classes. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprObjC.h" +#include "clang/AST/StmtObjC.h" +#include "clang/Analysis/DomainSpecific/CocoaConventions.h" +#include "clang/Analysis/SelectorExtras.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; +using namespace llvm; + +namespace { +class APIMisuse : public BugType { +public: + APIMisuse(const CheckerBase *checker, const char *name) + : BugType(checker, name, "API Misuse (Apple)") {} +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Utility functions. +//===----------------------------------------------------------------------===// + +static StringRef GetReceiverInterfaceName(const ObjCMethodCall &msg) { + if (const ObjCInterfaceDecl *ID = msg.getReceiverInterface()) + return ID->getIdentifier()->getName(); + return StringRef(); +} + +enum FoundationClass { + FC_None, + FC_NSArray, + FC_NSDictionary, + FC_NSEnumerator, + FC_NSNull, + FC_NSOrderedSet, + FC_NSSet, + FC_NSString +}; + +static FoundationClass findKnownClass(const ObjCInterfaceDecl *ID, + bool IncludeSuperclasses = true) { + static llvm::StringMap<FoundationClass> Classes; + if (Classes.empty()) { + Classes["NSArray"] = FC_NSArray; + Classes["NSDictionary"] = FC_NSDictionary; + Classes["NSEnumerator"] = FC_NSEnumerator; + Classes["NSNull"] = FC_NSNull; + Classes["NSOrderedSet"] = FC_NSOrderedSet; + Classes["NSSet"] = FC_NSSet; + Classes["NSString"] = FC_NSString; + } + + // FIXME: Should we cache this at all? + FoundationClass result = Classes.lookup(ID->getIdentifier()->getName()); + if (result == FC_None && IncludeSuperclasses) + if (const ObjCInterfaceDecl *Super = ID->getSuperClass()) + return findKnownClass(Super); + + return result; +} + +//===----------------------------------------------------------------------===// +// NilArgChecker - Check for prohibited nil arguments to ObjC method calls. +//===----------------------------------------------------------------------===// + +namespace { + class NilArgChecker : public Checker<check::PreObjCMessage, + check::PostStmt<ObjCDictionaryLiteral>, + check::PostStmt<ObjCArrayLiteral> > { + mutable std::unique_ptr<APIMisuse> BT; + + mutable llvm::SmallDenseMap<Selector, unsigned, 16> StringSelectors; + mutable Selector ArrayWithObjectSel; + mutable Selector AddObjectSel; + mutable Selector InsertObjectAtIndexSel; + mutable Selector ReplaceObjectAtIndexWithObjectSel; + mutable Selector SetObjectAtIndexedSubscriptSel; + mutable Selector ArrayByAddingObjectSel; + mutable Selector DictionaryWithObjectForKeySel; + mutable Selector SetObjectForKeySel; + mutable Selector SetObjectForKeyedSubscriptSel; + mutable Selector RemoveObjectForKeySel; + + void warnIfNilExpr(const Expr *E, + const char *Msg, + CheckerContext &C) const; + + void warnIfNilArg(CheckerContext &C, + const ObjCMethodCall &msg, unsigned Arg, + FoundationClass Class, + bool CanBeSubscript = false) const; + + void generateBugReport(ExplodedNode *N, + StringRef Msg, + SourceRange Range, + const Expr *Expr, + CheckerContext &C) const; + + public: + void checkPreObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const; + void checkPostStmt(const ObjCDictionaryLiteral *DL, + CheckerContext &C) const; + void checkPostStmt(const ObjCArrayLiteral *AL, + CheckerContext &C) const; + }; +} // end anonymous namespace + +void NilArgChecker::warnIfNilExpr(const Expr *E, + const char *Msg, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + if (State->isNull(C.getSVal(E)).isConstrainedTrue()) { + + if (ExplodedNode *N = C.generateErrorNode()) { + generateBugReport(N, Msg, E->getSourceRange(), E, C); + } + } +} + +void NilArgChecker::warnIfNilArg(CheckerContext &C, + const ObjCMethodCall &msg, + unsigned int Arg, + FoundationClass Class, + bool CanBeSubscript) const { + // Check if the argument is nil. + ProgramStateRef State = C.getState(); + if (!State->isNull(msg.getArgSVal(Arg)).isConstrainedTrue()) + return; + + // NOTE: We cannot throw non-fatal errors from warnIfNilExpr, + // because it's called multiple times from some callers, so it'd cause + // an unwanted state split if two or more non-fatal errors are thrown + // within the same checker callback. For now we don't want to, but + // it'll need to be fixed if we ever want to. + if (ExplodedNode *N = C.generateErrorNode()) { + SmallString<128> sbuf; + llvm::raw_svector_ostream os(sbuf); + + if (CanBeSubscript && msg.getMessageKind() == OCM_Subscript) { + + if (Class == FC_NSArray) { + os << "Array element cannot be nil"; + } else if (Class == FC_NSDictionary) { + if (Arg == 0) { + os << "Value stored into '"; + os << GetReceiverInterfaceName(msg) << "' cannot be nil"; + } else { + assert(Arg == 1); + os << "'"<< GetReceiverInterfaceName(msg) << "' key cannot be nil"; + } + } else + llvm_unreachable("Missing foundation class for the subscript expr"); + + } else { + if (Class == FC_NSDictionary) { + if (Arg == 0) + os << "Value argument "; + else { + assert(Arg == 1); + os << "Key argument "; + } + os << "to '"; + msg.getSelector().print(os); + os << "' cannot be nil"; + } else { + os << "Argument to '" << GetReceiverInterfaceName(msg) << "' method '"; + msg.getSelector().print(os); + os << "' cannot be nil"; + } + } + + generateBugReport(N, os.str(), msg.getArgSourceRange(Arg), + msg.getArgExpr(Arg), C); + } +} + +void NilArgChecker::generateBugReport(ExplodedNode *N, + StringRef Msg, + SourceRange Range, + const Expr *E, + CheckerContext &C) const { + if (!BT) + BT.reset(new APIMisuse(this, "nil argument")); + + auto R = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); + R->addRange(Range); + bugreporter::trackExpressionValue(N, E, *R); + C.emitReport(std::move(R)); +} + +void NilArgChecker::checkPreObjCMessage(const ObjCMethodCall &msg, + CheckerContext &C) const { + const ObjCInterfaceDecl *ID = msg.getReceiverInterface(); + if (!ID) + return; + + FoundationClass Class = findKnownClass(ID); + + static const unsigned InvalidArgIndex = UINT_MAX; + unsigned Arg = InvalidArgIndex; + bool CanBeSubscript = false; + + if (Class == FC_NSString) { + Selector S = msg.getSelector(); + + if (S.isUnarySelector()) + return; + + if (StringSelectors.empty()) { + ASTContext &Ctx = C.getASTContext(); + Selector Sels[] = { + getKeywordSelector(Ctx, "caseInsensitiveCompare"), + getKeywordSelector(Ctx, "compare"), + getKeywordSelector(Ctx, "compare", "options"), + getKeywordSelector(Ctx, "compare", "options", "range"), + getKeywordSelector(Ctx, "compare", "options", "range", "locale"), + getKeywordSelector(Ctx, "componentsSeparatedByCharactersInSet"), + getKeywordSelector(Ctx, "initWithFormat"), + getKeywordSelector(Ctx, "localizedCaseInsensitiveCompare"), + getKeywordSelector(Ctx, "localizedCompare"), + getKeywordSelector(Ctx, "localizedStandardCompare"), + }; + for (Selector KnownSel : Sels) + StringSelectors[KnownSel] = 0; + } + auto I = StringSelectors.find(S); + if (I == StringSelectors.end()) + return; + Arg = I->second; + } else if (Class == FC_NSArray) { + Selector S = msg.getSelector(); + + if (S.isUnarySelector()) + return; + + if (ArrayWithObjectSel.isNull()) { + ASTContext &Ctx = C.getASTContext(); + ArrayWithObjectSel = getKeywordSelector(Ctx, "arrayWithObject"); + AddObjectSel = getKeywordSelector(Ctx, "addObject"); + InsertObjectAtIndexSel = + getKeywordSelector(Ctx, "insertObject", "atIndex"); + ReplaceObjectAtIndexWithObjectSel = + getKeywordSelector(Ctx, "replaceObjectAtIndex", "withObject"); + SetObjectAtIndexedSubscriptSel = + getKeywordSelector(Ctx, "setObject", "atIndexedSubscript"); + ArrayByAddingObjectSel = getKeywordSelector(Ctx, "arrayByAddingObject"); + } + + if (S == ArrayWithObjectSel || S == AddObjectSel || + S == InsertObjectAtIndexSel || S == ArrayByAddingObjectSel) { + Arg = 0; + } else if (S == SetObjectAtIndexedSubscriptSel) { + Arg = 0; + CanBeSubscript = true; + } else if (S == ReplaceObjectAtIndexWithObjectSel) { + Arg = 1; + } + } else if (Class == FC_NSDictionary) { + Selector S = msg.getSelector(); + + if (S.isUnarySelector()) + return; + + if (DictionaryWithObjectForKeySel.isNull()) { + ASTContext &Ctx = C.getASTContext(); + DictionaryWithObjectForKeySel = + getKeywordSelector(Ctx, "dictionaryWithObject", "forKey"); + SetObjectForKeySel = getKeywordSelector(Ctx, "setObject", "forKey"); + SetObjectForKeyedSubscriptSel = + getKeywordSelector(Ctx, "setObject", "forKeyedSubscript"); + RemoveObjectForKeySel = getKeywordSelector(Ctx, "removeObjectForKey"); + } + + if (S == DictionaryWithObjectForKeySel || S == SetObjectForKeySel) { + Arg = 0; + warnIfNilArg(C, msg, /* Arg */1, Class); + } else if (S == SetObjectForKeyedSubscriptSel) { + CanBeSubscript = true; + Arg = 1; + } else if (S == RemoveObjectForKeySel) { + Arg = 0; + } + } + + // If argument is '0', report a warning. + if ((Arg != InvalidArgIndex)) + warnIfNilArg(C, msg, Arg, Class, CanBeSubscript); +} + +void NilArgChecker::checkPostStmt(const ObjCArrayLiteral *AL, + CheckerContext &C) const { + unsigned NumOfElements = AL->getNumElements(); + for (unsigned i = 0; i < NumOfElements; ++i) { + warnIfNilExpr(AL->getElement(i), "Array element cannot be nil", C); + } +} + +void NilArgChecker::checkPostStmt(const ObjCDictionaryLiteral *DL, + CheckerContext &C) const { + unsigned NumOfElements = DL->getNumElements(); + for (unsigned i = 0; i < NumOfElements; ++i) { + ObjCDictionaryElement Element = DL->getKeyValueElement(i); + warnIfNilExpr(Element.Key, "Dictionary key cannot be nil", C); + warnIfNilExpr(Element.Value, "Dictionary value cannot be nil", C); + } +} + +//===----------------------------------------------------------------------===// +// Checking for mismatched types passed to CFNumberCreate/CFNumberGetValue. +//===----------------------------------------------------------------------===// + +namespace { +class CFNumberChecker : public Checker< check::PreStmt<CallExpr> > { + mutable std::unique_ptr<APIMisuse> BT; + mutable IdentifierInfo *ICreate, *IGetValue; +public: + CFNumberChecker() : ICreate(nullptr), IGetValue(nullptr) {} + + void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; + +private: + void EmitError(const TypedRegion* R, const Expr *Ex, + uint64_t SourceSize, uint64_t TargetSize, uint64_t NumberKind); +}; +} // end anonymous namespace + +enum CFNumberType { + kCFNumberSInt8Type = 1, + kCFNumberSInt16Type = 2, + kCFNumberSInt32Type = 3, + kCFNumberSInt64Type = 4, + kCFNumberFloat32Type = 5, + kCFNumberFloat64Type = 6, + kCFNumberCharType = 7, + kCFNumberShortType = 8, + kCFNumberIntType = 9, + kCFNumberLongType = 10, + kCFNumberLongLongType = 11, + kCFNumberFloatType = 12, + kCFNumberDoubleType = 13, + kCFNumberCFIndexType = 14, + kCFNumberNSIntegerType = 15, + kCFNumberCGFloatType = 16 +}; + +static Optional<uint64_t> GetCFNumberSize(ASTContext &Ctx, uint64_t i) { + static const unsigned char FixedSize[] = { 8, 16, 32, 64, 32, 64 }; + + if (i < kCFNumberCharType) + return FixedSize[i-1]; + + QualType T; + + switch (i) { + case kCFNumberCharType: T = Ctx.CharTy; break; + case kCFNumberShortType: T = Ctx.ShortTy; break; + case kCFNumberIntType: T = Ctx.IntTy; break; + case kCFNumberLongType: T = Ctx.LongTy; break; + case kCFNumberLongLongType: T = Ctx.LongLongTy; break; + case kCFNumberFloatType: T = Ctx.FloatTy; break; + case kCFNumberDoubleType: T = Ctx.DoubleTy; break; + case kCFNumberCFIndexType: + case kCFNumberNSIntegerType: + case kCFNumberCGFloatType: + // FIXME: We need a way to map from names to Type*. + default: + return None; + } + + return Ctx.getTypeSize(T); +} + +#if 0 +static const char* GetCFNumberTypeStr(uint64_t i) { + static const char* Names[] = { + "kCFNumberSInt8Type", + "kCFNumberSInt16Type", + "kCFNumberSInt32Type", + "kCFNumberSInt64Type", + "kCFNumberFloat32Type", + "kCFNumberFloat64Type", + "kCFNumberCharType", + "kCFNumberShortType", + "kCFNumberIntType", + "kCFNumberLongType", + "kCFNumberLongLongType", + "kCFNumberFloatType", + "kCFNumberDoubleType", + "kCFNumberCFIndexType", + "kCFNumberNSIntegerType", + "kCFNumberCGFloatType" + }; + + return i <= kCFNumberCGFloatType ? Names[i-1] : "Invalid CFNumberType"; +} +#endif + +void CFNumberChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + const FunctionDecl *FD = C.getCalleeDecl(CE); + if (!FD) + return; + + ASTContext &Ctx = C.getASTContext(); + if (!ICreate) { + ICreate = &Ctx.Idents.get("CFNumberCreate"); + IGetValue = &Ctx.Idents.get("CFNumberGetValue"); + } + if (!(FD->getIdentifier() == ICreate || FD->getIdentifier() == IGetValue) || + CE->getNumArgs() != 3) + return; + + // Get the value of the "theType" argument. + SVal TheTypeVal = C.getSVal(CE->getArg(1)); + + // FIXME: We really should allow ranges of valid theType values, and + // bifurcate the state appropriately. + Optional<nonloc::ConcreteInt> V = TheTypeVal.getAs<nonloc::ConcreteInt>(); + if (!V) + return; + + uint64_t NumberKind = V->getValue().getLimitedValue(); + Optional<uint64_t> OptCFNumberSize = GetCFNumberSize(Ctx, NumberKind); + + // FIXME: In some cases we can emit an error. + if (!OptCFNumberSize) + return; + + uint64_t CFNumberSize = *OptCFNumberSize; + + // Look at the value of the integer being passed by reference. Essentially + // we want to catch cases where the value passed in is not equal to the + // size of the type being created. + SVal TheValueExpr = C.getSVal(CE->getArg(2)); + + // FIXME: Eventually we should handle arbitrary locations. We can do this + // by having an enhanced memory model that does low-level typing. + Optional<loc::MemRegionVal> LV = TheValueExpr.getAs<loc::MemRegionVal>(); + if (!LV) + return; + + const TypedValueRegion* R = dyn_cast<TypedValueRegion>(LV->stripCasts()); + if (!R) + return; + + QualType T = Ctx.getCanonicalType(R->getValueType()); + + // FIXME: If the pointee isn't an integer type, should we flag a warning? + // People can do weird stuff with pointers. + + if (!T->isIntegralOrEnumerationType()) + return; + + uint64_t PrimitiveTypeSize = Ctx.getTypeSize(T); + + if (PrimitiveTypeSize == CFNumberSize) + return; + + // FIXME: We can actually create an abstract "CFNumber" object that has + // the bits initialized to the provided values. + ExplodedNode *N = C.generateNonFatalErrorNode(); + if (N) { + SmallString<128> sbuf; + llvm::raw_svector_ostream os(sbuf); + bool isCreate = (FD->getIdentifier() == ICreate); + + if (isCreate) { + os << (PrimitiveTypeSize == 8 ? "An " : "A ") + << PrimitiveTypeSize << "-bit integer is used to initialize a " + << "CFNumber object that represents " + << (CFNumberSize == 8 ? "an " : "a ") + << CFNumberSize << "-bit integer; "; + } else { + os << "A CFNumber object that represents " + << (CFNumberSize == 8 ? "an " : "a ") + << CFNumberSize << "-bit integer is used to initialize " + << (PrimitiveTypeSize == 8 ? "an " : "a ") + << PrimitiveTypeSize << "-bit integer; "; + } + + if (PrimitiveTypeSize < CFNumberSize) + os << (CFNumberSize - PrimitiveTypeSize) + << " bits of the CFNumber value will " + << (isCreate ? "be garbage." : "overwrite adjacent storage."); + else + os << (PrimitiveTypeSize - CFNumberSize) + << " bits of the integer value will be " + << (isCreate ? "lost." : "garbage."); + + if (!BT) + BT.reset(new APIMisuse(this, "Bad use of CFNumber APIs")); + + auto report = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N); + report->addRange(CE->getArg(2)->getSourceRange()); + C.emitReport(std::move(report)); + } +} + +//===----------------------------------------------------------------------===// +// CFRetain/CFRelease/CFMakeCollectable/CFAutorelease checking for null arguments. +//===----------------------------------------------------------------------===// + +namespace { +class CFRetainReleaseChecker : public Checker<check::PreCall> { + mutable APIMisuse BT{this, "null passed to CF memory management function"}; + CallDescription CFRetain{"CFRetain", 1}, + CFRelease{"CFRelease", 1}, + CFMakeCollectable{"CFMakeCollectable", 1}, + CFAutorelease{"CFAutorelease", 1}; + +public: + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; +}; +} // end anonymous namespace + +void CFRetainReleaseChecker::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + // TODO: Make this check part of CallDescription. + if (!Call.isGlobalCFunction()) + return; + + // Check if we called CFRetain/CFRelease/CFMakeCollectable/CFAutorelease. + if (!(Call.isCalled(CFRetain) || Call.isCalled(CFRelease) || + Call.isCalled(CFMakeCollectable) || Call.isCalled(CFAutorelease))) + return; + + // Get the argument's value. + SVal ArgVal = Call.getArgSVal(0); + Optional<DefinedSVal> DefArgVal = ArgVal.getAs<DefinedSVal>(); + if (!DefArgVal) + return; + + // Is it null? + ProgramStateRef state = C.getState(); + ProgramStateRef stateNonNull, stateNull; + std::tie(stateNonNull, stateNull) = state->assume(*DefArgVal); + + if (!stateNonNull) { + ExplodedNode *N = C.generateErrorNode(stateNull); + if (!N) + return; + + SmallString<64> Str; + raw_svector_ostream OS(Str); + OS << "Null pointer argument in call to " + << cast<FunctionDecl>(Call.getDecl())->getName(); + + auto report = std::make_unique<PathSensitiveBugReport>(BT, OS.str(), N); + report->addRange(Call.getArgSourceRange(0)); + bugreporter::trackExpressionValue(N, Call.getArgExpr(0), *report); + C.emitReport(std::move(report)); + return; + } + + // From here on, we know the argument is non-null. + C.addTransition(stateNonNull); +} + +//===----------------------------------------------------------------------===// +// Check for sending 'retain', 'release', or 'autorelease' directly to a Class. +//===----------------------------------------------------------------------===// + +namespace { +class ClassReleaseChecker : public Checker<check::PreObjCMessage> { + mutable Selector releaseS; + mutable Selector retainS; + mutable Selector autoreleaseS; + mutable Selector drainS; + mutable std::unique_ptr<BugType> BT; + +public: + void checkPreObjCMessage(const ObjCMethodCall &msg, CheckerContext &C) const; +}; +} // end anonymous namespace + +void ClassReleaseChecker::checkPreObjCMessage(const ObjCMethodCall &msg, + CheckerContext &C) const { + if (!BT) { + BT.reset(new APIMisuse( + this, "message incorrectly sent to class instead of class instance")); + + ASTContext &Ctx = C.getASTContext(); + releaseS = GetNullarySelector("release", Ctx); + retainS = GetNullarySelector("retain", Ctx); + autoreleaseS = GetNullarySelector("autorelease", Ctx); + drainS = GetNullarySelector("drain", Ctx); + } + + if (msg.isInstanceMessage()) + return; + const ObjCInterfaceDecl *Class = msg.getReceiverInterface(); + assert(Class); + + Selector S = msg.getSelector(); + if (!(S == releaseS || S == retainS || S == autoreleaseS || S == drainS)) + return; + + if (ExplodedNode *N = C.generateNonFatalErrorNode()) { + SmallString<200> buf; + llvm::raw_svector_ostream os(buf); + + os << "The '"; + S.print(os); + os << "' message should be sent to instances " + "of class '" << Class->getName() + << "' and not the class directly"; + + auto report = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N); + report->addRange(msg.getSourceRange()); + C.emitReport(std::move(report)); + } +} + +//===----------------------------------------------------------------------===// +// Check for passing non-Objective-C types to variadic methods that expect +// only Objective-C types. +//===----------------------------------------------------------------------===// + +namespace { +class VariadicMethodTypeChecker : public Checker<check::PreObjCMessage> { + mutable Selector arrayWithObjectsS; + mutable Selector dictionaryWithObjectsAndKeysS; + mutable Selector setWithObjectsS; + mutable Selector orderedSetWithObjectsS; + mutable Selector initWithObjectsS; + mutable Selector initWithObjectsAndKeysS; + mutable std::unique_ptr<BugType> BT; + + bool isVariadicMessage(const ObjCMethodCall &msg) const; + +public: + void checkPreObjCMessage(const ObjCMethodCall &msg, CheckerContext &C) const; +}; +} // end anonymous namespace + +/// isVariadicMessage - Returns whether the given message is a variadic message, +/// where all arguments must be Objective-C types. +bool +VariadicMethodTypeChecker::isVariadicMessage(const ObjCMethodCall &msg) const { + const ObjCMethodDecl *MD = msg.getDecl(); + + if (!MD || !MD->isVariadic() || isa<ObjCProtocolDecl>(MD->getDeclContext())) + return false; + + Selector S = msg.getSelector(); + + if (msg.isInstanceMessage()) { + // FIXME: Ideally we'd look at the receiver interface here, but that's not + // useful for init, because alloc returns 'id'. In theory, this could lead + // to false positives, for example if there existed a class that had an + // initWithObjects: implementation that does accept non-Objective-C pointer + // types, but the chance of that happening is pretty small compared to the + // gains that this analysis gives. + const ObjCInterfaceDecl *Class = MD->getClassInterface(); + + switch (findKnownClass(Class)) { + case FC_NSArray: + case FC_NSOrderedSet: + case FC_NSSet: + return S == initWithObjectsS; + case FC_NSDictionary: + return S == initWithObjectsAndKeysS; + default: + return false; + } + } else { + const ObjCInterfaceDecl *Class = msg.getReceiverInterface(); + + switch (findKnownClass(Class)) { + case FC_NSArray: + return S == arrayWithObjectsS; + case FC_NSOrderedSet: + return S == orderedSetWithObjectsS; + case FC_NSSet: + return S == setWithObjectsS; + case FC_NSDictionary: + return S == dictionaryWithObjectsAndKeysS; + default: + return false; + } + } +} + +void VariadicMethodTypeChecker::checkPreObjCMessage(const ObjCMethodCall &msg, + CheckerContext &C) const { + if (!BT) { + BT.reset(new APIMisuse(this, + "Arguments passed to variadic method aren't all " + "Objective-C pointer types")); + + ASTContext &Ctx = C.getASTContext(); + arrayWithObjectsS = GetUnarySelector("arrayWithObjects", Ctx); + dictionaryWithObjectsAndKeysS = + GetUnarySelector("dictionaryWithObjectsAndKeys", Ctx); + setWithObjectsS = GetUnarySelector("setWithObjects", Ctx); + orderedSetWithObjectsS = GetUnarySelector("orderedSetWithObjects", Ctx); + + initWithObjectsS = GetUnarySelector("initWithObjects", Ctx); + initWithObjectsAndKeysS = GetUnarySelector("initWithObjectsAndKeys", Ctx); + } + + if (!isVariadicMessage(msg)) + return; + + // We are not interested in the selector arguments since they have + // well-defined types, so the compiler will issue a warning for them. + unsigned variadicArgsBegin = msg.getSelector().getNumArgs(); + + // We're not interested in the last argument since it has to be nil or the + // compiler would have issued a warning for it elsewhere. + unsigned variadicArgsEnd = msg.getNumArgs() - 1; + + if (variadicArgsEnd <= variadicArgsBegin) + return; + + // Verify that all arguments have Objective-C types. + Optional<ExplodedNode*> errorNode; + + for (unsigned I = variadicArgsBegin; I != variadicArgsEnd; ++I) { + QualType ArgTy = msg.getArgExpr(I)->getType(); + if (ArgTy->isObjCObjectPointerType()) + continue; + + // Block pointers are treaded as Objective-C pointers. + if (ArgTy->isBlockPointerType()) + continue; + + // Ignore pointer constants. + if (msg.getArgSVal(I).getAs<loc::ConcreteInt>()) + continue; + + // Ignore pointer types annotated with 'NSObject' attribute. + if (C.getASTContext().isObjCNSObjectType(ArgTy)) + continue; + + // Ignore CF references, which can be toll-free bridged. + if (coreFoundation::isCFObjectRef(ArgTy)) + continue; + + // Generate only one error node to use for all bug reports. + if (!errorNode.hasValue()) + errorNode = C.generateNonFatalErrorNode(); + + if (!errorNode.getValue()) + continue; + + SmallString<128> sbuf; + llvm::raw_svector_ostream os(sbuf); + + StringRef TypeName = GetReceiverInterfaceName(msg); + if (!TypeName.empty()) + os << "Argument to '" << TypeName << "' method '"; + else + os << "Argument to method '"; + + msg.getSelector().print(os); + os << "' should be an Objective-C pointer type, not '"; + ArgTy.print(os, C.getLangOpts()); + os << "'"; + + auto R = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), + errorNode.getValue()); + R->addRange(msg.getArgSourceRange(I)); + C.emitReport(std::move(R)); + } +} + +//===----------------------------------------------------------------------===// +// Improves the modeling of loops over Cocoa collections. +//===----------------------------------------------------------------------===// + +// The map from container symbol to the container count symbol. +// We currently will remember the last container count symbol encountered. +REGISTER_MAP_WITH_PROGRAMSTATE(ContainerCountMap, SymbolRef, SymbolRef) +REGISTER_MAP_WITH_PROGRAMSTATE(ContainerNonEmptyMap, SymbolRef, bool) + +namespace { +class ObjCLoopChecker + : public Checker<check::PostStmt<ObjCForCollectionStmt>, + check::PostObjCMessage, + check::DeadSymbols, + check::PointerEscape > { + mutable IdentifierInfo *CountSelectorII; + + bool isCollectionCountMethod(const ObjCMethodCall &M, + CheckerContext &C) const; + +public: + ObjCLoopChecker() : CountSelectorII(nullptr) {} + void checkPostStmt(const ObjCForCollectionStmt *FCS, CheckerContext &C) const; + void checkPostObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const; + void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const; + ProgramStateRef checkPointerEscape(ProgramStateRef State, + const InvalidatedSymbols &Escaped, + const CallEvent *Call, + PointerEscapeKind Kind) const; +}; +} // end anonymous namespace + +static bool isKnownNonNilCollectionType(QualType T) { + const ObjCObjectPointerType *PT = T->getAs<ObjCObjectPointerType>(); + if (!PT) + return false; + + const ObjCInterfaceDecl *ID = PT->getInterfaceDecl(); + if (!ID) + return false; + + switch (findKnownClass(ID)) { + case FC_NSArray: + case FC_NSDictionary: + case FC_NSEnumerator: + case FC_NSOrderedSet: + case FC_NSSet: + return true; + default: + return false; + } +} + +/// Assumes that the collection is non-nil. +/// +/// If the collection is known to be nil, returns NULL to indicate an infeasible +/// path. +static ProgramStateRef checkCollectionNonNil(CheckerContext &C, + ProgramStateRef State, + const ObjCForCollectionStmt *FCS) { + if (!State) + return nullptr; + + SVal CollectionVal = C.getSVal(FCS->getCollection()); + Optional<DefinedSVal> KnownCollection = CollectionVal.getAs<DefinedSVal>(); + if (!KnownCollection) + return State; + + ProgramStateRef StNonNil, StNil; + std::tie(StNonNil, StNil) = State->assume(*KnownCollection); + if (StNil && !StNonNil) { + // The collection is nil. This path is infeasible. + return nullptr; + } + + return StNonNil; +} + +/// Assumes that the collection elements are non-nil. +/// +/// This only applies if the collection is one of those known not to contain +/// nil values. +static ProgramStateRef checkElementNonNil(CheckerContext &C, + ProgramStateRef State, + const ObjCForCollectionStmt *FCS) { + if (!State) + return nullptr; + + // See if the collection is one where we /know/ the elements are non-nil. + if (!isKnownNonNilCollectionType(FCS->getCollection()->getType())) + return State; + + const LocationContext *LCtx = C.getLocationContext(); + const Stmt *Element = FCS->getElement(); + + // FIXME: Copied from ExprEngineObjC. + Optional<Loc> ElementLoc; + if (const DeclStmt *DS = dyn_cast<DeclStmt>(Element)) { + const VarDecl *ElemDecl = cast<VarDecl>(DS->getSingleDecl()); + assert(ElemDecl->getInit() == nullptr); + ElementLoc = State->getLValue(ElemDecl, LCtx); + } else { + ElementLoc = State->getSVal(Element, LCtx).getAs<Loc>(); + } + + if (!ElementLoc) + return State; + + // Go ahead and assume the value is non-nil. + SVal Val = State->getSVal(*ElementLoc); + return State->assume(Val.castAs<DefinedOrUnknownSVal>(), true); +} + +/// Returns NULL state if the collection is known to contain elements +/// (or is known not to contain elements if the Assumption parameter is false.) +static ProgramStateRef +assumeCollectionNonEmpty(CheckerContext &C, ProgramStateRef State, + SymbolRef CollectionS, bool Assumption) { + if (!State || !CollectionS) + return State; + + const SymbolRef *CountS = State->get<ContainerCountMap>(CollectionS); + if (!CountS) { + const bool *KnownNonEmpty = State->get<ContainerNonEmptyMap>(CollectionS); + if (!KnownNonEmpty) + return State->set<ContainerNonEmptyMap>(CollectionS, Assumption); + return (Assumption == *KnownNonEmpty) ? State : nullptr; + } + + SValBuilder &SvalBuilder = C.getSValBuilder(); + SVal CountGreaterThanZeroVal = + SvalBuilder.evalBinOp(State, BO_GT, + nonloc::SymbolVal(*CountS), + SvalBuilder.makeIntVal(0, (*CountS)->getType()), + SvalBuilder.getConditionType()); + Optional<DefinedSVal> CountGreaterThanZero = + CountGreaterThanZeroVal.getAs<DefinedSVal>(); + if (!CountGreaterThanZero) { + // The SValBuilder cannot construct a valid SVal for this condition. + // This means we cannot properly reason about it. + return State; + } + + return State->assume(*CountGreaterThanZero, Assumption); +} + +static ProgramStateRef +assumeCollectionNonEmpty(CheckerContext &C, ProgramStateRef State, + const ObjCForCollectionStmt *FCS, + bool Assumption) { + if (!State) + return nullptr; + + SymbolRef CollectionS = C.getSVal(FCS->getCollection()).getAsSymbol(); + return assumeCollectionNonEmpty(C, State, CollectionS, Assumption); +} + +/// If the fist block edge is a back edge, we are reentering the loop. +static bool alreadyExecutedAtLeastOneLoopIteration(const ExplodedNode *N, + const ObjCForCollectionStmt *FCS) { + if (!N) + return false; + + ProgramPoint P = N->getLocation(); + if (Optional<BlockEdge> BE = P.getAs<BlockEdge>()) { + return BE->getSrc()->getLoopTarget() == FCS; + } + + // Keep looking for a block edge. + for (ExplodedNode::const_pred_iterator I = N->pred_begin(), + E = N->pred_end(); I != E; ++I) { + if (alreadyExecutedAtLeastOneLoopIteration(*I, FCS)) + return true; + } + + return false; +} + +void ObjCLoopChecker::checkPostStmt(const ObjCForCollectionStmt *FCS, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + + // Check if this is the branch for the end of the loop. + SVal CollectionSentinel = C.getSVal(FCS); + if (CollectionSentinel.isZeroConstant()) { + if (!alreadyExecutedAtLeastOneLoopIteration(C.getPredecessor(), FCS)) + State = assumeCollectionNonEmpty(C, State, FCS, /*Assumption*/false); + + // Otherwise, this is a branch that goes through the loop body. + } else { + State = checkCollectionNonNil(C, State, FCS); + State = checkElementNonNil(C, State, FCS); + State = assumeCollectionNonEmpty(C, State, FCS, /*Assumption*/true); + } + + if (!State) + C.generateSink(C.getState(), C.getPredecessor()); + else if (State != C.getState()) + C.addTransition(State); +} + +bool ObjCLoopChecker::isCollectionCountMethod(const ObjCMethodCall &M, + CheckerContext &C) const { + Selector S = M.getSelector(); + // Initialize the identifiers on first use. + if (!CountSelectorII) + CountSelectorII = &C.getASTContext().Idents.get("count"); + + // If the method returns collection count, record the value. + return S.isUnarySelector() && + (S.getIdentifierInfoForSlot(0) == CountSelectorII); +} + +void ObjCLoopChecker::checkPostObjCMessage(const ObjCMethodCall &M, + CheckerContext &C) const { + if (!M.isInstanceMessage()) + return; + + const ObjCInterfaceDecl *ClassID = M.getReceiverInterface(); + if (!ClassID) + return; + + FoundationClass Class = findKnownClass(ClassID); + if (Class != FC_NSDictionary && + Class != FC_NSArray && + Class != FC_NSSet && + Class != FC_NSOrderedSet) + return; + + SymbolRef ContainerS = M.getReceiverSVal().getAsSymbol(); + if (!ContainerS) + return; + + // If we are processing a call to "count", get the symbolic value returned by + // a call to "count" and add it to the map. + if (!isCollectionCountMethod(M, C)) + return; + + const Expr *MsgExpr = M.getOriginExpr(); + SymbolRef CountS = C.getSVal(MsgExpr).getAsSymbol(); + if (CountS) { + ProgramStateRef State = C.getState(); + + C.getSymbolManager().addSymbolDependency(ContainerS, CountS); + State = State->set<ContainerCountMap>(ContainerS, CountS); + + if (const bool *NonEmpty = State->get<ContainerNonEmptyMap>(ContainerS)) { + State = State->remove<ContainerNonEmptyMap>(ContainerS); + State = assumeCollectionNonEmpty(C, State, ContainerS, *NonEmpty); + } + + C.addTransition(State); + } +} + +static SymbolRef getMethodReceiverIfKnownImmutable(const CallEvent *Call) { + const ObjCMethodCall *Message = dyn_cast_or_null<ObjCMethodCall>(Call); + if (!Message) + return nullptr; + + const ObjCMethodDecl *MD = Message->getDecl(); + if (!MD) + return nullptr; + + const ObjCInterfaceDecl *StaticClass; + if (isa<ObjCProtocolDecl>(MD->getDeclContext())) { + // We can't find out where the method was declared without doing more work. + // Instead, see if the receiver is statically typed as a known immutable + // collection. + StaticClass = Message->getOriginExpr()->getReceiverInterface(); + } else { + StaticClass = MD->getClassInterface(); + } + + if (!StaticClass) + return nullptr; + + switch (findKnownClass(StaticClass, /*IncludeSuper=*/false)) { + case FC_None: + return nullptr; + case FC_NSArray: + case FC_NSDictionary: + case FC_NSEnumerator: + case FC_NSNull: + case FC_NSOrderedSet: + case FC_NSSet: + case FC_NSString: + break; + } + + return Message->getReceiverSVal().getAsSymbol(); +} + +ProgramStateRef +ObjCLoopChecker::checkPointerEscape(ProgramStateRef State, + const InvalidatedSymbols &Escaped, + const CallEvent *Call, + PointerEscapeKind Kind) const { + SymbolRef ImmutableReceiver = getMethodReceiverIfKnownImmutable(Call); + + // Remove the invalidated symbols form the collection count map. + for (InvalidatedSymbols::const_iterator I = Escaped.begin(), + E = Escaped.end(); + I != E; ++I) { + SymbolRef Sym = *I; + + // Don't invalidate this symbol's count if we know the method being called + // is declared on an immutable class. This isn't completely correct if the + // receiver is also passed as an argument, but in most uses of NSArray, + // NSDictionary, etc. this isn't likely to happen in a dangerous way. + if (Sym == ImmutableReceiver) + continue; + + // The symbol escaped. Pessimistically, assume that the count could have + // changed. + State = State->remove<ContainerCountMap>(Sym); + State = State->remove<ContainerNonEmptyMap>(Sym); + } + return State; +} + +void ObjCLoopChecker::checkDeadSymbols(SymbolReaper &SymReaper, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + + // Remove the dead symbols from the collection count map. + ContainerCountMapTy Tracked = State->get<ContainerCountMap>(); + for (ContainerCountMapTy::iterator I = Tracked.begin(), + E = Tracked.end(); I != E; ++I) { + SymbolRef Sym = I->first; + if (SymReaper.isDead(Sym)) { + State = State->remove<ContainerCountMap>(Sym); + State = State->remove<ContainerNonEmptyMap>(Sym); + } + } + + C.addTransition(State); +} + +namespace { +/// \class ObjCNonNilReturnValueChecker +/// The checker restricts the return values of APIs known to +/// never (or almost never) return 'nil'. +class ObjCNonNilReturnValueChecker + : public Checker<check::PostObjCMessage, + check::PostStmt<ObjCArrayLiteral>, + check::PostStmt<ObjCDictionaryLiteral>, + check::PostStmt<ObjCBoxedExpr> > { + mutable bool Initialized; + mutable Selector ObjectAtIndex; + mutable Selector ObjectAtIndexedSubscript; + mutable Selector NullSelector; + +public: + ObjCNonNilReturnValueChecker() : Initialized(false) {} + + ProgramStateRef assumeExprIsNonNull(const Expr *NonNullExpr, + ProgramStateRef State, + CheckerContext &C) const; + void assumeExprIsNonNull(const Expr *E, CheckerContext &C) const { + C.addTransition(assumeExprIsNonNull(E, C.getState(), C)); + } + + void checkPostStmt(const ObjCArrayLiteral *E, CheckerContext &C) const { + assumeExprIsNonNull(E, C); + } + void checkPostStmt(const ObjCDictionaryLiteral *E, CheckerContext &C) const { + assumeExprIsNonNull(E, C); + } + void checkPostStmt(const ObjCBoxedExpr *E, CheckerContext &C) const { + assumeExprIsNonNull(E, C); + } + + void checkPostObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const; +}; +} // end anonymous namespace + +ProgramStateRef +ObjCNonNilReturnValueChecker::assumeExprIsNonNull(const Expr *NonNullExpr, + ProgramStateRef State, + CheckerContext &C) const { + SVal Val = C.getSVal(NonNullExpr); + if (Optional<DefinedOrUnknownSVal> DV = Val.getAs<DefinedOrUnknownSVal>()) + return State->assume(*DV, true); + return State; +} + +void ObjCNonNilReturnValueChecker::checkPostObjCMessage(const ObjCMethodCall &M, + CheckerContext &C) + const { + ProgramStateRef State = C.getState(); + + if (!Initialized) { + ASTContext &Ctx = C.getASTContext(); + ObjectAtIndex = GetUnarySelector("objectAtIndex", Ctx); + ObjectAtIndexedSubscript = GetUnarySelector("objectAtIndexedSubscript", Ctx); + NullSelector = GetNullarySelector("null", Ctx); + } + + // Check the receiver type. + if (const ObjCInterfaceDecl *Interface = M.getReceiverInterface()) { + + // Assume that object returned from '[self init]' or '[super init]' is not + // 'nil' if we are processing an inlined function/method. + // + // A defensive callee will (and should) check if the object returned by + // '[super init]' is 'nil' before doing it's own initialization. However, + // since 'nil' is rarely returned in practice, we should not warn when the + // caller to the defensive constructor uses the object in contexts where + // 'nil' is not accepted. + if (!C.inTopFrame() && M.getDecl() && + M.getDecl()->getMethodFamily() == OMF_init && + M.isReceiverSelfOrSuper()) { + State = assumeExprIsNonNull(M.getOriginExpr(), State, C); + } + + FoundationClass Cl = findKnownClass(Interface); + + // Objects returned from + // [NSArray|NSOrderedSet]::[ObjectAtIndex|ObjectAtIndexedSubscript] + // are never 'nil'. + if (Cl == FC_NSArray || Cl == FC_NSOrderedSet) { + Selector Sel = M.getSelector(); + if (Sel == ObjectAtIndex || Sel == ObjectAtIndexedSubscript) { + // Go ahead and assume the value is non-nil. + State = assumeExprIsNonNull(M.getOriginExpr(), State, C); + } + } + + // Objects returned from [NSNull null] are not nil. + if (Cl == FC_NSNull) { + if (M.getSelector() == NullSelector) { + // Go ahead and assume the value is non-nil. + State = assumeExprIsNonNull(M.getOriginExpr(), State, C); + } + } + } + C.addTransition(State); +} + +//===----------------------------------------------------------------------===// +// Check registration. +//===----------------------------------------------------------------------===// + +void ento::registerNilArgChecker(CheckerManager &mgr) { + mgr.registerChecker<NilArgChecker>(); +} + +bool ento::shouldRegisterNilArgChecker(const LangOptions &LO) { + return true; +} + +void ento::registerCFNumberChecker(CheckerManager &mgr) { + mgr.registerChecker<CFNumberChecker>(); +} + +bool ento::shouldRegisterCFNumberChecker(const LangOptions &LO) { + return true; +} + +void ento::registerCFRetainReleaseChecker(CheckerManager &mgr) { + mgr.registerChecker<CFRetainReleaseChecker>(); +} + +bool ento::shouldRegisterCFRetainReleaseChecker(const LangOptions &LO) { + return true; +} + +void ento::registerClassReleaseChecker(CheckerManager &mgr) { + mgr.registerChecker<ClassReleaseChecker>(); +} + +bool ento::shouldRegisterClassReleaseChecker(const LangOptions &LO) { + return true; +} + +void ento::registerVariadicMethodTypeChecker(CheckerManager &mgr) { + mgr.registerChecker<VariadicMethodTypeChecker>(); +} + +bool ento::shouldRegisterVariadicMethodTypeChecker(const LangOptions &LO) { + return true; +} + +void ento::registerObjCLoopChecker(CheckerManager &mgr) { + mgr.registerChecker<ObjCLoopChecker>(); +} + +bool ento::shouldRegisterObjCLoopChecker(const LangOptions &LO) { + return true; +} + +void ento::registerObjCNonNilReturnValueChecker(CheckerManager &mgr) { + mgr.registerChecker<ObjCNonNilReturnValueChecker>(); +} + +bool ento::shouldRegisterObjCNonNilReturnValueChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp new file mode 100644 index 000000000000..0eb3c3d1d0e6 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp @@ -0,0 +1,189 @@ +//===-- BlockInCriticalSectionChecker.cpp -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines a checker for blocks in critical sections. This checker should find +// the calls to blocking functions (for example: sleep, getc, fgets, read, +// recv etc.) inside a critical section. When sleep(x) is called while a mutex +// is held, other threades cannot lock the same mutex. This might take some +// time, leading to bad performance or even deadlock. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { + +class BlockInCriticalSectionChecker : public Checker<check::PostCall> { + + mutable IdentifierInfo *IILockGuard, *IIUniqueLock; + + CallDescription LockFn, UnlockFn, SleepFn, GetcFn, FgetsFn, ReadFn, RecvFn, + PthreadLockFn, PthreadTryLockFn, PthreadUnlockFn, + MtxLock, MtxTimedLock, MtxTryLock, MtxUnlock; + + StringRef ClassLockGuard, ClassUniqueLock; + + mutable bool IdentifierInfoInitialized; + + std::unique_ptr<BugType> BlockInCritSectionBugType; + + void initIdentifierInfo(ASTContext &Ctx) const; + + void reportBlockInCritSection(SymbolRef FileDescSym, + const CallEvent &call, + CheckerContext &C) const; + +public: + BlockInCriticalSectionChecker(); + + bool isBlockingFunction(const CallEvent &Call) const; + bool isLockFunction(const CallEvent &Call) const; + bool isUnlockFunction(const CallEvent &Call) const; + + /// Process unlock. + /// Process lock. + /// Process blocking functions (sleep, getc, fgets, read, recv) + void checkPostCall(const CallEvent &Call, CheckerContext &C) const; +}; + +} // end anonymous namespace + +REGISTER_TRAIT_WITH_PROGRAMSTATE(MutexCounter, unsigned) + +BlockInCriticalSectionChecker::BlockInCriticalSectionChecker() + : IILockGuard(nullptr), IIUniqueLock(nullptr), + LockFn("lock"), UnlockFn("unlock"), SleepFn("sleep"), GetcFn("getc"), + FgetsFn("fgets"), ReadFn("read"), RecvFn("recv"), + PthreadLockFn("pthread_mutex_lock"), + PthreadTryLockFn("pthread_mutex_trylock"), + PthreadUnlockFn("pthread_mutex_unlock"), + MtxLock("mtx_lock"), + MtxTimedLock("mtx_timedlock"), + MtxTryLock("mtx_trylock"), + MtxUnlock("mtx_unlock"), + ClassLockGuard("lock_guard"), + ClassUniqueLock("unique_lock"), + IdentifierInfoInitialized(false) { + // Initialize the bug type. + BlockInCritSectionBugType.reset( + new BugType(this, "Call to blocking function in critical section", + "Blocking Error")); +} + +void BlockInCriticalSectionChecker::initIdentifierInfo(ASTContext &Ctx) const { + if (!IdentifierInfoInitialized) { + /* In case of checking C code, or when the corresponding headers are not + * included, we might end up query the identifier table every time when this + * function is called instead of early returning it. To avoid this, a bool + * variable (IdentifierInfoInitialized) is used and the function will be run + * only once. */ + IILockGuard = &Ctx.Idents.get(ClassLockGuard); + IIUniqueLock = &Ctx.Idents.get(ClassUniqueLock); + IdentifierInfoInitialized = true; + } +} + +bool BlockInCriticalSectionChecker::isBlockingFunction(const CallEvent &Call) const { + if (Call.isCalled(SleepFn) + || Call.isCalled(GetcFn) + || Call.isCalled(FgetsFn) + || Call.isCalled(ReadFn) + || Call.isCalled(RecvFn)) { + return true; + } + return false; +} + +bool BlockInCriticalSectionChecker::isLockFunction(const CallEvent &Call) const { + if (const auto *Ctor = dyn_cast<CXXConstructorCall>(&Call)) { + auto IdentifierInfo = Ctor->getDecl()->getParent()->getIdentifier(); + if (IdentifierInfo == IILockGuard || IdentifierInfo == IIUniqueLock) + return true; + } + + if (Call.isCalled(LockFn) + || Call.isCalled(PthreadLockFn) + || Call.isCalled(PthreadTryLockFn) + || Call.isCalled(MtxLock) + || Call.isCalled(MtxTimedLock) + || Call.isCalled(MtxTryLock)) { + return true; + } + return false; +} + +bool BlockInCriticalSectionChecker::isUnlockFunction(const CallEvent &Call) const { + if (const auto *Dtor = dyn_cast<CXXDestructorCall>(&Call)) { + const auto *DRecordDecl = cast<CXXRecordDecl>(Dtor->getDecl()->getParent()); + auto IdentifierInfo = DRecordDecl->getIdentifier(); + if (IdentifierInfo == IILockGuard || IdentifierInfo == IIUniqueLock) + return true; + } + + if (Call.isCalled(UnlockFn) + || Call.isCalled(PthreadUnlockFn) + || Call.isCalled(MtxUnlock)) { + return true; + } + return false; +} + +void BlockInCriticalSectionChecker::checkPostCall(const CallEvent &Call, + CheckerContext &C) const { + initIdentifierInfo(C.getASTContext()); + + if (!isBlockingFunction(Call) + && !isLockFunction(Call) + && !isUnlockFunction(Call)) + return; + + ProgramStateRef State = C.getState(); + unsigned mutexCount = State->get<MutexCounter>(); + if (isUnlockFunction(Call) && mutexCount > 0) { + State = State->set<MutexCounter>(--mutexCount); + C.addTransition(State); + } else if (isLockFunction(Call)) { + State = State->set<MutexCounter>(++mutexCount); + C.addTransition(State); + } else if (mutexCount > 0) { + SymbolRef BlockDesc = Call.getReturnValue().getAsSymbol(); + reportBlockInCritSection(BlockDesc, Call, C); + } +} + +void BlockInCriticalSectionChecker::reportBlockInCritSection( + SymbolRef BlockDescSym, const CallEvent &Call, CheckerContext &C) const { + ExplodedNode *ErrNode = C.generateNonFatalErrorNode(); + if (!ErrNode) + return; + + std::string msg; + llvm::raw_string_ostream os(msg); + os << "Call to blocking function '" << Call.getCalleeIdentifier()->getName() + << "' inside of critical section"; + auto R = std::make_unique<PathSensitiveBugReport>(*BlockInCritSectionBugType, + os.str(), ErrNode); + R->addRange(Call.getSourceRange()); + R->markInteresting(BlockDescSym); + C.emitReport(std::move(R)); +} + +void ento::registerBlockInCriticalSectionChecker(CheckerManager &mgr) { + mgr.registerChecker<BlockInCriticalSectionChecker>(); +} + +bool ento::shouldRegisterBlockInCriticalSectionChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp new file mode 100644 index 000000000000..1423b9c39b26 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp @@ -0,0 +1,162 @@ +//== BoolAssignmentChecker.cpp - Boolean assignment checker -----*- C++ -*--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines BoolAssignmentChecker, a builtin check in ExprEngine that +// performs checks for assignment of non-Boolean values to Boolean variables. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { + class BoolAssignmentChecker : public Checker< check::Bind > { + mutable std::unique_ptr<BuiltinBug> BT; + void emitReport(ProgramStateRef state, CheckerContext &C) const; + public: + void checkBind(SVal loc, SVal val, const Stmt *S, CheckerContext &C) const; + }; +} // end anonymous namespace + +void BoolAssignmentChecker::emitReport(ProgramStateRef state, + CheckerContext &C) const { + if (ExplodedNode *N = C.generateNonFatalErrorNode(state)) { + if (!BT) + BT.reset(new BuiltinBug(this, "Assignment of a non-Boolean value")); + + C.emitReport( + std::make_unique<PathSensitiveBugReport>(*BT, BT->getDescription(), N)); + } +} + +static bool isBooleanType(QualType Ty) { + if (Ty->isBooleanType()) // C++ or C99 + return true; + + if (const TypedefType *TT = Ty->getAs<TypedefType>()) + return TT->getDecl()->getName() == "BOOL" || // Objective-C + TT->getDecl()->getName() == "_Bool" || // stdbool.h < C99 + TT->getDecl()->getName() == "Boolean"; // MacTypes.h + + return false; +} + +void BoolAssignmentChecker::checkBind(SVal loc, SVal val, const Stmt *S, + CheckerContext &C) const { + + // We are only interested in stores into Booleans. + const TypedValueRegion *TR = + dyn_cast_or_null<TypedValueRegion>(loc.getAsRegion()); + + if (!TR) + return; + + QualType valTy = TR->getValueType(); + + if (!isBooleanType(valTy)) + return; + + // Get the value of the right-hand side. We only care about values + // that are defined (UnknownVals and UndefinedVals are handled by other + // checkers). + Optional<DefinedSVal> DV = val.getAs<DefinedSVal>(); + if (!DV) + return; + + // Check if the assigned value meets our criteria for correctness. It must + // be a value that is either 0 or 1. One way to check this is to see if + // the value is possibly < 0 (for a negative value) or greater than 1. + ProgramStateRef state = C.getState(); + SValBuilder &svalBuilder = C.getSValBuilder(); + ConstraintManager &CM = C.getConstraintManager(); + + // First, ensure that the value is >= 0. + DefinedSVal zeroVal = svalBuilder.makeIntVal(0, valTy); + SVal greaterThanOrEqualToZeroVal = + svalBuilder.evalBinOp(state, BO_GE, *DV, zeroVal, + svalBuilder.getConditionType()); + + Optional<DefinedSVal> greaterThanEqualToZero = + greaterThanOrEqualToZeroVal.getAs<DefinedSVal>(); + + if (!greaterThanEqualToZero) { + // The SValBuilder cannot construct a valid SVal for this condition. + // This means we cannot properly reason about it. + return; + } + + ProgramStateRef stateLT, stateGE; + std::tie(stateGE, stateLT) = CM.assumeDual(state, *greaterThanEqualToZero); + + // Is it possible for the value to be less than zero? + if (stateLT) { + // It is possible for the value to be less than zero. We only + // want to emit a warning, however, if that value is fully constrained. + // If it it possible for the value to be >= 0, then essentially the + // value is underconstrained and there is nothing left to be done. + if (!stateGE) + emitReport(stateLT, C); + + // In either case, we are done. + return; + } + + // If we reach here, it must be the case that the value is constrained + // to only be >= 0. + assert(stateGE == state); + + // At this point we know that the value is >= 0. + // Now check to ensure that the value is <= 1. + DefinedSVal OneVal = svalBuilder.makeIntVal(1, valTy); + SVal lessThanEqToOneVal = + svalBuilder.evalBinOp(state, BO_LE, *DV, OneVal, + svalBuilder.getConditionType()); + + Optional<DefinedSVal> lessThanEqToOne = + lessThanEqToOneVal.getAs<DefinedSVal>(); + + if (!lessThanEqToOne) { + // The SValBuilder cannot construct a valid SVal for this condition. + // This means we cannot properly reason about it. + return; + } + + ProgramStateRef stateGT, stateLE; + std::tie(stateLE, stateGT) = CM.assumeDual(state, *lessThanEqToOne); + + // Is it possible for the value to be greater than one? + if (stateGT) { + // It is possible for the value to be greater than one. We only + // want to emit a warning, however, if that value is fully constrained. + // If it is possible for the value to be <= 1, then essentially the + // value is underconstrained and there is nothing left to be done. + if (!stateLE) + emitReport(stateGT, C); + + // In either case, we are done. + return; + } + + // If we reach here, it must be the case that the value is constrained + // to only be <= 1. + assert(stateLE == state); +} + +void ento::registerBoolAssignmentChecker(CheckerManager &mgr) { + mgr.registerChecker<BoolAssignmentChecker>(); +} + +bool ento::shouldRegisterBoolAssignmentChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp new file mode 100644 index 000000000000..10594e331cbe --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp @@ -0,0 +1,139 @@ +//=== BuiltinFunctionChecker.cpp --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This checker evaluates clang builtin functions. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/Basic/Builtins.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { + +class BuiltinFunctionChecker : public Checker<eval::Call> { +public: + bool evalCall(const CallEvent &Call, CheckerContext &C) const; +}; + +} + +bool BuiltinFunctionChecker::evalCall(const CallEvent &Call, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + const auto *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); + if (!FD) + return false; + + const LocationContext *LCtx = C.getLocationContext(); + const Expr *CE = Call.getOriginExpr(); + + switch (FD->getBuiltinID()) { + default: + return false; + + case Builtin::BI__builtin_assume: { + assert (Call.getNumArgs() > 0); + SVal Arg = Call.getArgSVal(0); + if (Arg.isUndef()) + return true; // Return true to model purity. + + state = state->assume(Arg.castAs<DefinedOrUnknownSVal>(), true); + // FIXME: do we want to warn here? Not right now. The most reports might + // come from infeasible paths, thus being false positives. + if (!state) { + C.generateSink(C.getState(), C.getPredecessor()); + return true; + } + + C.addTransition(state); + return true; + } + + case Builtin::BI__builtin_unpredictable: + case Builtin::BI__builtin_expect: + case Builtin::BI__builtin_assume_aligned: + case Builtin::BI__builtin_addressof: { + // For __builtin_unpredictable, __builtin_expect, and + // __builtin_assume_aligned, just return the value of the subexpression. + // __builtin_addressof is going from a reference to a pointer, but those + // are represented the same way in the analyzer. + assert (Call.getNumArgs() > 0); + SVal Arg = Call.getArgSVal(0); + C.addTransition(state->BindExpr(CE, LCtx, Arg)); + return true; + } + + case Builtin::BI__builtin_alloca_with_align: + case Builtin::BI__builtin_alloca: { + // FIXME: Refactor into StoreManager itself? + MemRegionManager& RM = C.getStoreManager().getRegionManager(); + const AllocaRegion* R = + RM.getAllocaRegion(CE, C.blockCount(), C.getLocationContext()); + + // Set the extent of the region in bytes. This enables us to use the + // SVal of the argument directly. If we save the extent in bits, we + // cannot represent values like symbol*8. + auto Size = Call.getArgSVal(0); + if (Size.isUndef()) + return true; // Return true to model purity. + + SValBuilder& svalBuilder = C.getSValBuilder(); + DefinedOrUnknownSVal Extent = R->getExtent(svalBuilder); + DefinedOrUnknownSVal extentMatchesSizeArg = + svalBuilder.evalEQ(state, Extent, Size.castAs<DefinedOrUnknownSVal>()); + state = state->assume(extentMatchesSizeArg, true); + assert(state && "The region should not have any previous constraints"); + + C.addTransition(state->BindExpr(CE, LCtx, loc::MemRegionVal(R))); + return true; + } + + case Builtin::BI__builtin_dynamic_object_size: + case Builtin::BI__builtin_object_size: + case Builtin::BI__builtin_constant_p: { + // This must be resolvable at compile time, so we defer to the constant + // evaluator for a value. + SValBuilder &SVB = C.getSValBuilder(); + SVal V = UnknownVal(); + Expr::EvalResult EVResult; + if (CE->EvaluateAsInt(EVResult, C.getASTContext(), Expr::SE_NoSideEffects)) { + // Make sure the result has the correct type. + llvm::APSInt Result = EVResult.Val.getInt(); + BasicValueFactory &BVF = SVB.getBasicValueFactory(); + BVF.getAPSIntType(CE->getType()).apply(Result); + V = SVB.makeIntVal(Result); + } + + if (FD->getBuiltinID() == Builtin::BI__builtin_constant_p) { + // If we didn't manage to figure out if the value is constant or not, + // it is safe to assume that it's not constant and unsafe to assume + // that it's constant. + if (V.isUnknown()) + V = SVB.makeIntVal(0, CE->getType()); + } + + C.addTransition(state->BindExpr(CE, LCtx, V)); + return true; + } + } +} + +void ento::registerBuiltinFunctionChecker(CheckerManager &mgr) { + mgr.registerChecker<BuiltinFunctionChecker>(); +} + +bool ento::shouldRegisterBuiltinFunctionChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp new file mode 100644 index 000000000000..503c451670b8 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -0,0 +1,2420 @@ +//= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines CStringChecker, which is an assortment of checks on calls +// to functions in <string.h>. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "InterCheckerAPI.h" +#include "clang/Basic/CharInfo.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +namespace { +class CStringChecker : public Checker< eval::Call, + check::PreStmt<DeclStmt>, + check::LiveSymbols, + check::DeadSymbols, + check::RegionChanges + > { + mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap, + BT_NotCString, BT_AdditionOverflow; + + mutable const char *CurrentFunctionDescription; + +public: + /// The filter is used to filter out the diagnostics which are not enabled by + /// the user. + struct CStringChecksFilter { + DefaultBool CheckCStringNullArg; + DefaultBool CheckCStringOutOfBounds; + DefaultBool CheckCStringBufferOverlap; + DefaultBool CheckCStringNotNullTerm; + + CheckerNameRef CheckNameCStringNullArg; + CheckerNameRef CheckNameCStringOutOfBounds; + CheckerNameRef CheckNameCStringBufferOverlap; + CheckerNameRef CheckNameCStringNotNullTerm; + }; + + CStringChecksFilter Filter; + + static void *getTag() { static int tag; return &tag; } + + bool evalCall(const CallEvent &Call, CheckerContext &C) const; + void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; + void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const; + void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; + + ProgramStateRef + checkRegionChanges(ProgramStateRef state, + const InvalidatedSymbols *, + ArrayRef<const MemRegion *> ExplicitRegions, + ArrayRef<const MemRegion *> Regions, + const LocationContext *LCtx, + const CallEvent *Call) const; + + typedef void (CStringChecker::*FnCheck)(CheckerContext &, + const CallExpr *) const; + CallDescriptionMap<FnCheck> Callbacks = { + {{CDF_MaybeBuiltin, "memcpy", 3}, &CStringChecker::evalMemcpy}, + {{CDF_MaybeBuiltin, "mempcpy", 3}, &CStringChecker::evalMempcpy}, + {{CDF_MaybeBuiltin, "memcmp", 3}, &CStringChecker::evalMemcmp}, + {{CDF_MaybeBuiltin, "memmove", 3}, &CStringChecker::evalMemmove}, + {{CDF_MaybeBuiltin, "memset", 3}, &CStringChecker::evalMemset}, + {{CDF_MaybeBuiltin, "explicit_memset", 3}, &CStringChecker::evalMemset}, + {{CDF_MaybeBuiltin, "strcpy", 2}, &CStringChecker::evalStrcpy}, + {{CDF_MaybeBuiltin, "strncpy", 3}, &CStringChecker::evalStrncpy}, + {{CDF_MaybeBuiltin, "stpcpy", 2}, &CStringChecker::evalStpcpy}, + {{CDF_MaybeBuiltin, "strlcpy", 3}, &CStringChecker::evalStrlcpy}, + {{CDF_MaybeBuiltin, "strcat", 2}, &CStringChecker::evalStrcat}, + {{CDF_MaybeBuiltin, "strncat", 3}, &CStringChecker::evalStrncat}, + {{CDF_MaybeBuiltin, "strlcat", 3}, &CStringChecker::evalStrlcat}, + {{CDF_MaybeBuiltin, "strlen", 1}, &CStringChecker::evalstrLength}, + {{CDF_MaybeBuiltin, "strnlen", 2}, &CStringChecker::evalstrnLength}, + {{CDF_MaybeBuiltin, "strcmp", 2}, &CStringChecker::evalStrcmp}, + {{CDF_MaybeBuiltin, "strncmp", 3}, &CStringChecker::evalStrncmp}, + {{CDF_MaybeBuiltin, "strcasecmp", 2}, &CStringChecker::evalStrcasecmp}, + {{CDF_MaybeBuiltin, "strncasecmp", 3}, &CStringChecker::evalStrncasecmp}, + {{CDF_MaybeBuiltin, "strsep", 2}, &CStringChecker::evalStrsep}, + {{CDF_MaybeBuiltin, "bcopy", 3}, &CStringChecker::evalBcopy}, + {{CDF_MaybeBuiltin, "bcmp", 3}, &CStringChecker::evalMemcmp}, + {{CDF_MaybeBuiltin, "bzero", 2}, &CStringChecker::evalBzero}, + {{CDF_MaybeBuiltin, "explicit_bzero", 2}, &CStringChecker::evalBzero}, + }; + + // These require a bit of special handling. + CallDescription StdCopy{{"std", "copy"}, 3}, + StdCopyBackward{{"std", "copy_backward"}, 3}; + + FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const; + void evalMemcpy(CheckerContext &C, const CallExpr *CE) const; + void evalMempcpy(CheckerContext &C, const CallExpr *CE) const; + void evalMemmove(CheckerContext &C, const CallExpr *CE) const; + void evalBcopy(CheckerContext &C, const CallExpr *CE) const; + void evalCopyCommon(CheckerContext &C, const CallExpr *CE, + ProgramStateRef state, + const Expr *Size, + const Expr *Source, + const Expr *Dest, + bool Restricted = false, + bool IsMempcpy = false) const; + + void evalMemcmp(CheckerContext &C, const CallExpr *CE) const; + + void evalstrLength(CheckerContext &C, const CallExpr *CE) const; + void evalstrnLength(CheckerContext &C, const CallExpr *CE) const; + void evalstrLengthCommon(CheckerContext &C, + const CallExpr *CE, + bool IsStrnlen = false) const; + + void evalStrcpy(CheckerContext &C, const CallExpr *CE) const; + void evalStrncpy(CheckerContext &C, const CallExpr *CE) const; + void evalStpcpy(CheckerContext &C, const CallExpr *CE) const; + void evalStrlcpy(CheckerContext &C, const CallExpr *CE) const; + void evalStrcpyCommon(CheckerContext &C, + const CallExpr *CE, + bool returnEnd, + bool isBounded, + bool isAppending, + bool returnPtr = true) const; + + void evalStrcat(CheckerContext &C, const CallExpr *CE) const; + void evalStrncat(CheckerContext &C, const CallExpr *CE) const; + void evalStrlcat(CheckerContext &C, const CallExpr *CE) const; + + void evalStrcmp(CheckerContext &C, const CallExpr *CE) const; + void evalStrncmp(CheckerContext &C, const CallExpr *CE) const; + void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const; + void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const; + void evalStrcmpCommon(CheckerContext &C, + const CallExpr *CE, + bool isBounded = false, + bool ignoreCase = false) const; + + void evalStrsep(CheckerContext &C, const CallExpr *CE) const; + + void evalStdCopy(CheckerContext &C, const CallExpr *CE) const; + void evalStdCopyBackward(CheckerContext &C, const CallExpr *CE) const; + void evalStdCopyCommon(CheckerContext &C, const CallExpr *CE) const; + void evalMemset(CheckerContext &C, const CallExpr *CE) const; + void evalBzero(CheckerContext &C, const CallExpr *CE) const; + + // Utility methods + std::pair<ProgramStateRef , ProgramStateRef > + static assumeZero(CheckerContext &C, + ProgramStateRef state, SVal V, QualType Ty); + + static ProgramStateRef setCStringLength(ProgramStateRef state, + const MemRegion *MR, + SVal strLength); + static SVal getCStringLengthForRegion(CheckerContext &C, + ProgramStateRef &state, + const Expr *Ex, + const MemRegion *MR, + bool hypothetical); + SVal getCStringLength(CheckerContext &C, + ProgramStateRef &state, + const Expr *Ex, + SVal Buf, + bool hypothetical = false) const; + + const StringLiteral *getCStringLiteral(CheckerContext &C, + ProgramStateRef &state, + const Expr *expr, + SVal val) const; + + static ProgramStateRef InvalidateBuffer(CheckerContext &C, + ProgramStateRef state, + const Expr *Ex, SVal V, + bool IsSourceBuffer, + const Expr *Size); + + static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx, + const MemRegion *MR); + + static bool memsetAux(const Expr *DstBuffer, SVal CharE, + const Expr *Size, CheckerContext &C, + ProgramStateRef &State); + + // Re-usable checks + ProgramStateRef checkNonNull(CheckerContext &C, + ProgramStateRef state, + const Expr *S, + SVal l, + unsigned IdxOfArg) const; + ProgramStateRef CheckLocation(CheckerContext &C, + ProgramStateRef state, + const Expr *S, + SVal l, + const char *message = nullptr) const; + ProgramStateRef CheckBufferAccess(CheckerContext &C, + ProgramStateRef state, + const Expr *Size, + const Expr *FirstBuf, + const Expr *SecondBuf, + const char *firstMessage = nullptr, + const char *secondMessage = nullptr, + bool WarnAboutSize = false) const; + + ProgramStateRef CheckBufferAccess(CheckerContext &C, + ProgramStateRef state, + const Expr *Size, + const Expr *Buf, + const char *message = nullptr, + bool WarnAboutSize = false) const { + // This is a convenience overload. + return CheckBufferAccess(C, state, Size, Buf, nullptr, message, nullptr, + WarnAboutSize); + } + ProgramStateRef CheckOverlap(CheckerContext &C, + ProgramStateRef state, + const Expr *Size, + const Expr *First, + const Expr *Second) const; + void emitOverlapBug(CheckerContext &C, + ProgramStateRef state, + const Stmt *First, + const Stmt *Second) const; + + void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S, + StringRef WarningMsg) const; + void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State, + const Stmt *S, StringRef WarningMsg) const; + void emitNotCStringBug(CheckerContext &C, ProgramStateRef State, + const Stmt *S, StringRef WarningMsg) const; + void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const; + + ProgramStateRef checkAdditionOverflow(CheckerContext &C, + ProgramStateRef state, + NonLoc left, + NonLoc right) const; + + // Return true if the destination buffer of the copy function may be in bound. + // Expects SVal of Size to be positive and unsigned. + // Expects SVal of FirstBuf to be a FieldRegion. + static bool IsFirstBufInBound(CheckerContext &C, + ProgramStateRef state, + const Expr *FirstBuf, + const Expr *Size); +}; + +} //end anonymous namespace + +REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal) + +//===----------------------------------------------------------------------===// +// Individual checks and utility methods. +//===----------------------------------------------------------------------===// + +std::pair<ProgramStateRef , ProgramStateRef > +CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V, + QualType Ty) { + Optional<DefinedSVal> val = V.getAs<DefinedSVal>(); + if (!val) + return std::pair<ProgramStateRef , ProgramStateRef >(state, state); + + SValBuilder &svalBuilder = C.getSValBuilder(); + DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); + return state->assume(svalBuilder.evalEQ(state, *val, zero)); +} + +ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C, + ProgramStateRef state, + const Expr *S, SVal l, + unsigned IdxOfArg) const { + // If a previous check has failed, propagate the failure. + if (!state) + return nullptr; + + ProgramStateRef stateNull, stateNonNull; + std::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType()); + + if (stateNull && !stateNonNull) { + if (Filter.CheckCStringNullArg) { + SmallString<80> buf; + llvm::raw_svector_ostream OS(buf); + assert(CurrentFunctionDescription); + OS << "Null pointer argument in call to " << CurrentFunctionDescription + << ' ' << IdxOfArg << llvm::getOrdinalSuffix(IdxOfArg) + << " parameter"; + + emitNullArgBug(C, stateNull, S, OS.str()); + } + return nullptr; + } + + // From here on, assume that the value is non-null. + assert(stateNonNull); + return stateNonNull; +} + +// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? +ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C, + ProgramStateRef state, + const Expr *S, SVal l, + const char *warningMsg) const { + // If a previous check has failed, propagate the failure. + if (!state) + return nullptr; + + // Check for out of bound array element access. + const MemRegion *R = l.getAsRegion(); + if (!R) + return state; + + const ElementRegion *ER = dyn_cast<ElementRegion>(R); + if (!ER) + return state; + + if (ER->getValueType() != C.getASTContext().CharTy) + return state; + + // Get the size of the array. + const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); + SValBuilder &svalBuilder = C.getSValBuilder(); + SVal Extent = + svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); + DefinedOrUnknownSVal Size = Extent.castAs<DefinedOrUnknownSVal>(); + + // Get the index of the accessed element. + DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>(); + + ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true); + ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false); + if (StOutBound && !StInBound) { + // These checks are either enabled by the CString out-of-bounds checker + // explicitly or implicitly by the Malloc checker. + // In the latter case we only do modeling but do not emit warning. + if (!Filter.CheckCStringOutOfBounds) + return nullptr; + // Emit a bug report. + if (warningMsg) { + emitOutOfBoundsBug(C, StOutBound, S, warningMsg); + } else { + assert(CurrentFunctionDescription); + assert(CurrentFunctionDescription[0] != '\0'); + + SmallString<80> buf; + llvm::raw_svector_ostream os(buf); + os << toUppercase(CurrentFunctionDescription[0]) + << &CurrentFunctionDescription[1] + << " accesses out-of-bound array element"; + emitOutOfBoundsBug(C, StOutBound, S, os.str()); + } + return nullptr; + } + + // Array bound check succeeded. From this point forward the array bound + // should always succeed. + return StInBound; +} + +ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C, + ProgramStateRef state, + const Expr *Size, + const Expr *FirstBuf, + const Expr *SecondBuf, + const char *firstMessage, + const char *secondMessage, + bool WarnAboutSize) const { + // If a previous check has failed, propagate the failure. + if (!state) + return nullptr; + + SValBuilder &svalBuilder = C.getSValBuilder(); + ASTContext &Ctx = svalBuilder.getContext(); + const LocationContext *LCtx = C.getLocationContext(); + + QualType sizeTy = Size->getType(); + QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); + + // Check that the first buffer is non-null. + SVal BufVal = C.getSVal(FirstBuf); + state = checkNonNull(C, state, FirstBuf, BufVal, 1); + if (!state) + return nullptr; + + // If out-of-bounds checking is turned off, skip the rest. + if (!Filter.CheckCStringOutOfBounds) + return state; + + // Get the access length and make sure it is known. + // FIXME: This assumes the caller has already checked that the access length + // is positive. And that it's unsigned. + SVal LengthVal = C.getSVal(Size); + Optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); + if (!Length) + return state; + + // Compute the offset of the last element to be accessed: size-1. + NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>(); + SVal Offset = svalBuilder.evalBinOpNN(state, BO_Sub, *Length, One, sizeTy); + if (Offset.isUnknown()) + return nullptr; + NonLoc LastOffset = Offset.castAs<NonLoc>(); + + // Check that the first buffer is sufficiently long. + SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); + if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) { + const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf); + + SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, + LastOffset, PtrTy); + state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage); + + // If the buffer isn't large enough, abort. + if (!state) + return nullptr; + } + + // If there's a second buffer, check it as well. + if (SecondBuf) { + BufVal = state->getSVal(SecondBuf, LCtx); + state = checkNonNull(C, state, SecondBuf, BufVal, 2); + if (!state) + return nullptr; + + BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType()); + if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) { + const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf); + + SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, + LastOffset, PtrTy); + state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage); + } + } + + // Large enough or not, return this state! + return state; +} + +ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C, + ProgramStateRef state, + const Expr *Size, + const Expr *First, + const Expr *Second) const { + if (!Filter.CheckCStringBufferOverlap) + return state; + + // Do a simple check for overlap: if the two arguments are from the same + // buffer, see if the end of the first is greater than the start of the second + // or vice versa. + + // If a previous check has failed, propagate the failure. + if (!state) + return nullptr; + + ProgramStateRef stateTrue, stateFalse; + + // Get the buffer values and make sure they're known locations. + const LocationContext *LCtx = C.getLocationContext(); + SVal firstVal = state->getSVal(First, LCtx); + SVal secondVal = state->getSVal(Second, LCtx); + + Optional<Loc> firstLoc = firstVal.getAs<Loc>(); + if (!firstLoc) + return state; + + Optional<Loc> secondLoc = secondVal.getAs<Loc>(); + if (!secondLoc) + return state; + + // Are the two values the same? + SValBuilder &svalBuilder = C.getSValBuilder(); + std::tie(stateTrue, stateFalse) = + state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); + + if (stateTrue && !stateFalse) { + // If the values are known to be equal, that's automatically an overlap. + emitOverlapBug(C, stateTrue, First, Second); + return nullptr; + } + + // assume the two expressions are not equal. + assert(stateFalse); + state = stateFalse; + + // Which value comes first? + QualType cmpTy = svalBuilder.getConditionType(); + SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT, + *firstLoc, *secondLoc, cmpTy); + Optional<DefinedOrUnknownSVal> reverseTest = + reverse.getAs<DefinedOrUnknownSVal>(); + if (!reverseTest) + return state; + + std::tie(stateTrue, stateFalse) = state->assume(*reverseTest); + if (stateTrue) { + if (stateFalse) { + // If we don't know which one comes first, we can't perform this test. + return state; + } else { + // Switch the values so that firstVal is before secondVal. + std::swap(firstLoc, secondLoc); + + // Switch the Exprs as well, so that they still correspond. + std::swap(First, Second); + } + } + + // Get the length, and make sure it too is known. + SVal LengthVal = state->getSVal(Size, LCtx); + Optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); + if (!Length) + return state; + + // Convert the first buffer's start address to char*. + // Bail out if the cast fails. + ASTContext &Ctx = svalBuilder.getContext(); + QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); + SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, + First->getType()); + Optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>(); + if (!FirstStartLoc) + return state; + + // Compute the end of the first buffer. Bail out if THAT fails. + SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, + *FirstStartLoc, *Length, CharPtrTy); + Optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>(); + if (!FirstEndLoc) + return state; + + // Is the end of the first buffer past the start of the second buffer? + SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT, + *FirstEndLoc, *secondLoc, cmpTy); + Optional<DefinedOrUnknownSVal> OverlapTest = + Overlap.getAs<DefinedOrUnknownSVal>(); + if (!OverlapTest) + return state; + + std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); + + if (stateTrue && !stateFalse) { + // Overlap! + emitOverlapBug(C, stateTrue, First, Second); + return nullptr; + } + + // assume the two expressions don't overlap. + assert(stateFalse); + return stateFalse; +} + +void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state, + const Stmt *First, const Stmt *Second) const { + ExplodedNode *N = C.generateErrorNode(state); + if (!N) + return; + + if (!BT_Overlap) + BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap, + categories::UnixAPI, "Improper arguments")); + + // Generate a report for this bug. + auto report = std::make_unique<PathSensitiveBugReport>( + *BT_Overlap, "Arguments must not be overlapping buffers", N); + report->addRange(First->getSourceRange()); + report->addRange(Second->getSourceRange()); + + C.emitReport(std::move(report)); +} + +void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State, + const Stmt *S, StringRef WarningMsg) const { + if (ExplodedNode *N = C.generateErrorNode(State)) { + if (!BT_Null) + BT_Null.reset(new BuiltinBug( + Filter.CheckNameCStringNullArg, categories::UnixAPI, + "Null pointer argument in call to byte string function")); + + BuiltinBug *BT = static_cast<BuiltinBug *>(BT_Null.get()); + auto Report = std::make_unique<PathSensitiveBugReport>(*BT, WarningMsg, N); + Report->addRange(S->getSourceRange()); + if (const auto *Ex = dyn_cast<Expr>(S)) + bugreporter::trackExpressionValue(N, Ex, *Report); + C.emitReport(std::move(Report)); + } +} + +void CStringChecker::emitOutOfBoundsBug(CheckerContext &C, + ProgramStateRef State, const Stmt *S, + StringRef WarningMsg) const { + if (ExplodedNode *N = C.generateErrorNode(State)) { + if (!BT_Bounds) + BT_Bounds.reset(new BuiltinBug( + Filter.CheckCStringOutOfBounds ? Filter.CheckNameCStringOutOfBounds + : Filter.CheckNameCStringNullArg, + "Out-of-bound array access", + "Byte string function accesses out-of-bound array element")); + + BuiltinBug *BT = static_cast<BuiltinBug *>(BT_Bounds.get()); + + // FIXME: It would be nice to eventually make this diagnostic more clear, + // e.g., by referencing the original declaration or by saying *why* this + // reference is outside the range. + auto Report = std::make_unique<PathSensitiveBugReport>(*BT, WarningMsg, N); + Report->addRange(S->getSourceRange()); + C.emitReport(std::move(Report)); + } +} + +void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State, + const Stmt *S, + StringRef WarningMsg) const { + if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) { + if (!BT_NotCString) + BT_NotCString.reset(new BuiltinBug( + Filter.CheckNameCStringNotNullTerm, categories::UnixAPI, + "Argument is not a null-terminated string.")); + + auto Report = + std::make_unique<PathSensitiveBugReport>(*BT_NotCString, WarningMsg, N); + + Report->addRange(S->getSourceRange()); + C.emitReport(std::move(Report)); + } +} + +void CStringChecker::emitAdditionOverflowBug(CheckerContext &C, + ProgramStateRef State) const { + if (ExplodedNode *N = C.generateErrorNode(State)) { + if (!BT_NotCString) + BT_NotCString.reset( + new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API", + "Sum of expressions causes overflow.")); + + // This isn't a great error message, but this should never occur in real + // code anyway -- you'd have to create a buffer longer than a size_t can + // represent, which is sort of a contradiction. + const char *WarningMsg = + "This expression will create a string whose length is too big to " + "be represented as a size_t"; + + auto Report = + std::make_unique<PathSensitiveBugReport>(*BT_NotCString, WarningMsg, N); + C.emitReport(std::move(Report)); + } +} + +ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C, + ProgramStateRef state, + NonLoc left, + NonLoc right) const { + // If out-of-bounds checking is turned off, skip the rest. + if (!Filter.CheckCStringOutOfBounds) + return state; + + // If a previous check has failed, propagate the failure. + if (!state) + return nullptr; + + SValBuilder &svalBuilder = C.getSValBuilder(); + BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); + + QualType sizeTy = svalBuilder.getContext().getSizeType(); + const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); + NonLoc maxVal = svalBuilder.makeIntVal(maxValInt); + + SVal maxMinusRight; + if (right.getAs<nonloc::ConcreteInt>()) { + maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right, + sizeTy); + } else { + // Try switching the operands. (The order of these two assignments is + // important!) + maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left, + sizeTy); + left = right; + } + + if (Optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) { + QualType cmpTy = svalBuilder.getConditionType(); + // If left > max - right, we have an overflow. + SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left, + *maxMinusRightNL, cmpTy); + + ProgramStateRef stateOverflow, stateOkay; + std::tie(stateOverflow, stateOkay) = + state->assume(willOverflow.castAs<DefinedOrUnknownSVal>()); + + if (stateOverflow && !stateOkay) { + // We have an overflow. Emit a bug report. + emitAdditionOverflowBug(C, stateOverflow); + return nullptr; + } + + // From now on, assume an overflow didn't occur. + assert(stateOkay); + state = stateOkay; + } + + return state; +} + +ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state, + const MemRegion *MR, + SVal strLength) { + assert(!strLength.isUndef() && "Attempt to set an undefined string length"); + + MR = MR->StripCasts(); + + switch (MR->getKind()) { + case MemRegion::StringRegionKind: + // FIXME: This can happen if we strcpy() into a string region. This is + // undefined [C99 6.4.5p6], but we should still warn about it. + return state; + + case MemRegion::SymbolicRegionKind: + case MemRegion::AllocaRegionKind: + case MemRegion::VarRegionKind: + case MemRegion::FieldRegionKind: + case MemRegion::ObjCIvarRegionKind: + // These are the types we can currently track string lengths for. + break; + + case MemRegion::ElementRegionKind: + // FIXME: Handle element regions by upper-bounding the parent region's + // string length. + return state; + + default: + // Other regions (mostly non-data) can't have a reliable C string length. + // For now, just ignore the change. + // FIXME: These are rare but not impossible. We should output some kind of + // warning for things like strcpy((char[]){'a', 0}, "b"); + return state; + } + + if (strLength.isUnknown()) + return state->remove<CStringLength>(MR); + + return state->set<CStringLength>(MR, strLength); +} + +SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, + ProgramStateRef &state, + const Expr *Ex, + const MemRegion *MR, + bool hypothetical) { + if (!hypothetical) { + // If there's a recorded length, go ahead and return it. + const SVal *Recorded = state->get<CStringLength>(MR); + if (Recorded) + return *Recorded; + } + + // Otherwise, get a new symbol and update the state. + SValBuilder &svalBuilder = C.getSValBuilder(); + QualType sizeTy = svalBuilder.getContext().getSizeType(); + SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(), + MR, Ex, sizeTy, + C.getLocationContext(), + C.blockCount()); + + if (!hypothetical) { + if (Optional<NonLoc> strLn = strLength.getAs<NonLoc>()) { + // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4 + BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); + const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); + llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4); + const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt, + fourInt); + NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt); + SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, + maxLength, sizeTy); + state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true); + } + state = state->set<CStringLength>(MR, strLength); + } + + return strLength; +} + +SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state, + const Expr *Ex, SVal Buf, + bool hypothetical) const { + const MemRegion *MR = Buf.getAsRegion(); + if (!MR) { + // If we can't get a region, see if it's something we /know/ isn't a + // C string. In the context of locations, the only time we can issue such + // a warning is for labels. + if (Optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) { + if (Filter.CheckCStringNotNullTerm) { + SmallString<120> buf; + llvm::raw_svector_ostream os(buf); + assert(CurrentFunctionDescription); + os << "Argument to " << CurrentFunctionDescription + << " is the address of the label '" << Label->getLabel()->getName() + << "', which is not a null-terminated string"; + + emitNotCStringBug(C, state, Ex, os.str()); + } + return UndefinedVal(); + } + + // If it's not a region and not a label, give up. + return UnknownVal(); + } + + // If we have a region, strip casts from it and see if we can figure out + // its length. For anything we can't figure out, just return UnknownVal. + MR = MR->StripCasts(); + + switch (MR->getKind()) { + case MemRegion::StringRegionKind: { + // Modifying the contents of string regions is undefined [C99 6.4.5p6], + // so we can assume that the byte length is the correct C string length. + SValBuilder &svalBuilder = C.getSValBuilder(); + QualType sizeTy = svalBuilder.getContext().getSizeType(); + const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); + return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy); + } + case MemRegion::SymbolicRegionKind: + case MemRegion::AllocaRegionKind: + case MemRegion::VarRegionKind: + case MemRegion::FieldRegionKind: + case MemRegion::ObjCIvarRegionKind: + return getCStringLengthForRegion(C, state, Ex, MR, hypothetical); + case MemRegion::CompoundLiteralRegionKind: + // FIXME: Can we track this? Is it necessary? + return UnknownVal(); + case MemRegion::ElementRegionKind: + // FIXME: How can we handle this? It's not good enough to subtract the + // offset from the base string length; consider "123\x00567" and &a[5]. + return UnknownVal(); + default: + // Other regions (mostly non-data) can't have a reliable C string length. + // In this case, an error is emitted and UndefinedVal is returned. + // The caller should always be prepared to handle this case. + if (Filter.CheckCStringNotNullTerm) { + SmallString<120> buf; + llvm::raw_svector_ostream os(buf); + + assert(CurrentFunctionDescription); + os << "Argument to " << CurrentFunctionDescription << " is "; + + if (SummarizeRegion(os, C.getASTContext(), MR)) + os << ", which is not a null-terminated string"; + else + os << "not a null-terminated string"; + + emitNotCStringBug(C, state, Ex, os.str()); + } + return UndefinedVal(); + } +} + +const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C, + ProgramStateRef &state, const Expr *expr, SVal val) const { + + // Get the memory region pointed to by the val. + const MemRegion *bufRegion = val.getAsRegion(); + if (!bufRegion) + return nullptr; + + // Strip casts off the memory region. + bufRegion = bufRegion->StripCasts(); + + // Cast the memory region to a string region. + const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion); + if (!strRegion) + return nullptr; + + // Return the actual string in the string region. + return strRegion->getStringLiteral(); +} + +bool CStringChecker::IsFirstBufInBound(CheckerContext &C, + ProgramStateRef state, + const Expr *FirstBuf, + const Expr *Size) { + // If we do not know that the buffer is long enough we return 'true'. + // Otherwise the parent region of this field region would also get + // invalidated, which would lead to warnings based on an unknown state. + + // Originally copied from CheckBufferAccess and CheckLocation. + SValBuilder &svalBuilder = C.getSValBuilder(); + ASTContext &Ctx = svalBuilder.getContext(); + const LocationContext *LCtx = C.getLocationContext(); + + QualType sizeTy = Size->getType(); + QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); + SVal BufVal = state->getSVal(FirstBuf, LCtx); + + SVal LengthVal = state->getSVal(Size, LCtx); + Optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); + if (!Length) + return true; // cf top comment. + + // Compute the offset of the last element to be accessed: size-1. + NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>(); + SVal Offset = svalBuilder.evalBinOpNN(state, BO_Sub, *Length, One, sizeTy); + if (Offset.isUnknown()) + return true; // cf top comment + NonLoc LastOffset = Offset.castAs<NonLoc>(); + + // Check that the first buffer is sufficiently long. + SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); + Optional<Loc> BufLoc = BufStart.getAs<Loc>(); + if (!BufLoc) + return true; // cf top comment. + + SVal BufEnd = + svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, LastOffset, PtrTy); + + // Check for out of bound array element access. + const MemRegion *R = BufEnd.getAsRegion(); + if (!R) + return true; // cf top comment. + + const ElementRegion *ER = dyn_cast<ElementRegion>(R); + if (!ER) + return true; // cf top comment. + + // FIXME: Does this crash when a non-standard definition + // of a library function is encountered? + assert(ER->getValueType() == C.getASTContext().CharTy && + "IsFirstBufInBound should only be called with char* ElementRegions"); + + // Get the size of the array. + const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); + SVal Extent = + svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); + DefinedOrUnknownSVal ExtentSize = Extent.castAs<DefinedOrUnknownSVal>(); + + // Get the index of the accessed element. + DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>(); + + ProgramStateRef StInBound = state->assumeInBound(Idx, ExtentSize, true); + + return static_cast<bool>(StInBound); +} + +ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C, + ProgramStateRef state, + const Expr *E, SVal V, + bool IsSourceBuffer, + const Expr *Size) { + Optional<Loc> L = V.getAs<Loc>(); + if (!L) + return state; + + // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes + // some assumptions about the value that CFRefCount can't. Even so, it should + // probably be refactored. + if (Optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) { + const MemRegion *R = MR->getRegion()->StripCasts(); + + // Are we dealing with an ElementRegion? If so, we should be invalidating + // the super-region. + if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { + R = ER->getSuperRegion(); + // FIXME: What about layers of ElementRegions? + } + + // Invalidate this region. + const LocationContext *LCtx = C.getPredecessor()->getLocationContext(); + + bool CausesPointerEscape = false; + RegionAndSymbolInvalidationTraits ITraits; + // Invalidate and escape only indirect regions accessible through the source + // buffer. + if (IsSourceBuffer) { + ITraits.setTrait(R->getBaseRegion(), + RegionAndSymbolInvalidationTraits::TK_PreserveContents); + ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape); + CausesPointerEscape = true; + } else { + const MemRegion::Kind& K = R->getKind(); + if (K == MemRegion::FieldRegionKind) + if (Size && IsFirstBufInBound(C, state, E, Size)) { + // If destination buffer is a field region and access is in bound, + // do not invalidate its super region. + ITraits.setTrait( + R, + RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); + } + } + + return state->invalidateRegions(R, E, C.blockCount(), LCtx, + CausesPointerEscape, nullptr, nullptr, + &ITraits); + } + + // If we have a non-region value by chance, just remove the binding. + // FIXME: is this necessary or correct? This handles the non-Region + // cases. Is it ever valid to store to these? + return state->killBinding(*L); +} + +bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx, + const MemRegion *MR) { + const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR); + + switch (MR->getKind()) { + case MemRegion::FunctionCodeRegionKind: { + const NamedDecl *FD = cast<FunctionCodeRegion>(MR)->getDecl(); + if (FD) + os << "the address of the function '" << *FD << '\''; + else + os << "the address of a function"; + return true; + } + case MemRegion::BlockCodeRegionKind: + os << "block text"; + return true; + case MemRegion::BlockDataRegionKind: + os << "a block"; + return true; + case MemRegion::CXXThisRegionKind: + case MemRegion::CXXTempObjectRegionKind: + os << "a C++ temp object of type " << TVR->getValueType().getAsString(); + return true; + case MemRegion::VarRegionKind: + os << "a variable of type" << TVR->getValueType().getAsString(); + return true; + case MemRegion::FieldRegionKind: + os << "a field of type " << TVR->getValueType().getAsString(); + return true; + case MemRegion::ObjCIvarRegionKind: + os << "an instance variable of type " << TVR->getValueType().getAsString(); + return true; + default: + return false; + } +} + +bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal, + const Expr *Size, CheckerContext &C, + ProgramStateRef &State) { + SVal MemVal = C.getSVal(DstBuffer); + SVal SizeVal = C.getSVal(Size); + const MemRegion *MR = MemVal.getAsRegion(); + if (!MR) + return false; + + // We're about to model memset by producing a "default binding" in the Store. + // Our current implementation - RegionStore - doesn't support default bindings + // that don't cover the whole base region. So we should first get the offset + // and the base region to figure out whether the offset of buffer is 0. + RegionOffset Offset = MR->getAsOffset(); + const MemRegion *BR = Offset.getRegion(); + + Optional<NonLoc> SizeNL = SizeVal.getAs<NonLoc>(); + if (!SizeNL) + return false; + + SValBuilder &svalBuilder = C.getSValBuilder(); + ASTContext &Ctx = C.getASTContext(); + + // void *memset(void *dest, int ch, size_t count); + // For now we can only handle the case of offset is 0 and concrete char value. + if (Offset.isValid() && !Offset.hasSymbolicOffset() && + Offset.getOffset() == 0) { + // Get the base region's extent. + auto *SubReg = cast<SubRegion>(BR); + DefinedOrUnknownSVal Extent = SubReg->getExtent(svalBuilder); + + ProgramStateRef StateWholeReg, StateNotWholeReg; + std::tie(StateWholeReg, StateNotWholeReg) = + State->assume(svalBuilder.evalEQ(State, Extent, *SizeNL)); + + // With the semantic of 'memset()', we should convert the CharVal to + // unsigned char. + CharVal = svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy); + + ProgramStateRef StateNullChar, StateNonNullChar; + std::tie(StateNullChar, StateNonNullChar) = + assumeZero(C, State, CharVal, Ctx.UnsignedCharTy); + + if (StateWholeReg && !StateNotWholeReg && StateNullChar && + !StateNonNullChar) { + // If the 'memset()' acts on the whole region of destination buffer and + // the value of the second argument of 'memset()' is zero, bind the second + // argument's value to the destination buffer with 'default binding'. + // FIXME: Since there is no perfect way to bind the non-zero character, we + // can only deal with zero value here. In the future, we need to deal with + // the binding of non-zero value in the case of whole region. + State = State->bindDefaultZero(svalBuilder.makeLoc(BR), + C.getLocationContext()); + } else { + // If the destination buffer's extent is not equal to the value of + // third argument, just invalidate buffer. + State = InvalidateBuffer(C, State, DstBuffer, MemVal, + /*IsSourceBuffer*/ false, Size); + } + + if (StateNullChar && !StateNonNullChar) { + // If the value of the second argument of 'memset()' is zero, set the + // string length of destination buffer to 0 directly. + State = setCStringLength(State, MR, + svalBuilder.makeZeroVal(Ctx.getSizeType())); + } else if (!StateNullChar && StateNonNullChar) { + SVal NewStrLen = svalBuilder.getMetadataSymbolVal( + CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(), + C.getLocationContext(), C.blockCount()); + + // If the value of second argument is not zero, then the string length + // is at least the size argument. + SVal NewStrLenGESize = svalBuilder.evalBinOp( + State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType()); + + State = setCStringLength( + State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true), + MR, NewStrLen); + } + } else { + // If the offset is not zero and char value is not concrete, we can do + // nothing but invalidate the buffer. + State = InvalidateBuffer(C, State, DstBuffer, MemVal, + /*IsSourceBuffer*/ false, Size); + } + return true; +} + +//===----------------------------------------------------------------------===// +// evaluation of individual function calls. +//===----------------------------------------------------------------------===// + +void CStringChecker::evalCopyCommon(CheckerContext &C, + const CallExpr *CE, + ProgramStateRef state, + const Expr *Size, const Expr *Dest, + const Expr *Source, bool Restricted, + bool IsMempcpy) const { + CurrentFunctionDescription = "memory copy function"; + + // See if the size argument is zero. + const LocationContext *LCtx = C.getLocationContext(); + SVal sizeVal = state->getSVal(Size, LCtx); + QualType sizeTy = Size->getType(); + + ProgramStateRef stateZeroSize, stateNonZeroSize; + std::tie(stateZeroSize, stateNonZeroSize) = + assumeZero(C, state, sizeVal, sizeTy); + + // Get the value of the Dest. + SVal destVal = state->getSVal(Dest, LCtx); + + // If the size is zero, there won't be any actual memory access, so + // just bind the return value to the destination buffer and return. + if (stateZeroSize && !stateNonZeroSize) { + stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal); + C.addTransition(stateZeroSize); + return; + } + + // If the size can be nonzero, we have to check the other arguments. + if (stateNonZeroSize) { + state = stateNonZeroSize; + + // Ensure the destination is not null. If it is NULL there will be a + // NULL pointer dereference. + state = checkNonNull(C, state, Dest, destVal, 1); + if (!state) + return; + + // Get the value of the Src. + SVal srcVal = state->getSVal(Source, LCtx); + + // Ensure the source is not null. If it is NULL there will be a + // NULL pointer dereference. + state = checkNonNull(C, state, Source, srcVal, 2); + if (!state) + return; + + // Ensure the accesses are valid and that the buffers do not overlap. + const char * const writeWarning = + "Memory copy function overflows destination buffer"; + state = CheckBufferAccess(C, state, Size, Dest, Source, + writeWarning, /* sourceWarning = */ nullptr); + if (Restricted) + state = CheckOverlap(C, state, Size, Dest, Source); + + if (!state) + return; + + // If this is mempcpy, get the byte after the last byte copied and + // bind the expr. + if (IsMempcpy) { + // Get the byte after the last byte copied. + SValBuilder &SvalBuilder = C.getSValBuilder(); + ASTContext &Ctx = SvalBuilder.getContext(); + QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); + SVal DestRegCharVal = + SvalBuilder.evalCast(destVal, CharPtrTy, Dest->getType()); + SVal lastElement = C.getSValBuilder().evalBinOp( + state, BO_Add, DestRegCharVal, sizeVal, Dest->getType()); + // If we don't know how much we copied, we can at least + // conjure a return value for later. + if (lastElement.isUnknown()) + lastElement = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, + C.blockCount()); + + // The byte after the last byte copied is the return value. + state = state->BindExpr(CE, LCtx, lastElement); + } else { + // All other copies return the destination buffer. + // (Well, bcopy() has a void return type, but this won't hurt.) + state = state->BindExpr(CE, LCtx, destVal); + } + + // Invalidate the destination (regular invalidation without pointer-escaping + // the address of the top-level region). + // FIXME: Even if we can't perfectly model the copy, we should see if we + // can use LazyCompoundVals to copy the source values into the destination. + // This would probably remove any existing bindings past the end of the + // copied region, but that's still an improvement over blank invalidation. + state = InvalidateBuffer(C, state, Dest, C.getSVal(Dest), + /*IsSourceBuffer*/false, Size); + + // Invalidate the source (const-invalidation without const-pointer-escaping + // the address of the top-level region). + state = InvalidateBuffer(C, state, Source, C.getSVal(Source), + /*IsSourceBuffer*/true, nullptr); + + C.addTransition(state); + } +} + + +void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const { + // void *memcpy(void *restrict dst, const void *restrict src, size_t n); + // The return value is the address of the destination buffer. + const Expr *Dest = CE->getArg(0); + ProgramStateRef state = C.getState(); + + evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true); +} + +void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const { + // void *mempcpy(void *restrict dst, const void *restrict src, size_t n); + // The return value is a pointer to the byte following the last written byte. + const Expr *Dest = CE->getArg(0); + ProgramStateRef state = C.getState(); + + evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true); +} + +void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const { + // void *memmove(void *dst, const void *src, size_t n); + // The return value is the address of the destination buffer. + const Expr *Dest = CE->getArg(0); + ProgramStateRef state = C.getState(); + + evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1)); +} + +void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const { + // void bcopy(const void *src, void *dst, size_t n); + evalCopyCommon(C, CE, C.getState(), + CE->getArg(2), CE->getArg(1), CE->getArg(0)); +} + +void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const { + // int memcmp(const void *s1, const void *s2, size_t n); + CurrentFunctionDescription = "memory comparison function"; + + const Expr *Left = CE->getArg(0); + const Expr *Right = CE->getArg(1); + const Expr *Size = CE->getArg(2); + + ProgramStateRef state = C.getState(); + SValBuilder &svalBuilder = C.getSValBuilder(); + + // See if the size argument is zero. + const LocationContext *LCtx = C.getLocationContext(); + SVal sizeVal = state->getSVal(Size, LCtx); + QualType sizeTy = Size->getType(); + + ProgramStateRef stateZeroSize, stateNonZeroSize; + std::tie(stateZeroSize, stateNonZeroSize) = + assumeZero(C, state, sizeVal, sizeTy); + + // If the size can be zero, the result will be 0 in that case, and we don't + // have to check either of the buffers. + if (stateZeroSize) { + state = stateZeroSize; + state = state->BindExpr(CE, LCtx, + svalBuilder.makeZeroVal(CE->getType())); + C.addTransition(state); + } + + // If the size can be nonzero, we have to check the other arguments. + if (stateNonZeroSize) { + state = stateNonZeroSize; + // If we know the two buffers are the same, we know the result is 0. + // First, get the two buffers' addresses. Another checker will have already + // made sure they're not undefined. + DefinedOrUnknownSVal LV = + state->getSVal(Left, LCtx).castAs<DefinedOrUnknownSVal>(); + DefinedOrUnknownSVal RV = + state->getSVal(Right, LCtx).castAs<DefinedOrUnknownSVal>(); + + // See if they are the same. + DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); + ProgramStateRef StSameBuf, StNotSameBuf; + std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); + + // If the two arguments might be the same buffer, we know the result is 0, + // and we only need to check one size. + if (StSameBuf) { + state = StSameBuf; + state = CheckBufferAccess(C, state, Size, Left); + if (state) { + state = StSameBuf->BindExpr(CE, LCtx, + svalBuilder.makeZeroVal(CE->getType())); + C.addTransition(state); + } + } + + // If the two arguments might be different buffers, we have to check the + // size of both of them. + if (StNotSameBuf) { + state = StNotSameBuf; + state = CheckBufferAccess(C, state, Size, Left, Right); + if (state) { + // The return value is the comparison result, which we don't know. + SVal CmpV = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, + C.blockCount()); + state = state->BindExpr(CE, LCtx, CmpV); + C.addTransition(state); + } + } + } +} + +void CStringChecker::evalstrLength(CheckerContext &C, + const CallExpr *CE) const { + // size_t strlen(const char *s); + evalstrLengthCommon(C, CE, /* IsStrnlen = */ false); +} + +void CStringChecker::evalstrnLength(CheckerContext &C, + const CallExpr *CE) const { + // size_t strnlen(const char *s, size_t maxlen); + evalstrLengthCommon(C, CE, /* IsStrnlen = */ true); +} + +void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, + bool IsStrnlen) const { + CurrentFunctionDescription = "string length function"; + ProgramStateRef state = C.getState(); + const LocationContext *LCtx = C.getLocationContext(); + + if (IsStrnlen) { + const Expr *maxlenExpr = CE->getArg(1); + SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); + + ProgramStateRef stateZeroSize, stateNonZeroSize; + std::tie(stateZeroSize, stateNonZeroSize) = + assumeZero(C, state, maxlenVal, maxlenExpr->getType()); + + // If the size can be zero, the result will be 0 in that case, and we don't + // have to check the string itself. + if (stateZeroSize) { + SVal zero = C.getSValBuilder().makeZeroVal(CE->getType()); + stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero); + C.addTransition(stateZeroSize); + } + + // If the size is GUARANTEED to be zero, we're done! + if (!stateNonZeroSize) + return; + + // Otherwise, record the assumption that the size is nonzero. + state = stateNonZeroSize; + } + + // Check that the string argument is non-null. + const Expr *Arg = CE->getArg(0); + SVal ArgVal = state->getSVal(Arg, LCtx); + + state = checkNonNull(C, state, Arg, ArgVal, 1); + + if (!state) + return; + + SVal strLength = getCStringLength(C, state, Arg, ArgVal); + + // If the argument isn't a valid C string, there's no valid state to + // transition to. + if (strLength.isUndef()) + return; + + DefinedOrUnknownSVal result = UnknownVal(); + + // If the check is for strnlen() then bind the return value to no more than + // the maxlen value. + if (IsStrnlen) { + QualType cmpTy = C.getSValBuilder().getConditionType(); + + // It's a little unfortunate to be getting this again, + // but it's not that expensive... + const Expr *maxlenExpr = CE->getArg(1); + SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); + + Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>(); + Optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>(); + + if (strLengthNL && maxlenValNL) { + ProgramStateRef stateStringTooLong, stateStringNotTooLong; + + // Check if the strLength is greater than the maxlen. + std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume( + C.getSValBuilder() + .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy) + .castAs<DefinedOrUnknownSVal>()); + + if (stateStringTooLong && !stateStringNotTooLong) { + // If the string is longer than maxlen, return maxlen. + result = *maxlenValNL; + } else if (stateStringNotTooLong && !stateStringTooLong) { + // If the string is shorter than maxlen, return its length. + result = *strLengthNL; + } + } + + if (result.isUnknown()) { + // If we don't have enough information for a comparison, there's + // no guarantee the full string length will actually be returned. + // All we know is the return value is the min of the string length + // and the limit. This is better than nothing. + result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, + C.blockCount()); + NonLoc resultNL = result.castAs<NonLoc>(); + + if (strLengthNL) { + state = state->assume(C.getSValBuilder().evalBinOpNN( + state, BO_LE, resultNL, *strLengthNL, cmpTy) + .castAs<DefinedOrUnknownSVal>(), true); + } + + if (maxlenValNL) { + state = state->assume(C.getSValBuilder().evalBinOpNN( + state, BO_LE, resultNL, *maxlenValNL, cmpTy) + .castAs<DefinedOrUnknownSVal>(), true); + } + } + + } else { + // This is a plain strlen(), not strnlen(). + result = strLength.castAs<DefinedOrUnknownSVal>(); + + // If we don't know the length of the string, conjure a return + // value, so it can be used in constraints, at least. + if (result.isUnknown()) { + result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, + C.blockCount()); + } + } + + // Bind the return value. + assert(!result.isUnknown() && "Should have conjured a value by now"); + state = state->BindExpr(CE, LCtx, result); + C.addTransition(state); +} + +void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const { + // char *strcpy(char *restrict dst, const char *restrict src); + evalStrcpyCommon(C, CE, + /* returnEnd = */ false, + /* isBounded = */ false, + /* isAppending = */ false); +} + +void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const { + // char *strncpy(char *restrict dst, const char *restrict src, size_t n); + evalStrcpyCommon(C, CE, + /* returnEnd = */ false, + /* isBounded = */ true, + /* isAppending = */ false); +} + +void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const { + // char *stpcpy(char *restrict dst, const char *restrict src); + evalStrcpyCommon(C, CE, + /* returnEnd = */ true, + /* isBounded = */ false, + /* isAppending = */ false); +} + +void CStringChecker::evalStrlcpy(CheckerContext &C, const CallExpr *CE) const { + // char *strlcpy(char *dst, const char *src, size_t n); + evalStrcpyCommon(C, CE, + /* returnEnd = */ true, + /* isBounded = */ true, + /* isAppending = */ false, + /* returnPtr = */ false); +} + +void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const { + //char *strcat(char *restrict s1, const char *restrict s2); + evalStrcpyCommon(C, CE, + /* returnEnd = */ false, + /* isBounded = */ false, + /* isAppending = */ true); +} + +void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const { + //char *strncat(char *restrict s1, const char *restrict s2, size_t n); + evalStrcpyCommon(C, CE, + /* returnEnd = */ false, + /* isBounded = */ true, + /* isAppending = */ true); +} + +void CStringChecker::evalStrlcat(CheckerContext &C, const CallExpr *CE) const { + // FIXME: strlcat() uses a different rule for bound checking, i.e. 'n' means + // a different thing as compared to strncat(). This currently causes + // false positives in the alpha string bound checker. + + //char *strlcat(char *s1, const char *s2, size_t n); + evalStrcpyCommon(C, CE, + /* returnEnd = */ false, + /* isBounded = */ true, + /* isAppending = */ true, + /* returnPtr = */ false); +} + +void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, + bool returnEnd, bool isBounded, + bool isAppending, bool returnPtr) const { + CurrentFunctionDescription = "string copy function"; + ProgramStateRef state = C.getState(); + const LocationContext *LCtx = C.getLocationContext(); + + // Check that the destination is non-null. + const Expr *Dst = CE->getArg(0); + SVal DstVal = state->getSVal(Dst, LCtx); + + state = checkNonNull(C, state, Dst, DstVal, 1); + if (!state) + return; + + // Check that the source is non-null. + const Expr *srcExpr = CE->getArg(1); + SVal srcVal = state->getSVal(srcExpr, LCtx); + state = checkNonNull(C, state, srcExpr, srcVal, 2); + if (!state) + return; + + // Get the string length of the source. + SVal strLength = getCStringLength(C, state, srcExpr, srcVal); + + // If the source isn't a valid C string, give up. + if (strLength.isUndef()) + return; + + SValBuilder &svalBuilder = C.getSValBuilder(); + QualType cmpTy = svalBuilder.getConditionType(); + QualType sizeTy = svalBuilder.getContext().getSizeType(); + + // These two values allow checking two kinds of errors: + // - actual overflows caused by a source that doesn't fit in the destination + // - potential overflows caused by a bound that could exceed the destination + SVal amountCopied = UnknownVal(); + SVal maxLastElementIndex = UnknownVal(); + const char *boundWarning = nullptr; + + state = CheckOverlap(C, state, isBounded ? CE->getArg(2) : CE->getArg(1), Dst, srcExpr); + + if (!state) + return; + + // If the function is strncpy, strncat, etc... it is bounded. + if (isBounded) { + // Get the max number of characters to copy. + const Expr *lenExpr = CE->getArg(2); + SVal lenVal = state->getSVal(lenExpr, LCtx); + + // Protect against misdeclared strncpy(). + lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType()); + + Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>(); + Optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>(); + + // If we know both values, we might be able to figure out how much + // we're copying. + if (strLengthNL && lenValNL) { + ProgramStateRef stateSourceTooLong, stateSourceNotTooLong; + + // Check if the max number to copy is less than the length of the src. + // If the bound is equal to the source length, strncpy won't null- + // terminate the result! + std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume( + svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy) + .castAs<DefinedOrUnknownSVal>()); + + if (stateSourceTooLong && !stateSourceNotTooLong) { + // Max number to copy is less than the length of the src, so the actual + // strLength copied is the max number arg. + state = stateSourceTooLong; + amountCopied = lenVal; + + } else if (!stateSourceTooLong && stateSourceNotTooLong) { + // The source buffer entirely fits in the bound. + state = stateSourceNotTooLong; + amountCopied = strLength; + } + } + + // We still want to know if the bound is known to be too large. + if (lenValNL) { + if (isAppending) { + // For strncat, the check is strlen(dst) + lenVal < sizeof(dst) + + // Get the string length of the destination. If the destination is + // memory that can't have a string length, we shouldn't be copying + // into it anyway. + SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); + if (dstStrLength.isUndef()) + return; + + if (Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>()) { + maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add, + *lenValNL, + *dstStrLengthNL, + sizeTy); + boundWarning = "Size argument is greater than the free space in the " + "destination buffer"; + } + + } else { + // For strncpy, this is just checking that lenVal <= sizeof(dst) + // (Yes, strncpy and strncat differ in how they treat termination. + // strncat ALWAYS terminates, but strncpy doesn't.) + + // We need a special case for when the copy size is zero, in which + // case strncpy will do no work at all. Our bounds check uses n-1 + // as the last element accessed, so n == 0 is problematic. + ProgramStateRef StateZeroSize, StateNonZeroSize; + std::tie(StateZeroSize, StateNonZeroSize) = + assumeZero(C, state, *lenValNL, sizeTy); + + // If the size is known to be zero, we're done. + if (StateZeroSize && !StateNonZeroSize) { + if (returnPtr) { + StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal); + } else { + StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, *lenValNL); + } + C.addTransition(StateZeroSize); + return; + } + + // Otherwise, go ahead and figure out the last element we'll touch. + // We don't record the non-zero assumption here because we can't + // be sure. We won't warn on a possible zero. + NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>(); + maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, + one, sizeTy); + boundWarning = "Size argument is greater than the length of the " + "destination buffer"; + } + } + + // If we couldn't pin down the copy length, at least bound it. + // FIXME: We should actually run this code path for append as well, but + // right now it creates problems with constraints (since we can end up + // trying to pass constraints from symbol to symbol). + if (amountCopied.isUnknown() && !isAppending) { + // Try to get a "hypothetical" string length symbol, which we can later + // set as a real value if that turns out to be the case. + amountCopied = getCStringLength(C, state, lenExpr, srcVal, true); + assert(!amountCopied.isUndef()); + + if (Optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>()) { + if (lenValNL) { + // amountCopied <= lenVal + SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE, + *amountCopiedNL, + *lenValNL, + cmpTy); + state = state->assume( + copiedLessThanBound.castAs<DefinedOrUnknownSVal>(), true); + if (!state) + return; + } + + if (strLengthNL) { + // amountCopied <= strlen(source) + SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE, + *amountCopiedNL, + *strLengthNL, + cmpTy); + state = state->assume( + copiedLessThanSrc.castAs<DefinedOrUnknownSVal>(), true); + if (!state) + return; + } + } + } + + } else { + // The function isn't bounded. The amount copied should match the length + // of the source buffer. + amountCopied = strLength; + } + + assert(state); + + // This represents the number of characters copied into the destination + // buffer. (It may not actually be the strlen if the destination buffer + // is not terminated.) + SVal finalStrLength = UnknownVal(); + + // If this is an appending function (strcat, strncat...) then set the + // string length to strlen(src) + strlen(dst) since the buffer will + // ultimately contain both. + if (isAppending) { + // Get the string length of the destination. If the destination is memory + // that can't have a string length, we shouldn't be copying into it anyway. + SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); + if (dstStrLength.isUndef()) + return; + + Optional<NonLoc> srcStrLengthNL = amountCopied.getAs<NonLoc>(); + Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>(); + + // If we know both string lengths, we might know the final string length. + if (srcStrLengthNL && dstStrLengthNL) { + // Make sure the two lengths together don't overflow a size_t. + state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL); + if (!state) + return; + + finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL, + *dstStrLengthNL, sizeTy); + } + + // If we couldn't get a single value for the final string length, + // we can at least bound it by the individual lengths. + if (finalStrLength.isUnknown()) { + // Try to get a "hypothetical" string length symbol, which we can later + // set as a real value if that turns out to be the case. + finalStrLength = getCStringLength(C, state, CE, DstVal, true); + assert(!finalStrLength.isUndef()); + + if (Optional<NonLoc> finalStrLengthNL = finalStrLength.getAs<NonLoc>()) { + if (srcStrLengthNL) { + // finalStrLength >= srcStrLength + SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE, + *finalStrLengthNL, + *srcStrLengthNL, + cmpTy); + state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(), + true); + if (!state) + return; + } + + if (dstStrLengthNL) { + // finalStrLength >= dstStrLength + SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE, + *finalStrLengthNL, + *dstStrLengthNL, + cmpTy); + state = + state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true); + if (!state) + return; + } + } + } + + } else { + // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and + // the final string length will match the input string length. + finalStrLength = amountCopied; + } + + SVal Result; + + if (returnPtr) { + // The final result of the function will either be a pointer past the last + // copied element, or a pointer to the start of the destination buffer. + Result = (returnEnd ? UnknownVal() : DstVal); + } else { + Result = finalStrLength; + } + + assert(state); + + // If the destination is a MemRegion, try to check for a buffer overflow and + // record the new string length. + if (Optional<loc::MemRegionVal> dstRegVal = + DstVal.getAs<loc::MemRegionVal>()) { + QualType ptrTy = Dst->getType(); + + // If we have an exact value on a bounded copy, use that to check for + // overflows, rather than our estimate about how much is actually copied. + if (boundWarning) { + if (Optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) { + SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, + *maxLastNL, ptrTy); + state = CheckLocation(C, state, CE->getArg(2), maxLastElement, + boundWarning); + if (!state) + return; + } + } + + // Then, if the final length is known... + if (Optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) { + SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, + *knownStrLength, ptrTy); + + // ...and we haven't checked the bound, we'll check the actual copy. + if (!boundWarning) { + const char * const warningMsg = + "String copy function overflows destination buffer"; + state = CheckLocation(C, state, Dst, lastElement, warningMsg); + if (!state) + return; + } + + // If this is a stpcpy-style copy, the last element is the return value. + if (returnPtr && returnEnd) + Result = lastElement; + } + + // Invalidate the destination (regular invalidation without pointer-escaping + // the address of the top-level region). This must happen before we set the + // C string length because invalidation will clear the length. + // FIXME: Even if we can't perfectly model the copy, we should see if we + // can use LazyCompoundVals to copy the source values into the destination. + // This would probably remove any existing bindings past the end of the + // string, but that's still an improvement over blank invalidation. + state = InvalidateBuffer(C, state, Dst, *dstRegVal, + /*IsSourceBuffer*/false, nullptr); + + // Invalidate the source (const-invalidation without const-pointer-escaping + // the address of the top-level region). + state = InvalidateBuffer(C, state, srcExpr, srcVal, /*IsSourceBuffer*/true, + nullptr); + + // Set the C string length of the destination, if we know it. + if (isBounded && !isAppending) { + // strncpy is annoying in that it doesn't guarantee to null-terminate + // the result string. If the original string didn't fit entirely inside + // the bound (including the null-terminator), we don't know how long the + // result is. + if (amountCopied != strLength) + finalStrLength = UnknownVal(); + } + state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength); + } + + assert(state); + + if (returnPtr) { + // If this is a stpcpy-style copy, but we were unable to check for a buffer + // overflow, we still need a result. Conjure a return value. + if (returnEnd && Result.isUnknown()) { + Result = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); + } + } + // Set the return value. + state = state->BindExpr(CE, LCtx, Result); + C.addTransition(state); +} + +void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const { + //int strcmp(const char *s1, const char *s2); + evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false); +} + +void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const { + //int strncmp(const char *s1, const char *s2, size_t n); + evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false); +} + +void CStringChecker::evalStrcasecmp(CheckerContext &C, + const CallExpr *CE) const { + //int strcasecmp(const char *s1, const char *s2); + evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true); +} + +void CStringChecker::evalStrncasecmp(CheckerContext &C, + const CallExpr *CE) const { + //int strncasecmp(const char *s1, const char *s2, size_t n); + evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true); +} + +void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, + bool isBounded, bool ignoreCase) const { + CurrentFunctionDescription = "string comparison function"; + ProgramStateRef state = C.getState(); + const LocationContext *LCtx = C.getLocationContext(); + + // Check that the first string is non-null + const Expr *s1 = CE->getArg(0); + SVal s1Val = state->getSVal(s1, LCtx); + state = checkNonNull(C, state, s1, s1Val, 1); + if (!state) + return; + + // Check that the second string is non-null. + const Expr *s2 = CE->getArg(1); + SVal s2Val = state->getSVal(s2, LCtx); + state = checkNonNull(C, state, s2, s2Val, 2); + if (!state) + return; + + // Get the string length of the first string or give up. + SVal s1Length = getCStringLength(C, state, s1, s1Val); + if (s1Length.isUndef()) + return; + + // Get the string length of the second string or give up. + SVal s2Length = getCStringLength(C, state, s2, s2Val); + if (s2Length.isUndef()) + return; + + // If we know the two buffers are the same, we know the result is 0. + // First, get the two buffers' addresses. Another checker will have already + // made sure they're not undefined. + DefinedOrUnknownSVal LV = s1Val.castAs<DefinedOrUnknownSVal>(); + DefinedOrUnknownSVal RV = s2Val.castAs<DefinedOrUnknownSVal>(); + + // See if they are the same. + SValBuilder &svalBuilder = C.getSValBuilder(); + DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); + ProgramStateRef StSameBuf, StNotSameBuf; + std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); + + // If the two arguments might be the same buffer, we know the result is 0, + // and we only need to check one size. + if (StSameBuf) { + StSameBuf = StSameBuf->BindExpr(CE, LCtx, + svalBuilder.makeZeroVal(CE->getType())); + C.addTransition(StSameBuf); + + // If the two arguments are GUARANTEED to be the same, we're done! + if (!StNotSameBuf) + return; + } + + assert(StNotSameBuf); + state = StNotSameBuf; + + // At this point we can go about comparing the two buffers. + // For now, we only do this if they're both known string literals. + + // Attempt to extract string literals from both expressions. + const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val); + const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val); + bool canComputeResult = false; + SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, + C.blockCount()); + + if (s1StrLiteral && s2StrLiteral) { + StringRef s1StrRef = s1StrLiteral->getString(); + StringRef s2StrRef = s2StrLiteral->getString(); + + if (isBounded) { + // Get the max number of characters to compare. + const Expr *lenExpr = CE->getArg(2); + SVal lenVal = state->getSVal(lenExpr, LCtx); + + // If the length is known, we can get the right substrings. + if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) { + // Create substrings of each to compare the prefix. + s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue()); + s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue()); + canComputeResult = true; + } + } else { + // This is a normal, unbounded strcmp. + canComputeResult = true; + } + + if (canComputeResult) { + // Real strcmp stops at null characters. + size_t s1Term = s1StrRef.find('\0'); + if (s1Term != StringRef::npos) + s1StrRef = s1StrRef.substr(0, s1Term); + + size_t s2Term = s2StrRef.find('\0'); + if (s2Term != StringRef::npos) + s2StrRef = s2StrRef.substr(0, s2Term); + + // Use StringRef's comparison methods to compute the actual result. + int compareRes = ignoreCase ? s1StrRef.compare_lower(s2StrRef) + : s1StrRef.compare(s2StrRef); + + // The strcmp function returns an integer greater than, equal to, or less + // than zero, [c11, p7.24.4.2]. + if (compareRes == 0) { + resultVal = svalBuilder.makeIntVal(compareRes, CE->getType()); + } + else { + DefinedSVal zeroVal = svalBuilder.makeIntVal(0, CE->getType()); + // Constrain strcmp's result range based on the result of StringRef's + // comparison methods. + BinaryOperatorKind op = (compareRes == 1) ? BO_GT : BO_LT; + SVal compareWithZero = + svalBuilder.evalBinOp(state, op, resultVal, zeroVal, + svalBuilder.getConditionType()); + DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>(); + state = state->assume(compareWithZeroVal, true); + } + } + } + + state = state->BindExpr(CE, LCtx, resultVal); + + // Record this as a possible path. + C.addTransition(state); +} + +void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const { + //char *strsep(char **stringp, const char *delim); + // Sanity: does the search string parameter match the return type? + const Expr *SearchStrPtr = CE->getArg(0); + QualType CharPtrTy = SearchStrPtr->getType()->getPointeeType(); + if (CharPtrTy.isNull() || + CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType()) + return; + + CurrentFunctionDescription = "strsep()"; + ProgramStateRef State = C.getState(); + const LocationContext *LCtx = C.getLocationContext(); + + // Check that the search string pointer is non-null (though it may point to + // a null string). + SVal SearchStrVal = State->getSVal(SearchStrPtr, LCtx); + State = checkNonNull(C, State, SearchStrPtr, SearchStrVal, 1); + if (!State) + return; + + // Check that the delimiter string is non-null. + const Expr *DelimStr = CE->getArg(1); + SVal DelimStrVal = State->getSVal(DelimStr, LCtx); + State = checkNonNull(C, State, DelimStr, DelimStrVal, 2); + if (!State) + return; + + SValBuilder &SVB = C.getSValBuilder(); + SVal Result; + if (Optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) { + // Get the current value of the search string pointer, as a char*. + Result = State->getSVal(*SearchStrLoc, CharPtrTy); + + // Invalidate the search string, representing the change of one delimiter + // character to NUL. + State = InvalidateBuffer(C, State, SearchStrPtr, Result, + /*IsSourceBuffer*/false, nullptr); + + // Overwrite the search string pointer. The new value is either an address + // further along in the same string, or NULL if there are no more tokens. + State = State->bindLoc(*SearchStrLoc, + SVB.conjureSymbolVal(getTag(), + CE, + LCtx, + CharPtrTy, + C.blockCount()), + LCtx); + } else { + assert(SearchStrVal.isUnknown()); + // Conjure a symbolic value. It's the best we can do. + Result = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); + } + + // Set the return value, and finish. + State = State->BindExpr(CE, LCtx, Result); + C.addTransition(State); +} + +// These should probably be moved into a C++ standard library checker. +void CStringChecker::evalStdCopy(CheckerContext &C, const CallExpr *CE) const { + evalStdCopyCommon(C, CE); +} + +void CStringChecker::evalStdCopyBackward(CheckerContext &C, + const CallExpr *CE) const { + evalStdCopyCommon(C, CE); +} + +void CStringChecker::evalStdCopyCommon(CheckerContext &C, + const CallExpr *CE) const { + if (!CE->getArg(2)->getType()->isPointerType()) + return; + + ProgramStateRef State = C.getState(); + + const LocationContext *LCtx = C.getLocationContext(); + + // template <class _InputIterator, class _OutputIterator> + // _OutputIterator + // copy(_InputIterator __first, _InputIterator __last, + // _OutputIterator __result) + + // Invalidate the destination buffer + const Expr *Dst = CE->getArg(2); + SVal DstVal = State->getSVal(Dst, LCtx); + State = InvalidateBuffer(C, State, Dst, DstVal, /*IsSource=*/false, + /*Size=*/nullptr); + + SValBuilder &SVB = C.getSValBuilder(); + + SVal ResultVal = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); + State = State->BindExpr(CE, LCtx, ResultVal); + + C.addTransition(State); +} + +void CStringChecker::evalMemset(CheckerContext &C, const CallExpr *CE) const { + CurrentFunctionDescription = "memory set function"; + + const Expr *Mem = CE->getArg(0); + const Expr *CharE = CE->getArg(1); + const Expr *Size = CE->getArg(2); + ProgramStateRef State = C.getState(); + + // See if the size argument is zero. + const LocationContext *LCtx = C.getLocationContext(); + SVal SizeVal = State->getSVal(Size, LCtx); + QualType SizeTy = Size->getType(); + + ProgramStateRef StateZeroSize, StateNonZeroSize; + std::tie(StateZeroSize, StateNonZeroSize) = + assumeZero(C, State, SizeVal, SizeTy); + + // Get the value of the memory area. + SVal MemVal = State->getSVal(Mem, LCtx); + + // If the size is zero, there won't be any actual memory access, so + // just bind the return value to the Mem buffer and return. + if (StateZeroSize && !StateNonZeroSize) { + StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, MemVal); + C.addTransition(StateZeroSize); + return; + } + + // Ensure the memory area is not null. + // If it is NULL there will be a NULL pointer dereference. + State = checkNonNull(C, StateNonZeroSize, Mem, MemVal, 1); + if (!State) + return; + + State = CheckBufferAccess(C, State, Size, Mem); + if (!State) + return; + + // According to the values of the arguments, bind the value of the second + // argument to the destination buffer and set string length, or just + // invalidate the destination buffer. + if (!memsetAux(Mem, C.getSVal(CharE), Size, C, State)) + return; + + State = State->BindExpr(CE, LCtx, MemVal); + C.addTransition(State); +} + +void CStringChecker::evalBzero(CheckerContext &C, const CallExpr *CE) const { + CurrentFunctionDescription = "memory clearance function"; + + const Expr *Mem = CE->getArg(0); + const Expr *Size = CE->getArg(1); + SVal Zero = C.getSValBuilder().makeZeroVal(C.getASTContext().IntTy); + + ProgramStateRef State = C.getState(); + + // See if the size argument is zero. + SVal SizeVal = C.getSVal(Size); + QualType SizeTy = Size->getType(); + + ProgramStateRef StateZeroSize, StateNonZeroSize; + std::tie(StateZeroSize, StateNonZeroSize) = + assumeZero(C, State, SizeVal, SizeTy); + + // If the size is zero, there won't be any actual memory access, + // In this case we just return. + if (StateZeroSize && !StateNonZeroSize) { + C.addTransition(StateZeroSize); + return; + } + + // Get the value of the memory area. + SVal MemVal = C.getSVal(Mem); + + // Ensure the memory area is not null. + // If it is NULL there will be a NULL pointer dereference. + State = checkNonNull(C, StateNonZeroSize, Mem, MemVal, 1); + if (!State) + return; + + State = CheckBufferAccess(C, State, Size, Mem); + if (!State) + return; + + if (!memsetAux(Mem, Zero, Size, C, State)) + return; + + C.addTransition(State); +} + +//===----------------------------------------------------------------------===// +// The driver method, and other Checker callbacks. +//===----------------------------------------------------------------------===// + +CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call, + CheckerContext &C) const { + const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr()); + if (!CE) + return nullptr; + + const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); + if (!FD) + return nullptr; + + if (Call.isCalled(StdCopy)) { + return &CStringChecker::evalStdCopy; + } else if (Call.isCalled(StdCopyBackward)) { + return &CStringChecker::evalStdCopyBackward; + } + + // Pro-actively check that argument types are safe to do arithmetic upon. + // We do not want to crash if someone accidentally passes a structure + // into, say, a C++ overload of any of these functions. We could not check + // that for std::copy because they may have arguments of other types. + for (auto I : CE->arguments()) { + QualType T = I->getType(); + if (!T->isIntegralOrEnumerationType() && !T->isPointerType()) + return nullptr; + } + + const FnCheck *Callback = Callbacks.lookup(Call); + if (Callback) + return *Callback; + + return nullptr; +} + +bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const { + FnCheck Callback = identifyCall(Call, C); + + // If the callee isn't a string function, let another checker handle it. + if (!Callback) + return false; + + // Check and evaluate the call. + const auto *CE = cast<CallExpr>(Call.getOriginExpr()); + (this->*Callback)(C, CE); + + // If the evaluate call resulted in no change, chain to the next eval call + // handler. + // Note, the custom CString evaluation calls assume that basic safety + // properties are held. However, if the user chooses to turn off some of these + // checks, we ignore the issues and leave the call evaluation to a generic + // handler. + return C.isDifferent(); +} + +void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { + // Record string length for char a[] = "abc"; + ProgramStateRef state = C.getState(); + + for (const auto *I : DS->decls()) { + const VarDecl *D = dyn_cast<VarDecl>(I); + if (!D) + continue; + + // FIXME: Handle array fields of structs. + if (!D->getType()->isArrayType()) + continue; + + const Expr *Init = D->getInit(); + if (!Init) + continue; + if (!isa<StringLiteral>(Init)) + continue; + + Loc VarLoc = state->getLValue(D, C.getLocationContext()); + const MemRegion *MR = VarLoc.getAsRegion(); + if (!MR) + continue; + + SVal StrVal = C.getSVal(Init); + assert(StrVal.isValid() && "Initializer string is unknown or undefined"); + DefinedOrUnknownSVal strLength = + getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>(); + + state = state->set<CStringLength>(MR, strLength); + } + + C.addTransition(state); +} + +ProgramStateRef +CStringChecker::checkRegionChanges(ProgramStateRef state, + const InvalidatedSymbols *, + ArrayRef<const MemRegion *> ExplicitRegions, + ArrayRef<const MemRegion *> Regions, + const LocationContext *LCtx, + const CallEvent *Call) const { + CStringLengthTy Entries = state->get<CStringLength>(); + if (Entries.isEmpty()) + return state; + + llvm::SmallPtrSet<const MemRegion *, 8> Invalidated; + llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions; + + // First build sets for the changed regions and their super-regions. + for (ArrayRef<const MemRegion *>::iterator + I = Regions.begin(), E = Regions.end(); I != E; ++I) { + const MemRegion *MR = *I; + Invalidated.insert(MR); + + SuperRegions.insert(MR); + while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) { + MR = SR->getSuperRegion(); + SuperRegions.insert(MR); + } + } + + CStringLengthTy::Factory &F = state->get_context<CStringLength>(); + + // Then loop over the entries in the current state. + for (CStringLengthTy::iterator I = Entries.begin(), + E = Entries.end(); I != E; ++I) { + const MemRegion *MR = I.getKey(); + + // Is this entry for a super-region of a changed region? + if (SuperRegions.count(MR)) { + Entries = F.remove(Entries, MR); + continue; + } + + // Is this entry for a sub-region of a changed region? + const MemRegion *Super = MR; + while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) { + Super = SR->getSuperRegion(); + if (Invalidated.count(Super)) { + Entries = F.remove(Entries, MR); + break; + } + } + } + + return state->set<CStringLength>(Entries); +} + +void CStringChecker::checkLiveSymbols(ProgramStateRef state, + SymbolReaper &SR) const { + // Mark all symbols in our string length map as valid. + CStringLengthTy Entries = state->get<CStringLength>(); + + for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end(); + I != E; ++I) { + SVal Len = I.getData(); + + for (SymExpr::symbol_iterator si = Len.symbol_begin(), + se = Len.symbol_end(); si != se; ++si) + SR.markInUse(*si); + } +} + +void CStringChecker::checkDeadSymbols(SymbolReaper &SR, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + CStringLengthTy Entries = state->get<CStringLength>(); + if (Entries.isEmpty()) + return; + + CStringLengthTy::Factory &F = state->get_context<CStringLength>(); + for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end(); + I != E; ++I) { + SVal Len = I.getData(); + if (SymbolRef Sym = Len.getAsSymbol()) { + if (SR.isDead(Sym)) + Entries = F.remove(Entries, I.getKey()); + } + } + + state = state->set<CStringLength>(Entries); + C.addTransition(state); +} + +void ento::registerCStringModeling(CheckerManager &Mgr) { + Mgr.registerChecker<CStringChecker>(); +} + +bool ento::shouldRegisterCStringModeling(const LangOptions &LO) { + return true; +} + +#define REGISTER_CHECKER(name) \ + void ento::register##name(CheckerManager &mgr) { \ + CStringChecker *checker = mgr.getChecker<CStringChecker>(); \ + checker->Filter.Check##name = true; \ + checker->Filter.CheckName##name = mgr.getCurrentCheckerName(); \ + } \ + \ + bool ento::shouldRegister##name(const LangOptions &LO) { return true; } + +REGISTER_CHECKER(CStringNullArg) +REGISTER_CHECKER(CStringOutOfBounds) +REGISTER_CHECKER(CStringBufferOverlap) +REGISTER_CHECKER(CStringNotNullTerm) diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringSyntaxChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringSyntaxChecker.cpp new file mode 100644 index 000000000000..d84fcc69a492 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CStringSyntaxChecker.cpp @@ -0,0 +1,296 @@ +//== CStringSyntaxChecker.cpp - CoreFoundation containers API *- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// An AST checker that looks for common pitfalls when using C string APIs. +// - Identifies erroneous patterns in the last argument to strncat - the number +// of bytes to copy. +// +//===----------------------------------------------------------------------===// +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/Expr.h" +#include "clang/AST/OperationKinds.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Basic/TypeTraits.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +namespace { +class WalkAST: public StmtVisitor<WalkAST> { + const CheckerBase *Checker; + BugReporter &BR; + AnalysisDeclContext* AC; + + /// Check if two expressions refer to the same declaration. + bool sameDecl(const Expr *A1, const Expr *A2) { + if (const auto *D1 = dyn_cast<DeclRefExpr>(A1->IgnoreParenCasts())) + if (const auto *D2 = dyn_cast<DeclRefExpr>(A2->IgnoreParenCasts())) + return D1->getDecl() == D2->getDecl(); + return false; + } + + /// Check if the expression E is a sizeof(WithArg). + bool isSizeof(const Expr *E, const Expr *WithArg) { + if (const auto *UE = dyn_cast<UnaryExprOrTypeTraitExpr>(E)) + if (UE->getKind() == UETT_SizeOf && !UE->isArgumentType()) + return sameDecl(UE->getArgumentExpr(), WithArg); + return false; + } + + /// Check if the expression E is a strlen(WithArg). + bool isStrlen(const Expr *E, const Expr *WithArg) { + if (const auto *CE = dyn_cast<CallExpr>(E)) { + const FunctionDecl *FD = CE->getDirectCallee(); + if (!FD) + return false; + return (CheckerContext::isCLibraryFunction(FD, "strlen") && + sameDecl(CE->getArg(0), WithArg)); + } + return false; + } + + /// Check if the expression is an integer literal with value 1. + bool isOne(const Expr *E) { + if (const auto *IL = dyn_cast<IntegerLiteral>(E)) + return (IL->getValue().isIntN(1)); + return false; + } + + StringRef getPrintableName(const Expr *E) { + if (const auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenCasts())) + return D->getDecl()->getName(); + return StringRef(); + } + + /// Identify erroneous patterns in the last argument to strncat - the number + /// of bytes to copy. + bool containsBadStrncatPattern(const CallExpr *CE); + + /// Identify erroneous patterns in the last argument to strlcpy - the number + /// of bytes to copy. + /// The bad pattern checked is when the size is known + /// to be larger than the destination can handle. + /// char dst[2]; + /// size_t cpy = 4; + /// strlcpy(dst, "abcd", sizeof("abcd") - 1); + /// strlcpy(dst, "abcd", 4); + /// strlcpy(dst + 3, "abcd", 2); + /// strlcpy(dst, "abcd", cpy); + /// Identify erroneous patterns in the last argument to strlcat - the number + /// of bytes to copy. + /// The bad pattern checked is when the last argument is basically + /// pointing to the destination buffer size or argument larger or + /// equal to. + /// char dst[2]; + /// strlcat(dst, src2, sizeof(dst)); + /// strlcat(dst, src2, 2); + /// strlcat(dst, src2, 10); + bool containsBadStrlcpyStrlcatPattern(const CallExpr *CE); + +public: + WalkAST(const CheckerBase *Checker, BugReporter &BR, AnalysisDeclContext *AC) + : Checker(Checker), BR(BR), AC(AC) {} + + // Statement visitor methods. + void VisitChildren(Stmt *S); + void VisitStmt(Stmt *S) { + VisitChildren(S); + } + void VisitCallExpr(CallExpr *CE); +}; +} // end anonymous namespace + +// The correct size argument should look like following: +// strncat(dst, src, sizeof(dst) - strlen(dest) - 1); +// We look for the following anti-patterns: +// - strncat(dst, src, sizeof(dst) - strlen(dst)); +// - strncat(dst, src, sizeof(dst) - 1); +// - strncat(dst, src, sizeof(dst)); +bool WalkAST::containsBadStrncatPattern(const CallExpr *CE) { + if (CE->getNumArgs() != 3) + return false; + const Expr *DstArg = CE->getArg(0); + const Expr *SrcArg = CE->getArg(1); + const Expr *LenArg = CE->getArg(2); + + // Identify wrong size expressions, which are commonly used instead. + if (const auto *BE = dyn_cast<BinaryOperator>(LenArg->IgnoreParenCasts())) { + // - sizeof(dst) - strlen(dst) + if (BE->getOpcode() == BO_Sub) { + const Expr *L = BE->getLHS(); + const Expr *R = BE->getRHS(); + if (isSizeof(L, DstArg) && isStrlen(R, DstArg)) + return true; + + // - sizeof(dst) - 1 + if (isSizeof(L, DstArg) && isOne(R->IgnoreParenCasts())) + return true; + } + } + // - sizeof(dst) + if (isSizeof(LenArg, DstArg)) + return true; + + // - sizeof(src) + if (isSizeof(LenArg, SrcArg)) + return true; + return false; +} + +bool WalkAST::containsBadStrlcpyStrlcatPattern(const CallExpr *CE) { + if (CE->getNumArgs() != 3) + return false; + const Expr *DstArg = CE->getArg(0); + const Expr *LenArg = CE->getArg(2); + + const auto *DstArgDRE = dyn_cast<DeclRefExpr>(DstArg->IgnoreParenImpCasts()); + const auto *LenArgDRE = + dyn_cast<DeclRefExpr>(LenArg->IgnoreParenLValueCasts()); + uint64_t DstOff = 0; + if (isSizeof(LenArg, DstArg)) + return false; + + // - size_t dstlen = sizeof(dst) + if (LenArgDRE) { + const auto *LenArgVal = dyn_cast<VarDecl>(LenArgDRE->getDecl()); + // If it's an EnumConstantDecl instead, then we're missing out on something. + if (!LenArgVal) { + assert(isa<EnumConstantDecl>(LenArgDRE->getDecl())); + return false; + } + if (LenArgVal->getInit()) + LenArg = LenArgVal->getInit(); + } + + // - integral value + // We try to figure out if the last argument is possibly longer + // than the destination can possibly handle if its size can be defined. + if (const auto *IL = dyn_cast<IntegerLiteral>(LenArg->IgnoreParenImpCasts())) { + uint64_t ILRawVal = IL->getValue().getZExtValue(); + + // Case when there is pointer arithmetic on the destination buffer + // especially when we offset from the base decreasing the + // buffer length accordingly. + if (!DstArgDRE) { + if (const auto *BE = + dyn_cast<BinaryOperator>(DstArg->IgnoreParenImpCasts())) { + DstArgDRE = dyn_cast<DeclRefExpr>(BE->getLHS()->IgnoreParenImpCasts()); + if (BE->getOpcode() == BO_Add) { + if ((IL = dyn_cast<IntegerLiteral>(BE->getRHS()->IgnoreParenImpCasts()))) { + DstOff = IL->getValue().getZExtValue(); + } + } + } + } + if (DstArgDRE) { + if (const auto *Buffer = + dyn_cast<ConstantArrayType>(DstArgDRE->getType())) { + ASTContext &C = BR.getContext(); + uint64_t BufferLen = C.getTypeSize(Buffer) / 8; + auto RemainingBufferLen = BufferLen - DstOff; + if (RemainingBufferLen < ILRawVal) + return true; + } + } + } + + return false; +} + +void WalkAST::VisitCallExpr(CallExpr *CE) { + const FunctionDecl *FD = CE->getDirectCallee(); + if (!FD) + return; + + if (CheckerContext::isCLibraryFunction(FD, "strncat")) { + if (containsBadStrncatPattern(CE)) { + const Expr *DstArg = CE->getArg(0); + const Expr *LenArg = CE->getArg(2); + PathDiagnosticLocation Loc = + PathDiagnosticLocation::createBegin(LenArg, BR.getSourceManager(), AC); + + StringRef DstName = getPrintableName(DstArg); + + SmallString<256> S; + llvm::raw_svector_ostream os(S); + os << "Potential buffer overflow. "; + if (!DstName.empty()) { + os << "Replace with 'sizeof(" << DstName << ") " + "- strlen(" << DstName <<") - 1'"; + os << " or u"; + } else + os << "U"; + os << "se a safer 'strlcat' API"; + + BR.EmitBasicReport(FD, Checker, "Anti-pattern in the argument", + "C String API", os.str(), Loc, + LenArg->getSourceRange()); + } + } else if (CheckerContext::isCLibraryFunction(FD, "strlcpy") || + CheckerContext::isCLibraryFunction(FD, "strlcat")) { + if (containsBadStrlcpyStrlcatPattern(CE)) { + const Expr *DstArg = CE->getArg(0); + const Expr *LenArg = CE->getArg(2); + PathDiagnosticLocation Loc = + PathDiagnosticLocation::createBegin(LenArg, BR.getSourceManager(), AC); + + StringRef DstName = getPrintableName(DstArg); + + SmallString<256> S; + llvm::raw_svector_ostream os(S); + os << "The third argument allows to potentially copy more bytes than it should. "; + os << "Replace with the value "; + if (!DstName.empty()) + os << "sizeof(" << DstName << ")"; + else + os << "sizeof(<destination buffer>)"; + os << " or lower"; + + BR.EmitBasicReport(FD, Checker, "Anti-pattern in the argument", + "C String API", os.str(), Loc, + LenArg->getSourceRange()); + } + } + + // Recurse and check children. + VisitChildren(CE); +} + +void WalkAST::VisitChildren(Stmt *S) { + for (Stmt *Child : S->children()) + if (Child) + Visit(Child); +} + +namespace { +class CStringSyntaxChecker: public Checker<check::ASTCodeBody> { +public: + + void checkASTCodeBody(const Decl *D, AnalysisManager& Mgr, + BugReporter &BR) const { + WalkAST walker(this, BR, Mgr.getAnalysisDeclContext(D)); + walker.Visit(D->getBody()); + } +}; +} + +void ento::registerCStringSyntaxChecker(CheckerManager &mgr) { + mgr.registerChecker<CStringSyntaxChecker>(); +} + +bool ento::shouldRegisterCStringSyntaxChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/CXXSelfAssignmentChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CXXSelfAssignmentChecker.cpp new file mode 100644 index 000000000000..01f5b9c889e3 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CXXSelfAssignmentChecker.cpp @@ -0,0 +1,81 @@ +//=== CXXSelfAssignmentChecker.cpp -----------------------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines CXXSelfAssignmentChecker, which tests all custom defined +// copy and move assignment operators for the case of self assignment, thus +// where the parameter refers to the same location where the this pointer +// points to. The checker itself does not do any checks at all, but it +// causes the analyzer to check every copy and move assignment operator twice: +// once for when 'this' aliases with the parameter and once for when it may not. +// It is the task of the other enabled checkers to find the bugs in these two +// different cases. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { + +class CXXSelfAssignmentChecker : public Checker<check::BeginFunction> { +public: + CXXSelfAssignmentChecker(); + void checkBeginFunction(CheckerContext &C) const; +}; +} + +CXXSelfAssignmentChecker::CXXSelfAssignmentChecker() {} + +void CXXSelfAssignmentChecker::checkBeginFunction(CheckerContext &C) const { + if (!C.inTopFrame()) + return; + const auto *LCtx = C.getLocationContext(); + const auto *MD = dyn_cast<CXXMethodDecl>(LCtx->getDecl()); + if (!MD) + return; + if (!MD->isCopyAssignmentOperator() && !MD->isMoveAssignmentOperator()) + return; + auto &State = C.getState(); + auto &SVB = C.getSValBuilder(); + auto ThisVal = + State->getSVal(SVB.getCXXThis(MD, LCtx->getStackFrame())); + auto Param = SVB.makeLoc(State->getRegion(MD->getParamDecl(0), LCtx)); + auto ParamVal = State->getSVal(Param); + + ProgramStateRef SelfAssignState = State->bindLoc(Param, ThisVal, LCtx); + const NoteTag *SelfAssignTag = + C.getNoteTag([MD](BugReport &BR) -> std::string { + SmallString<256> Msg; + llvm::raw_svector_ostream Out(Msg); + Out << "Assuming " << MD->getParamDecl(0)->getName() << " == *this"; + return Out.str(); + }); + C.addTransition(SelfAssignState, SelfAssignTag); + + ProgramStateRef NonSelfAssignState = State->bindLoc(Param, ParamVal, LCtx); + const NoteTag *NonSelfAssignTag = + C.getNoteTag([MD](BugReport &BR) -> std::string { + SmallString<256> Msg; + llvm::raw_svector_ostream Out(Msg); + Out << "Assuming " << MD->getParamDecl(0)->getName() << " != *this"; + return Out.str(); + }); + C.addTransition(NonSelfAssignState, NonSelfAssignTag); +} + +void ento::registerCXXSelfAssignmentChecker(CheckerManager &Mgr) { + Mgr.registerChecker<CXXSelfAssignmentChecker>(); +} + +bool ento::shouldRegisterCXXSelfAssignmentChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp new file mode 100644 index 000000000000..2fcb765cd4ee --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp @@ -0,0 +1,620 @@ +//===--- CallAndMessageChecker.cpp ------------------------------*- C++ -*--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines CallAndMessageChecker, a builtin checker that checks for various +// errors of call and objc message expressions. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/ParentMap.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +namespace { + +class CallAndMessageChecker + : public Checker< check::PreStmt<CallExpr>, + check::PreStmt<CXXDeleteExpr>, + check::PreObjCMessage, + check::ObjCMessageNil, + check::PreCall > { + mutable std::unique_ptr<BugType> BT_call_null; + mutable std::unique_ptr<BugType> BT_call_undef; + mutable std::unique_ptr<BugType> BT_cxx_call_null; + mutable std::unique_ptr<BugType> BT_cxx_call_undef; + mutable std::unique_ptr<BugType> BT_call_arg; + mutable std::unique_ptr<BugType> BT_cxx_delete_undef; + mutable std::unique_ptr<BugType> BT_msg_undef; + mutable std::unique_ptr<BugType> BT_objc_prop_undef; + mutable std::unique_ptr<BugType> BT_objc_subscript_undef; + mutable std::unique_ptr<BugType> BT_msg_arg; + mutable std::unique_ptr<BugType> BT_msg_ret; + mutable std::unique_ptr<BugType> BT_call_few_args; + +public: + DefaultBool Check_CallAndMessageUnInitRefArg; + CheckerNameRef CheckName_CallAndMessageUnInitRefArg; + + void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; + void checkPreStmt(const CXXDeleteExpr *DE, CheckerContext &C) const; + void checkPreObjCMessage(const ObjCMethodCall &msg, CheckerContext &C) const; + + /// Fill in the return value that results from messaging nil based on the + /// return type and architecture and diagnose if the return value will be + /// garbage. + void checkObjCMessageNil(const ObjCMethodCall &msg, CheckerContext &C) const; + + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + +private: + bool PreVisitProcessArg(CheckerContext &C, SVal V, SourceRange ArgRange, + const Expr *ArgEx, int ArgumentNumber, + bool CheckUninitFields, const CallEvent &Call, + std::unique_ptr<BugType> &BT, + const ParmVarDecl *ParamDecl) const; + + static void emitBadCall(BugType *BT, CheckerContext &C, const Expr *BadE); + void emitNilReceiverBug(CheckerContext &C, const ObjCMethodCall &msg, + ExplodedNode *N) const; + + void HandleNilReceiver(CheckerContext &C, + ProgramStateRef state, + const ObjCMethodCall &msg) const; + + void LazyInit_BT(const char *desc, std::unique_ptr<BugType> &BT) const { + if (!BT) + BT.reset(new BuiltinBug(this, desc)); + } + bool uninitRefOrPointer(CheckerContext &C, const SVal &V, + SourceRange ArgRange, const Expr *ArgEx, + std::unique_ptr<BugType> &BT, + const ParmVarDecl *ParamDecl, const char *BD, + int ArgumentNumber) const; +}; +} // end anonymous namespace + +void CallAndMessageChecker::emitBadCall(BugType *BT, CheckerContext &C, + const Expr *BadE) { + ExplodedNode *N = C.generateErrorNode(); + if (!N) + return; + + auto R = std::make_unique<PathSensitiveBugReport>(*BT, BT->getDescription(), N); + if (BadE) { + R->addRange(BadE->getSourceRange()); + if (BadE->isGLValue()) + BadE = bugreporter::getDerefExpr(BadE); + bugreporter::trackExpressionValue(N, BadE, *R); + } + C.emitReport(std::move(R)); +} + +static void describeUninitializedArgumentInCall(const CallEvent &Call, + int ArgumentNumber, + llvm::raw_svector_ostream &Os) { + switch (Call.getKind()) { + case CE_ObjCMessage: { + const ObjCMethodCall &Msg = cast<ObjCMethodCall>(Call); + switch (Msg.getMessageKind()) { + case OCM_Message: + Os << (ArgumentNumber + 1) << llvm::getOrdinalSuffix(ArgumentNumber + 1) + << " argument in message expression is an uninitialized value"; + return; + case OCM_PropertyAccess: + assert(Msg.isSetter() && "Getters have no args"); + Os << "Argument for property setter is an uninitialized value"; + return; + case OCM_Subscript: + if (Msg.isSetter() && (ArgumentNumber == 0)) + Os << "Argument for subscript setter is an uninitialized value"; + else + Os << "Subscript index is an uninitialized value"; + return; + } + llvm_unreachable("Unknown message kind."); + } + case CE_Block: + Os << (ArgumentNumber + 1) << llvm::getOrdinalSuffix(ArgumentNumber + 1) + << " block call argument is an uninitialized value"; + return; + default: + Os << (ArgumentNumber + 1) << llvm::getOrdinalSuffix(ArgumentNumber + 1) + << " function call argument is an uninitialized value"; + return; + } +} + +bool CallAndMessageChecker::uninitRefOrPointer( + CheckerContext &C, const SVal &V, SourceRange ArgRange, const Expr *ArgEx, + std::unique_ptr<BugType> &BT, const ParmVarDecl *ParamDecl, const char *BD, + int ArgumentNumber) const { + if (!Check_CallAndMessageUnInitRefArg) + return false; + + // No parameter declaration available, i.e. variadic function argument. + if(!ParamDecl) + return false; + + // If parameter is declared as pointer to const in function declaration, + // then check if corresponding argument in function call is + // pointing to undefined symbol value (uninitialized memory). + SmallString<200> Buf; + llvm::raw_svector_ostream Os(Buf); + + if (ParamDecl->getType()->isPointerType()) { + Os << (ArgumentNumber + 1) << llvm::getOrdinalSuffix(ArgumentNumber + 1) + << " function call argument is a pointer to uninitialized value"; + } else if (ParamDecl->getType()->isReferenceType()) { + Os << (ArgumentNumber + 1) << llvm::getOrdinalSuffix(ArgumentNumber + 1) + << " function call argument is an uninitialized value"; + } else + return false; + + if(!ParamDecl->getType()->getPointeeType().isConstQualified()) + return false; + + if (const MemRegion *SValMemRegion = V.getAsRegion()) { + const ProgramStateRef State = C.getState(); + const SVal PSV = State->getSVal(SValMemRegion, C.getASTContext().CharTy); + if (PSV.isUndef()) { + if (ExplodedNode *N = C.generateErrorNode()) { + LazyInit_BT(BD, BT); + auto R = std::make_unique<PathSensitiveBugReport>(*BT, Os.str(), N); + R->addRange(ArgRange); + if (ArgEx) + bugreporter::trackExpressionValue(N, ArgEx, *R); + + C.emitReport(std::move(R)); + } + return true; + } + } + return false; +} + +namespace { +class FindUninitializedField { +public: + SmallVector<const FieldDecl *, 10> FieldChain; + +private: + StoreManager &StoreMgr; + MemRegionManager &MrMgr; + Store store; + +public: + FindUninitializedField(StoreManager &storeMgr, MemRegionManager &mrMgr, + Store s) + : StoreMgr(storeMgr), MrMgr(mrMgr), store(s) {} + + bool Find(const TypedValueRegion *R) { + QualType T = R->getValueType(); + if (const RecordType *RT = T->getAsStructureType()) { + const RecordDecl *RD = RT->getDecl()->getDefinition(); + assert(RD && "Referred record has no definition"); + for (const auto *I : RD->fields()) { + const FieldRegion *FR = MrMgr.getFieldRegion(I, R); + FieldChain.push_back(I); + T = I->getType(); + if (T->getAsStructureType()) { + if (Find(FR)) + return true; + } else { + const SVal &V = StoreMgr.getBinding(store, loc::MemRegionVal(FR)); + if (V.isUndef()) + return true; + } + FieldChain.pop_back(); + } + } + + return false; + } +}; +} // namespace + +bool CallAndMessageChecker::PreVisitProcessArg(CheckerContext &C, + SVal V, + SourceRange ArgRange, + const Expr *ArgEx, + int ArgumentNumber, + bool CheckUninitFields, + const CallEvent &Call, + std::unique_ptr<BugType> &BT, + const ParmVarDecl *ParamDecl + ) const { + const char *BD = "Uninitialized argument value"; + + if (uninitRefOrPointer(C, V, ArgRange, ArgEx, BT, ParamDecl, BD, + ArgumentNumber)) + return true; + + if (V.isUndef()) { + if (ExplodedNode *N = C.generateErrorNode()) { + LazyInit_BT(BD, BT); + // Generate a report for this bug. + SmallString<200> Buf; + llvm::raw_svector_ostream Os(Buf); + describeUninitializedArgumentInCall(Call, ArgumentNumber, Os); + auto R = std::make_unique<PathSensitiveBugReport>(*BT, Os.str(), N); + + R->addRange(ArgRange); + if (ArgEx) + bugreporter::trackExpressionValue(N, ArgEx, *R); + C.emitReport(std::move(R)); + } + return true; + } + + if (!CheckUninitFields) + return false; + + if (auto LV = V.getAs<nonloc::LazyCompoundVal>()) { + const LazyCompoundValData *D = LV->getCVData(); + FindUninitializedField F(C.getState()->getStateManager().getStoreManager(), + C.getSValBuilder().getRegionManager(), + D->getStore()); + + if (F.Find(D->getRegion())) { + if (ExplodedNode *N = C.generateErrorNode()) { + LazyInit_BT(BD, BT); + SmallString<512> Str; + llvm::raw_svector_ostream os(Str); + os << "Passed-by-value struct argument contains uninitialized data"; + + if (F.FieldChain.size() == 1) + os << " (e.g., field: '" << *F.FieldChain[0] << "')"; + else { + os << " (e.g., via the field chain: '"; + bool first = true; + for (SmallVectorImpl<const FieldDecl *>::iterator + DI = F.FieldChain.begin(), DE = F.FieldChain.end(); DI!=DE;++DI){ + if (first) + first = false; + else + os << '.'; + os << **DI; + } + os << "')"; + } + + // Generate a report for this bug. + auto R = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N); + R->addRange(ArgRange); + + if (ArgEx) + bugreporter::trackExpressionValue(N, ArgEx, *R); + // FIXME: enhance track back for uninitialized value for arbitrary + // memregions + C.emitReport(std::move(R)); + } + return true; + } + } + + return false; +} + +void CallAndMessageChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const{ + + const Expr *Callee = CE->getCallee()->IgnoreParens(); + ProgramStateRef State = C.getState(); + const LocationContext *LCtx = C.getLocationContext(); + SVal L = State->getSVal(Callee, LCtx); + + if (L.isUndef()) { + if (!BT_call_undef) + BT_call_undef.reset(new BuiltinBug( + this, "Called function pointer is an uninitialized pointer value")); + emitBadCall(BT_call_undef.get(), C, Callee); + return; + } + + ProgramStateRef StNonNull, StNull; + std::tie(StNonNull, StNull) = State->assume(L.castAs<DefinedOrUnknownSVal>()); + + if (StNull && !StNonNull) { + if (!BT_call_null) + BT_call_null.reset(new BuiltinBug( + this, "Called function pointer is null (null dereference)")); + emitBadCall(BT_call_null.get(), C, Callee); + return; + } + + C.addTransition(StNonNull); +} + +void CallAndMessageChecker::checkPreStmt(const CXXDeleteExpr *DE, + CheckerContext &C) const { + + SVal Arg = C.getSVal(DE->getArgument()); + if (Arg.isUndef()) { + StringRef Desc; + ExplodedNode *N = C.generateErrorNode(); + if (!N) + return; + if (!BT_cxx_delete_undef) + BT_cxx_delete_undef.reset( + new BuiltinBug(this, "Uninitialized argument value")); + if (DE->isArrayFormAsWritten()) + Desc = "Argument to 'delete[]' is uninitialized"; + else + Desc = "Argument to 'delete' is uninitialized"; + BugType *BT = BT_cxx_delete_undef.get(); + auto R = std::make_unique<PathSensitiveBugReport>(*BT, Desc, N); + bugreporter::trackExpressionValue(N, DE, *R); + C.emitReport(std::move(R)); + return; + } +} + +void CallAndMessageChecker::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + + // If this is a call to a C++ method, check if the callee is null or + // undefined. + if (const CXXInstanceCall *CC = dyn_cast<CXXInstanceCall>(&Call)) { + SVal V = CC->getCXXThisVal(); + if (V.isUndef()) { + if (!BT_cxx_call_undef) + BT_cxx_call_undef.reset( + new BuiltinBug(this, "Called C++ object pointer is uninitialized")); + emitBadCall(BT_cxx_call_undef.get(), C, CC->getCXXThisExpr()); + return; + } + + ProgramStateRef StNonNull, StNull; + std::tie(StNonNull, StNull) = + State->assume(V.castAs<DefinedOrUnknownSVal>()); + + if (StNull && !StNonNull) { + if (!BT_cxx_call_null) + BT_cxx_call_null.reset( + new BuiltinBug(this, "Called C++ object pointer is null")); + emitBadCall(BT_cxx_call_null.get(), C, CC->getCXXThisExpr()); + return; + } + + State = StNonNull; + } + + const Decl *D = Call.getDecl(); + if (D && (isa<FunctionDecl>(D) || isa<BlockDecl>(D))) { + // If we have a function or block declaration, we can make sure we pass + // enough parameters. + unsigned Params = Call.parameters().size(); + if (Call.getNumArgs() < Params) { + ExplodedNode *N = C.generateErrorNode(); + if (!N) + return; + + LazyInit_BT("Function call with too few arguments", BT_call_few_args); + + SmallString<512> Str; + llvm::raw_svector_ostream os(Str); + if (isa<FunctionDecl>(D)) { + os << "Function "; + } else { + assert(isa<BlockDecl>(D)); + os << "Block "; + } + os << "taking " << Params << " argument" + << (Params == 1 ? "" : "s") << " is called with fewer (" + << Call.getNumArgs() << ")"; + + C.emitReport(std::make_unique<PathSensitiveBugReport>(*BT_call_few_args, + os.str(), N)); + } + } + + // Don't check for uninitialized field values in arguments if the + // caller has a body that is available and we have the chance to inline it. + // This is a hack, but is a reasonable compromise betweens sometimes warning + // and sometimes not depending on if we decide to inline a function. + const bool checkUninitFields = + !(C.getAnalysisManager().shouldInlineCall() && (D && D->getBody())); + + std::unique_ptr<BugType> *BT; + if (isa<ObjCMethodCall>(Call)) + BT = &BT_msg_arg; + else + BT = &BT_call_arg; + + const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); + for (unsigned i = 0, e = Call.getNumArgs(); i != e; ++i) { + const ParmVarDecl *ParamDecl = nullptr; + if(FD && i < FD->getNumParams()) + ParamDecl = FD->getParamDecl(i); + if (PreVisitProcessArg(C, Call.getArgSVal(i), Call.getArgSourceRange(i), + Call.getArgExpr(i), i, + checkUninitFields, Call, *BT, ParamDecl)) + return; + } + + // If we make it here, record our assumptions about the callee. + C.addTransition(State); +} + +void CallAndMessageChecker::checkPreObjCMessage(const ObjCMethodCall &msg, + CheckerContext &C) const { + SVal recVal = msg.getReceiverSVal(); + if (recVal.isUndef()) { + if (ExplodedNode *N = C.generateErrorNode()) { + BugType *BT = nullptr; + switch (msg.getMessageKind()) { + case OCM_Message: + if (!BT_msg_undef) + BT_msg_undef.reset(new BuiltinBug(this, + "Receiver in message expression " + "is an uninitialized value")); + BT = BT_msg_undef.get(); + break; + case OCM_PropertyAccess: + if (!BT_objc_prop_undef) + BT_objc_prop_undef.reset(new BuiltinBug( + this, "Property access on an uninitialized object pointer")); + BT = BT_objc_prop_undef.get(); + break; + case OCM_Subscript: + if (!BT_objc_subscript_undef) + BT_objc_subscript_undef.reset(new BuiltinBug( + this, "Subscript access on an uninitialized object pointer")); + BT = BT_objc_subscript_undef.get(); + break; + } + assert(BT && "Unknown message kind."); + + auto R = std::make_unique<PathSensitiveBugReport>(*BT, BT->getDescription(), N); + const ObjCMessageExpr *ME = msg.getOriginExpr(); + R->addRange(ME->getReceiverRange()); + + // FIXME: getTrackNullOrUndefValueVisitor can't handle "super" yet. + if (const Expr *ReceiverE = ME->getInstanceReceiver()) + bugreporter::trackExpressionValue(N, ReceiverE, *R); + C.emitReport(std::move(R)); + } + return; + } +} + +void CallAndMessageChecker::checkObjCMessageNil(const ObjCMethodCall &msg, + CheckerContext &C) const { + HandleNilReceiver(C, C.getState(), msg); +} + +void CallAndMessageChecker::emitNilReceiverBug(CheckerContext &C, + const ObjCMethodCall &msg, + ExplodedNode *N) const { + + if (!BT_msg_ret) + BT_msg_ret.reset( + new BuiltinBug(this, "Receiver in message expression is 'nil'")); + + const ObjCMessageExpr *ME = msg.getOriginExpr(); + + QualType ResTy = msg.getResultType(); + + SmallString<200> buf; + llvm::raw_svector_ostream os(buf); + os << "The receiver of message '"; + ME->getSelector().print(os); + os << "' is nil"; + if (ResTy->isReferenceType()) { + os << ", which results in forming a null reference"; + } else { + os << " and returns a value of type '"; + msg.getResultType().print(os, C.getLangOpts()); + os << "' that will be garbage"; + } + + auto report = + std::make_unique<PathSensitiveBugReport>(*BT_msg_ret, os.str(), N); + report->addRange(ME->getReceiverRange()); + // FIXME: This won't track "self" in messages to super. + if (const Expr *receiver = ME->getInstanceReceiver()) { + bugreporter::trackExpressionValue(N, receiver, *report); + } + C.emitReport(std::move(report)); +} + +static bool supportsNilWithFloatRet(const llvm::Triple &triple) { + return (triple.getVendor() == llvm::Triple::Apple && + (triple.isiOS() || triple.isWatchOS() || + !triple.isMacOSXVersionLT(10,5))); +} + +void CallAndMessageChecker::HandleNilReceiver(CheckerContext &C, + ProgramStateRef state, + const ObjCMethodCall &Msg) const { + ASTContext &Ctx = C.getASTContext(); + static CheckerProgramPointTag Tag(this, "NilReceiver"); + + // Check the return type of the message expression. A message to nil will + // return different values depending on the return type and the architecture. + QualType RetTy = Msg.getResultType(); + CanQualType CanRetTy = Ctx.getCanonicalType(RetTy); + const LocationContext *LCtx = C.getLocationContext(); + + if (CanRetTy->isStructureOrClassType()) { + // Structure returns are safe since the compiler zeroes them out. + SVal V = C.getSValBuilder().makeZeroVal(RetTy); + C.addTransition(state->BindExpr(Msg.getOriginExpr(), LCtx, V), &Tag); + return; + } + + // Other cases: check if sizeof(return type) > sizeof(void*) + if (CanRetTy != Ctx.VoidTy && C.getLocationContext()->getParentMap() + .isConsumedExpr(Msg.getOriginExpr())) { + // Compute: sizeof(void *) and sizeof(return type) + const uint64_t voidPtrSize = Ctx.getTypeSize(Ctx.VoidPtrTy); + const uint64_t returnTypeSize = Ctx.getTypeSize(CanRetTy); + + if (CanRetTy.getTypePtr()->isReferenceType()|| + (voidPtrSize < returnTypeSize && + !(supportsNilWithFloatRet(Ctx.getTargetInfo().getTriple()) && + (Ctx.FloatTy == CanRetTy || + Ctx.DoubleTy == CanRetTy || + Ctx.LongDoubleTy == CanRetTy || + Ctx.LongLongTy == CanRetTy || + Ctx.UnsignedLongLongTy == CanRetTy)))) { + if (ExplodedNode *N = C.generateErrorNode(state, &Tag)) + emitNilReceiverBug(C, Msg, N); + return; + } + + // Handle the safe cases where the return value is 0 if the + // receiver is nil. + // + // FIXME: For now take the conservative approach that we only + // return null values if we *know* that the receiver is nil. + // This is because we can have surprises like: + // + // ... = [[NSScreens screens] objectAtIndex:0]; + // + // What can happen is that [... screens] could return nil, but + // it most likely isn't nil. We should assume the semantics + // of this case unless we have *a lot* more knowledge. + // + SVal V = C.getSValBuilder().makeZeroVal(RetTy); + C.addTransition(state->BindExpr(Msg.getOriginExpr(), LCtx, V), &Tag); + return; + } + + C.addTransition(state); +} + +void ento::registerCallAndMessageChecker(CheckerManager &mgr) { + mgr.registerChecker<CallAndMessageChecker>(); +} + +bool ento::shouldRegisterCallAndMessageChecker(const LangOptions &LO) { + return true; +} + +void ento::registerCallAndMessageUnInitRefArg(CheckerManager &mgr) { + CallAndMessageChecker *Checker = mgr.getChecker<CallAndMessageChecker>(); + Checker->Check_CallAndMessageUnInitRefArg = true; + Checker->CheckName_CallAndMessageUnInitRefArg = mgr.getCurrentCheckerName(); +} + +bool ento::shouldRegisterCallAndMessageUnInitRefArg(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp new file mode 100644 index 000000000000..51c1d4409929 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp @@ -0,0 +1,152 @@ +//=== CastSizeChecker.cpp ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// CastSizeChecker checks when casting a malloc'ed symbolic region to type T, +// whether the size of the symbolic region is a multiple of the size of T. +// +//===----------------------------------------------------------------------===// +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/CharUnits.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class CastSizeChecker : public Checker< check::PreStmt<CastExpr> > { + mutable std::unique_ptr<BuiltinBug> BT; + +public: + void checkPreStmt(const CastExpr *CE, CheckerContext &C) const; +}; +} + +/// Check if we are casting to a struct with a flexible array at the end. +/// \code +/// struct foo { +/// size_t len; +/// struct bar data[]; +/// }; +/// \endcode +/// or +/// \code +/// struct foo { +/// size_t len; +/// struct bar data[0]; +/// } +/// \endcode +/// In these cases it is also valid to allocate size of struct foo + a multiple +/// of struct bar. +static bool evenFlexibleArraySize(ASTContext &Ctx, CharUnits RegionSize, + CharUnits TypeSize, QualType ToPointeeTy) { + const RecordType *RT = ToPointeeTy->getAs<RecordType>(); + if (!RT) + return false; + + const RecordDecl *RD = RT->getDecl(); + RecordDecl::field_iterator Iter(RD->field_begin()); + RecordDecl::field_iterator End(RD->field_end()); + const FieldDecl *Last = nullptr; + for (; Iter != End; ++Iter) + Last = *Iter; + assert(Last && "empty structs should already be handled"); + + const Type *ElemType = Last->getType()->getArrayElementTypeNoTypeQual(); + CharUnits FlexSize; + if (const ConstantArrayType *ArrayTy = + Ctx.getAsConstantArrayType(Last->getType())) { + FlexSize = Ctx.getTypeSizeInChars(ElemType); + if (ArrayTy->getSize() == 1 && TypeSize > FlexSize) + TypeSize -= FlexSize; + else if (ArrayTy->getSize() != 0) + return false; + } else if (RD->hasFlexibleArrayMember()) { + FlexSize = Ctx.getTypeSizeInChars(ElemType); + } else { + return false; + } + + if (FlexSize.isZero()) + return false; + + CharUnits Left = RegionSize - TypeSize; + if (Left.isNegative()) + return false; + + return Left % FlexSize == 0; +} + +void CastSizeChecker::checkPreStmt(const CastExpr *CE,CheckerContext &C) const { + const Expr *E = CE->getSubExpr(); + ASTContext &Ctx = C.getASTContext(); + QualType ToTy = Ctx.getCanonicalType(CE->getType()); + const PointerType *ToPTy = dyn_cast<PointerType>(ToTy.getTypePtr()); + + if (!ToPTy) + return; + + QualType ToPointeeTy = ToPTy->getPointeeType(); + + // Only perform the check if 'ToPointeeTy' is a complete type. + if (ToPointeeTy->isIncompleteType()) + return; + + ProgramStateRef state = C.getState(); + const MemRegion *R = C.getSVal(E).getAsRegion(); + if (!R) + return; + + const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(R); + if (!SR) + return; + + SValBuilder &svalBuilder = C.getSValBuilder(); + SVal extent = SR->getExtent(svalBuilder); + const llvm::APSInt *extentInt = svalBuilder.getKnownValue(state, extent); + if (!extentInt) + return; + + CharUnits regionSize = CharUnits::fromQuantity(extentInt->getSExtValue()); + CharUnits typeSize = C.getASTContext().getTypeSizeInChars(ToPointeeTy); + + // Ignore void, and a few other un-sizeable types. + if (typeSize.isZero()) + return; + + if (regionSize % typeSize == 0) + return; + + if (evenFlexibleArraySize(Ctx, regionSize, typeSize, ToPointeeTy)) + return; + + if (ExplodedNode *errorNode = C.generateErrorNode()) { + if (!BT) + BT.reset(new BuiltinBug(this, "Cast region with wrong size.", + "Cast a region whose size is not a multiple" + " of the destination type size.")); + auto R = std::make_unique<PathSensitiveBugReport>(*BT, BT->getDescription(), + errorNode); + R->addRange(CE->getSourceRange()); + C.emitReport(std::move(R)); + } +} + +void ento::registerCastSizeChecker(CheckerManager &mgr) { + mgr.registerChecker<CastSizeChecker>(); +} + +bool ento::shouldRegisterCastSizeChecker(const LangOptions &LO) { + // PR31226: C++ is more complicated than what this checker currently supports. + // There are derived-to-base casts, there are different rules for 0-size + // structures, no flexible arrays, etc. + // FIXME: Disabled on C++ for now. + return !LO.CPlusPlus; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/CastToStructChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CastToStructChecker.cpp new file mode 100644 index 000000000000..93665596be29 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CastToStructChecker.cpp @@ -0,0 +1,125 @@ +//=== CastToStructChecker.cpp ----------------------------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files defines CastToStructChecker, a builtin checker that checks for +// cast from non-struct pointer to struct pointer and widening struct data cast. +// This check corresponds to CWE-588. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class CastToStructVisitor : public RecursiveASTVisitor<CastToStructVisitor> { + BugReporter &BR; + const CheckerBase *Checker; + AnalysisDeclContext *AC; + +public: + explicit CastToStructVisitor(BugReporter &B, const CheckerBase *Checker, + AnalysisDeclContext *A) + : BR(B), Checker(Checker), AC(A) {} + bool VisitCastExpr(const CastExpr *CE); +}; +} + +bool CastToStructVisitor::VisitCastExpr(const CastExpr *CE) { + const Expr *E = CE->getSubExpr(); + ASTContext &Ctx = AC->getASTContext(); + QualType OrigTy = Ctx.getCanonicalType(E->getType()); + QualType ToTy = Ctx.getCanonicalType(CE->getType()); + + const PointerType *OrigPTy = dyn_cast<PointerType>(OrigTy.getTypePtr()); + const PointerType *ToPTy = dyn_cast<PointerType>(ToTy.getTypePtr()); + + if (!ToPTy || !OrigPTy) + return true; + + QualType OrigPointeeTy = OrigPTy->getPointeeType(); + QualType ToPointeeTy = ToPTy->getPointeeType(); + + if (!ToPointeeTy->isStructureOrClassType()) + return true; + + // We allow cast from void*. + if (OrigPointeeTy->isVoidType()) + return true; + + // Now the cast-to-type is struct pointer, the original type is not void*. + if (!OrigPointeeTy->isRecordType()) { + SourceRange Sr[1] = {CE->getSourceRange()}; + PathDiagnosticLocation Loc(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport( + AC->getDecl(), Checker, "Cast from non-struct type to struct type", + categories::LogicError, "Casting a non-structure type to a structure " + "type and accessing a field can lead to memory " + "access errors or data corruption.", + Loc, Sr); + } else { + // Don't warn when size of data is unknown. + const auto *U = dyn_cast<UnaryOperator>(E); + if (!U || U->getOpcode() != UO_AddrOf) + return true; + + // Don't warn for references + const ValueDecl *VD = nullptr; + if (const auto *SE = dyn_cast<DeclRefExpr>(U->getSubExpr())) + VD = SE->getDecl(); + else if (const auto *SE = dyn_cast<MemberExpr>(U->getSubExpr())) + VD = SE->getMemberDecl(); + if (!VD || VD->getType()->isReferenceType()) + return true; + + if (ToPointeeTy->isIncompleteType() || + OrigPointeeTy->isIncompleteType()) + return true; + + // Warn when there is widening cast. + unsigned ToWidth = Ctx.getTypeInfo(ToPointeeTy).Width; + unsigned OrigWidth = Ctx.getTypeInfo(OrigPointeeTy).Width; + if (ToWidth <= OrigWidth) + return true; + + PathDiagnosticLocation Loc(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), Checker, "Widening cast to struct type", + categories::LogicError, + "Casting data to a larger structure type and accessing " + "a field can lead to memory access errors or data " + "corruption.", + Loc, CE->getSourceRange()); + } + + return true; +} + +namespace { +class CastToStructChecker : public Checker<check::ASTCodeBody> { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager &Mgr, + BugReporter &BR) const { + CastToStructVisitor Visitor(BR, this, Mgr.getAnalysisDeclContext(D)); + Visitor.TraverseDecl(const_cast<Decl *>(D)); + } +}; +} // end anonymous namespace + +void ento::registerCastToStructChecker(CheckerManager &mgr) { + mgr.registerChecker<CastToStructChecker>(); +} + +bool ento::shouldRegisterCastToStructChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp new file mode 100644 index 000000000000..cc1c9a66b90e --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp @@ -0,0 +1,441 @@ +//===- CastValueChecker - Model implementation of custom RTTIs --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines CastValueChecker which models casts of custom RTTIs. +// +// TODO list: +// - It only allows one succesful cast between two types however in the wild +// the object could be casted to multiple types. +// - It needs to check the most likely type information from the dynamic type +// map to increase precision of dynamic casting. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/DeclTemplate.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h" +#include "llvm/ADT/Optional.h" +#include <utility> + +using namespace clang; +using namespace ento; + +namespace { +class CastValueChecker : public Checker<eval::Call> { + enum class CallKind { Function, Method, InstanceOf }; + + using CastCheck = + std::function<void(const CastValueChecker *, const CallEvent &Call, + DefinedOrUnknownSVal, CheckerContext &)>; + +public: + // We have five cases to evaluate a cast: + // 1) The parameter is non-null, the return value is non-null. + // 2) The parameter is non-null, the return value is null. + // 3) The parameter is null, the return value is null. + // cast: 1; dyn_cast: 1, 2; cast_or_null: 1, 3; dyn_cast_or_null: 1, 2, 3. + // + // 4) castAs: Has no parameter, the return value is non-null. + // 5) getAs: Has no parameter, the return value is null or non-null. + // + // We have two cases to check the parameter is an instance of the given type. + // 1) isa: The parameter is non-null, returns boolean. + // 2) isa_and_nonnull: The parameter is null or non-null, returns boolean. + bool evalCall(const CallEvent &Call, CheckerContext &C) const; + +private: + // These are known in the LLVM project. The pairs are in the following form: + // {{{namespace, call}, argument-count}, {callback, kind}} + const CallDescriptionMap<std::pair<CastCheck, CallKind>> CDM = { + {{{"llvm", "cast"}, 1}, + {&CastValueChecker::evalCast, CallKind::Function}}, + {{{"llvm", "dyn_cast"}, 1}, + {&CastValueChecker::evalDynCast, CallKind::Function}}, + {{{"llvm", "cast_or_null"}, 1}, + {&CastValueChecker::evalCastOrNull, CallKind::Function}}, + {{{"llvm", "dyn_cast_or_null"}, 1}, + {&CastValueChecker::evalDynCastOrNull, CallKind::Function}}, + {{{"clang", "castAs"}, 0}, + {&CastValueChecker::evalCastAs, CallKind::Method}}, + {{{"clang", "getAs"}, 0}, + {&CastValueChecker::evalGetAs, CallKind::Method}}, + {{{"llvm", "isa"}, 1}, + {&CastValueChecker::evalIsa, CallKind::InstanceOf}}, + {{{"llvm", "isa_and_nonnull"}, 1}, + {&CastValueChecker::evalIsaAndNonNull, CallKind::InstanceOf}}}; + + void evalCast(const CallEvent &Call, DefinedOrUnknownSVal DV, + CheckerContext &C) const; + void evalDynCast(const CallEvent &Call, DefinedOrUnknownSVal DV, + CheckerContext &C) const; + void evalCastOrNull(const CallEvent &Call, DefinedOrUnknownSVal DV, + CheckerContext &C) const; + void evalDynCastOrNull(const CallEvent &Call, DefinedOrUnknownSVal DV, + CheckerContext &C) const; + void evalCastAs(const CallEvent &Call, DefinedOrUnknownSVal DV, + CheckerContext &C) const; + void evalGetAs(const CallEvent &Call, DefinedOrUnknownSVal DV, + CheckerContext &C) const; + void evalIsa(const CallEvent &Call, DefinedOrUnknownSVal DV, + CheckerContext &C) const; + void evalIsaAndNonNull(const CallEvent &Call, DefinedOrUnknownSVal DV, + CheckerContext &C) const; +}; +} // namespace + +static bool isInfeasibleCast(const DynamicCastInfo *CastInfo, + bool CastSucceeds) { + if (!CastInfo) + return false; + + return CastSucceeds ? CastInfo->fails() : CastInfo->succeeds(); +} + +static const NoteTag *getNoteTag(CheckerContext &C, + const DynamicCastInfo *CastInfo, + QualType CastToTy, const Expr *Object, + bool CastSucceeds, bool IsKnownCast) { + std::string CastToName = + CastInfo ? CastInfo->to()->getPointeeCXXRecordDecl()->getNameAsString() + : CastToTy->getPointeeCXXRecordDecl()->getNameAsString(); + Object = Object->IgnoreParenImpCasts(); + + return C.getNoteTag( + [=]() -> std::string { + SmallString<128> Msg; + llvm::raw_svector_ostream Out(Msg); + + if (!IsKnownCast) + Out << "Assuming "; + + if (const auto *DRE = dyn_cast<DeclRefExpr>(Object)) { + Out << '\'' << DRE->getDecl()->getNameAsString() << '\''; + } else if (const auto *ME = dyn_cast<MemberExpr>(Object)) { + Out << (IsKnownCast ? "Field '" : "field '") + << ME->getMemberDecl()->getNameAsString() << '\''; + } else { + Out << (IsKnownCast ? "The object" : "the object"); + } + + Out << ' ' << (CastSucceeds ? "is a" : "is not a") << " '" << CastToName + << '\''; + + return Out.str(); + }, + /*IsPrunable=*/true); +} + +//===----------------------------------------------------------------------===// +// Main logic to evaluate a cast. +//===----------------------------------------------------------------------===// + +static QualType alignReferenceTypes(QualType toAlign, QualType alignTowards, + ASTContext &ACtx) { + if (alignTowards->isLValueReferenceType() && + alignTowards.isConstQualified()) { + toAlign.addConst(); + return ACtx.getLValueReferenceType(toAlign); + } else if (alignTowards->isLValueReferenceType()) + return ACtx.getLValueReferenceType(toAlign); + else if (alignTowards->isRValueReferenceType()) + return ACtx.getRValueReferenceType(toAlign); + + llvm_unreachable("Must align towards a reference type!"); +} + +static void addCastTransition(const CallEvent &Call, DefinedOrUnknownSVal DV, + CheckerContext &C, bool IsNonNullParam, + bool IsNonNullReturn, + bool IsCheckedCast = false) { + ProgramStateRef State = C.getState()->assume(DV, IsNonNullParam); + if (!State) + return; + + const Expr *Object; + QualType CastFromTy; + QualType CastToTy = Call.getResultType(); + + if (Call.getNumArgs() > 0) { + Object = Call.getArgExpr(0); + CastFromTy = Call.parameters()[0]->getType(); + } else { + Object = cast<CXXInstanceCall>(&Call)->getCXXThisExpr(); + CastFromTy = Object->getType(); + if (CastToTy->isPointerType()) { + if (!CastFromTy->isPointerType()) + return; + } else { + if (!CastFromTy->isReferenceType()) + return; + + CastFromTy = alignReferenceTypes(CastFromTy, CastToTy, C.getASTContext()); + } + } + + const MemRegion *MR = DV.getAsRegion(); + const DynamicCastInfo *CastInfo = + getDynamicCastInfo(State, MR, CastFromTy, CastToTy); + + // We assume that every checked cast succeeds. + bool CastSucceeds = IsCheckedCast || CastFromTy == CastToTy; + if (!CastSucceeds) { + if (CastInfo) + CastSucceeds = IsNonNullReturn && CastInfo->succeeds(); + else + CastSucceeds = IsNonNullReturn; + } + + // Check for infeasible casts. + if (isInfeasibleCast(CastInfo, CastSucceeds)) { + C.generateSink(State, C.getPredecessor()); + return; + } + + // Store the type and the cast information. + bool IsKnownCast = CastInfo || IsCheckedCast || CastFromTy == CastToTy; + if (!IsKnownCast || IsCheckedCast) + State = setDynamicTypeAndCastInfo(State, MR, CastFromTy, CastToTy, + CastSucceeds); + + SVal V = CastSucceeds ? C.getSValBuilder().evalCast(DV, CastToTy, CastFromTy) + : C.getSValBuilder().makeNull(); + C.addTransition( + State->BindExpr(Call.getOriginExpr(), C.getLocationContext(), V, false), + getNoteTag(C, CastInfo, CastToTy, Object, CastSucceeds, IsKnownCast)); +} + +static void addInstanceOfTransition(const CallEvent &Call, + DefinedOrUnknownSVal DV, + ProgramStateRef State, CheckerContext &C, + bool IsInstanceOf) { + const FunctionDecl *FD = Call.getDecl()->getAsFunction(); + QualType CastFromTy = Call.parameters()[0]->getType(); + QualType CastToTy = FD->getTemplateSpecializationArgs()->get(0).getAsType(); + if (CastFromTy->isPointerType()) + CastToTy = C.getASTContext().getPointerType(CastToTy); + else if (CastFromTy->isReferenceType()) + CastToTy = alignReferenceTypes(CastToTy, CastFromTy, C.getASTContext()); + else + return; + + const MemRegion *MR = DV.getAsRegion(); + const DynamicCastInfo *CastInfo = + getDynamicCastInfo(State, MR, CastFromTy, CastToTy); + + bool CastSucceeds; + if (CastInfo) + CastSucceeds = IsInstanceOf && CastInfo->succeeds(); + else + CastSucceeds = IsInstanceOf || CastFromTy == CastToTy; + + if (isInfeasibleCast(CastInfo, CastSucceeds)) { + C.generateSink(State, C.getPredecessor()); + return; + } + + // Store the type and the cast information. + bool IsKnownCast = CastInfo || CastFromTy == CastToTy; + if (!IsKnownCast) + State = setDynamicTypeAndCastInfo(State, MR, CastFromTy, CastToTy, + IsInstanceOf); + + C.addTransition( + State->BindExpr(Call.getOriginExpr(), C.getLocationContext(), + C.getSValBuilder().makeTruthVal(CastSucceeds)), + getNoteTag(C, CastInfo, CastToTy, Call.getArgExpr(0), CastSucceeds, + IsKnownCast)); +} + +//===----------------------------------------------------------------------===// +// Evaluating cast, dyn_cast, cast_or_null, dyn_cast_or_null. +//===----------------------------------------------------------------------===// + +static void evalNonNullParamNonNullReturn(const CallEvent &Call, + DefinedOrUnknownSVal DV, + CheckerContext &C, + bool IsCheckedCast = false) { + addCastTransition(Call, DV, C, /*IsNonNullParam=*/true, + /*IsNonNullReturn=*/true, IsCheckedCast); +} + +static void evalNonNullParamNullReturn(const CallEvent &Call, + DefinedOrUnknownSVal DV, + CheckerContext &C) { + addCastTransition(Call, DV, C, /*IsNonNullParam=*/true, + /*IsNonNullReturn=*/false); +} + +static void evalNullParamNullReturn(const CallEvent &Call, + DefinedOrUnknownSVal DV, + CheckerContext &C) { + if (ProgramStateRef State = C.getState()->assume(DV, false)) + C.addTransition(State->BindExpr(Call.getOriginExpr(), + C.getLocationContext(), + C.getSValBuilder().makeNull(), false), + C.getNoteTag("Assuming null pointer is passed into cast", + /*IsPrunable=*/true)); +} + +void CastValueChecker::evalCast(const CallEvent &Call, DefinedOrUnknownSVal DV, + CheckerContext &C) const { + evalNonNullParamNonNullReturn(Call, DV, C, /*IsCheckedCast=*/true); +} + +void CastValueChecker::evalDynCast(const CallEvent &Call, + DefinedOrUnknownSVal DV, + CheckerContext &C) const { + evalNonNullParamNonNullReturn(Call, DV, C); + evalNonNullParamNullReturn(Call, DV, C); +} + +void CastValueChecker::evalCastOrNull(const CallEvent &Call, + DefinedOrUnknownSVal DV, + CheckerContext &C) const { + evalNonNullParamNonNullReturn(Call, DV, C); + evalNullParamNullReturn(Call, DV, C); +} + +void CastValueChecker::evalDynCastOrNull(const CallEvent &Call, + DefinedOrUnknownSVal DV, + CheckerContext &C) const { + evalNonNullParamNonNullReturn(Call, DV, C); + evalNonNullParamNullReturn(Call, DV, C); + evalNullParamNullReturn(Call, DV, C); +} + +//===----------------------------------------------------------------------===// +// Evaluating castAs, getAs. +//===----------------------------------------------------------------------===// + +static void evalZeroParamNonNullReturn(const CallEvent &Call, + DefinedOrUnknownSVal DV, + CheckerContext &C, + bool IsCheckedCast = false) { + addCastTransition(Call, DV, C, /*IsNonNullParam=*/true, + /*IsNonNullReturn=*/true, IsCheckedCast); +} + +static void evalZeroParamNullReturn(const CallEvent &Call, + DefinedOrUnknownSVal DV, + CheckerContext &C) { + addCastTransition(Call, DV, C, /*IsNonNullParam=*/true, + /*IsNonNullReturn=*/false); +} + +void CastValueChecker::evalCastAs(const CallEvent &Call, + DefinedOrUnknownSVal DV, + CheckerContext &C) const { + evalZeroParamNonNullReturn(Call, DV, C, /*IsCheckedCast=*/true); +} + +void CastValueChecker::evalGetAs(const CallEvent &Call, DefinedOrUnknownSVal DV, + CheckerContext &C) const { + evalZeroParamNonNullReturn(Call, DV, C); + evalZeroParamNullReturn(Call, DV, C); +} + +//===----------------------------------------------------------------------===// +// Evaluating isa, isa_and_nonnull. +//===----------------------------------------------------------------------===// + +void CastValueChecker::evalIsa(const CallEvent &Call, DefinedOrUnknownSVal DV, + CheckerContext &C) const { + ProgramStateRef NonNullState, NullState; + std::tie(NonNullState, NullState) = C.getState()->assume(DV); + + if (NonNullState) { + addInstanceOfTransition(Call, DV, NonNullState, C, /*IsInstanceOf=*/true); + addInstanceOfTransition(Call, DV, NonNullState, C, /*IsInstanceOf=*/false); + } + + if (NullState) { + C.generateSink(NullState, C.getPredecessor()); + } +} + +void CastValueChecker::evalIsaAndNonNull(const CallEvent &Call, + DefinedOrUnknownSVal DV, + CheckerContext &C) const { + ProgramStateRef NonNullState, NullState; + std::tie(NonNullState, NullState) = C.getState()->assume(DV); + + if (NonNullState) { + addInstanceOfTransition(Call, DV, NonNullState, C, /*IsInstanceOf=*/true); + addInstanceOfTransition(Call, DV, NonNullState, C, /*IsInstanceOf=*/false); + } + + if (NullState) { + addInstanceOfTransition(Call, DV, NullState, C, /*IsInstanceOf=*/false); + } +} + +//===----------------------------------------------------------------------===// +// Main logic to evaluate a call. +//===----------------------------------------------------------------------===// + +bool CastValueChecker::evalCall(const CallEvent &Call, + CheckerContext &C) const { + const auto *Lookup = CDM.lookup(Call); + if (!Lookup) + return false; + + const CastCheck &Check = Lookup->first; + CallKind Kind = Lookup->second; + + Optional<DefinedOrUnknownSVal> DV; + + switch (Kind) { + case CallKind::Function: { + // We only model casts from pointers to pointers or from references + // to references. Other casts are most likely specialized and we + // cannot model them. + QualType ParamT = Call.parameters()[0]->getType(); + QualType ResultT = Call.getResultType(); + if (!(ParamT->isPointerType() && ResultT->isPointerType()) && + !(ParamT->isReferenceType() && ResultT->isReferenceType())) + return false; + + DV = Call.getArgSVal(0).getAs<DefinedOrUnknownSVal>(); + break; + } + case CallKind::InstanceOf: { + // We need to obtain the only template argument to determinte the type. + const FunctionDecl *FD = Call.getDecl()->getAsFunction(); + if (!FD || !FD->getTemplateSpecializationArgs()) + return false; + + DV = Call.getArgSVal(0).getAs<DefinedOrUnknownSVal>(); + break; + } + case CallKind::Method: + const auto *InstanceCall = dyn_cast<CXXInstanceCall>(&Call); + if (!InstanceCall) + return false; + + DV = InstanceCall->getCXXThisVal().getAs<DefinedOrUnknownSVal>(); + break; + } + + if (!DV) + return false; + + Check(this, Call, *DV, C); + return true; +} + +void ento::registerCastValueChecker(CheckerManager &Mgr) { + Mgr.registerChecker<CastValueChecker>(); +} + +bool ento::shouldRegisterCastValueChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp new file mode 100644 index 000000000000..50b872bd8682 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp @@ -0,0 +1,1094 @@ +//==- CheckObjCDealloc.cpp - Check ObjC -dealloc implementation --*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This checker analyzes Objective-C -dealloc methods and their callees +// to warn about improper releasing of instance variables that back synthesized +// properties. It warns about missing releases in the following cases: +// - When a class has a synthesized instance variable for a 'retain' or 'copy' +// property and lacks a -dealloc method in its implementation. +// - When a class has a synthesized instance variable for a 'retain'/'copy' +// property but the ivar is not released in -dealloc by either -release +// or by nilling out the property. +// +// It warns about extra releases in -dealloc (but not in callees) when a +// synthesized instance variable is released in the following cases: +// - When the property is 'assign' and is not 'readonly'. +// - When the property is 'weak'. +// +// This checker only warns for instance variables synthesized to back +// properties. Handling the more general case would require inferring whether +// an instance variable is stored retained or not. For synthesized properties, +// this is specified in the property declaration itself. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/Analysis/PathDiagnostic.h" +#include "clang/AST/Attr.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprObjC.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +/// Indicates whether an instance variable is required to be released in +/// -dealloc. +enum class ReleaseRequirement { + /// The instance variable must be released, either by calling + /// -release on it directly or by nilling it out with a property setter. + MustRelease, + + /// The instance variable must not be directly released with -release. + MustNotReleaseDirectly, + + /// The requirement for the instance variable could not be determined. + Unknown +}; + +/// Returns true if the property implementation is synthesized and the +/// type of the property is retainable. +static bool isSynthesizedRetainableProperty(const ObjCPropertyImplDecl *I, + const ObjCIvarDecl **ID, + const ObjCPropertyDecl **PD) { + + if (I->getPropertyImplementation() != ObjCPropertyImplDecl::Synthesize) + return false; + + (*ID) = I->getPropertyIvarDecl(); + if (!(*ID)) + return false; + + QualType T = (*ID)->getType(); + if (!T->isObjCRetainableType()) + return false; + + (*PD) = I->getPropertyDecl(); + // Shouldn't be able to synthesize a property that doesn't exist. + assert(*PD); + + return true; +} + +namespace { + +class ObjCDeallocChecker + : public Checker<check::ASTDecl<ObjCImplementationDecl>, + check::PreObjCMessage, check::PostObjCMessage, + check::PreCall, + check::BeginFunction, check::EndFunction, + eval::Assume, + check::PointerEscape, + check::PreStmt<ReturnStmt>> { + + mutable IdentifierInfo *NSObjectII, *SenTestCaseII, *XCTestCaseII, + *Block_releaseII, *CIFilterII; + + mutable Selector DeallocSel, ReleaseSel; + + std::unique_ptr<BugType> MissingReleaseBugType; + std::unique_ptr<BugType> ExtraReleaseBugType; + std::unique_ptr<BugType> MistakenDeallocBugType; + +public: + ObjCDeallocChecker(); + + void checkASTDecl(const ObjCImplementationDecl *D, AnalysisManager& Mgr, + BugReporter &BR) const; + void checkBeginFunction(CheckerContext &Ctx) const; + void checkPreObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const; + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + void checkPostObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const; + + ProgramStateRef evalAssume(ProgramStateRef State, SVal Cond, + bool Assumption) const; + + ProgramStateRef checkPointerEscape(ProgramStateRef State, + const InvalidatedSymbols &Escaped, + const CallEvent *Call, + PointerEscapeKind Kind) const; + void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const; + void checkEndFunction(const ReturnStmt *RS, CheckerContext &Ctx) const; + +private: + void diagnoseMissingReleases(CheckerContext &C) const; + + bool diagnoseExtraRelease(SymbolRef ReleasedValue, const ObjCMethodCall &M, + CheckerContext &C) const; + + bool diagnoseMistakenDealloc(SymbolRef DeallocedValue, + const ObjCMethodCall &M, + CheckerContext &C) const; + + SymbolRef getValueReleasedByNillingOut(const ObjCMethodCall &M, + CheckerContext &C) const; + + const ObjCIvarRegion *getIvarRegionForIvarSymbol(SymbolRef IvarSym) const; + SymbolRef getInstanceSymbolFromIvarSymbol(SymbolRef IvarSym) const; + + const ObjCPropertyImplDecl* + findPropertyOnDeallocatingInstance(SymbolRef IvarSym, + CheckerContext &C) const; + + ReleaseRequirement + getDeallocReleaseRequirement(const ObjCPropertyImplDecl *PropImpl) const; + + bool isInInstanceDealloc(const CheckerContext &C, SVal &SelfValOut) const; + bool isInInstanceDealloc(const CheckerContext &C, const LocationContext *LCtx, + SVal &SelfValOut) const; + bool instanceDeallocIsOnStack(const CheckerContext &C, + SVal &InstanceValOut) const; + + bool isSuperDeallocMessage(const ObjCMethodCall &M) const; + + const ObjCImplDecl *getContainingObjCImpl(const LocationContext *LCtx) const; + + const ObjCPropertyDecl * + findShadowedPropertyDecl(const ObjCPropertyImplDecl *PropImpl) const; + + void transitionToReleaseValue(CheckerContext &C, SymbolRef Value) const; + ProgramStateRef removeValueRequiringRelease(ProgramStateRef State, + SymbolRef InstanceSym, + SymbolRef ValueSym) const; + + void initIdentifierInfoAndSelectors(ASTContext &Ctx) const; + + bool classHasSeparateTeardown(const ObjCInterfaceDecl *ID) const; + + bool isReleasedByCIFilterDealloc(const ObjCPropertyImplDecl *PropImpl) const; + bool isNibLoadedIvarWithoutRetain(const ObjCPropertyImplDecl *PropImpl) const; +}; +} // End anonymous namespace. + + +/// Maps from the symbol for a class instance to the set of +/// symbols remaining that must be released in -dealloc. +REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(SymbolSet, SymbolRef) +REGISTER_MAP_WITH_PROGRAMSTATE(UnreleasedIvarMap, SymbolRef, SymbolSet) + + +/// An AST check that diagnose when the class requires a -dealloc method and +/// is missing one. +void ObjCDeallocChecker::checkASTDecl(const ObjCImplementationDecl *D, + AnalysisManager &Mgr, + BugReporter &BR) const { + assert(Mgr.getLangOpts().getGC() != LangOptions::GCOnly); + assert(!Mgr.getLangOpts().ObjCAutoRefCount); + initIdentifierInfoAndSelectors(Mgr.getASTContext()); + + const ObjCInterfaceDecl *ID = D->getClassInterface(); + // If the class is known to have a lifecycle with a separate teardown method + // then it may not require a -dealloc method. + if (classHasSeparateTeardown(ID)) + return; + + // Does the class contain any synthesized properties that are retainable? + // If not, skip the check entirely. + const ObjCPropertyImplDecl *PropImplRequiringRelease = nullptr; + bool HasOthers = false; + for (const auto *I : D->property_impls()) { + if (getDeallocReleaseRequirement(I) == ReleaseRequirement::MustRelease) { + if (!PropImplRequiringRelease) + PropImplRequiringRelease = I; + else { + HasOthers = true; + break; + } + } + } + + if (!PropImplRequiringRelease) + return; + + const ObjCMethodDecl *MD = nullptr; + + // Scan the instance methods for "dealloc". + for (const auto *I : D->instance_methods()) { + if (I->getSelector() == DeallocSel) { + MD = I; + break; + } + } + + if (!MD) { // No dealloc found. + const char* Name = "Missing -dealloc"; + + std::string Buf; + llvm::raw_string_ostream OS(Buf); + OS << "'" << *D << "' lacks a 'dealloc' instance method but " + << "must release '" << *PropImplRequiringRelease->getPropertyIvarDecl() + << "'"; + + if (HasOthers) + OS << " and others"; + PathDiagnosticLocation DLoc = + PathDiagnosticLocation::createBegin(D, BR.getSourceManager()); + + BR.EmitBasicReport(D, this, Name, categories::CoreFoundationObjectiveC, + OS.str(), DLoc); + return; + } +} + +/// If this is the beginning of -dealloc, mark the values initially stored in +/// instance variables that must be released by the end of -dealloc +/// as unreleased in the state. +void ObjCDeallocChecker::checkBeginFunction( + CheckerContext &C) const { + initIdentifierInfoAndSelectors(C.getASTContext()); + + // Only do this if the current method is -dealloc. + SVal SelfVal; + if (!isInInstanceDealloc(C, SelfVal)) + return; + + SymbolRef SelfSymbol = SelfVal.getAsSymbol(); + + const LocationContext *LCtx = C.getLocationContext(); + ProgramStateRef InitialState = C.getState(); + + ProgramStateRef State = InitialState; + + SymbolSet::Factory &F = State->getStateManager().get_context<SymbolSet>(); + + // Symbols that must be released by the end of the -dealloc; + SymbolSet RequiredReleases = F.getEmptySet(); + + // If we're an inlined -dealloc, we should add our symbols to the existing + // set from our subclass. + if (const SymbolSet *CurrSet = State->get<UnreleasedIvarMap>(SelfSymbol)) + RequiredReleases = *CurrSet; + + for (auto *PropImpl : getContainingObjCImpl(LCtx)->property_impls()) { + ReleaseRequirement Requirement = getDeallocReleaseRequirement(PropImpl); + if (Requirement != ReleaseRequirement::MustRelease) + continue; + + SVal LVal = State->getLValue(PropImpl->getPropertyIvarDecl(), SelfVal); + Optional<Loc> LValLoc = LVal.getAs<Loc>(); + if (!LValLoc) + continue; + + SVal InitialVal = State->getSVal(LValLoc.getValue()); + SymbolRef Symbol = InitialVal.getAsSymbol(); + if (!Symbol || !isa<SymbolRegionValue>(Symbol)) + continue; + + // Mark the value as requiring a release. + RequiredReleases = F.add(RequiredReleases, Symbol); + } + + if (!RequiredReleases.isEmpty()) { + State = State->set<UnreleasedIvarMap>(SelfSymbol, RequiredReleases); + } + + if (State != InitialState) { + C.addTransition(State); + } +} + +/// Given a symbol for an ivar, return the ivar region it was loaded from. +/// Returns nullptr if the instance symbol cannot be found. +const ObjCIvarRegion * +ObjCDeallocChecker::getIvarRegionForIvarSymbol(SymbolRef IvarSym) const { + return dyn_cast_or_null<ObjCIvarRegion>(IvarSym->getOriginRegion()); +} + +/// Given a symbol for an ivar, return a symbol for the instance containing +/// the ivar. Returns nullptr if the instance symbol cannot be found. +SymbolRef +ObjCDeallocChecker::getInstanceSymbolFromIvarSymbol(SymbolRef IvarSym) const { + + const ObjCIvarRegion *IvarRegion = getIvarRegionForIvarSymbol(IvarSym); + if (!IvarRegion) + return nullptr; + + return IvarRegion->getSymbolicBase()->getSymbol(); +} + +/// If we are in -dealloc or -dealloc is on the stack, handle the call if it is +/// a release or a nilling-out property setter. +void ObjCDeallocChecker::checkPreObjCMessage( + const ObjCMethodCall &M, CheckerContext &C) const { + // Only run if -dealloc is on the stack. + SVal DeallocedInstance; + if (!instanceDeallocIsOnStack(C, DeallocedInstance)) + return; + + SymbolRef ReleasedValue = nullptr; + + if (M.getSelector() == ReleaseSel) { + ReleasedValue = M.getReceiverSVal().getAsSymbol(); + } else if (M.getSelector() == DeallocSel && !M.isReceiverSelfOrSuper()) { + if (diagnoseMistakenDealloc(M.getReceiverSVal().getAsSymbol(), M, C)) + return; + } + + if (ReleasedValue) { + // An instance variable symbol was released with -release: + // [_property release]; + if (diagnoseExtraRelease(ReleasedValue,M, C)) + return; + } else { + // An instance variable symbol was released nilling out its property: + // self.property = nil; + ReleasedValue = getValueReleasedByNillingOut(M, C); + } + + if (!ReleasedValue) + return; + + transitionToReleaseValue(C, ReleasedValue); +} + +/// If we are in -dealloc or -dealloc is on the stack, handle the call if it is +/// call to Block_release(). +void ObjCDeallocChecker::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + const IdentifierInfo *II = Call.getCalleeIdentifier(); + if (II != Block_releaseII) + return; + + if (Call.getNumArgs() != 1) + return; + + SymbolRef ReleasedValue = Call.getArgSVal(0).getAsSymbol(); + if (!ReleasedValue) + return; + + transitionToReleaseValue(C, ReleasedValue); +} +/// If the message was a call to '[super dealloc]', diagnose any missing +/// releases. +void ObjCDeallocChecker::checkPostObjCMessage( + const ObjCMethodCall &M, CheckerContext &C) const { + // We perform this check post-message so that if the super -dealloc + // calls a helper method and that this class overrides, any ivars released in + // the helper method will be recorded before checking. + if (isSuperDeallocMessage(M)) + diagnoseMissingReleases(C); +} + +/// Check for missing releases even when -dealloc does not call +/// '[super dealloc]'. +void ObjCDeallocChecker::checkEndFunction( + const ReturnStmt *RS, CheckerContext &C) const { + diagnoseMissingReleases(C); +} + +/// Check for missing releases on early return. +void ObjCDeallocChecker::checkPreStmt( + const ReturnStmt *RS, CheckerContext &C) const { + diagnoseMissingReleases(C); +} + +/// When a symbol is assumed to be nil, remove it from the set of symbols +/// require to be nil. +ProgramStateRef ObjCDeallocChecker::evalAssume(ProgramStateRef State, SVal Cond, + bool Assumption) const { + if (State->get<UnreleasedIvarMap>().isEmpty()) + return State; + + auto *CondBSE = dyn_cast_or_null<BinarySymExpr>(Cond.getAsSymExpr()); + if (!CondBSE) + return State; + + BinaryOperator::Opcode OpCode = CondBSE->getOpcode(); + if (Assumption) { + if (OpCode != BO_EQ) + return State; + } else { + if (OpCode != BO_NE) + return State; + } + + SymbolRef NullSymbol = nullptr; + if (auto *SIE = dyn_cast<SymIntExpr>(CondBSE)) { + const llvm::APInt &RHS = SIE->getRHS(); + if (RHS != 0) + return State; + NullSymbol = SIE->getLHS(); + } else if (auto *SIE = dyn_cast<IntSymExpr>(CondBSE)) { + const llvm::APInt &LHS = SIE->getLHS(); + if (LHS != 0) + return State; + NullSymbol = SIE->getRHS(); + } else { + return State; + } + + SymbolRef InstanceSymbol = getInstanceSymbolFromIvarSymbol(NullSymbol); + if (!InstanceSymbol) + return State; + + State = removeValueRequiringRelease(State, InstanceSymbol, NullSymbol); + + return State; +} + +/// If a symbol escapes conservatively assume unseen code released it. +ProgramStateRef ObjCDeallocChecker::checkPointerEscape( + ProgramStateRef State, const InvalidatedSymbols &Escaped, + const CallEvent *Call, PointerEscapeKind Kind) const { + + if (State->get<UnreleasedIvarMap>().isEmpty()) + return State; + + // Don't treat calls to '[super dealloc]' as escaping for the purposes + // of this checker. Because the checker diagnoses missing releases in the + // post-message handler for '[super dealloc], escaping here would cause + // the checker to never warn. + auto *OMC = dyn_cast_or_null<ObjCMethodCall>(Call); + if (OMC && isSuperDeallocMessage(*OMC)) + return State; + + for (const auto &Sym : Escaped) { + if (!Call || (Call && !Call->isInSystemHeader())) { + // If Sym is a symbol for an object with instance variables that + // must be released, remove these obligations when the object escapes + // unless via a call to a system function. System functions are + // very unlikely to release instance variables on objects passed to them, + // and are frequently called on 'self' in -dealloc (e.g., to remove + // observers) -- we want to avoid false negatives from escaping on + // them. + State = State->remove<UnreleasedIvarMap>(Sym); + } + + + SymbolRef InstanceSymbol = getInstanceSymbolFromIvarSymbol(Sym); + if (!InstanceSymbol) + continue; + + State = removeValueRequiringRelease(State, InstanceSymbol, Sym); + } + + return State; +} + +/// Report any unreleased instance variables for the current instance being +/// dealloced. +void ObjCDeallocChecker::diagnoseMissingReleases(CheckerContext &C) const { + ProgramStateRef State = C.getState(); + + SVal SelfVal; + if (!isInInstanceDealloc(C, SelfVal)) + return; + + const MemRegion *SelfRegion = SelfVal.castAs<loc::MemRegionVal>().getRegion(); + const LocationContext *LCtx = C.getLocationContext(); + + ExplodedNode *ErrNode = nullptr; + + SymbolRef SelfSym = SelfVal.getAsSymbol(); + if (!SelfSym) + return; + + const SymbolSet *OldUnreleased = State->get<UnreleasedIvarMap>(SelfSym); + if (!OldUnreleased) + return; + + SymbolSet NewUnreleased = *OldUnreleased; + SymbolSet::Factory &F = State->getStateManager().get_context<SymbolSet>(); + + ProgramStateRef InitialState = State; + + for (auto *IvarSymbol : *OldUnreleased) { + const TypedValueRegion *TVR = + cast<SymbolRegionValue>(IvarSymbol)->getRegion(); + const ObjCIvarRegion *IvarRegion = cast<ObjCIvarRegion>(TVR); + + // Don't warn if the ivar is not for this instance. + if (SelfRegion != IvarRegion->getSuperRegion()) + continue; + + const ObjCIvarDecl *IvarDecl = IvarRegion->getDecl(); + // Prevent an inlined call to -dealloc in a super class from warning + // about the values the subclass's -dealloc should release. + if (IvarDecl->getContainingInterface() != + cast<ObjCMethodDecl>(LCtx->getDecl())->getClassInterface()) + continue; + + // Prevents diagnosing multiple times for the same instance variable + // at, for example, both a return and at the end of the function. + NewUnreleased = F.remove(NewUnreleased, IvarSymbol); + + if (State->getStateManager() + .getConstraintManager() + .isNull(State, IvarSymbol) + .isConstrainedTrue()) { + continue; + } + + // A missing release manifests as a leak, so treat as a non-fatal error. + if (!ErrNode) + ErrNode = C.generateNonFatalErrorNode(); + // If we've already reached this node on another path, return without + // diagnosing. + if (!ErrNode) + return; + + std::string Buf; + llvm::raw_string_ostream OS(Buf); + + const ObjCInterfaceDecl *Interface = IvarDecl->getContainingInterface(); + // If the class is known to have a lifecycle with teardown that is + // separate from -dealloc, do not warn about missing releases. We + // suppress here (rather than not tracking for instance variables in + // such classes) because these classes are rare. + if (classHasSeparateTeardown(Interface)) + return; + + ObjCImplDecl *ImplDecl = Interface->getImplementation(); + + const ObjCPropertyImplDecl *PropImpl = + ImplDecl->FindPropertyImplIvarDecl(IvarDecl->getIdentifier()); + + const ObjCPropertyDecl *PropDecl = PropImpl->getPropertyDecl(); + + assert(PropDecl->getSetterKind() == ObjCPropertyDecl::Copy || + PropDecl->getSetterKind() == ObjCPropertyDecl::Retain); + + OS << "The '" << *IvarDecl << "' ivar in '" << *ImplDecl + << "' was "; + + if (PropDecl->getSetterKind() == ObjCPropertyDecl::Retain) + OS << "retained"; + else + OS << "copied"; + + OS << " by a synthesized property but not released" + " before '[super dealloc]'"; + + auto BR = std::make_unique<PathSensitiveBugReport>(*MissingReleaseBugType, + OS.str(), ErrNode); + C.emitReport(std::move(BR)); + } + + if (NewUnreleased.isEmpty()) { + State = State->remove<UnreleasedIvarMap>(SelfSym); + } else { + State = State->set<UnreleasedIvarMap>(SelfSym, NewUnreleased); + } + + if (ErrNode) { + C.addTransition(State, ErrNode); + } else if (State != InitialState) { + C.addTransition(State); + } + + // Make sure that after checking in the top-most frame the list of + // tracked ivars is empty. This is intended to detect accidental leaks in + // the UnreleasedIvarMap program state. + assert(!LCtx->inTopFrame() || State->get<UnreleasedIvarMap>().isEmpty()); +} + +/// Given a symbol, determine whether the symbol refers to an ivar on +/// the top-most deallocating instance. If so, find the property for that +/// ivar, if one exists. Otherwise return null. +const ObjCPropertyImplDecl * +ObjCDeallocChecker::findPropertyOnDeallocatingInstance( + SymbolRef IvarSym, CheckerContext &C) const { + SVal DeallocedInstance; + if (!isInInstanceDealloc(C, DeallocedInstance)) + return nullptr; + + // Try to get the region from which the ivar value was loaded. + auto *IvarRegion = getIvarRegionForIvarSymbol(IvarSym); + if (!IvarRegion) + return nullptr; + + // Don't try to find the property if the ivar was not loaded from the + // given instance. + if (DeallocedInstance.castAs<loc::MemRegionVal>().getRegion() != + IvarRegion->getSuperRegion()) + return nullptr; + + const LocationContext *LCtx = C.getLocationContext(); + const ObjCIvarDecl *IvarDecl = IvarRegion->getDecl(); + + const ObjCImplDecl *Container = getContainingObjCImpl(LCtx); + const ObjCPropertyImplDecl *PropImpl = + Container->FindPropertyImplIvarDecl(IvarDecl->getIdentifier()); + return PropImpl; +} + +/// Emits a warning if the current context is -dealloc and ReleasedValue +/// must not be directly released in a -dealloc. Returns true if a diagnostic +/// was emitted. +bool ObjCDeallocChecker::diagnoseExtraRelease(SymbolRef ReleasedValue, + const ObjCMethodCall &M, + CheckerContext &C) const { + // Try to get the region from which the released value was loaded. + // Note that, unlike diagnosing for missing releases, here we don't track + // values that must not be released in the state. This is because even if + // these values escape, it is still an error under the rules of MRR to + // release them in -dealloc. + const ObjCPropertyImplDecl *PropImpl = + findPropertyOnDeallocatingInstance(ReleasedValue, C); + + if (!PropImpl) + return false; + + // If the ivar belongs to a property that must not be released directly + // in dealloc, emit a warning. + if (getDeallocReleaseRequirement(PropImpl) != + ReleaseRequirement::MustNotReleaseDirectly) { + return false; + } + + // If the property is readwrite but it shadows a read-only property in its + // external interface, treat the property a read-only. If the outside + // world cannot write to a property then the internal implementation is free + // to make its own convention about whether the value is stored retained + // or not. We look up the shadow here rather than in + // getDeallocReleaseRequirement() because doing so can be expensive. + const ObjCPropertyDecl *PropDecl = findShadowedPropertyDecl(PropImpl); + if (PropDecl) { + if (PropDecl->isReadOnly()) + return false; + } else { + PropDecl = PropImpl->getPropertyDecl(); + } + + ExplodedNode *ErrNode = C.generateNonFatalErrorNode(); + if (!ErrNode) + return false; + + std::string Buf; + llvm::raw_string_ostream OS(Buf); + + assert(PropDecl->getSetterKind() == ObjCPropertyDecl::Weak || + (PropDecl->getSetterKind() == ObjCPropertyDecl::Assign && + !PropDecl->isReadOnly()) || + isReleasedByCIFilterDealloc(PropImpl) + ); + + const ObjCImplDecl *Container = getContainingObjCImpl(C.getLocationContext()); + OS << "The '" << *PropImpl->getPropertyIvarDecl() + << "' ivar in '" << *Container; + + + if (isReleasedByCIFilterDealloc(PropImpl)) { + OS << "' will be released by '-[CIFilter dealloc]' but also released here"; + } else { + OS << "' was synthesized for "; + + if (PropDecl->getSetterKind() == ObjCPropertyDecl::Weak) + OS << "a weak"; + else + OS << "an assign, readwrite"; + + OS << " property but was released in 'dealloc'"; + } + + auto BR = std::make_unique<PathSensitiveBugReport>(*ExtraReleaseBugType, + OS.str(), ErrNode); + BR->addRange(M.getOriginExpr()->getSourceRange()); + + C.emitReport(std::move(BR)); + + return true; +} + +/// Emits a warning if the current context is -dealloc and DeallocedValue +/// must not be directly dealloced in a -dealloc. Returns true if a diagnostic +/// was emitted. +bool ObjCDeallocChecker::diagnoseMistakenDealloc(SymbolRef DeallocedValue, + const ObjCMethodCall &M, + CheckerContext &C) const { + // TODO: Apart from unknown/undefined receivers, this may happen when + // dealloc is called as a class method. Should we warn? + if (!DeallocedValue) + return false; + + // Find the property backing the instance variable that M + // is dealloc'ing. + const ObjCPropertyImplDecl *PropImpl = + findPropertyOnDeallocatingInstance(DeallocedValue, C); + if (!PropImpl) + return false; + + if (getDeallocReleaseRequirement(PropImpl) != + ReleaseRequirement::MustRelease) { + return false; + } + + ExplodedNode *ErrNode = C.generateErrorNode(); + if (!ErrNode) + return false; + + std::string Buf; + llvm::raw_string_ostream OS(Buf); + + OS << "'" << *PropImpl->getPropertyIvarDecl() + << "' should be released rather than deallocated"; + + auto BR = std::make_unique<PathSensitiveBugReport>(*MistakenDeallocBugType, + OS.str(), ErrNode); + BR->addRange(M.getOriginExpr()->getSourceRange()); + + C.emitReport(std::move(BR)); + + return true; +} + +ObjCDeallocChecker::ObjCDeallocChecker() + : NSObjectII(nullptr), SenTestCaseII(nullptr), XCTestCaseII(nullptr), + CIFilterII(nullptr) { + + MissingReleaseBugType.reset( + new BugType(this, "Missing ivar release (leak)", + categories::MemoryRefCount)); + + ExtraReleaseBugType.reset( + new BugType(this, "Extra ivar release", + categories::MemoryRefCount)); + + MistakenDeallocBugType.reset( + new BugType(this, "Mistaken dealloc", + categories::MemoryRefCount)); +} + +void ObjCDeallocChecker::initIdentifierInfoAndSelectors( + ASTContext &Ctx) const { + if (NSObjectII) + return; + + NSObjectII = &Ctx.Idents.get("NSObject"); + SenTestCaseII = &Ctx.Idents.get("SenTestCase"); + XCTestCaseII = &Ctx.Idents.get("XCTestCase"); + Block_releaseII = &Ctx.Idents.get("_Block_release"); + CIFilterII = &Ctx.Idents.get("CIFilter"); + + IdentifierInfo *DeallocII = &Ctx.Idents.get("dealloc"); + IdentifierInfo *ReleaseII = &Ctx.Idents.get("release"); + DeallocSel = Ctx.Selectors.getSelector(0, &DeallocII); + ReleaseSel = Ctx.Selectors.getSelector(0, &ReleaseII); +} + +/// Returns true if M is a call to '[super dealloc]'. +bool ObjCDeallocChecker::isSuperDeallocMessage( + const ObjCMethodCall &M) const { + if (M.getOriginExpr()->getReceiverKind() != ObjCMessageExpr::SuperInstance) + return false; + + return M.getSelector() == DeallocSel; +} + +/// Returns the ObjCImplDecl containing the method declaration in LCtx. +const ObjCImplDecl * +ObjCDeallocChecker::getContainingObjCImpl(const LocationContext *LCtx) const { + auto *MD = cast<ObjCMethodDecl>(LCtx->getDecl()); + return cast<ObjCImplDecl>(MD->getDeclContext()); +} + +/// Returns the property that shadowed by PropImpl if one exists and +/// nullptr otherwise. +const ObjCPropertyDecl *ObjCDeallocChecker::findShadowedPropertyDecl( + const ObjCPropertyImplDecl *PropImpl) const { + const ObjCPropertyDecl *PropDecl = PropImpl->getPropertyDecl(); + + // Only readwrite properties can shadow. + if (PropDecl->isReadOnly()) + return nullptr; + + auto *CatDecl = dyn_cast<ObjCCategoryDecl>(PropDecl->getDeclContext()); + + // Only class extensions can contain shadowing properties. + if (!CatDecl || !CatDecl->IsClassExtension()) + return nullptr; + + IdentifierInfo *ID = PropDecl->getIdentifier(); + DeclContext::lookup_result R = CatDecl->getClassInterface()->lookup(ID); + for (DeclContext::lookup_iterator I = R.begin(), E = R.end(); I != E; ++I) { + auto *ShadowedPropDecl = dyn_cast<ObjCPropertyDecl>(*I); + if (!ShadowedPropDecl) + continue; + + if (ShadowedPropDecl->isInstanceProperty()) { + assert(ShadowedPropDecl->isReadOnly()); + return ShadowedPropDecl; + } + } + + return nullptr; +} + +/// Add a transition noting the release of the given value. +void ObjCDeallocChecker::transitionToReleaseValue(CheckerContext &C, + SymbolRef Value) const { + assert(Value); + SymbolRef InstanceSym = getInstanceSymbolFromIvarSymbol(Value); + if (!InstanceSym) + return; + ProgramStateRef InitialState = C.getState(); + + ProgramStateRef ReleasedState = + removeValueRequiringRelease(InitialState, InstanceSym, Value); + + if (ReleasedState != InitialState) { + C.addTransition(ReleasedState); + } +} + +/// Remove the Value requiring a release from the tracked set for +/// Instance and return the resultant state. +ProgramStateRef ObjCDeallocChecker::removeValueRequiringRelease( + ProgramStateRef State, SymbolRef Instance, SymbolRef Value) const { + assert(Instance); + assert(Value); + const ObjCIvarRegion *RemovedRegion = getIvarRegionForIvarSymbol(Value); + if (!RemovedRegion) + return State; + + const SymbolSet *Unreleased = State->get<UnreleasedIvarMap>(Instance); + if (!Unreleased) + return State; + + // Mark the value as no longer requiring a release. + SymbolSet::Factory &F = State->getStateManager().get_context<SymbolSet>(); + SymbolSet NewUnreleased = *Unreleased; + for (auto &Sym : *Unreleased) { + const ObjCIvarRegion *UnreleasedRegion = getIvarRegionForIvarSymbol(Sym); + assert(UnreleasedRegion); + if (RemovedRegion->getDecl() == UnreleasedRegion->getDecl()) { + NewUnreleased = F.remove(NewUnreleased, Sym); + } + } + + if (NewUnreleased.isEmpty()) { + return State->remove<UnreleasedIvarMap>(Instance); + } + + return State->set<UnreleasedIvarMap>(Instance, NewUnreleased); +} + +/// Determines whether the instance variable for \p PropImpl must or must not be +/// released in -dealloc or whether it cannot be determined. +ReleaseRequirement ObjCDeallocChecker::getDeallocReleaseRequirement( + const ObjCPropertyImplDecl *PropImpl) const { + const ObjCIvarDecl *IvarDecl; + const ObjCPropertyDecl *PropDecl; + if (!isSynthesizedRetainableProperty(PropImpl, &IvarDecl, &PropDecl)) + return ReleaseRequirement::Unknown; + + ObjCPropertyDecl::SetterKind SK = PropDecl->getSetterKind(); + + switch (SK) { + // Retain and copy setters retain/copy their values before storing and so + // the value in their instance variables must be released in -dealloc. + case ObjCPropertyDecl::Retain: + case ObjCPropertyDecl::Copy: + if (isReleasedByCIFilterDealloc(PropImpl)) + return ReleaseRequirement::MustNotReleaseDirectly; + + if (isNibLoadedIvarWithoutRetain(PropImpl)) + return ReleaseRequirement::Unknown; + + return ReleaseRequirement::MustRelease; + + case ObjCPropertyDecl::Weak: + return ReleaseRequirement::MustNotReleaseDirectly; + + case ObjCPropertyDecl::Assign: + // It is common for the ivars for read-only assign properties to + // always be stored retained, so their release requirement cannot be + // be determined. + if (PropDecl->isReadOnly()) + return ReleaseRequirement::Unknown; + + return ReleaseRequirement::MustNotReleaseDirectly; + } + llvm_unreachable("Unrecognized setter kind"); +} + +/// Returns the released value if M is a call a setter that releases +/// and nils out its underlying instance variable. +SymbolRef +ObjCDeallocChecker::getValueReleasedByNillingOut(const ObjCMethodCall &M, + CheckerContext &C) const { + SVal ReceiverVal = M.getReceiverSVal(); + if (!ReceiverVal.isValid()) + return nullptr; + + if (M.getNumArgs() == 0) + return nullptr; + + if (!M.getArgExpr(0)->getType()->isObjCRetainableType()) + return nullptr; + + // Is the first argument nil? + SVal Arg = M.getArgSVal(0); + ProgramStateRef notNilState, nilState; + std::tie(notNilState, nilState) = + M.getState()->assume(Arg.castAs<DefinedOrUnknownSVal>()); + if (!(nilState && !notNilState)) + return nullptr; + + const ObjCPropertyDecl *Prop = M.getAccessedProperty(); + if (!Prop) + return nullptr; + + ObjCIvarDecl *PropIvarDecl = Prop->getPropertyIvarDecl(); + if (!PropIvarDecl) + return nullptr; + + ProgramStateRef State = C.getState(); + + SVal LVal = State->getLValue(PropIvarDecl, ReceiverVal); + Optional<Loc> LValLoc = LVal.getAs<Loc>(); + if (!LValLoc) + return nullptr; + + SVal CurrentValInIvar = State->getSVal(LValLoc.getValue()); + return CurrentValInIvar.getAsSymbol(); +} + +/// Returns true if the current context is a call to -dealloc and false +/// otherwise. If true, it also sets SelfValOut to the value of +/// 'self'. +bool ObjCDeallocChecker::isInInstanceDealloc(const CheckerContext &C, + SVal &SelfValOut) const { + return isInInstanceDealloc(C, C.getLocationContext(), SelfValOut); +} + +/// Returns true if LCtx is a call to -dealloc and false +/// otherwise. If true, it also sets SelfValOut to the value of +/// 'self'. +bool ObjCDeallocChecker::isInInstanceDealloc(const CheckerContext &C, + const LocationContext *LCtx, + SVal &SelfValOut) const { + auto *MD = dyn_cast<ObjCMethodDecl>(LCtx->getDecl()); + if (!MD || !MD->isInstanceMethod() || MD->getSelector() != DeallocSel) + return false; + + const ImplicitParamDecl *SelfDecl = LCtx->getSelfDecl(); + assert(SelfDecl && "No self in -dealloc?"); + + ProgramStateRef State = C.getState(); + SelfValOut = State->getSVal(State->getRegion(SelfDecl, LCtx)); + return true; +} + +/// Returns true if there is a call to -dealloc anywhere on the stack and false +/// otherwise. If true, it also sets InstanceValOut to the value of +/// 'self' in the frame for -dealloc. +bool ObjCDeallocChecker::instanceDeallocIsOnStack(const CheckerContext &C, + SVal &InstanceValOut) const { + const LocationContext *LCtx = C.getLocationContext(); + + while (LCtx) { + if (isInInstanceDealloc(C, LCtx, InstanceValOut)) + return true; + + LCtx = LCtx->getParent(); + } + + return false; +} + +/// Returns true if the ID is a class in which which is known to have +/// a separate teardown lifecycle. In this case, -dealloc warnings +/// about missing releases should be suppressed. +bool ObjCDeallocChecker::classHasSeparateTeardown( + const ObjCInterfaceDecl *ID) const { + // Suppress if the class is not a subclass of NSObject. + for ( ; ID ; ID = ID->getSuperClass()) { + IdentifierInfo *II = ID->getIdentifier(); + + if (II == NSObjectII) + return false; + + // FIXME: For now, ignore classes that subclass SenTestCase and XCTestCase, + // as these don't need to implement -dealloc. They implement tear down in + // another way, which we should try and catch later. + // http://llvm.org/bugs/show_bug.cgi?id=3187 + if (II == XCTestCaseII || II == SenTestCaseII) + return true; + } + + return true; +} + +/// The -dealloc method in CIFilter highly unusual in that is will release +/// instance variables belonging to its *subclasses* if the variable name +/// starts with "input" or backs a property whose name starts with "input". +/// Subclasses should not release these ivars in their own -dealloc method -- +/// doing so could result in an over release. +/// +/// This method returns true if the property will be released by +/// -[CIFilter dealloc]. +bool ObjCDeallocChecker::isReleasedByCIFilterDealloc( + const ObjCPropertyImplDecl *PropImpl) const { + assert(PropImpl->getPropertyIvarDecl()); + StringRef PropName = PropImpl->getPropertyDecl()->getName(); + StringRef IvarName = PropImpl->getPropertyIvarDecl()->getName(); + + const char *ReleasePrefix = "input"; + if (!(PropName.startswith(ReleasePrefix) || + IvarName.startswith(ReleasePrefix))) { + return false; + } + + const ObjCInterfaceDecl *ID = + PropImpl->getPropertyIvarDecl()->getContainingInterface(); + for ( ; ID ; ID = ID->getSuperClass()) { + IdentifierInfo *II = ID->getIdentifier(); + if (II == CIFilterII) + return true; + } + + return false; +} + +/// Returns whether the ivar backing the property is an IBOutlet that +/// has its value set by nib loading code without retaining the value. +/// +/// On macOS, if there is no setter, the nib-loading code sets the ivar +/// directly, without retaining the value, +/// +/// On iOS and its derivatives, the nib-loading code will call +/// -setValue:forKey:, which retains the value before directly setting the ivar. +bool ObjCDeallocChecker::isNibLoadedIvarWithoutRetain( + const ObjCPropertyImplDecl *PropImpl) const { + const ObjCIvarDecl *IvarDecl = PropImpl->getPropertyIvarDecl(); + if (!IvarDecl->hasAttr<IBOutletAttr>()) + return false; + + const llvm::Triple &Target = + IvarDecl->getASTContext().getTargetInfo().getTriple(); + + if (!Target.isMacOSX()) + return false; + + if (PropImpl->getPropertyDecl()->getSetterMethodDecl()) + return false; + + return true; +} + +void ento::registerObjCDeallocChecker(CheckerManager &Mgr) { + Mgr.registerChecker<ObjCDeallocChecker>(); +} + +bool ento::shouldRegisterObjCDeallocChecker(const LangOptions &LO) { + // These checker only makes sense under MRR. + return LO.getGC() != LangOptions::GCOnly && !LO.ObjCAutoRefCount; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckObjCInstMethSignature.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckObjCInstMethSignature.cpp new file mode 100644 index 000000000000..1694c237cda4 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CheckObjCInstMethSignature.cpp @@ -0,0 +1,143 @@ +//===-- CheckObjCInstMethSignature.cpp - Check ObjC method signatures -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a CheckObjCInstMethSignature, a flow-insenstive check +// that determines if an Objective-C class interface incorrectly redefines +// the method signature in a subclass. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/Analysis/PathDiagnostic.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/Type.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +static bool AreTypesCompatible(QualType Derived, QualType Ancestor, + ASTContext &C) { + + // Right now don't compare the compatibility of pointers. That involves + // looking at subtyping relationships. FIXME: Future patch. + if (Derived->isAnyPointerType() && Ancestor->isAnyPointerType()) + return true; + + return C.typesAreCompatible(Derived, Ancestor); +} + +static void CompareReturnTypes(const ObjCMethodDecl *MethDerived, + const ObjCMethodDecl *MethAncestor, + BugReporter &BR, ASTContext &Ctx, + const ObjCImplementationDecl *ID, + const CheckerBase *Checker) { + + QualType ResDerived = MethDerived->getReturnType(); + QualType ResAncestor = MethAncestor->getReturnType(); + + if (!AreTypesCompatible(ResDerived, ResAncestor, Ctx)) { + std::string sbuf; + llvm::raw_string_ostream os(sbuf); + + os << "The Objective-C class '" + << *MethDerived->getClassInterface() + << "', which is derived from class '" + << *MethAncestor->getClassInterface() + << "', defines the instance method '"; + MethDerived->getSelector().print(os); + os << "' whose return type is '" + << ResDerived.getAsString() + << "'. A method with the same name (same selector) is also defined in " + "class '" + << *MethAncestor->getClassInterface() + << "' and has a return type of '" + << ResAncestor.getAsString() + << "'. These two types are incompatible, and may result in undefined " + "behavior for clients of these classes."; + + PathDiagnosticLocation MethDLoc = + PathDiagnosticLocation::createBegin(MethDerived, + BR.getSourceManager()); + + BR.EmitBasicReport( + MethDerived, Checker, "Incompatible instance method return type", + categories::CoreFoundationObjectiveC, os.str(), MethDLoc); + } +} + +static void CheckObjCInstMethSignature(const ObjCImplementationDecl *ID, + BugReporter &BR, + const CheckerBase *Checker) { + + const ObjCInterfaceDecl *D = ID->getClassInterface(); + const ObjCInterfaceDecl *C = D->getSuperClass(); + + if (!C) + return; + + ASTContext &Ctx = BR.getContext(); + + // Build a DenseMap of the methods for quick querying. + typedef llvm::DenseMap<Selector,ObjCMethodDecl*> MapTy; + MapTy IMeths; + unsigned NumMethods = 0; + + for (auto *M : ID->instance_methods()) { + IMeths[M->getSelector()] = M; + ++NumMethods; + } + + // Now recurse the class hierarchy chain looking for methods with the + // same signatures. + while (C && NumMethods) { + for (const auto *M : C->instance_methods()) { + Selector S = M->getSelector(); + + MapTy::iterator MI = IMeths.find(S); + + if (MI == IMeths.end() || MI->second == nullptr) + continue; + + --NumMethods; + ObjCMethodDecl *MethDerived = MI->second; + MI->second = nullptr; + + CompareReturnTypes(MethDerived, M, BR, Ctx, ID, Checker); + } + + C = C->getSuperClass(); + } +} + +//===----------------------------------------------------------------------===// +// ObjCMethSigsChecker +//===----------------------------------------------------------------------===// + +namespace { +class ObjCMethSigsChecker : public Checker< + check::ASTDecl<ObjCImplementationDecl> > { +public: + void checkASTDecl(const ObjCImplementationDecl *D, AnalysisManager& mgr, + BugReporter &BR) const { + CheckObjCInstMethSignature(D, BR, this); + } +}; +} + +void ento::registerObjCMethSigsChecker(CheckerManager &mgr) { + mgr.registerChecker<ObjCMethSigsChecker>(); +} + +bool ento::shouldRegisterObjCMethSigsChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp new file mode 100644 index 000000000000..260a2896e78c --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp @@ -0,0 +1,1037 @@ +//==- CheckSecuritySyntaxOnly.cpp - Basic security checks --------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a set of flow-insensitive security checks. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +static bool isArc4RandomAvailable(const ASTContext &Ctx) { + const llvm::Triple &T = Ctx.getTargetInfo().getTriple(); + return T.getVendor() == llvm::Triple::Apple || + T.getOS() == llvm::Triple::CloudABI || + T.isOSFreeBSD() || + T.isOSNetBSD() || + T.isOSOpenBSD() || + T.isOSDragonFly(); +} + +namespace { +struct ChecksFilter { + DefaultBool check_bcmp; + DefaultBool check_bcopy; + DefaultBool check_bzero; + DefaultBool check_gets; + DefaultBool check_getpw; + DefaultBool check_mktemp; + DefaultBool check_mkstemp; + DefaultBool check_strcpy; + DefaultBool check_DeprecatedOrUnsafeBufferHandling; + DefaultBool check_rand; + DefaultBool check_vfork; + DefaultBool check_FloatLoopCounter; + DefaultBool check_UncheckedReturn; + + CheckerNameRef checkName_bcmp; + CheckerNameRef checkName_bcopy; + CheckerNameRef checkName_bzero; + CheckerNameRef checkName_gets; + CheckerNameRef checkName_getpw; + CheckerNameRef checkName_mktemp; + CheckerNameRef checkName_mkstemp; + CheckerNameRef checkName_strcpy; + CheckerNameRef checkName_DeprecatedOrUnsafeBufferHandling; + CheckerNameRef checkName_rand; + CheckerNameRef checkName_vfork; + CheckerNameRef checkName_FloatLoopCounter; + CheckerNameRef checkName_UncheckedReturn; +}; + +class WalkAST : public StmtVisitor<WalkAST> { + BugReporter &BR; + AnalysisDeclContext* AC; + enum { num_setids = 6 }; + IdentifierInfo *II_setid[num_setids]; + + const bool CheckRand; + const ChecksFilter &filter; + +public: + WalkAST(BugReporter &br, AnalysisDeclContext* ac, + const ChecksFilter &f) + : BR(br), AC(ac), II_setid(), + CheckRand(isArc4RandomAvailable(BR.getContext())), + filter(f) {} + + // Statement visitor methods. + void VisitCallExpr(CallExpr *CE); + void VisitForStmt(ForStmt *S); + void VisitCompoundStmt (CompoundStmt *S); + void VisitStmt(Stmt *S) { VisitChildren(S); } + + void VisitChildren(Stmt *S); + + // Helpers. + bool checkCall_strCommon(const CallExpr *CE, const FunctionDecl *FD); + + typedef void (WalkAST::*FnCheck)(const CallExpr *, const FunctionDecl *); + + // Checker-specific methods. + void checkLoopConditionForFloat(const ForStmt *FS); + void checkCall_bcmp(const CallExpr *CE, const FunctionDecl *FD); + void checkCall_bcopy(const CallExpr *CE, const FunctionDecl *FD); + void checkCall_bzero(const CallExpr *CE, const FunctionDecl *FD); + void checkCall_gets(const CallExpr *CE, const FunctionDecl *FD); + void checkCall_getpw(const CallExpr *CE, const FunctionDecl *FD); + void checkCall_mktemp(const CallExpr *CE, const FunctionDecl *FD); + void checkCall_mkstemp(const CallExpr *CE, const FunctionDecl *FD); + void checkCall_strcpy(const CallExpr *CE, const FunctionDecl *FD); + void checkCall_strcat(const CallExpr *CE, const FunctionDecl *FD); + void checkDeprecatedOrUnsafeBufferHandling(const CallExpr *CE, + const FunctionDecl *FD); + void checkCall_rand(const CallExpr *CE, const FunctionDecl *FD); + void checkCall_random(const CallExpr *CE, const FunctionDecl *FD); + void checkCall_vfork(const CallExpr *CE, const FunctionDecl *FD); + void checkUncheckedReturnValue(CallExpr *CE); +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// AST walking. +//===----------------------------------------------------------------------===// + +void WalkAST::VisitChildren(Stmt *S) { + for (Stmt *Child : S->children()) + if (Child) + Visit(Child); +} + +void WalkAST::VisitCallExpr(CallExpr *CE) { + // Get the callee. + const FunctionDecl *FD = CE->getDirectCallee(); + + if (!FD) + return; + + // Get the name of the callee. If it's a builtin, strip off the prefix. + IdentifierInfo *II = FD->getIdentifier(); + if (!II) // if no identifier, not a simple C function + return; + StringRef Name = II->getName(); + if (Name.startswith("__builtin_")) + Name = Name.substr(10); + + // Set the evaluation function by switching on the callee name. + FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) + .Case("bcmp", &WalkAST::checkCall_bcmp) + .Case("bcopy", &WalkAST::checkCall_bcopy) + .Case("bzero", &WalkAST::checkCall_bzero) + .Case("gets", &WalkAST::checkCall_gets) + .Case("getpw", &WalkAST::checkCall_getpw) + .Case("mktemp", &WalkAST::checkCall_mktemp) + .Case("mkstemp", &WalkAST::checkCall_mkstemp) + .Case("mkdtemp", &WalkAST::checkCall_mkstemp) + .Case("mkstemps", &WalkAST::checkCall_mkstemp) + .Cases("strcpy", "__strcpy_chk", &WalkAST::checkCall_strcpy) + .Cases("strcat", "__strcat_chk", &WalkAST::checkCall_strcat) + .Cases("sprintf", "vsprintf", "scanf", "wscanf", "fscanf", "fwscanf", + "vscanf", "vwscanf", "vfscanf", "vfwscanf", + &WalkAST::checkDeprecatedOrUnsafeBufferHandling) + .Cases("sscanf", "swscanf", "vsscanf", "vswscanf", "swprintf", + "snprintf", "vswprintf", "vsnprintf", "memcpy", "memmove", + &WalkAST::checkDeprecatedOrUnsafeBufferHandling) + .Cases("strncpy", "strncat", "memset", + &WalkAST::checkDeprecatedOrUnsafeBufferHandling) + .Case("drand48", &WalkAST::checkCall_rand) + .Case("erand48", &WalkAST::checkCall_rand) + .Case("jrand48", &WalkAST::checkCall_rand) + .Case("lrand48", &WalkAST::checkCall_rand) + .Case("mrand48", &WalkAST::checkCall_rand) + .Case("nrand48", &WalkAST::checkCall_rand) + .Case("lcong48", &WalkAST::checkCall_rand) + .Case("rand", &WalkAST::checkCall_rand) + .Case("rand_r", &WalkAST::checkCall_rand) + .Case("random", &WalkAST::checkCall_random) + .Case("vfork", &WalkAST::checkCall_vfork) + .Default(nullptr); + + // If the callee isn't defined, it is not of security concern. + // Check and evaluate the call. + if (evalFunction) + (this->*evalFunction)(CE, FD); + + // Recurse and check children. + VisitChildren(CE); +} + +void WalkAST::VisitCompoundStmt(CompoundStmt *S) { + for (Stmt *Child : S->children()) + if (Child) { + if (CallExpr *CE = dyn_cast<CallExpr>(Child)) + checkUncheckedReturnValue(CE); + Visit(Child); + } +} + +void WalkAST::VisitForStmt(ForStmt *FS) { + checkLoopConditionForFloat(FS); + + // Recurse and check children. + VisitChildren(FS); +} + +//===----------------------------------------------------------------------===// +// Check: floating point variable used as loop counter. +// Originally: <rdar://problem/6336718> +// Implements: CERT security coding advisory FLP-30. +//===----------------------------------------------------------------------===// + +// Returns either 'x' or 'y', depending on which one of them is incremented +// in 'expr', or nullptr if none of them is incremented. +static const DeclRefExpr* +getIncrementedVar(const Expr *expr, const VarDecl *x, const VarDecl *y) { + expr = expr->IgnoreParenCasts(); + + if (const BinaryOperator *B = dyn_cast<BinaryOperator>(expr)) { + if (!(B->isAssignmentOp() || B->isCompoundAssignmentOp() || + B->getOpcode() == BO_Comma)) + return nullptr; + + if (const DeclRefExpr *lhs = getIncrementedVar(B->getLHS(), x, y)) + return lhs; + + if (const DeclRefExpr *rhs = getIncrementedVar(B->getRHS(), x, y)) + return rhs; + + return nullptr; + } + + if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(expr)) { + const NamedDecl *ND = DR->getDecl(); + return ND == x || ND == y ? DR : nullptr; + } + + if (const UnaryOperator *U = dyn_cast<UnaryOperator>(expr)) + return U->isIncrementDecrementOp() + ? getIncrementedVar(U->getSubExpr(), x, y) : nullptr; + + return nullptr; +} + +/// CheckLoopConditionForFloat - This check looks for 'for' statements that +/// use a floating point variable as a loop counter. +/// CERT: FLP30-C, FLP30-CPP. +/// +void WalkAST::checkLoopConditionForFloat(const ForStmt *FS) { + if (!filter.check_FloatLoopCounter) + return; + + // Does the loop have a condition? + const Expr *condition = FS->getCond(); + + if (!condition) + return; + + // Does the loop have an increment? + const Expr *increment = FS->getInc(); + + if (!increment) + return; + + // Strip away '()' and casts. + condition = condition->IgnoreParenCasts(); + increment = increment->IgnoreParenCasts(); + + // Is the loop condition a comparison? + const BinaryOperator *B = dyn_cast<BinaryOperator>(condition); + + if (!B) + return; + + // Is this a comparison? + if (!(B->isRelationalOp() || B->isEqualityOp())) + return; + + // Are we comparing variables? + const DeclRefExpr *drLHS = + dyn_cast<DeclRefExpr>(B->getLHS()->IgnoreParenLValueCasts()); + const DeclRefExpr *drRHS = + dyn_cast<DeclRefExpr>(B->getRHS()->IgnoreParenLValueCasts()); + + // Does at least one of the variables have a floating point type? + drLHS = drLHS && drLHS->getType()->isRealFloatingType() ? drLHS : nullptr; + drRHS = drRHS && drRHS->getType()->isRealFloatingType() ? drRHS : nullptr; + + if (!drLHS && !drRHS) + return; + + const VarDecl *vdLHS = drLHS ? dyn_cast<VarDecl>(drLHS->getDecl()) : nullptr; + const VarDecl *vdRHS = drRHS ? dyn_cast<VarDecl>(drRHS->getDecl()) : nullptr; + + if (!vdLHS && !vdRHS) + return; + + // Does either variable appear in increment? + const DeclRefExpr *drInc = getIncrementedVar(increment, vdLHS, vdRHS); + if (!drInc) + return; + + const VarDecl *vdInc = cast<VarDecl>(drInc->getDecl()); + assert(vdInc && (vdInc == vdLHS || vdInc == vdRHS)); + + // Emit the error. First figure out which DeclRefExpr in the condition + // referenced the compared variable. + const DeclRefExpr *drCond = vdLHS == vdInc ? drLHS : drRHS; + + SmallVector<SourceRange, 2> ranges; + SmallString<256> sbuf; + llvm::raw_svector_ostream os(sbuf); + + os << "Variable '" << drCond->getDecl()->getName() + << "' with floating point type '" << drCond->getType().getAsString() + << "' should not be used as a loop counter"; + + ranges.push_back(drCond->getSourceRange()); + ranges.push_back(drInc->getSourceRange()); + + const char *bugType = "Floating point variable used as loop counter"; + + PathDiagnosticLocation FSLoc = + PathDiagnosticLocation::createBegin(FS, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), filter.checkName_FloatLoopCounter, + bugType, "Security", os.str(), + FSLoc, ranges); +} + +//===----------------------------------------------------------------------===// +// Check: Any use of bcmp. +// CWE-477: Use of Obsolete Functions +// bcmp was deprecated in POSIX.1-2008 +//===----------------------------------------------------------------------===// + +void WalkAST::checkCall_bcmp(const CallExpr *CE, const FunctionDecl *FD) { + if (!filter.check_bcmp) + return; + + const FunctionProtoType *FPT = FD->getType()->getAs<FunctionProtoType>(); + if (!FPT) + return; + + // Verify that the function takes three arguments. + if (FPT->getNumParams() != 3) + return; + + for (int i = 0; i < 2; i++) { + // Verify the first and second argument type is void*. + const PointerType *PT = FPT->getParamType(i)->getAs<PointerType>(); + if (!PT) + return; + + if (PT->getPointeeType().getUnqualifiedType() != BR.getContext().VoidTy) + return; + } + + // Verify the third argument type is integer. + if (!FPT->getParamType(2)->isIntegralOrUnscopedEnumerationType()) + return; + + // Issue a warning. + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), filter.checkName_bcmp, + "Use of deprecated function in call to 'bcmp()'", + "Security", + "The bcmp() function is obsoleted by memcmp().", + CELoc, CE->getCallee()->getSourceRange()); +} + +//===----------------------------------------------------------------------===// +// Check: Any use of bcopy. +// CWE-477: Use of Obsolete Functions +// bcopy was deprecated in POSIX.1-2008 +//===----------------------------------------------------------------------===// + +void WalkAST::checkCall_bcopy(const CallExpr *CE, const FunctionDecl *FD) { + if (!filter.check_bcopy) + return; + + const FunctionProtoType *FPT = FD->getType()->getAs<FunctionProtoType>(); + if (!FPT) + return; + + // Verify that the function takes three arguments. + if (FPT->getNumParams() != 3) + return; + + for (int i = 0; i < 2; i++) { + // Verify the first and second argument type is void*. + const PointerType *PT = FPT->getParamType(i)->getAs<PointerType>(); + if (!PT) + return; + + if (PT->getPointeeType().getUnqualifiedType() != BR.getContext().VoidTy) + return; + } + + // Verify the third argument type is integer. + if (!FPT->getParamType(2)->isIntegralOrUnscopedEnumerationType()) + return; + + // Issue a warning. + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), filter.checkName_bcopy, + "Use of deprecated function in call to 'bcopy()'", + "Security", + "The bcopy() function is obsoleted by memcpy() " + "or memmove().", + CELoc, CE->getCallee()->getSourceRange()); +} + +//===----------------------------------------------------------------------===// +// Check: Any use of bzero. +// CWE-477: Use of Obsolete Functions +// bzero was deprecated in POSIX.1-2008 +//===----------------------------------------------------------------------===// + +void WalkAST::checkCall_bzero(const CallExpr *CE, const FunctionDecl *FD) { + if (!filter.check_bzero) + return; + + const FunctionProtoType *FPT = FD->getType()->getAs<FunctionProtoType>(); + if (!FPT) + return; + + // Verify that the function takes two arguments. + if (FPT->getNumParams() != 2) + return; + + // Verify the first argument type is void*. + const PointerType *PT = FPT->getParamType(0)->getAs<PointerType>(); + if (!PT) + return; + + if (PT->getPointeeType().getUnqualifiedType() != BR.getContext().VoidTy) + return; + + // Verify the second argument type is integer. + if (!FPT->getParamType(1)->isIntegralOrUnscopedEnumerationType()) + return; + + // Issue a warning. + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), filter.checkName_bzero, + "Use of deprecated function in call to 'bzero()'", + "Security", + "The bzero() function is obsoleted by memset().", + CELoc, CE->getCallee()->getSourceRange()); +} + + +//===----------------------------------------------------------------------===// +// Check: Any use of 'gets' is insecure. +// Originally: <rdar://problem/6335715> +// Implements (part of): 300-BSI (buildsecurityin.us-cert.gov) +// CWE-242: Use of Inherently Dangerous Function +//===----------------------------------------------------------------------===// + +void WalkAST::checkCall_gets(const CallExpr *CE, const FunctionDecl *FD) { + if (!filter.check_gets) + return; + + const FunctionProtoType *FPT = FD->getType()->getAs<FunctionProtoType>(); + if (!FPT) + return; + + // Verify that the function takes a single argument. + if (FPT->getNumParams() != 1) + return; + + // Is the argument a 'char*'? + const PointerType *PT = FPT->getParamType(0)->getAs<PointerType>(); + if (!PT) + return; + + if (PT->getPointeeType().getUnqualifiedType() != BR.getContext().CharTy) + return; + + // Issue a warning. + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), filter.checkName_gets, + "Potential buffer overflow in call to 'gets'", + "Security", + "Call to function 'gets' is extremely insecure as it can " + "always result in a buffer overflow", + CELoc, CE->getCallee()->getSourceRange()); +} + +//===----------------------------------------------------------------------===// +// Check: Any use of 'getpwd' is insecure. +// CWE-477: Use of Obsolete Functions +//===----------------------------------------------------------------------===// + +void WalkAST::checkCall_getpw(const CallExpr *CE, const FunctionDecl *FD) { + if (!filter.check_getpw) + return; + + const FunctionProtoType *FPT = FD->getType()->getAs<FunctionProtoType>(); + if (!FPT) + return; + + // Verify that the function takes two arguments. + if (FPT->getNumParams() != 2) + return; + + // Verify the first argument type is integer. + if (!FPT->getParamType(0)->isIntegralOrUnscopedEnumerationType()) + return; + + // Verify the second argument type is char*. + const PointerType *PT = FPT->getParamType(1)->getAs<PointerType>(); + if (!PT) + return; + + if (PT->getPointeeType().getUnqualifiedType() != BR.getContext().CharTy) + return; + + // Issue a warning. + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), filter.checkName_getpw, + "Potential buffer overflow in call to 'getpw'", + "Security", + "The getpw() function is dangerous as it may overflow the " + "provided buffer. It is obsoleted by getpwuid().", + CELoc, CE->getCallee()->getSourceRange()); +} + +//===----------------------------------------------------------------------===// +// Check: Any use of 'mktemp' is insecure. It is obsoleted by mkstemp(). +// CWE-377: Insecure Temporary File +//===----------------------------------------------------------------------===// + +void WalkAST::checkCall_mktemp(const CallExpr *CE, const FunctionDecl *FD) { + if (!filter.check_mktemp) { + // Fall back to the security check of looking for enough 'X's in the + // format string, since that is a less severe warning. + checkCall_mkstemp(CE, FD); + return; + } + + const FunctionProtoType *FPT = FD->getType()->getAs<FunctionProtoType>(); + if(!FPT) + return; + + // Verify that the function takes a single argument. + if (FPT->getNumParams() != 1) + return; + + // Verify that the argument is Pointer Type. + const PointerType *PT = FPT->getParamType(0)->getAs<PointerType>(); + if (!PT) + return; + + // Verify that the argument is a 'char*'. + if (PT->getPointeeType().getUnqualifiedType() != BR.getContext().CharTy) + return; + + // Issue a warning. + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), filter.checkName_mktemp, + "Potential insecure temporary file in call 'mktemp'", + "Security", + "Call to function 'mktemp' is insecure as it always " + "creates or uses insecure temporary file. Use 'mkstemp' " + "instead", + CELoc, CE->getCallee()->getSourceRange()); +} + +//===----------------------------------------------------------------------===// +// Check: Use of 'mkstemp', 'mktemp', 'mkdtemp' should contain at least 6 X's. +//===----------------------------------------------------------------------===// + +void WalkAST::checkCall_mkstemp(const CallExpr *CE, const FunctionDecl *FD) { + if (!filter.check_mkstemp) + return; + + StringRef Name = FD->getIdentifier()->getName(); + std::pair<signed, signed> ArgSuffix = + llvm::StringSwitch<std::pair<signed, signed> >(Name) + .Case("mktemp", std::make_pair(0,-1)) + .Case("mkstemp", std::make_pair(0,-1)) + .Case("mkdtemp", std::make_pair(0,-1)) + .Case("mkstemps", std::make_pair(0,1)) + .Default(std::make_pair(-1, -1)); + + assert(ArgSuffix.first >= 0 && "Unsupported function"); + + // Check if the number of arguments is consistent with out expectations. + unsigned numArgs = CE->getNumArgs(); + if ((signed) numArgs <= ArgSuffix.first) + return; + + const StringLiteral *strArg = + dyn_cast<StringLiteral>(CE->getArg((unsigned)ArgSuffix.first) + ->IgnoreParenImpCasts()); + + // Currently we only handle string literals. It is possible to do better, + // either by looking at references to const variables, or by doing real + // flow analysis. + if (!strArg || strArg->getCharByteWidth() != 1) + return; + + // Count the number of X's, taking into account a possible cutoff suffix. + StringRef str = strArg->getString(); + unsigned numX = 0; + unsigned n = str.size(); + + // Take into account the suffix. + unsigned suffix = 0; + if (ArgSuffix.second >= 0) { + const Expr *suffixEx = CE->getArg((unsigned)ArgSuffix.second); + Expr::EvalResult EVResult; + if (!suffixEx->EvaluateAsInt(EVResult, BR.getContext())) + return; + llvm::APSInt Result = EVResult.Val.getInt(); + // FIXME: Issue a warning. + if (Result.isNegative()) + return; + suffix = (unsigned) Result.getZExtValue(); + n = (n > suffix) ? n - suffix : 0; + } + + for (unsigned i = 0; i < n; ++i) + if (str[i] == 'X') ++numX; + + if (numX >= 6) + return; + + // Issue a warning. + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + SmallString<512> buf; + llvm::raw_svector_ostream out(buf); + out << "Call to '" << Name << "' should have at least 6 'X's in the" + " format string to be secure (" << numX << " 'X'"; + if (numX != 1) + out << 's'; + out << " seen"; + if (suffix) { + out << ", " << suffix << " character"; + if (suffix > 1) + out << 's'; + out << " used as a suffix"; + } + out << ')'; + BR.EmitBasicReport(AC->getDecl(), filter.checkName_mkstemp, + "Insecure temporary file creation", "Security", + out.str(), CELoc, strArg->getSourceRange()); +} + +//===----------------------------------------------------------------------===// +// Check: Any use of 'strcpy' is insecure. +// +// CWE-119: Improper Restriction of Operations within +// the Bounds of a Memory Buffer +//===----------------------------------------------------------------------===// + +void WalkAST::checkCall_strcpy(const CallExpr *CE, const FunctionDecl *FD) { + if (!filter.check_strcpy) + return; + + if (!checkCall_strCommon(CE, FD)) + return; + + const auto *Target = CE->getArg(0)->IgnoreImpCasts(), + *Source = CE->getArg(1)->IgnoreImpCasts(); + + if (const auto *Array = dyn_cast<ConstantArrayType>(Target->getType())) { + uint64_t ArraySize = BR.getContext().getTypeSize(Array) / 8; + if (const auto *String = dyn_cast<StringLiteral>(Source)) { + if (ArraySize >= String->getLength() + 1) + return; + } + } + + // Issue a warning. + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), filter.checkName_strcpy, + "Potential insecure memory buffer bounds restriction in " + "call 'strcpy'", + "Security", + "Call to function 'strcpy' is insecure as it does not " + "provide bounding of the memory buffer. Replace " + "unbounded copy functions with analogous functions that " + "support length arguments such as 'strlcpy'. CWE-119.", + CELoc, CE->getCallee()->getSourceRange()); +} + +//===----------------------------------------------------------------------===// +// Check: Any use of 'strcat' is insecure. +// +// CWE-119: Improper Restriction of Operations within +// the Bounds of a Memory Buffer +//===----------------------------------------------------------------------===// + +void WalkAST::checkCall_strcat(const CallExpr *CE, const FunctionDecl *FD) { + if (!filter.check_strcpy) + return; + + if (!checkCall_strCommon(CE, FD)) + return; + + // Issue a warning. + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), filter.checkName_strcpy, + "Potential insecure memory buffer bounds restriction in " + "call 'strcat'", + "Security", + "Call to function 'strcat' is insecure as it does not " + "provide bounding of the memory buffer. Replace " + "unbounded copy functions with analogous functions that " + "support length arguments such as 'strlcat'. CWE-119.", + CELoc, CE->getCallee()->getSourceRange()); +} + +//===----------------------------------------------------------------------===// +// Check: Any use of 'sprintf', 'vsprintf', 'scanf', 'wscanf', 'fscanf', +// 'fwscanf', 'vscanf', 'vwscanf', 'vfscanf', 'vfwscanf', 'sscanf', +// 'swscanf', 'vsscanf', 'vswscanf', 'swprintf', 'snprintf', 'vswprintf', +// 'vsnprintf', 'memcpy', 'memmove', 'strncpy', 'strncat', 'memset' +// is deprecated since C11. +// +// Use of 'sprintf', 'vsprintf', 'scanf', 'wscanf','fscanf', +// 'fwscanf', 'vscanf', 'vwscanf', 'vfscanf', 'vfwscanf', 'sscanf', +// 'swscanf', 'vsscanf', 'vswscanf' without buffer limitations +// is insecure. +// +// CWE-119: Improper Restriction of Operations within +// the Bounds of a Memory Buffer +//===----------------------------------------------------------------------===// + +void WalkAST::checkDeprecatedOrUnsafeBufferHandling(const CallExpr *CE, + const FunctionDecl *FD) { + if (!filter.check_DeprecatedOrUnsafeBufferHandling) + return; + + if (!BR.getContext().getLangOpts().C11) + return; + + // Issue a warning. ArgIndex == -1: Deprecated but not unsafe (has size + // restrictions). + enum { DEPR_ONLY = -1, UNKNOWN_CALL = -2 }; + + StringRef Name = FD->getIdentifier()->getName(); + if (Name.startswith("__builtin_")) + Name = Name.substr(10); + + int ArgIndex = + llvm::StringSwitch<int>(Name) + .Cases("scanf", "wscanf", "vscanf", "vwscanf", 0) + .Cases("sprintf", "vsprintf", "fscanf", "fwscanf", "vfscanf", + "vfwscanf", "sscanf", "swscanf", "vsscanf", "vswscanf", 1) + .Cases("swprintf", "snprintf", "vswprintf", "vsnprintf", "memcpy", + "memmove", "memset", "strncpy", "strncat", DEPR_ONLY) + .Default(UNKNOWN_CALL); + + assert(ArgIndex != UNKNOWN_CALL && "Unsupported function"); + bool BoundsProvided = ArgIndex == DEPR_ONLY; + + if (!BoundsProvided) { + // Currently we only handle (not wide) string literals. It is possible to do + // better, either by looking at references to const variables, or by doing + // real flow analysis. + auto FormatString = + dyn_cast<StringLiteral>(CE->getArg(ArgIndex)->IgnoreParenImpCasts()); + if (FormatString && + FormatString->getString().find("%s") == StringRef::npos && + FormatString->getString().find("%[") == StringRef::npos) + BoundsProvided = true; + } + + SmallString<128> Buf1; + SmallString<512> Buf2; + llvm::raw_svector_ostream Out1(Buf1); + llvm::raw_svector_ostream Out2(Buf2); + + Out1 << "Potential insecure memory buffer bounds restriction in call '" + << Name << "'"; + Out2 << "Call to function '" << Name + << "' is insecure as it does not provide "; + + if (!BoundsProvided) { + Out2 << "bounding of the memory buffer or "; + } + + Out2 << "security checks introduced " + "in the C11 standard. Replace with analogous functions that " + "support length arguments or provides boundary checks such as '" + << Name << "_s' in case of C11"; + + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), + filter.checkName_DeprecatedOrUnsafeBufferHandling, + Out1.str(), "Security", Out2.str(), CELoc, + CE->getCallee()->getSourceRange()); +} + +//===----------------------------------------------------------------------===// +// Common check for str* functions with no bounds parameters. +//===----------------------------------------------------------------------===// + +bool WalkAST::checkCall_strCommon(const CallExpr *CE, const FunctionDecl *FD) { + const FunctionProtoType *FPT = FD->getType()->getAs<FunctionProtoType>(); + if (!FPT) + return false; + + // Verify the function takes two arguments, three in the _chk version. + int numArgs = FPT->getNumParams(); + if (numArgs != 2 && numArgs != 3) + return false; + + // Verify the type for both arguments. + for (int i = 0; i < 2; i++) { + // Verify that the arguments are pointers. + const PointerType *PT = FPT->getParamType(i)->getAs<PointerType>(); + if (!PT) + return false; + + // Verify that the argument is a 'char*'. + if (PT->getPointeeType().getUnqualifiedType() != BR.getContext().CharTy) + return false; + } + + return true; +} + +//===----------------------------------------------------------------------===// +// Check: Linear congruent random number generators should not be used +// Originally: <rdar://problem/63371000> +// CWE-338: Use of cryptographically weak prng +//===----------------------------------------------------------------------===// + +void WalkAST::checkCall_rand(const CallExpr *CE, const FunctionDecl *FD) { + if (!filter.check_rand || !CheckRand) + return; + + const FunctionProtoType *FTP = FD->getType()->getAs<FunctionProtoType>(); + if (!FTP) + return; + + if (FTP->getNumParams() == 1) { + // Is the argument an 'unsigned short *'? + // (Actually any integer type is allowed.) + const PointerType *PT = FTP->getParamType(0)->getAs<PointerType>(); + if (!PT) + return; + + if (! PT->getPointeeType()->isIntegralOrUnscopedEnumerationType()) + return; + } else if (FTP->getNumParams() != 0) + return; + + // Issue a warning. + SmallString<256> buf1; + llvm::raw_svector_ostream os1(buf1); + os1 << '\'' << *FD << "' is a poor random number generator"; + + SmallString<256> buf2; + llvm::raw_svector_ostream os2(buf2); + os2 << "Function '" << *FD + << "' is obsolete because it implements a poor random number generator." + << " Use 'arc4random' instead"; + + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), filter.checkName_rand, os1.str(), + "Security", os2.str(), CELoc, + CE->getCallee()->getSourceRange()); +} + +//===----------------------------------------------------------------------===// +// Check: 'random' should not be used +// Originally: <rdar://problem/63371000> +//===----------------------------------------------------------------------===// + +void WalkAST::checkCall_random(const CallExpr *CE, const FunctionDecl *FD) { + if (!CheckRand || !filter.check_rand) + return; + + const FunctionProtoType *FTP = FD->getType()->getAs<FunctionProtoType>(); + if (!FTP) + return; + + // Verify that the function takes no argument. + if (FTP->getNumParams() != 0) + return; + + // Issue a warning. + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), filter.checkName_rand, + "'random' is not a secure random number generator", + "Security", + "The 'random' function produces a sequence of values that " + "an adversary may be able to predict. Use 'arc4random' " + "instead", CELoc, CE->getCallee()->getSourceRange()); +} + +//===----------------------------------------------------------------------===// +// Check: 'vfork' should not be used. +// POS33-C: Do not use vfork(). +//===----------------------------------------------------------------------===// + +void WalkAST::checkCall_vfork(const CallExpr *CE, const FunctionDecl *FD) { + if (!filter.check_vfork) + return; + + // All calls to vfork() are insecure, issue a warning. + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), filter.checkName_vfork, + "Potential insecure implementation-specific behavior in " + "call 'vfork'", + "Security", + "Call to function 'vfork' is insecure as it can lead to " + "denial of service situations in the parent process. " + "Replace calls to vfork with calls to the safer " + "'posix_spawn' function", + CELoc, CE->getCallee()->getSourceRange()); +} + +//===----------------------------------------------------------------------===// +// Check: Should check whether privileges are dropped successfully. +// Originally: <rdar://problem/6337132> +//===----------------------------------------------------------------------===// + +void WalkAST::checkUncheckedReturnValue(CallExpr *CE) { + if (!filter.check_UncheckedReturn) + return; + + const FunctionDecl *FD = CE->getDirectCallee(); + if (!FD) + return; + + if (II_setid[0] == nullptr) { + static const char * const identifiers[num_setids] = { + "setuid", "setgid", "seteuid", "setegid", + "setreuid", "setregid" + }; + + for (size_t i = 0; i < num_setids; i++) + II_setid[i] = &BR.getContext().Idents.get(identifiers[i]); + } + + const IdentifierInfo *id = FD->getIdentifier(); + size_t identifierid; + + for (identifierid = 0; identifierid < num_setids; identifierid++) + if (id == II_setid[identifierid]) + break; + + if (identifierid >= num_setids) + return; + + const FunctionProtoType *FTP = FD->getType()->getAs<FunctionProtoType>(); + if (!FTP) + return; + + // Verify that the function takes one or two arguments (depending on + // the function). + if (FTP->getNumParams() != (identifierid < 4 ? 1 : 2)) + return; + + // The arguments must be integers. + for (unsigned i = 0; i < FTP->getNumParams(); i++) + if (!FTP->getParamType(i)->isIntegralOrUnscopedEnumerationType()) + return; + + // Issue a warning. + SmallString<256> buf1; + llvm::raw_svector_ostream os1(buf1); + os1 << "Return value is not checked in call to '" << *FD << '\''; + + SmallString<256> buf2; + llvm::raw_svector_ostream os2(buf2); + os2 << "The return value from the call to '" << *FD + << "' is not checked. If an error occurs in '" << *FD + << "', the following code may execute with unexpected privileges"; + + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), filter.checkName_UncheckedReturn, os1.str(), + "Security", os2.str(), CELoc, + CE->getCallee()->getSourceRange()); +} + +//===----------------------------------------------------------------------===// +// SecuritySyntaxChecker +//===----------------------------------------------------------------------===// + +namespace { +class SecuritySyntaxChecker : public Checker<check::ASTCodeBody> { +public: + ChecksFilter filter; + + void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) const { + WalkAST walker(BR, mgr.getAnalysisDeclContext(D), filter); + walker.Visit(D->getBody()); + } +}; +} + +void ento::registerSecuritySyntaxChecker(CheckerManager &mgr) { + mgr.registerChecker<SecuritySyntaxChecker>(); +} + +bool ento::shouldRegisterSecuritySyntaxChecker(const LangOptions &LO) { + return true; +} + +#define REGISTER_CHECKER(name) \ + void ento::register##name(CheckerManager &mgr) { \ + SecuritySyntaxChecker *checker = mgr.getChecker<SecuritySyntaxChecker>(); \ + checker->filter.check_##name = true; \ + checker->filter.checkName_##name = mgr.getCurrentCheckerName(); \ + } \ + \ + bool ento::shouldRegister##name(const LangOptions &LO) { return true; } + +REGISTER_CHECKER(bcmp) +REGISTER_CHECKER(bcopy) +REGISTER_CHECKER(bzero) +REGISTER_CHECKER(gets) +REGISTER_CHECKER(getpw) +REGISTER_CHECKER(mkstemp) +REGISTER_CHECKER(mktemp) +REGISTER_CHECKER(strcpy) +REGISTER_CHECKER(rand) +REGISTER_CHECKER(vfork) +REGISTER_CHECKER(FloatLoopCounter) +REGISTER_CHECKER(UncheckedReturn) +REGISTER_CHECKER(DeprecatedOrUnsafeBufferHandling) diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckSizeofPointer.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckSizeofPointer.cpp new file mode 100644 index 000000000000..ec401cfa8985 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CheckSizeofPointer.cpp @@ -0,0 +1,96 @@ +//==- CheckSizeofPointer.cpp - Check for sizeof on pointers ------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a check for unintended use of sizeof() on pointer +// expressions. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" + +using namespace clang; +using namespace ento; + +namespace { +class WalkAST : public StmtVisitor<WalkAST> { + BugReporter &BR; + const CheckerBase *Checker; + AnalysisDeclContext* AC; + +public: + WalkAST(BugReporter &br, const CheckerBase *checker, AnalysisDeclContext *ac) + : BR(br), Checker(checker), AC(ac) {} + void VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *E); + void VisitStmt(Stmt *S) { VisitChildren(S); } + void VisitChildren(Stmt *S); +}; +} + +void WalkAST::VisitChildren(Stmt *S) { + for (Stmt *Child : S->children()) + if (Child) + Visit(Child); +} + +// CWE-467: Use of sizeof() on a Pointer Type +void WalkAST::VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *E) { + if (E->getKind() != UETT_SizeOf) + return; + + // If an explicit type is used in the code, usually the coder knows what they are + // doing. + if (E->isArgumentType()) + return; + + QualType T = E->getTypeOfArgument(); + if (T->isPointerType()) { + + // Many false positives have the form 'sizeof *p'. This is reasonable + // because people know what they are doing when they intentionally + // dereference the pointer. + Expr *ArgEx = E->getArgumentExpr(); + if (!isa<DeclRefExpr>(ArgEx->IgnoreParens())) + return; + + PathDiagnosticLocation ELoc = + PathDiagnosticLocation::createBegin(E, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), Checker, + "Potential unintended use of sizeof() on pointer type", + categories::LogicError, + "The code calls sizeof() on a pointer type. " + "This can produce an unexpected result.", + ELoc, ArgEx->getSourceRange()); + } +} + +//===----------------------------------------------------------------------===// +// SizeofPointerChecker +//===----------------------------------------------------------------------===// + +namespace { +class SizeofPointerChecker : public Checker<check::ASTCodeBody> { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) const { + WalkAST walker(BR, this, mgr.getAnalysisDeclContext(D)); + walker.Visit(D->getBody()); + } +}; +} + +void ento::registerSizeofPointerChecker(CheckerManager &mgr) { + mgr.registerChecker<SizeofPointerChecker>(); +} + +bool ento::shouldRegisterSizeofPointerChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp new file mode 100644 index 000000000000..3e5e2b913914 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp @@ -0,0 +1,334 @@ +//===- CheckerDocumentation.cpp - Documentation checker ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This checker lists all the checker callbacks and provides documentation for +// checker writers. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" + +using namespace clang; +using namespace ento; + +// All checkers should be placed into anonymous namespace. +// We place the CheckerDocumentation inside ento namespace to make the +// it visible in doxygen. +namespace clang { +namespace ento { + +/// This checker documents the callback functions checkers can use to implement +/// the custom handling of the specific events during path exploration as well +/// as reporting bugs. Most of the callbacks are targeted at path-sensitive +/// checking. +/// +/// \sa CheckerContext +class CheckerDocumentation : public Checker< check::PreStmt<ReturnStmt>, + check::PostStmt<DeclStmt>, + check::PreObjCMessage, + check::PostObjCMessage, + check::ObjCMessageNil, + check::PreCall, + check::PostCall, + check::BranchCondition, + check::NewAllocator, + check::Location, + check::Bind, + check::DeadSymbols, + check::BeginFunction, + check::EndFunction, + check::EndAnalysis, + check::EndOfTranslationUnit, + eval::Call, + eval::Assume, + check::LiveSymbols, + check::RegionChanges, + check::PointerEscape, + check::ConstPointerEscape, + check::Event<ImplicitNullDerefEvent>, + check::ASTDecl<FunctionDecl> > { +public: + /// Pre-visit the Statement. + /// + /// The method will be called before the analyzer core processes the + /// statement. The notification is performed for every explored CFGElement, + /// which does not include the control flow statements such as IfStmt. The + /// callback can be specialized to be called with any subclass of Stmt. + /// + /// See checkBranchCondition() callback for performing custom processing of + /// the branching statements. + /// + /// check::PreStmt<ReturnStmt> + void checkPreStmt(const ReturnStmt *DS, CheckerContext &C) const {} + + /// Post-visit the Statement. + /// + /// The method will be called after the analyzer core processes the + /// statement. The notification is performed for every explored CFGElement, + /// which does not include the control flow statements such as IfStmt. The + /// callback can be specialized to be called with any subclass of Stmt. + /// + /// check::PostStmt<DeclStmt> + void checkPostStmt(const DeclStmt *DS, CheckerContext &C) const; + + /// Pre-visit the Objective C message. + /// + /// This will be called before the analyzer core processes the method call. + /// This is called for any action which produces an Objective-C message send, + /// including explicit message syntax and property access. + /// + /// check::PreObjCMessage + void checkPreObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const {} + + /// Post-visit the Objective C message. + /// \sa checkPreObjCMessage() + /// + /// check::PostObjCMessage + void checkPostObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const {} + + /// Visit an Objective-C message whose receiver is nil. + /// + /// This will be called when the analyzer core processes a method call whose + /// receiver is definitely nil. In this case, check{Pre/Post}ObjCMessage and + /// check{Pre/Post}Call will not be called. + /// + /// check::ObjCMessageNil + void checkObjCMessageNil(const ObjCMethodCall &M, CheckerContext &C) const {} + + /// Pre-visit an abstract "call" event. + /// + /// This is used for checkers that want to check arguments or attributed + /// behavior for functions and methods no matter how they are being invoked. + /// + /// Note that this includes ALL cross-body invocations, so if you want to + /// limit your checks to, say, function calls, you should test for that at the + /// beginning of your callback function. + /// + /// check::PreCall + void checkPreCall(const CallEvent &Call, CheckerContext &C) const {} + + /// Post-visit an abstract "call" event. + /// \sa checkPreObjCMessage() + /// + /// check::PostCall + void checkPostCall(const CallEvent &Call, CheckerContext &C) const {} + + /// Pre-visit of the condition statement of a branch (such as IfStmt). + void checkBranchCondition(const Stmt *Condition, CheckerContext &Ctx) const {} + + /// Post-visit the C++ operator new's allocation call. + /// + /// Execution of C++ operator new consists of the following phases: (1) call + /// default or overridden operator new() to allocate memory (2) cast the + /// return value of operator new() from void pointer type to class pointer + /// type, (3) assuming that the value is non-null, call the object's + /// constructor over this pointer, (4) declare that the value of the + /// new-expression is this pointer. This callback is called between steps + /// (2) and (3). Post-call for the allocator is called after step (1). + /// Pre-statement for the new-expression is called on step (4) when the value + /// of the expression is evaluated. + /// \param NE The C++ new-expression that triggered the allocation. + /// \param Target The allocated region, casted to the class type. + void checkNewAllocator(const CXXNewExpr *NE, SVal Target, + CheckerContext &) const {} + + /// Called on a load from and a store to a location. + /// + /// The method will be called each time a location (pointer) value is + /// accessed. + /// \param Loc The value of the location (pointer). + /// \param IsLoad The flag specifying if the location is a store or a load. + /// \param S The load is performed while processing the statement. + /// + /// check::Location + void checkLocation(SVal Loc, bool IsLoad, const Stmt *S, + CheckerContext &) const {} + + /// Called on binding of a value to a location. + /// + /// \param Loc The value of the location (pointer). + /// \param Val The value which will be stored at the location Loc. + /// \param S The bind is performed while processing the statement S. + /// + /// check::Bind + void checkBind(SVal Loc, SVal Val, const Stmt *S, CheckerContext &) const {} + + /// Called whenever a symbol becomes dead. + /// + /// This callback should be used by the checkers to aggressively clean + /// up/reduce the checker state, which is important for reducing the overall + /// memory usage. Specifically, if a checker keeps symbol specific information + /// in the state, it can and should be dropped after the symbol becomes dead. + /// In addition, reporting a bug as soon as the checker becomes dead leads to + /// more precise diagnostics. (For example, one should report that a malloced + /// variable is not freed right after it goes out of scope.) + /// + /// \param SR The SymbolReaper object can be queried to determine which + /// symbols are dead. + /// + /// check::DeadSymbols + void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const {} + + + /// Called when the analyzer core starts analyzing a function, + /// regardless of whether it is analyzed at the top level or is inlined. + /// + /// check::BeginFunction + void checkBeginFunction(CheckerContext &Ctx) const {} + + /// Called when the analyzer core reaches the end of a + /// function being analyzed regardless of whether it is analyzed at the top + /// level or is inlined. + /// + /// check::EndFunction + void checkEndFunction(const ReturnStmt *RS, CheckerContext &Ctx) const {} + + /// Called after all the paths in the ExplodedGraph reach end of path + /// - the symbolic execution graph is fully explored. + /// + /// This callback should be used in cases when a checker needs to have a + /// global view of the information generated on all paths. For example, to + /// compare execution summary/result several paths. + /// See IdempotentOperationChecker for a usage example. + /// + /// check::EndAnalysis + void checkEndAnalysis(ExplodedGraph &G, + BugReporter &BR, + ExprEngine &Eng) const {} + + /// Called after analysis of a TranslationUnit is complete. + /// + /// check::EndOfTranslationUnit + void checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, + BugReporter &BR) const {} + + /// Evaluates function call. + /// + /// The analysis core treats all function calls in the same way. However, some + /// functions have special meaning, which should be reflected in the program + /// state. This callback allows a checker to provide domain specific knowledge + /// about the particular functions it knows about. + /// + /// \returns true if the call has been successfully evaluated + /// and false otherwise. Note, that only one checker can evaluate a call. If + /// more than one checker claims that they can evaluate the same call the + /// first one wins. + /// + /// eval::Call + bool evalCall(const CallExpr *CE, CheckerContext &C) const { return true; } + + /// Handles assumptions on symbolic values. + /// + /// This method is called when a symbolic expression is assumed to be true or + /// false. For example, the assumptions are performed when evaluating a + /// condition at a branch. The callback allows checkers track the assumptions + /// performed on the symbols of interest and change the state accordingly. + /// + /// eval::Assume + ProgramStateRef evalAssume(ProgramStateRef State, + SVal Cond, + bool Assumption) const { return State; } + + /// Allows modifying SymbolReaper object. For example, checkers can explicitly + /// register symbols of interest as live. These symbols will not be marked + /// dead and removed. + /// + /// check::LiveSymbols + void checkLiveSymbols(ProgramStateRef State, SymbolReaper &SR) const {} + + /// Called when the contents of one or more regions change. + /// + /// This can occur in many different ways: an explicit bind, a blanket + /// invalidation of the region contents, or by passing a region to a function + /// call whose behavior the analyzer cannot model perfectly. + /// + /// \param State The current program state. + /// \param Invalidated A set of all symbols potentially touched by the change. + /// \param ExplicitRegions The regions explicitly requested for invalidation. + /// For a function call, this would be the arguments. For a bind, this + /// would be the region being bound to. + /// \param Regions The transitive closure of regions accessible from, + /// \p ExplicitRegions, i.e. all regions that may have been touched + /// by this change. For a simple bind, this list will be the same as + /// \p ExplicitRegions, since a bind does not affect the contents of + /// anything accessible through the base region. + /// \param LCtx LocationContext that is useful for getting various contextual + /// info, like callstack, CFG etc. + /// \param Call The opaque call triggering this invalidation. Will be 0 if the + /// change was not triggered by a call. + /// + /// check::RegionChanges + ProgramStateRef + checkRegionChanges(ProgramStateRef State, + const InvalidatedSymbols *Invalidated, + ArrayRef<const MemRegion *> ExplicitRegions, + ArrayRef<const MemRegion *> Regions, + const LocationContext *LCtx, + const CallEvent *Call) const { + return State; + } + + /// Called when pointers escape. + /// + /// This notifies the checkers about pointer escape, which occurs whenever + /// the analyzer cannot track the symbol any more. For example, as a + /// result of assigning a pointer into a global or when it's passed to a + /// function call the analyzer cannot model. + /// + /// \param State The state at the point of escape. + /// \param Escaped The list of escaped symbols. + /// \param Call The corresponding CallEvent, if the symbols escape as + /// parameters to the given call. + /// \param Kind How the symbols have escaped. + /// \returns Checkers can modify the state by returning a new state. + ProgramStateRef checkPointerEscape(ProgramStateRef State, + const InvalidatedSymbols &Escaped, + const CallEvent *Call, + PointerEscapeKind Kind) const { + return State; + } + + /// Called when const pointers escape. + /// + /// Note: in most cases checkPointerEscape callback is sufficient. + /// \sa checkPointerEscape + ProgramStateRef checkConstPointerEscape(ProgramStateRef State, + const InvalidatedSymbols &Escaped, + const CallEvent *Call, + PointerEscapeKind Kind) const { + return State; + } + + /// check::Event<ImplicitNullDerefEvent> + void checkEvent(ImplicitNullDerefEvent Event) const {} + + /// Check every declaration in the AST. + /// + /// An AST traversal callback, which should only be used when the checker is + /// not path sensitive. It will be called for every Declaration in the AST and + /// can be specialized to only be called on subclasses of Decl, for example, + /// FunctionDecl. + /// + /// check::ASTDecl<FunctionDecl> + void checkASTDecl(const FunctionDecl *D, + AnalysisManager &Mgr, + BugReporter &BR) const {} +}; + +void CheckerDocumentation::checkPostStmt(const DeclStmt *DS, + CheckerContext &C) const { +} + +} // end namespace ento +} // end namespace clang diff --git a/clang/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp new file mode 100644 index 000000000000..7a41a7b6b216 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp @@ -0,0 +1,141 @@ +//===-- ChrootChecker.cpp - chroot usage checks ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines chroot checker, which checks improper use of chroot. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" + +using namespace clang; +using namespace ento; + +namespace { + +// enum value that represent the jail state +enum Kind { NO_CHROOT, ROOT_CHANGED, JAIL_ENTERED }; + +bool isRootChanged(intptr_t k) { return k == ROOT_CHANGED; } +//bool isJailEntered(intptr_t k) { return k == JAIL_ENTERED; } + +// This checker checks improper use of chroot. +// The state transition: +// NO_CHROOT ---chroot(path)--> ROOT_CHANGED ---chdir(/) --> JAIL_ENTERED +// | | +// ROOT_CHANGED<--chdir(..)-- JAIL_ENTERED<--chdir(..)-- +// | | +// bug<--foo()-- JAIL_ENTERED<--foo()-- +class ChrootChecker : public Checker<eval::Call, check::PreCall> { + // This bug refers to possibly break out of a chroot() jail. + mutable std::unique_ptr<BuiltinBug> BT_BreakJail; + + const CallDescription Chroot{"chroot", 1}, Chdir{"chdir", 1}; + +public: + ChrootChecker() {} + + static void *getTag() { + static int x; + return &x; + } + + bool evalCall(const CallEvent &Call, CheckerContext &C) const; + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + +private: + void evalChroot(const CallEvent &Call, CheckerContext &C) const; + void evalChdir(const CallEvent &Call, CheckerContext &C) const; +}; + +} // end anonymous namespace + +bool ChrootChecker::evalCall(const CallEvent &Call, CheckerContext &C) const { + if (Call.isCalled(Chroot)) { + evalChroot(Call, C); + return true; + } + if (Call.isCalled(Chdir)) { + evalChdir(Call, C); + return true; + } + + return false; +} + +void ChrootChecker::evalChroot(const CallEvent &Call, CheckerContext &C) const { + ProgramStateRef state = C.getState(); + ProgramStateManager &Mgr = state->getStateManager(); + + // Once encouter a chroot(), set the enum value ROOT_CHANGED directly in + // the GDM. + state = Mgr.addGDM(state, ChrootChecker::getTag(), (void*) ROOT_CHANGED); + C.addTransition(state); +} + +void ChrootChecker::evalChdir(const CallEvent &Call, CheckerContext &C) const { + ProgramStateRef state = C.getState(); + ProgramStateManager &Mgr = state->getStateManager(); + + // If there are no jail state in the GDM, just return. + const void *k = state->FindGDM(ChrootChecker::getTag()); + if (!k) + return; + + // After chdir("/"), enter the jail, set the enum value JAIL_ENTERED. + const Expr *ArgExpr = Call.getArgExpr(0); + SVal ArgVal = C.getSVal(ArgExpr); + + if (const MemRegion *R = ArgVal.getAsRegion()) { + R = R->StripCasts(); + if (const StringRegion* StrRegion= dyn_cast<StringRegion>(R)) { + const StringLiteral* Str = StrRegion->getStringLiteral(); + if (Str->getString() == "/") + state = Mgr.addGDM(state, ChrootChecker::getTag(), + (void*) JAIL_ENTERED); + } + } + + C.addTransition(state); +} + +// Check the jail state before any function call except chroot and chdir(). +void ChrootChecker::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + // Ignore chroot and chdir. + if (Call.isCalled(Chroot) || Call.isCalled(Chdir)) + return; + + // If jail state is ROOT_CHANGED, generate BugReport. + void *const* k = C.getState()->FindGDM(ChrootChecker::getTag()); + if (k) + if (isRootChanged((intptr_t) *k)) + if (ExplodedNode *N = C.generateNonFatalErrorNode()) { + if (!BT_BreakJail) + BT_BreakJail.reset(new BuiltinBug( + this, "Break out of jail", "No call of chdir(\"/\") immediately " + "after chroot")); + C.emitReport(std::make_unique<PathSensitiveBugReport>( + *BT_BreakJail, BT_BreakJail->getDescription(), N)); + } +} + +void ento::registerChrootChecker(CheckerManager &mgr) { + mgr.registerChecker<ChrootChecker>(); +} + +bool ento::shouldRegisterChrootChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp new file mode 100644 index 000000000000..ce45b5be34c9 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp @@ -0,0 +1,213 @@ +//===--- CloneChecker.cpp - Clone detection checker -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// CloneChecker is a checker that reports clones in the current translation +/// unit. +/// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/Analysis/CloneDetection.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class CloneChecker + : public Checker<check::ASTCodeBody, check::EndOfTranslationUnit> { +public: + // Checker options. + int MinComplexity; + bool ReportNormalClones; + StringRef IgnoredFilesPattern; + +private: + mutable CloneDetector Detector; + mutable std::unique_ptr<BugType> BT_Exact, BT_Suspicious; + +public: + void checkASTCodeBody(const Decl *D, AnalysisManager &Mgr, + BugReporter &BR) const; + + void checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, BugReporter &BR) const; + + /// Reports all clones to the user. + void reportClones(BugReporter &BR, AnalysisManager &Mgr, + std::vector<CloneDetector::CloneGroup> &CloneGroups) const; + + /// Reports only suspicious clones to the user along with information + /// that explain why they are suspicious. + void reportSuspiciousClones( + BugReporter &BR, AnalysisManager &Mgr, + std::vector<CloneDetector::CloneGroup> &CloneGroups) const; +}; +} // end anonymous namespace + +void CloneChecker::checkASTCodeBody(const Decl *D, AnalysisManager &Mgr, + BugReporter &BR) const { + // Every statement that should be included in the search for clones needs to + // be passed to the CloneDetector. + Detector.analyzeCodeBody(D); +} + +void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, + BugReporter &BR) const { + // At this point, every statement in the translation unit has been analyzed by + // the CloneDetector. The only thing left to do is to report the found clones. + + // Let the CloneDetector create a list of clones from all the analyzed + // statements. We don't filter for matching variable patterns at this point + // because reportSuspiciousClones() wants to search them for errors. + std::vector<CloneDetector::CloneGroup> AllCloneGroups; + + Detector.findClones( + AllCloneGroups, FilenamePatternConstraint(IgnoredFilesPattern), + RecursiveCloneTypeIIHashConstraint(), MinGroupSizeConstraint(2), + MinComplexityConstraint(MinComplexity), + RecursiveCloneTypeIIVerifyConstraint(), OnlyLargestCloneConstraint()); + + reportSuspiciousClones(BR, Mgr, AllCloneGroups); + + // We are done for this translation unit unless we also need to report normal + // clones. + if (!ReportNormalClones) + return; + + // Now that the suspicious clone detector has checked for pattern errors, + // we also filter all clones who don't have matching patterns + CloneDetector::constrainClones(AllCloneGroups, + MatchingVariablePatternConstraint(), + MinGroupSizeConstraint(2)); + + reportClones(BR, Mgr, AllCloneGroups); +} + +static PathDiagnosticLocation makeLocation(const StmtSequence &S, + AnalysisManager &Mgr) { + ASTContext &ACtx = Mgr.getASTContext(); + return PathDiagnosticLocation::createBegin( + S.front(), ACtx.getSourceManager(), + Mgr.getAnalysisDeclContext(ACtx.getTranslationUnitDecl())); +} + +void CloneChecker::reportClones( + BugReporter &BR, AnalysisManager &Mgr, + std::vector<CloneDetector::CloneGroup> &CloneGroups) const { + + if (!BT_Exact) + BT_Exact.reset(new BugType(this, "Exact code clone", "Code clone")); + + for (const CloneDetector::CloneGroup &Group : CloneGroups) { + // We group the clones by printing the first as a warning and all others + // as a note. + auto R = std::make_unique<BasicBugReport>( + *BT_Exact, "Duplicate code detected", makeLocation(Group.front(), Mgr)); + R->addRange(Group.front().getSourceRange()); + + for (unsigned i = 1; i < Group.size(); ++i) + R->addNote("Similar code here", makeLocation(Group[i], Mgr), + Group[i].getSourceRange()); + BR.emitReport(std::move(R)); + } +} + +void CloneChecker::reportSuspiciousClones( + BugReporter &BR, AnalysisManager &Mgr, + std::vector<CloneDetector::CloneGroup> &CloneGroups) const { + std::vector<VariablePattern::SuspiciousClonePair> Pairs; + + for (const CloneDetector::CloneGroup &Group : CloneGroups) { + for (unsigned i = 0; i < Group.size(); ++i) { + VariablePattern PatternA(Group[i]); + + for (unsigned j = i + 1; j < Group.size(); ++j) { + VariablePattern PatternB(Group[j]); + + VariablePattern::SuspiciousClonePair ClonePair; + // For now, we only report clones which break the variable pattern just + // once because multiple differences in a pattern are an indicator that + // those differences are maybe intended (e.g. because it's actually a + // different algorithm). + // FIXME: In very big clones even multiple variables can be unintended, + // so replacing this number with a percentage could better handle such + // cases. On the other hand it could increase the false-positive rate + // for all clones if the percentage is too high. + if (PatternA.countPatternDifferences(PatternB, &ClonePair) == 1) { + Pairs.push_back(ClonePair); + break; + } + } + } + } + + if (!BT_Suspicious) + BT_Suspicious.reset( + new BugType(this, "Suspicious code clone", "Code clone")); + + ASTContext &ACtx = BR.getContext(); + SourceManager &SM = ACtx.getSourceManager(); + AnalysisDeclContext *ADC = + Mgr.getAnalysisDeclContext(ACtx.getTranslationUnitDecl()); + + for (VariablePattern::SuspiciousClonePair &Pair : Pairs) { + // FIXME: We are ignoring the suggestions currently, because they are + // only 50% accurate (even if the second suggestion is unavailable), + // which may confuse the user. + // Think how to perform more accurate suggestions? + + auto R = std::make_unique<BasicBugReport>( + *BT_Suspicious, + "Potential copy-paste error; did you really mean to use '" + + Pair.FirstCloneInfo.Variable->getNameAsString() + "' here?", + PathDiagnosticLocation::createBegin(Pair.FirstCloneInfo.Mention, SM, + ADC)); + R->addRange(Pair.FirstCloneInfo.Mention->getSourceRange()); + + R->addNote("Similar code using '" + + Pair.SecondCloneInfo.Variable->getNameAsString() + "' here", + PathDiagnosticLocation::createBegin(Pair.SecondCloneInfo.Mention, + SM, ADC), + Pair.SecondCloneInfo.Mention->getSourceRange()); + + BR.emitReport(std::move(R)); + } +} + +//===----------------------------------------------------------------------===// +// Register CloneChecker +//===----------------------------------------------------------------------===// + +void ento::registerCloneChecker(CheckerManager &Mgr) { + auto *Checker = Mgr.registerChecker<CloneChecker>(); + + Checker->MinComplexity = Mgr.getAnalyzerOptions().getCheckerIntegerOption( + Checker, "MinimumCloneComplexity"); + + if (Checker->MinComplexity < 0) + Mgr.reportInvalidCheckerOptionValue( + Checker, "MinimumCloneComplexity", "a non-negative value"); + + Checker->ReportNormalClones = Mgr.getAnalyzerOptions().getCheckerBooleanOption( + Checker, "ReportNormalClones"); + + Checker->IgnoredFilesPattern = Mgr.getAnalyzerOptions() + .getCheckerStringOption(Checker, "IgnoredFilesPattern"); +} + +bool ento::shouldRegisterCloneChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ConversionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ConversionChecker.cpp new file mode 100644 index 000000000000..8dd3132f07e2 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ConversionChecker.cpp @@ -0,0 +1,201 @@ +//=== ConversionChecker.cpp -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Check that there is no loss of sign/precision in assignments, comparisons +// and multiplications. +// +// ConversionChecker uses path sensitive analysis to determine possible values +// of expressions. A warning is reported when: +// * a negative value is implicitly converted to an unsigned value in an +// assignment, comparison or multiplication. +// * assignment / initialization when the source value is greater than the max +// value of the target integer type +// * assignment / initialization when the source integer is above the range +// where the target floating point type can represent all integers +// +// Many compilers and tools have similar checks that are based on semantic +// analysis. Those checks are sound but have poor precision. ConversionChecker +// is an alternative to those checks. +// +//===----------------------------------------------------------------------===// +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/ParentMap.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "llvm/ADT/APFloat.h" + +#include <climits> + +using namespace clang; +using namespace ento; + +namespace { +class ConversionChecker : public Checker<check::PreStmt<ImplicitCastExpr>> { +public: + void checkPreStmt(const ImplicitCastExpr *Cast, CheckerContext &C) const; + +private: + mutable std::unique_ptr<BuiltinBug> BT; + + bool isLossOfPrecision(const ImplicitCastExpr *Cast, QualType DestType, + CheckerContext &C) const; + + bool isLossOfSign(const ImplicitCastExpr *Cast, CheckerContext &C) const; + + void reportBug(ExplodedNode *N, CheckerContext &C, const char Msg[]) const; +}; +} + +void ConversionChecker::checkPreStmt(const ImplicitCastExpr *Cast, + CheckerContext &C) const { + // TODO: For now we only warn about DeclRefExpr, to avoid noise. Warn for + // calculations also. + if (!isa<DeclRefExpr>(Cast->IgnoreParenImpCasts())) + return; + + // Don't warn for loss of sign/precision in macros. + if (Cast->getExprLoc().isMacroID()) + return; + + // Get Parent. + const ParentMap &PM = C.getLocationContext()->getParentMap(); + const Stmt *Parent = PM.getParent(Cast); + if (!Parent) + return; + + bool LossOfSign = false; + bool LossOfPrecision = false; + + // Loss of sign/precision in binary operation. + if (const auto *B = dyn_cast<BinaryOperator>(Parent)) { + BinaryOperator::Opcode Opc = B->getOpcode(); + if (Opc == BO_Assign) { + LossOfSign = isLossOfSign(Cast, C); + LossOfPrecision = isLossOfPrecision(Cast, Cast->getType(), C); + } else if (Opc == BO_AddAssign || Opc == BO_SubAssign) { + // No loss of sign. + LossOfPrecision = isLossOfPrecision(Cast, B->getLHS()->getType(), C); + } else if (Opc == BO_MulAssign) { + LossOfSign = isLossOfSign(Cast, C); + LossOfPrecision = isLossOfPrecision(Cast, B->getLHS()->getType(), C); + } else if (Opc == BO_DivAssign || Opc == BO_RemAssign) { + LossOfSign = isLossOfSign(Cast, C); + // No loss of precision. + } else if (Opc == BO_AndAssign) { + LossOfSign = isLossOfSign(Cast, C); + // No loss of precision. + } else if (Opc == BO_OrAssign || Opc == BO_XorAssign) { + LossOfSign = isLossOfSign(Cast, C); + LossOfPrecision = isLossOfPrecision(Cast, B->getLHS()->getType(), C); + } else if (B->isRelationalOp() || B->isMultiplicativeOp()) { + LossOfSign = isLossOfSign(Cast, C); + } + } else if (isa<DeclStmt>(Parent)) { + LossOfSign = isLossOfSign(Cast, C); + LossOfPrecision = isLossOfPrecision(Cast, Cast->getType(), C); + } + + if (LossOfSign || LossOfPrecision) { + // Generate an error node. + ExplodedNode *N = C.generateNonFatalErrorNode(C.getState()); + if (!N) + return; + if (LossOfSign) + reportBug(N, C, "Loss of sign in implicit conversion"); + if (LossOfPrecision) + reportBug(N, C, "Loss of precision in implicit conversion"); + } +} + +void ConversionChecker::reportBug(ExplodedNode *N, CheckerContext &C, + const char Msg[]) const { + if (!BT) + BT.reset( + new BuiltinBug(this, "Conversion", "Possible loss of sign/precision.")); + + // Generate a report for this bug. + auto R = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); + C.emitReport(std::move(R)); +} + +bool ConversionChecker::isLossOfPrecision(const ImplicitCastExpr *Cast, + QualType DestType, + CheckerContext &C) const { + // Don't warn about explicit loss of precision. + if (Cast->isEvaluatable(C.getASTContext())) + return false; + + QualType SubType = Cast->IgnoreParenImpCasts()->getType(); + + if (!DestType->isRealType() || !SubType->isIntegerType()) + return false; + + const bool isFloat = DestType->isFloatingType(); + + const auto &AC = C.getASTContext(); + + // We will find the largest RepresentsUntilExp value such that the DestType + // can exactly represent all nonnegative integers below 2^RepresentsUntilExp. + unsigned RepresentsUntilExp; + + if (isFloat) { + const llvm::fltSemantics &Sema = AC.getFloatTypeSemantics(DestType); + RepresentsUntilExp = llvm::APFloat::semanticsPrecision(Sema); + } else { + RepresentsUntilExp = AC.getIntWidth(DestType); + if (RepresentsUntilExp == 1) { + // This is just casting a number to bool, probably not a bug. + return false; + } + if (DestType->isSignedIntegerType()) + RepresentsUntilExp--; + } + + if (RepresentsUntilExp >= sizeof(unsigned long long) * CHAR_BIT) { + // Avoid overflow in our later calculations. + return false; + } + + unsigned CorrectedSrcWidth = AC.getIntWidth(SubType); + if (SubType->isSignedIntegerType()) + CorrectedSrcWidth--; + + if (RepresentsUntilExp >= CorrectedSrcWidth) { + // Simple case: the destination can store all values of the source type. + return false; + } + + unsigned long long MaxVal = 1ULL << RepresentsUntilExp; + if (isFloat) { + // If this is a floating point type, it can also represent MaxVal exactly. + MaxVal++; + } + return C.isGreaterOrEqual(Cast->getSubExpr(), MaxVal); + // TODO: maybe also check negative values with too large magnitude. +} + +bool ConversionChecker::isLossOfSign(const ImplicitCastExpr *Cast, + CheckerContext &C) const { + QualType CastType = Cast->getType(); + QualType SubType = Cast->IgnoreParenImpCasts()->getType(); + + if (!CastType->isUnsignedIntegerType() || !SubType->isSignedIntegerType()) + return false; + + return C.isNegative(Cast->getSubExpr()); +} + +void ento::registerConversionChecker(CheckerManager &mgr) { + mgr.registerChecker<ConversionChecker>(); +} + +bool ento::shouldRegisterConversionChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp new file mode 100644 index 000000000000..61441889fc64 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp @@ -0,0 +1,545 @@ +//==- DeadStoresChecker.cpp - Check for stores to dead variables -*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a DeadStores, a flow-sensitive checker that looks for +// stores to variables that are no longer live. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Lexer.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" +#include "clang/AST/ParentMap.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Analysis/Analyses/LiveVariables.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/SaveAndRestore.h" + +using namespace clang; +using namespace ento; + +namespace { + +/// A simple visitor to record what VarDecls occur in EH-handling code. +class EHCodeVisitor : public RecursiveASTVisitor<EHCodeVisitor> { +public: + bool inEH; + llvm::DenseSet<const VarDecl *> &S; + + bool TraverseObjCAtFinallyStmt(ObjCAtFinallyStmt *S) { + SaveAndRestore<bool> inFinally(inEH, true); + return ::RecursiveASTVisitor<EHCodeVisitor>::TraverseObjCAtFinallyStmt(S); + } + + bool TraverseObjCAtCatchStmt(ObjCAtCatchStmt *S) { + SaveAndRestore<bool> inCatch(inEH, true); + return ::RecursiveASTVisitor<EHCodeVisitor>::TraverseObjCAtCatchStmt(S); + } + + bool TraverseCXXCatchStmt(CXXCatchStmt *S) { + SaveAndRestore<bool> inCatch(inEH, true); + return TraverseStmt(S->getHandlerBlock()); + } + + bool VisitDeclRefExpr(DeclRefExpr *DR) { + if (inEH) + if (const VarDecl *D = dyn_cast<VarDecl>(DR->getDecl())) + S.insert(D); + return true; + } + + EHCodeVisitor(llvm::DenseSet<const VarDecl *> &S) : + inEH(false), S(S) {} +}; + +// FIXME: Eventually migrate into its own file, and have it managed by +// AnalysisManager. +class ReachableCode { + const CFG &cfg; + llvm::BitVector reachable; +public: + ReachableCode(const CFG &cfg) + : cfg(cfg), reachable(cfg.getNumBlockIDs(), false) {} + + void computeReachableBlocks(); + + bool isReachable(const CFGBlock *block) const { + return reachable[block->getBlockID()]; + } +}; +} + +void ReachableCode::computeReachableBlocks() { + if (!cfg.getNumBlockIDs()) + return; + + SmallVector<const CFGBlock*, 10> worklist; + worklist.push_back(&cfg.getEntry()); + + while (!worklist.empty()) { + const CFGBlock *block = worklist.pop_back_val(); + llvm::BitVector::reference isReachable = reachable[block->getBlockID()]; + if (isReachable) + continue; + isReachable = true; + for (CFGBlock::const_succ_iterator i = block->succ_begin(), + e = block->succ_end(); i != e; ++i) + if (const CFGBlock *succ = *i) + worklist.push_back(succ); + } +} + +static const Expr * +LookThroughTransitiveAssignmentsAndCommaOperators(const Expr *Ex) { + while (Ex) { + const BinaryOperator *BO = + dyn_cast<BinaryOperator>(Ex->IgnoreParenCasts()); + if (!BO) + break; + if (BO->getOpcode() == BO_Assign) { + Ex = BO->getRHS(); + continue; + } + if (BO->getOpcode() == BO_Comma) { + Ex = BO->getRHS(); + continue; + } + break; + } + return Ex; +} + +namespace { +class DeadStoresChecker : public Checker<check::ASTCodeBody> { +public: + bool ShowFixIts = false; + bool WarnForDeadNestedAssignments = true; + + void checkASTCodeBody(const Decl *D, AnalysisManager &Mgr, + BugReporter &BR) const; +}; + +class DeadStoreObs : public LiveVariables::Observer { + const CFG &cfg; + ASTContext &Ctx; + BugReporter& BR; + const DeadStoresChecker *Checker; + AnalysisDeclContext* AC; + ParentMap& Parents; + llvm::SmallPtrSet<const VarDecl*, 20> Escaped; + std::unique_ptr<ReachableCode> reachableCode; + const CFGBlock *currentBlock; + std::unique_ptr<llvm::DenseSet<const VarDecl *>> InEH; + + enum DeadStoreKind { Standard, Enclosing, DeadIncrement, DeadInit }; + +public: + DeadStoreObs(const CFG &cfg, ASTContext &ctx, BugReporter &br, + const DeadStoresChecker *checker, AnalysisDeclContext *ac, + ParentMap &parents, + llvm::SmallPtrSet<const VarDecl *, 20> &escaped, + bool warnForDeadNestedAssignments) + : cfg(cfg), Ctx(ctx), BR(br), Checker(checker), AC(ac), Parents(parents), + Escaped(escaped), currentBlock(nullptr) {} + + ~DeadStoreObs() override {} + + bool isLive(const LiveVariables::LivenessValues &Live, const VarDecl *D) { + if (Live.isLive(D)) + return true; + // Lazily construct the set that records which VarDecls are in + // EH code. + if (!InEH.get()) { + InEH.reset(new llvm::DenseSet<const VarDecl *>()); + EHCodeVisitor V(*InEH.get()); + V.TraverseStmt(AC->getBody()); + } + // Treat all VarDecls that occur in EH code as being "always live" + // when considering to suppress dead stores. Frequently stores + // are followed by reads in EH code, but we don't have the ability + // to analyze that yet. + return InEH->count(D); + } + + bool isSuppressed(SourceRange R) { + SourceManager &SMgr = Ctx.getSourceManager(); + SourceLocation Loc = R.getBegin(); + if (!Loc.isValid()) + return false; + + FileID FID = SMgr.getFileID(Loc); + bool Invalid = false; + StringRef Data = SMgr.getBufferData(FID, &Invalid); + if (Invalid) + return false; + + // Files autogenerated by DriverKit IIG contain some dead stores that + // we don't want to report. + if (Data.startswith("/* iig")) + return true; + + return false; + } + + void Report(const VarDecl *V, DeadStoreKind dsk, + PathDiagnosticLocation L, SourceRange R) { + if (Escaped.count(V)) + return; + + // Compute reachable blocks within the CFG for trivial cases + // where a bogus dead store can be reported because itself is unreachable. + if (!reachableCode.get()) { + reachableCode.reset(new ReachableCode(cfg)); + reachableCode->computeReachableBlocks(); + } + + if (!reachableCode->isReachable(currentBlock)) + return; + + if (isSuppressed(R)) + return; + + SmallString<64> buf; + llvm::raw_svector_ostream os(buf); + const char *BugType = nullptr; + + SmallVector<FixItHint, 1> Fixits; + + switch (dsk) { + case DeadInit: { + BugType = "Dead initialization"; + os << "Value stored to '" << *V + << "' during its initialization is never read"; + + ASTContext &ACtx = V->getASTContext(); + if (Checker->ShowFixIts) { + if (V->getInit()->HasSideEffects(ACtx, + /*IncludePossibleEffects=*/true)) { + break; + } + SourceManager &SM = ACtx.getSourceManager(); + const LangOptions &LO = ACtx.getLangOpts(); + SourceLocation L1 = + Lexer::findNextToken( + V->getTypeSourceInfo()->getTypeLoc().getEndLoc(), + SM, LO)->getEndLoc(); + SourceLocation L2 = + Lexer::getLocForEndOfToken(V->getInit()->getEndLoc(), 1, SM, LO); + Fixits.push_back(FixItHint::CreateRemoval({L1, L2})); + } + break; + } + + case DeadIncrement: + BugType = "Dead increment"; + LLVM_FALLTHROUGH; + case Standard: + if (!BugType) BugType = "Dead assignment"; + os << "Value stored to '" << *V << "' is never read"; + break; + + // eg.: f((x = foo())) + case Enclosing: + if (!Checker->WarnForDeadNestedAssignments) + return; + BugType = "Dead nested assignment"; + os << "Although the value stored to '" << *V + << "' is used in the enclosing expression, the value is never " + "actually read from '" + << *V << "'"; + break; + } + + BR.EmitBasicReport(AC->getDecl(), Checker, BugType, "Dead store", os.str(), + L, R, Fixits); + } + + void CheckVarDecl(const VarDecl *VD, const Expr *Ex, const Expr *Val, + DeadStoreKind dsk, + const LiveVariables::LivenessValues &Live) { + + if (!VD->hasLocalStorage()) + return; + // Reference types confuse the dead stores checker. Skip them + // for now. + if (VD->getType()->getAs<ReferenceType>()) + return; + + if (!isLive(Live, VD) && + !(VD->hasAttr<UnusedAttr>() || VD->hasAttr<BlocksAttr>() || + VD->hasAttr<ObjCPreciseLifetimeAttr>())) { + + PathDiagnosticLocation ExLoc = + PathDiagnosticLocation::createBegin(Ex, BR.getSourceManager(), AC); + Report(VD, dsk, ExLoc, Val->getSourceRange()); + } + } + + void CheckDeclRef(const DeclRefExpr *DR, const Expr *Val, DeadStoreKind dsk, + const LiveVariables::LivenessValues& Live) { + if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) + CheckVarDecl(VD, DR, Val, dsk, Live); + } + + bool isIncrement(VarDecl *VD, const BinaryOperator* B) { + if (B->isCompoundAssignmentOp()) + return true; + + const Expr *RHS = B->getRHS()->IgnoreParenCasts(); + const BinaryOperator* BRHS = dyn_cast<BinaryOperator>(RHS); + + if (!BRHS) + return false; + + const DeclRefExpr *DR; + + if ((DR = dyn_cast<DeclRefExpr>(BRHS->getLHS()->IgnoreParenCasts()))) + if (DR->getDecl() == VD) + return true; + + if ((DR = dyn_cast<DeclRefExpr>(BRHS->getRHS()->IgnoreParenCasts()))) + if (DR->getDecl() == VD) + return true; + + return false; + } + + void observeStmt(const Stmt *S, const CFGBlock *block, + const LiveVariables::LivenessValues &Live) override { + + currentBlock = block; + + // Skip statements in macros. + if (S->getBeginLoc().isMacroID()) + return; + + // Only cover dead stores from regular assignments. ++/-- dead stores + // have never flagged a real bug. + if (const BinaryOperator* B = dyn_cast<BinaryOperator>(S)) { + if (!B->isAssignmentOp()) return; // Skip non-assignments. + + if (DeclRefExpr *DR = dyn_cast<DeclRefExpr>(B->getLHS())) + if (VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) { + // Special case: check for assigning null to a pointer. + // This is a common form of defensive programming. + const Expr *RHS = + LookThroughTransitiveAssignmentsAndCommaOperators(B->getRHS()); + RHS = RHS->IgnoreParenCasts(); + + QualType T = VD->getType(); + if (T.isVolatileQualified()) + return; + if (T->isPointerType() || T->isObjCObjectPointerType()) { + if (RHS->isNullPointerConstant(Ctx, Expr::NPC_ValueDependentIsNull)) + return; + } + + // Special case: self-assignments. These are often used to shut up + // "unused variable" compiler warnings. + if (const DeclRefExpr *RhsDR = dyn_cast<DeclRefExpr>(RHS)) + if (VD == dyn_cast<VarDecl>(RhsDR->getDecl())) + return; + + // Otherwise, issue a warning. + DeadStoreKind dsk = Parents.isConsumedExpr(B) + ? Enclosing + : (isIncrement(VD,B) ? DeadIncrement : Standard); + + CheckVarDecl(VD, DR, B->getRHS(), dsk, Live); + } + } + else if (const UnaryOperator* U = dyn_cast<UnaryOperator>(S)) { + if (!U->isIncrementOp() || U->isPrefix()) + return; + + const Stmt *parent = Parents.getParentIgnoreParenCasts(U); + if (!parent || !isa<ReturnStmt>(parent)) + return; + + const Expr *Ex = U->getSubExpr()->IgnoreParenCasts(); + + if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Ex)) + CheckDeclRef(DR, U, DeadIncrement, Live); + } + else if (const DeclStmt *DS = dyn_cast<DeclStmt>(S)) + // Iterate through the decls. Warn if any initializers are complex + // expressions that are not live (never used). + for (const auto *DI : DS->decls()) { + const auto *V = dyn_cast<VarDecl>(DI); + + if (!V) + continue; + + if (V->hasLocalStorage()) { + // Reference types confuse the dead stores checker. Skip them + // for now. + if (V->getType()->getAs<ReferenceType>()) + return; + + if (const Expr *E = V->getInit()) { + while (const FullExpr *FE = dyn_cast<FullExpr>(E)) + E = FE->getSubExpr(); + + // Look through transitive assignments, e.g.: + // int x = y = 0; + E = LookThroughTransitiveAssignmentsAndCommaOperators(E); + + // Don't warn on C++ objects (yet) until we can show that their + // constructors/destructors don't have side effects. + if (isa<CXXConstructExpr>(E)) + return; + + // A dead initialization is a variable that is dead after it + // is initialized. We don't flag warnings for those variables + // marked 'unused' or 'objc_precise_lifetime'. + if (!isLive(Live, V) && + !V->hasAttr<UnusedAttr>() && + !V->hasAttr<ObjCPreciseLifetimeAttr>()) { + // Special case: check for initializations with constants. + // + // e.g. : int x = 0; + // + // If x is EVER assigned a new value later, don't issue + // a warning. This is because such initialization can be + // due to defensive programming. + if (E->isEvaluatable(Ctx)) + return; + + if (const DeclRefExpr *DRE = + dyn_cast<DeclRefExpr>(E->IgnoreParenCasts())) + if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl())) { + // Special case: check for initialization from constant + // variables. + // + // e.g. extern const int MyConstant; + // int x = MyConstant; + // + if (VD->hasGlobalStorage() && + VD->getType().isConstQualified()) + return; + // Special case: check for initialization from scalar + // parameters. This is often a form of defensive + // programming. Non-scalars are still an error since + // because it more likely represents an actual algorithmic + // bug. + if (isa<ParmVarDecl>(VD) && VD->getType()->isScalarType()) + return; + } + + PathDiagnosticLocation Loc = + PathDiagnosticLocation::create(V, BR.getSourceManager()); + Report(V, DeadInit, Loc, E->getSourceRange()); + } + } + } + } + } +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Driver function to invoke the Dead-Stores checker on a CFG. +//===----------------------------------------------------------------------===// + +namespace { +class FindEscaped { +public: + llvm::SmallPtrSet<const VarDecl*, 20> Escaped; + + void operator()(const Stmt *S) { + // Check for '&'. Any VarDecl whose address has been taken we treat as + // escaped. + // FIXME: What about references? + if (auto *LE = dyn_cast<LambdaExpr>(S)) { + findLambdaReferenceCaptures(LE); + return; + } + + const UnaryOperator *U = dyn_cast<UnaryOperator>(S); + if (!U) + return; + if (U->getOpcode() != UO_AddrOf) + return; + + const Expr *E = U->getSubExpr()->IgnoreParenCasts(); + if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(E)) + if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) + Escaped.insert(VD); + } + + // Treat local variables captured by reference in C++ lambdas as escaped. + void findLambdaReferenceCaptures(const LambdaExpr *LE) { + const CXXRecordDecl *LambdaClass = LE->getLambdaClass(); + llvm::DenseMap<const VarDecl *, FieldDecl *> CaptureFields; + FieldDecl *ThisCaptureField; + LambdaClass->getCaptureFields(CaptureFields, ThisCaptureField); + + for (const LambdaCapture &C : LE->captures()) { + if (!C.capturesVariable()) + continue; + + VarDecl *VD = C.getCapturedVar(); + const FieldDecl *FD = CaptureFields[VD]; + if (!FD) + continue; + + // If the capture field is a reference type, it is capture-by-reference. + if (FD->getType()->isReferenceType()) + Escaped.insert(VD); + } + } +}; +} // end anonymous namespace + + +//===----------------------------------------------------------------------===// +// DeadStoresChecker +//===----------------------------------------------------------------------===// + +void DeadStoresChecker::checkASTCodeBody(const Decl *D, AnalysisManager &mgr, + BugReporter &BR) const { + + // Don't do anything for template instantiations. + // Proving that code in a template instantiation is "dead" + // means proving that it is dead in all instantiations. + // This same problem exists with -Wunreachable-code. + if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) + if (FD->isTemplateInstantiation()) + return; + + if (LiveVariables *L = mgr.getAnalysis<LiveVariables>(D)) { + CFG &cfg = *mgr.getCFG(D); + AnalysisDeclContext *AC = mgr.getAnalysisDeclContext(D); + ParentMap &pmap = mgr.getParentMap(D); + FindEscaped FS; + cfg.VisitBlockStmts(FS); + DeadStoreObs A(cfg, BR.getContext(), BR, this, AC, pmap, FS.Escaped, + WarnForDeadNestedAssignments); + L->runOnAllBlocks(A); + } +} + +void ento::registerDeadStoresChecker(CheckerManager &Mgr) { + auto *Chk = Mgr.registerChecker<DeadStoresChecker>(); + + const AnalyzerOptions &AnOpts = Mgr.getAnalyzerOptions(); + Chk->WarnForDeadNestedAssignments = + AnOpts.getCheckerBooleanOption(Chk, "WarnForDeadNestedAssignments"); + Chk->ShowFixIts = + AnOpts.getCheckerBooleanOption(Chk, "ShowFixIts"); +} + +bool ento::shouldRegisterDeadStoresChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp b/clang/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp new file mode 100644 index 000000000000..0cb4be2c7fdc --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp @@ -0,0 +1,351 @@ +//==- DebugCheckers.cpp - Debugging Checkers ---------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines checkers that display debugging information. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/Analysis/Analyses/Dominators.h" +#include "clang/Analysis/Analyses/LiveVariables.h" +#include "clang/Analysis/CallGraph.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "llvm/Support/Process.h" + +using namespace clang; +using namespace ento; + +//===----------------------------------------------------------------------===// +// DominatorsTreeDumper +//===----------------------------------------------------------------------===// + +namespace { +class DominatorsTreeDumper : public Checker<check::ASTCodeBody> { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) const { + if (AnalysisDeclContext *AC = mgr.getAnalysisDeclContext(D)) { + CFGDomTree Dom; + Dom.buildDominatorTree(AC->getCFG()); + Dom.dump(); + } + } +}; +} + +void ento::registerDominatorsTreeDumper(CheckerManager &mgr) { + mgr.registerChecker<DominatorsTreeDumper>(); +} + +bool ento::shouldRegisterDominatorsTreeDumper(const LangOptions &LO) { + return true; +} + +//===----------------------------------------------------------------------===// +// PostDominatorsTreeDumper +//===----------------------------------------------------------------------===// + +namespace { +class PostDominatorsTreeDumper : public Checker<check::ASTCodeBody> { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) const { + if (AnalysisDeclContext *AC = mgr.getAnalysisDeclContext(D)) { + CFGPostDomTree Dom; + Dom.buildDominatorTree(AC->getCFG()); + Dom.dump(); + } + } +}; +} + +void ento::registerPostDominatorsTreeDumper(CheckerManager &mgr) { + mgr.registerChecker<PostDominatorsTreeDumper>(); +} + +bool ento::shouldRegisterPostDominatorsTreeDumper(const LangOptions &LO) { + return true; +} + +//===----------------------------------------------------------------------===// +// ControlDependencyTreeDumper +//===----------------------------------------------------------------------===// + +namespace { +class ControlDependencyTreeDumper : public Checker<check::ASTCodeBody> { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) const { + if (AnalysisDeclContext *AC = mgr.getAnalysisDeclContext(D)) { + ControlDependencyCalculator Dom(AC->getCFG()); + Dom.dump(); + } + } +}; +} + +void ento::registerControlDependencyTreeDumper(CheckerManager &mgr) { + mgr.registerChecker<ControlDependencyTreeDumper>(); +} + +bool ento::shouldRegisterControlDependencyTreeDumper(const LangOptions &LO) { + return true; +} + +//===----------------------------------------------------------------------===// +// LiveVariablesDumper +//===----------------------------------------------------------------------===// + +namespace { +class LiveVariablesDumper : public Checker<check::ASTCodeBody> { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) const { + if (LiveVariables* L = mgr.getAnalysis<LiveVariables>(D)) { + L->dumpBlockLiveness(mgr.getSourceManager()); + } + } +}; +} + +void ento::registerLiveVariablesDumper(CheckerManager &mgr) { + mgr.registerChecker<LiveVariablesDumper>(); +} + +bool ento::shouldRegisterLiveVariablesDumper(const LangOptions &LO) { + return true; +} + +//===----------------------------------------------------------------------===// +// LiveStatementsDumper +//===----------------------------------------------------------------------===// + +namespace { +class LiveStatementsDumper : public Checker<check::ASTCodeBody> { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager& Mgr, + BugReporter &BR) const { + if (LiveVariables *L = Mgr.getAnalysis<RelaxedLiveVariables>(D)) + L->dumpStmtLiveness(Mgr.getSourceManager()); + } +}; +} + +void ento::registerLiveStatementsDumper(CheckerManager &mgr) { + mgr.registerChecker<LiveStatementsDumper>(); +} + +bool ento::shouldRegisterLiveStatementsDumper(const LangOptions &LO) { + return true; +} + +//===----------------------------------------------------------------------===// +// CFGViewer +//===----------------------------------------------------------------------===// + +namespace { +class CFGViewer : public Checker<check::ASTCodeBody> { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) const { + if (CFG *cfg = mgr.getCFG(D)) { + cfg->viewCFG(mgr.getLangOpts()); + } + } +}; +} + +void ento::registerCFGViewer(CheckerManager &mgr) { + mgr.registerChecker<CFGViewer>(); +} + +bool ento::shouldRegisterCFGViewer(const LangOptions &LO) { + return true; +} + +//===----------------------------------------------------------------------===// +// CFGDumper +//===----------------------------------------------------------------------===// + +namespace { +class CFGDumper : public Checker<check::ASTCodeBody> { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) const { + PrintingPolicy Policy(mgr.getLangOpts()); + Policy.TerseOutput = true; + Policy.PolishForDeclaration = true; + D->print(llvm::errs(), Policy); + + if (CFG *cfg = mgr.getCFG(D)) { + cfg->dump(mgr.getLangOpts(), + llvm::sys::Process::StandardErrHasColors()); + } + } +}; +} + +void ento::registerCFGDumper(CheckerManager &mgr) { + mgr.registerChecker<CFGDumper>(); +} + +bool ento::shouldRegisterCFGDumper(const LangOptions &LO) { + return true; +} + +//===----------------------------------------------------------------------===// +// CallGraphViewer +//===----------------------------------------------------------------------===// + +namespace { +class CallGraphViewer : public Checker< check::ASTDecl<TranslationUnitDecl> > { +public: + void checkASTDecl(const TranslationUnitDecl *TU, AnalysisManager& mgr, + BugReporter &BR) const { + CallGraph CG; + CG.addToCallGraph(const_cast<TranslationUnitDecl*>(TU)); + CG.viewGraph(); + } +}; +} + +void ento::registerCallGraphViewer(CheckerManager &mgr) { + mgr.registerChecker<CallGraphViewer>(); +} + +bool ento::shouldRegisterCallGraphViewer(const LangOptions &LO) { + return true; +} + +//===----------------------------------------------------------------------===// +// CallGraphDumper +//===----------------------------------------------------------------------===// + +namespace { +class CallGraphDumper : public Checker< check::ASTDecl<TranslationUnitDecl> > { +public: + void checkASTDecl(const TranslationUnitDecl *TU, AnalysisManager& mgr, + BugReporter &BR) const { + CallGraph CG; + CG.addToCallGraph(const_cast<TranslationUnitDecl*>(TU)); + CG.dump(); + } +}; +} + +void ento::registerCallGraphDumper(CheckerManager &mgr) { + mgr.registerChecker<CallGraphDumper>(); +} + +bool ento::shouldRegisterCallGraphDumper(const LangOptions &LO) { + return true; +} + +//===----------------------------------------------------------------------===// +// ConfigDumper +//===----------------------------------------------------------------------===// + +namespace { +class ConfigDumper : public Checker< check::EndOfTranslationUnit > { + typedef AnalyzerOptions::ConfigTable Table; + + static int compareEntry(const Table::MapEntryTy *const *LHS, + const Table::MapEntryTy *const *RHS) { + return (*LHS)->getKey().compare((*RHS)->getKey()); + } + +public: + void checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager& mgr, + BugReporter &BR) const { + const Table &Config = mgr.options.Config; + + SmallVector<const Table::MapEntryTy *, 32> Keys; + for (Table::const_iterator I = Config.begin(), E = Config.end(); I != E; + ++I) + Keys.push_back(&*I); + llvm::array_pod_sort(Keys.begin(), Keys.end(), compareEntry); + + llvm::errs() << "[config]\n"; + for (unsigned I = 0, E = Keys.size(); I != E; ++I) + llvm::errs() << Keys[I]->getKey() << " = " + << (Keys[I]->second.empty() ? "\"\"" : Keys[I]->second) + << '\n'; + + llvm::errs() << "[stats]\n" << "num-entries = " << Keys.size() << '\n'; + } +}; +} + +void ento::registerConfigDumper(CheckerManager &mgr) { + mgr.registerChecker<ConfigDumper>(); +} + +bool ento::shouldRegisterConfigDumper(const LangOptions &LO) { + return true; +} + +//===----------------------------------------------------------------------===// +// ExplodedGraph Viewer +//===----------------------------------------------------------------------===// + +namespace { +class ExplodedGraphViewer : public Checker< check::EndAnalysis > { +public: + ExplodedGraphViewer() {} + void checkEndAnalysis(ExplodedGraph &G, BugReporter &B,ExprEngine &Eng) const { + Eng.ViewGraph(0); + } +}; + +} + +void ento::registerExplodedGraphViewer(CheckerManager &mgr) { + mgr.registerChecker<ExplodedGraphViewer>(); +} + +bool ento::shouldRegisterExplodedGraphViewer(const LangOptions &LO) { + return true; +} + +//===----------------------------------------------------------------------===// +// Emits a report for every Stmt that the analyzer visits. +//===----------------------------------------------------------------------===// + +namespace { + +class ReportStmts : public Checker<check::PreStmt<Stmt>> { + BuiltinBug BT_stmtLoc{this, "Statement"}; + +public: + void checkPreStmt(const Stmt *S, CheckerContext &C) const { + ExplodedNode *Node = C.generateNonFatalErrorNode(); + if (!Node) + return; + + auto Report = + std::make_unique<PathSensitiveBugReport>(BT_stmtLoc, "Statement", Node); + + C.emitReport(std::move(Report)); + } +}; + +} // end of anonymous namespace + +void ento::registerReportStmts(CheckerManager &mgr) { + mgr.registerChecker<ReportStmts>(); +} + +bool ento::shouldRegisterReportStmts(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/DeleteWithNonVirtualDtorChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DeleteWithNonVirtualDtorChecker.cpp new file mode 100644 index 000000000000..45c1984c5e15 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/DeleteWithNonVirtualDtorChecker.cpp @@ -0,0 +1,153 @@ +//===-- DeleteWithNonVirtualDtorChecker.cpp -----------------------*- C++ -*--// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines a checker for the OOP52-CPP CERT rule: Do not delete a polymorphic +// object without a virtual destructor. +// +// Diagnostic flags -Wnon-virtual-dtor and -Wdelete-non-virtual-dtor report if +// an object with a virtual function but a non-virtual destructor exists or is +// deleted, respectively. +// +// This check exceeds them by comparing the dynamic and static types of the +// object at the point of destruction and only warns if it happens through a +// pointer to a base type without a virtual destructor. The check places a note +// at the last point where the conversion from derived to base happened. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" + +using namespace clang; +using namespace ento; + +namespace { +class DeleteWithNonVirtualDtorChecker + : public Checker<check::PreStmt<CXXDeleteExpr>> { + mutable std::unique_ptr<BugType> BT; + + class DeleteBugVisitor : public BugReporterVisitor { + public: + DeleteBugVisitor() : Satisfied(false) {} + void Profile(llvm::FoldingSetNodeID &ID) const override { + static int X = 0; + ID.AddPointer(&X); + } + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; + + private: + bool Satisfied; + }; + +public: + void checkPreStmt(const CXXDeleteExpr *DE, CheckerContext &C) const; +}; +} // end anonymous namespace + +void DeleteWithNonVirtualDtorChecker::checkPreStmt(const CXXDeleteExpr *DE, + CheckerContext &C) const { + const Expr *DeletedObj = DE->getArgument(); + const MemRegion *MR = C.getSVal(DeletedObj).getAsRegion(); + if (!MR) + return; + + const auto *BaseClassRegion = MR->getAs<TypedValueRegion>(); + const auto *DerivedClassRegion = MR->getBaseRegion()->getAs<SymbolicRegion>(); + if (!BaseClassRegion || !DerivedClassRegion) + return; + + const auto *BaseClass = BaseClassRegion->getValueType()->getAsCXXRecordDecl(); + const auto *DerivedClass = + DerivedClassRegion->getSymbol()->getType()->getPointeeCXXRecordDecl(); + if (!BaseClass || !DerivedClass) + return; + + if (!BaseClass->hasDefinition() || !DerivedClass->hasDefinition()) + return; + + if (BaseClass->getDestructor()->isVirtual()) + return; + + if (!DerivedClass->isDerivedFrom(BaseClass)) + return; + + if (!BT) + BT.reset(new BugType(this, + "Destruction of a polymorphic object with no " + "virtual destructor", + "Logic error")); + + ExplodedNode *N = C.generateNonFatalErrorNode(); + auto R = std::make_unique<PathSensitiveBugReport>(*BT, BT->getDescription(), N); + + // Mark region of problematic base class for later use in the BugVisitor. + R->markInteresting(BaseClassRegion); + R->addVisitor(std::make_unique<DeleteBugVisitor>()); + C.emitReport(std::move(R)); +} + +PathDiagnosticPieceRef +DeleteWithNonVirtualDtorChecker::DeleteBugVisitor::VisitNode( + const ExplodedNode *N, BugReporterContext &BRC, + PathSensitiveBugReport &BR) { + // Stop traversal after the first conversion was found on a path. + if (Satisfied) + return nullptr; + + const Stmt *S = N->getStmtForDiagnostics(); + if (!S) + return nullptr; + + const auto *CastE = dyn_cast<CastExpr>(S); + if (!CastE) + return nullptr; + + // Only interested in DerivedToBase implicit casts. + // Explicit casts can have different CastKinds. + if (const auto *ImplCastE = dyn_cast<ImplicitCastExpr>(CastE)) { + if (ImplCastE->getCastKind() != CK_DerivedToBase) + return nullptr; + } + + // Region associated with the current cast expression. + const MemRegion *M = N->getSVal(CastE).getAsRegion(); + if (!M) + return nullptr; + + // Check if target region was marked as problematic previously. + if (!BR.isInteresting(M)) + return nullptr; + + // Stop traversal on this path. + Satisfied = true; + + SmallString<256> Buf; + llvm::raw_svector_ostream OS(Buf); + OS << "Conversion from derived to base happened here"; + PathDiagnosticLocation Pos(S, BRC.getSourceManager(), + N->getLocationContext()); + return std::make_shared<PathDiagnosticEventPiece>(Pos, OS.str(), true); +} + +void ento::registerDeleteWithNonVirtualDtorChecker(CheckerManager &mgr) { + mgr.registerChecker<DeleteWithNonVirtualDtorChecker>(); +} + +bool ento::shouldRegisterDeleteWithNonVirtualDtorChecker( + const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp new file mode 100644 index 000000000000..e3de0b4f4a7f --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp @@ -0,0 +1,309 @@ +//===-- DereferenceChecker.cpp - Null dereference checker -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines NullDerefChecker, a builtin check in ExprEngine that performs +// checks for null pointers at loads and stores. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/ExprObjC.h" +#include "clang/AST/ExprOpenMP.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +namespace { +class DereferenceChecker + : public Checker< check::Location, + check::Bind, + EventDispatcher<ImplicitNullDerefEvent> > { + mutable std::unique_ptr<BuiltinBug> BT_null; + mutable std::unique_ptr<BuiltinBug> BT_undef; + + void reportBug(ProgramStateRef State, const Stmt *S, CheckerContext &C) const; + +public: + void checkLocation(SVal location, bool isLoad, const Stmt* S, + CheckerContext &C) const; + void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const; + + static void AddDerefSource(raw_ostream &os, + SmallVectorImpl<SourceRange> &Ranges, + const Expr *Ex, const ProgramState *state, + const LocationContext *LCtx, + bool loadedFrom = false); +}; +} // end anonymous namespace + +void +DereferenceChecker::AddDerefSource(raw_ostream &os, + SmallVectorImpl<SourceRange> &Ranges, + const Expr *Ex, + const ProgramState *state, + const LocationContext *LCtx, + bool loadedFrom) { + Ex = Ex->IgnoreParenLValueCasts(); + switch (Ex->getStmtClass()) { + default: + break; + case Stmt::DeclRefExprClass: { + const DeclRefExpr *DR = cast<DeclRefExpr>(Ex); + if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) { + os << " (" << (loadedFrom ? "loaded from" : "from") + << " variable '" << VD->getName() << "')"; + Ranges.push_back(DR->getSourceRange()); + } + break; + } + case Stmt::MemberExprClass: { + const MemberExpr *ME = cast<MemberExpr>(Ex); + os << " (" << (loadedFrom ? "loaded from" : "via") + << " field '" << ME->getMemberNameInfo() << "')"; + SourceLocation L = ME->getMemberLoc(); + Ranges.push_back(SourceRange(L, L)); + break; + } + case Stmt::ObjCIvarRefExprClass: { + const ObjCIvarRefExpr *IV = cast<ObjCIvarRefExpr>(Ex); + os << " (" << (loadedFrom ? "loaded from" : "via") + << " ivar '" << IV->getDecl()->getName() << "')"; + SourceLocation L = IV->getLocation(); + Ranges.push_back(SourceRange(L, L)); + break; + } + } +} + +static const Expr *getDereferenceExpr(const Stmt *S, bool IsBind=false){ + const Expr *E = nullptr; + + // Walk through lvalue casts to get the original expression + // that syntactically caused the load. + if (const Expr *expr = dyn_cast<Expr>(S)) + E = expr->IgnoreParenLValueCasts(); + + if (IsBind) { + const VarDecl *VD; + const Expr *Init; + std::tie(VD, Init) = parseAssignment(S); + if (VD && Init) + E = Init; + } + return E; +} + +static bool suppressReport(const Expr *E) { + // Do not report dereferences on memory in non-default address spaces. + return E->getType().getQualifiers().hasAddressSpace(); +} + +static bool isDeclRefExprToReference(const Expr *E) { + if (const auto *DRE = dyn_cast<DeclRefExpr>(E)) + return DRE->getDecl()->getType()->isReferenceType(); + return false; +} + +void DereferenceChecker::reportBug(ProgramStateRef State, const Stmt *S, + CheckerContext &C) const { + // Generate an error node. + ExplodedNode *N = C.generateErrorNode(State); + if (!N) + return; + + // We know that 'location' cannot be non-null. This is what + // we call an "explicit" null dereference. + if (!BT_null) + BT_null.reset(new BuiltinBug(this, "Dereference of null pointer")); + + SmallString<100> buf; + llvm::raw_svector_ostream os(buf); + + SmallVector<SourceRange, 2> Ranges; + + switch (S->getStmtClass()) { + case Stmt::ArraySubscriptExprClass: { + os << "Array access"; + const ArraySubscriptExpr *AE = cast<ArraySubscriptExpr>(S); + AddDerefSource(os, Ranges, AE->getBase()->IgnoreParenCasts(), + State.get(), N->getLocationContext()); + os << " results in a null pointer dereference"; + break; + } + case Stmt::OMPArraySectionExprClass: { + os << "Array access"; + const OMPArraySectionExpr *AE = cast<OMPArraySectionExpr>(S); + AddDerefSource(os, Ranges, AE->getBase()->IgnoreParenCasts(), + State.get(), N->getLocationContext()); + os << " results in a null pointer dereference"; + break; + } + case Stmt::UnaryOperatorClass: { + os << "Dereference of null pointer"; + const UnaryOperator *U = cast<UnaryOperator>(S); + AddDerefSource(os, Ranges, U->getSubExpr()->IgnoreParens(), + State.get(), N->getLocationContext(), true); + break; + } + case Stmt::MemberExprClass: { + const MemberExpr *M = cast<MemberExpr>(S); + if (M->isArrow() || isDeclRefExprToReference(M->getBase())) { + os << "Access to field '" << M->getMemberNameInfo() + << "' results in a dereference of a null pointer"; + AddDerefSource(os, Ranges, M->getBase()->IgnoreParenCasts(), + State.get(), N->getLocationContext(), true); + } + break; + } + case Stmt::ObjCIvarRefExprClass: { + const ObjCIvarRefExpr *IV = cast<ObjCIvarRefExpr>(S); + os << "Access to instance variable '" << *IV->getDecl() + << "' results in a dereference of a null pointer"; + AddDerefSource(os, Ranges, IV->getBase()->IgnoreParenCasts(), + State.get(), N->getLocationContext(), true); + break; + } + default: + break; + } + + auto report = std::make_unique<PathSensitiveBugReport>( + *BT_null, buf.empty() ? BT_null->getDescription() : StringRef(buf), N); + + bugreporter::trackExpressionValue(N, bugreporter::getDerefExpr(S), *report); + + for (SmallVectorImpl<SourceRange>::iterator + I = Ranges.begin(), E = Ranges.end(); I!=E; ++I) + report->addRange(*I); + + C.emitReport(std::move(report)); +} + +void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S, + CheckerContext &C) const { + // Check for dereference of an undefined value. + if (l.isUndef()) { + if (ExplodedNode *N = C.generateErrorNode()) { + if (!BT_undef) + BT_undef.reset( + new BuiltinBug(this, "Dereference of undefined pointer value")); + + auto report = std::make_unique<PathSensitiveBugReport>( + *BT_undef, BT_undef->getDescription(), N); + bugreporter::trackExpressionValue(N, bugreporter::getDerefExpr(S), *report); + C.emitReport(std::move(report)); + } + return; + } + + DefinedOrUnknownSVal location = l.castAs<DefinedOrUnknownSVal>(); + + // Check for null dereferences. + if (!location.getAs<Loc>()) + return; + + ProgramStateRef state = C.getState(); + + ProgramStateRef notNullState, nullState; + std::tie(notNullState, nullState) = state->assume(location); + + // The explicit NULL case. + if (nullState) { + if (!notNullState) { + const Expr *expr = getDereferenceExpr(S); + if (!suppressReport(expr)) { + reportBug(nullState, expr, C); + return; + } + } + + // Otherwise, we have the case where the location could either be + // null or not-null. Record the error node as an "implicit" null + // dereference. + if (ExplodedNode *N = C.generateSink(nullState, C.getPredecessor())) { + ImplicitNullDerefEvent event = {l, isLoad, N, &C.getBugReporter(), + /*IsDirectDereference=*/true}; + dispatchEvent(event); + } + } + + // From this point forward, we know that the location is not null. + C.addTransition(notNullState); +} + +void DereferenceChecker::checkBind(SVal L, SVal V, const Stmt *S, + CheckerContext &C) const { + // If we're binding to a reference, check if the value is known to be null. + if (V.isUndef()) + return; + + const MemRegion *MR = L.getAsRegion(); + const TypedValueRegion *TVR = dyn_cast_or_null<TypedValueRegion>(MR); + if (!TVR) + return; + + if (!TVR->getValueType()->isReferenceType()) + return; + + ProgramStateRef State = C.getState(); + + ProgramStateRef StNonNull, StNull; + std::tie(StNonNull, StNull) = State->assume(V.castAs<DefinedOrUnknownSVal>()); + + if (StNull) { + if (!StNonNull) { + const Expr *expr = getDereferenceExpr(S, /*IsBind=*/true); + if (!suppressReport(expr)) { + reportBug(StNull, expr, C); + return; + } + } + + // At this point the value could be either null or non-null. + // Record this as an "implicit" null dereference. + if (ExplodedNode *N = C.generateSink(StNull, C.getPredecessor())) { + ImplicitNullDerefEvent event = {V, /*isLoad=*/true, N, + &C.getBugReporter(), + /*IsDirectDereference=*/true}; + dispatchEvent(event); + } + } + + // Unlike a regular null dereference, initializing a reference with a + // dereferenced null pointer does not actually cause a runtime exception in + // Clang's implementation of references. + // + // int &r = *p; // safe?? + // if (p != NULL) return; // uh-oh + // r = 5; // trap here + // + // The standard says this is invalid as soon as we try to create a "null + // reference" (there is no such thing), but turning this into an assumption + // that 'p' is never null will not match our actual runtime behavior. + // So we do not record this assumption, allowing us to warn on the last line + // of this example. + // + // We do need to add a transition because we may have generated a sink for + // the "implicit" null dereference. + C.addTransition(State, this); +} + +void ento::registerDereferenceChecker(CheckerManager &mgr) { + mgr.registerChecker<DereferenceChecker>(); +} + +bool ento::shouldRegisterDereferenceChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/DirectIvarAssignment.cpp b/clang/lib/StaticAnalyzer/Checkers/DirectIvarAssignment.cpp new file mode 100644 index 000000000000..0058f3d3881f --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/DirectIvarAssignment.cpp @@ -0,0 +1,235 @@ +//=- DirectIvarAssignment.cpp - Check rules on ObjC properties -*- C++ ----*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Check that Objective C properties are set with the setter, not though a +// direct assignment. +// +// Two versions of a checker exist: one that checks all methods and the other +// that only checks the methods annotated with +// __attribute__((annotate("objc_no_direct_instance_variable_assignment"))) +// +// The checker does not warn about assignments to Ivars, annotated with +// __attribute__((objc_allow_direct_instance_variable_assignment"))). This +// annotation serves as a false positive suppression mechanism for the +// checker. The annotation is allowed on properties and Ivars. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/Attr.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "llvm/ADT/DenseMap.h" + +using namespace clang; +using namespace ento; + +namespace { + +/// The default method filter, which is used to filter out the methods on which +/// the check should not be performed. +/// +/// Checks for the init, dealloc, and any other functions that might be allowed +/// to perform direct instance variable assignment based on their name. +static bool DefaultMethodFilter(const ObjCMethodDecl *M) { + return M->getMethodFamily() == OMF_init || + M->getMethodFamily() == OMF_dealloc || + M->getMethodFamily() == OMF_copy || + M->getMethodFamily() == OMF_mutableCopy || + M->getSelector().getNameForSlot(0).find("init") != StringRef::npos || + M->getSelector().getNameForSlot(0).find("Init") != StringRef::npos; +} + +class DirectIvarAssignment : + public Checker<check::ASTDecl<ObjCImplementationDecl> > { + + typedef llvm::DenseMap<const ObjCIvarDecl*, + const ObjCPropertyDecl*> IvarToPropertyMapTy; + + /// A helper class, which walks the AST and locates all assignments to ivars + /// in the given function. + class MethodCrawler : public ConstStmtVisitor<MethodCrawler> { + const IvarToPropertyMapTy &IvarToPropMap; + const ObjCMethodDecl *MD; + const ObjCInterfaceDecl *InterfD; + BugReporter &BR; + const CheckerBase *Checker; + LocationOrAnalysisDeclContext DCtx; + + public: + MethodCrawler(const IvarToPropertyMapTy &InMap, const ObjCMethodDecl *InMD, + const ObjCInterfaceDecl *InID, BugReporter &InBR, + const CheckerBase *Checker, AnalysisDeclContext *InDCtx) + : IvarToPropMap(InMap), MD(InMD), InterfD(InID), BR(InBR), + Checker(Checker), DCtx(InDCtx) {} + + void VisitStmt(const Stmt *S) { VisitChildren(S); } + + void VisitBinaryOperator(const BinaryOperator *BO); + + void VisitChildren(const Stmt *S) { + for (const Stmt *Child : S->children()) + if (Child) + this->Visit(Child); + } + }; + +public: + bool (*ShouldSkipMethod)(const ObjCMethodDecl *); + + DirectIvarAssignment() : ShouldSkipMethod(&DefaultMethodFilter) {} + + void checkASTDecl(const ObjCImplementationDecl *D, AnalysisManager& Mgr, + BugReporter &BR) const; +}; + +static const ObjCIvarDecl *findPropertyBackingIvar(const ObjCPropertyDecl *PD, + const ObjCInterfaceDecl *InterD, + ASTContext &Ctx) { + // Check for synthesized ivars. + ObjCIvarDecl *ID = PD->getPropertyIvarDecl(); + if (ID) + return ID; + + ObjCInterfaceDecl *NonConstInterD = const_cast<ObjCInterfaceDecl*>(InterD); + + // Check for existing "_PropName". + ID = NonConstInterD->lookupInstanceVariable(PD->getDefaultSynthIvarName(Ctx)); + if (ID) + return ID; + + // Check for existing "PropName". + IdentifierInfo *PropIdent = PD->getIdentifier(); + ID = NonConstInterD->lookupInstanceVariable(PropIdent); + + return ID; +} + +void DirectIvarAssignment::checkASTDecl(const ObjCImplementationDecl *D, + AnalysisManager& Mgr, + BugReporter &BR) const { + const ObjCInterfaceDecl *InterD = D->getClassInterface(); + + + IvarToPropertyMapTy IvarToPropMap; + + // Find all properties for this class. + for (const auto *PD : InterD->instance_properties()) { + // Find the corresponding IVar. + const ObjCIvarDecl *ID = findPropertyBackingIvar(PD, InterD, + Mgr.getASTContext()); + + if (!ID) + continue; + + // Store the IVar to property mapping. + IvarToPropMap[ID] = PD; + } + + if (IvarToPropMap.empty()) + return; + + for (const auto *M : D->instance_methods()) { + AnalysisDeclContext *DCtx = Mgr.getAnalysisDeclContext(M); + + if ((*ShouldSkipMethod)(M)) + continue; + + const Stmt *Body = M->getBody(); + assert(Body); + + MethodCrawler MC(IvarToPropMap, M->getCanonicalDecl(), InterD, BR, this, + DCtx); + MC.VisitStmt(Body); + } +} + +static bool isAnnotatedToAllowDirectAssignment(const Decl *D) { + for (const auto *Ann : D->specific_attrs<AnnotateAttr>()) + if (Ann->getAnnotation() == + "objc_allow_direct_instance_variable_assignment") + return true; + return false; +} + +void DirectIvarAssignment::MethodCrawler::VisitBinaryOperator( + const BinaryOperator *BO) { + if (!BO->isAssignmentOp()) + return; + + const ObjCIvarRefExpr *IvarRef = + dyn_cast<ObjCIvarRefExpr>(BO->getLHS()->IgnoreParenCasts()); + + if (!IvarRef) + return; + + if (const ObjCIvarDecl *D = IvarRef->getDecl()) { + IvarToPropertyMapTy::const_iterator I = IvarToPropMap.find(D); + + if (I != IvarToPropMap.end()) { + const ObjCPropertyDecl *PD = I->second; + // Skip warnings on Ivars, annotated with + // objc_allow_direct_instance_variable_assignment. This annotation serves + // as a false positive suppression mechanism for the checker. The + // annotation is allowed on properties and ivars. + if (isAnnotatedToAllowDirectAssignment(PD) || + isAnnotatedToAllowDirectAssignment(D)) + return; + + ObjCMethodDecl *GetterMethod = + InterfD->getInstanceMethod(PD->getGetterName()); + ObjCMethodDecl *SetterMethod = + InterfD->getInstanceMethod(PD->getSetterName()); + + if (SetterMethod && SetterMethod->getCanonicalDecl() == MD) + return; + + if (GetterMethod && GetterMethod->getCanonicalDecl() == MD) + return; + + BR.EmitBasicReport( + MD, Checker, "Property access", categories::CoreFoundationObjectiveC, + "Direct assignment to an instance variable backing a property; " + "use the setter instead", + PathDiagnosticLocation(IvarRef, BR.getSourceManager(), DCtx)); + } + } +} +} + +// Register the checker that checks for direct accesses in functions annotated +// with __attribute__((annotate("objc_no_direct_instance_variable_assignment"))). +static bool AttrFilter(const ObjCMethodDecl *M) { + for (const auto *Ann : M->specific_attrs<AnnotateAttr>()) + if (Ann->getAnnotation() == "objc_no_direct_instance_variable_assignment") + return false; + return true; +} + +// Register the checker that checks for direct accesses in all functions, +// except for the initialization and copy routines. +void ento::registerDirectIvarAssignment(CheckerManager &mgr) { + mgr.registerChecker<DirectIvarAssignment>(); +} + +bool ento::shouldRegisterDirectIvarAssignment(const LangOptions &LO) { + return true; +} + +void ento::registerDirectIvarAssignmentForAnnotatedFunctions( + CheckerManager &mgr) { + mgr.getChecker<DirectIvarAssignment>()->ShouldSkipMethod = &AttrFilter; +} + +bool ento::shouldRegisterDirectIvarAssignmentForAnnotatedFunctions( + const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp new file mode 100644 index 000000000000..8798bde88dcd --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp @@ -0,0 +1,106 @@ +//== DivZeroChecker.cpp - Division by zero checker --------------*- C++ -*--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines DivZeroChecker, a builtin check in ExprEngine that performs +// checks for division by zeros. +// +//===----------------------------------------------------------------------===// + +#include "Taint.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; +using namespace taint; + +namespace { +class DivZeroChecker : public Checker< check::PreStmt<BinaryOperator> > { + mutable std::unique_ptr<BuiltinBug> BT; + void reportBug(const char *Msg, ProgramStateRef StateZero, CheckerContext &C, + std::unique_ptr<BugReporterVisitor> Visitor = nullptr) const; + +public: + void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const; +}; +} // end anonymous namespace + +static const Expr *getDenomExpr(const ExplodedNode *N) { + const Stmt *S = N->getLocationAs<PreStmt>()->getStmt(); + if (const auto *BE = dyn_cast<BinaryOperator>(S)) + return BE->getRHS(); + return nullptr; +} + +void DivZeroChecker::reportBug( + const char *Msg, ProgramStateRef StateZero, CheckerContext &C, + std::unique_ptr<BugReporterVisitor> Visitor) const { + if (ExplodedNode *N = C.generateErrorNode(StateZero)) { + if (!BT) + BT.reset(new BuiltinBug(this, "Division by zero")); + + auto R = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); + R->addVisitor(std::move(Visitor)); + bugreporter::trackExpressionValue(N, getDenomExpr(N), *R); + C.emitReport(std::move(R)); + } +} + +void DivZeroChecker::checkPreStmt(const BinaryOperator *B, + CheckerContext &C) const { + BinaryOperator::Opcode Op = B->getOpcode(); + if (Op != BO_Div && + Op != BO_Rem && + Op != BO_DivAssign && + Op != BO_RemAssign) + return; + + if (!B->getRHS()->getType()->isScalarType()) + return; + + SVal Denom = C.getSVal(B->getRHS()); + Optional<DefinedSVal> DV = Denom.getAs<DefinedSVal>(); + + // Divide-by-undefined handled in the generic checking for uses of + // undefined values. + if (!DV) + return; + + // Check for divide by zero. + ConstraintManager &CM = C.getConstraintManager(); + ProgramStateRef stateNotZero, stateZero; + std::tie(stateNotZero, stateZero) = CM.assumeDual(C.getState(), *DV); + + if (!stateNotZero) { + assert(stateZero); + reportBug("Division by zero", stateZero, C); + return; + } + + bool TaintedD = isTainted(C.getState(), *DV); + if ((stateNotZero && stateZero && TaintedD)) { + reportBug("Division by a tainted value, possibly zero", stateZero, C, + std::make_unique<taint::TaintBugVisitor>(*DV)); + return; + } + + // If we get here, then the denom should not be zero. We abandon the implicit + // zero denom case for now. + C.addTransition(stateNotZero); +} + +void ento::registerDivZeroChecker(CheckerManager &mgr) { + mgr.registerChecker<DivZeroChecker>(); +} + +bool ento::shouldRegisterDivZeroChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/DynamicTypeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DynamicTypeChecker.cpp new file mode 100644 index 000000000000..8cc38f9735f3 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/DynamicTypeChecker.cpp @@ -0,0 +1,208 @@ +//== DynamicTypeChecker.cpp ------------------------------------ -*- C++ -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This checker looks for cases where the dynamic type of an object is unrelated +// to its static type. The type information utilized by this check is collected +// by the DynamicTypePropagation checker. This check does not report any type +// error for ObjC Generic types, in order to avoid duplicate erros from the +// ObjC Generics checker. This checker is not supposed to modify the program +// state, it is just the observer of the type information provided by other +// checkers. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" + +using namespace clang; +using namespace ento; + +namespace { +class DynamicTypeChecker : public Checker<check::PostStmt<ImplicitCastExpr>> { + mutable std::unique_ptr<BugType> BT; + void initBugType() const { + if (!BT) + BT.reset( + new BugType(this, "Dynamic and static type mismatch", "Type Error")); + } + + class DynamicTypeBugVisitor : public BugReporterVisitor { + public: + DynamicTypeBugVisitor(const MemRegion *Reg) : Reg(Reg) {} + + void Profile(llvm::FoldingSetNodeID &ID) const override { + static int X = 0; + ID.AddPointer(&X); + ID.AddPointer(Reg); + } + + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; + + private: + // The tracked region. + const MemRegion *Reg; + }; + + void reportTypeError(QualType DynamicType, QualType StaticType, + const MemRegion *Reg, const Stmt *ReportedNode, + CheckerContext &C) const; + +public: + void checkPostStmt(const ImplicitCastExpr *CE, CheckerContext &C) const; +}; +} + +void DynamicTypeChecker::reportTypeError(QualType DynamicType, + QualType StaticType, + const MemRegion *Reg, + const Stmt *ReportedNode, + CheckerContext &C) const { + initBugType(); + SmallString<192> Buf; + llvm::raw_svector_ostream OS(Buf); + OS << "Object has a dynamic type '"; + QualType::print(DynamicType.getTypePtr(), Qualifiers(), OS, C.getLangOpts(), + llvm::Twine()); + OS << "' which is incompatible with static type '"; + QualType::print(StaticType.getTypePtr(), Qualifiers(), OS, C.getLangOpts(), + llvm::Twine()); + OS << "'"; + auto R = std::make_unique<PathSensitiveBugReport>( + *BT, OS.str(), C.generateNonFatalErrorNode()); + R->markInteresting(Reg); + R->addVisitor(std::make_unique<DynamicTypeBugVisitor>(Reg)); + R->addRange(ReportedNode->getSourceRange()); + C.emitReport(std::move(R)); +} + +PathDiagnosticPieceRef DynamicTypeChecker::DynamicTypeBugVisitor::VisitNode( + const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &) { + ProgramStateRef State = N->getState(); + ProgramStateRef StatePrev = N->getFirstPred()->getState(); + + DynamicTypeInfo TrackedType = getDynamicTypeInfo(State, Reg); + DynamicTypeInfo TrackedTypePrev = getDynamicTypeInfo(StatePrev, Reg); + if (!TrackedType.isValid()) + return nullptr; + + if (TrackedTypePrev.isValid() && + TrackedTypePrev.getType() == TrackedType.getType()) + return nullptr; + + // Retrieve the associated statement. + const Stmt *S = N->getStmtForDiagnostics(); + if (!S) + return nullptr; + + const LangOptions &LangOpts = BRC.getASTContext().getLangOpts(); + + SmallString<256> Buf; + llvm::raw_svector_ostream OS(Buf); + OS << "Type '"; + QualType::print(TrackedType.getType().getTypePtr(), Qualifiers(), OS, + LangOpts, llvm::Twine()); + OS << "' is inferred from "; + + if (const auto *ExplicitCast = dyn_cast<ExplicitCastExpr>(S)) { + OS << "explicit cast (from '"; + QualType::print(ExplicitCast->getSubExpr()->getType().getTypePtr(), + Qualifiers(), OS, LangOpts, llvm::Twine()); + OS << "' to '"; + QualType::print(ExplicitCast->getType().getTypePtr(), Qualifiers(), OS, + LangOpts, llvm::Twine()); + OS << "')"; + } else if (const auto *ImplicitCast = dyn_cast<ImplicitCastExpr>(S)) { + OS << "implicit cast (from '"; + QualType::print(ImplicitCast->getSubExpr()->getType().getTypePtr(), + Qualifiers(), OS, LangOpts, llvm::Twine()); + OS << "' to '"; + QualType::print(ImplicitCast->getType().getTypePtr(), Qualifiers(), OS, + LangOpts, llvm::Twine()); + OS << "')"; + } else { + OS << "this context"; + } + + // Generate the extra diagnostic. + PathDiagnosticLocation Pos(S, BRC.getSourceManager(), + N->getLocationContext()); + return std::make_shared<PathDiagnosticEventPiece>(Pos, OS.str(), true); +} + +static bool hasDefinition(const ObjCObjectPointerType *ObjPtr) { + const ObjCInterfaceDecl *Decl = ObjPtr->getInterfaceDecl(); + if (!Decl) + return false; + + return Decl->getDefinition(); +} + +// TODO: consider checking explicit casts? +void DynamicTypeChecker::checkPostStmt(const ImplicitCastExpr *CE, + CheckerContext &C) const { + // TODO: C++ support. + if (CE->getCastKind() != CK_BitCast) + return; + + const MemRegion *Region = C.getSVal(CE).getAsRegion(); + if (!Region) + return; + + ProgramStateRef State = C.getState(); + DynamicTypeInfo DynTypeInfo = getDynamicTypeInfo(State, Region); + + if (!DynTypeInfo.isValid()) + return; + + QualType DynType = DynTypeInfo.getType(); + QualType StaticType = CE->getType(); + + const auto *DynObjCType = DynType->getAs<ObjCObjectPointerType>(); + const auto *StaticObjCType = StaticType->getAs<ObjCObjectPointerType>(); + + if (!DynObjCType || !StaticObjCType) + return; + + if (!hasDefinition(DynObjCType) || !hasDefinition(StaticObjCType)) + return; + + ASTContext &ASTCtxt = C.getASTContext(); + + // Strip kindeofness to correctly detect subtyping relationships. + DynObjCType = DynObjCType->stripObjCKindOfTypeAndQuals(ASTCtxt); + StaticObjCType = StaticObjCType->stripObjCKindOfTypeAndQuals(ASTCtxt); + + // Specialized objects are handled by the generics checker. + if (StaticObjCType->isSpecialized()) + return; + + if (ASTCtxt.canAssignObjCInterfaces(StaticObjCType, DynObjCType)) + return; + + if (DynTypeInfo.canBeASubClass() && + ASTCtxt.canAssignObjCInterfaces(DynObjCType, StaticObjCType)) + return; + + reportTypeError(DynType, StaticType, Region, CE, C); +} + +void ento::registerDynamicTypeChecker(CheckerManager &mgr) { + mgr.registerChecker<DynamicTypeChecker>(); +} + +bool ento::shouldRegisterDynamicTypeChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp new file mode 100644 index 000000000000..cce3449b8873 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp @@ -0,0 +1,994 @@ +//===- DynamicTypePropagation.cpp ------------------------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains two checkers. One helps the static analyzer core to track +// types, the other does type inference on Obj-C generics and report type +// errors. +// +// Dynamic Type Propagation: +// This checker defines the rules for dynamic type gathering and propagation. +// +// Generics Checker for Objective-C: +// This checker tries to find type errors that the compiler is not able to catch +// due to the implicit conversions that were introduced for backward +// compatibility. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/ParentMap.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Basic/Builtins.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" + +using namespace clang; +using namespace ento; + +// ProgramState trait - The type inflation is tracked by DynamicTypeMap. This is +// an auxiliary map that tracks more information about generic types, because in +// some cases the most derived type is not the most informative one about the +// type parameters. This types that are stored for each symbol in this map must +// be specialized. +// TODO: In some case the type stored in this map is exactly the same that is +// stored in DynamicTypeMap. We should no store duplicated information in those +// cases. +REGISTER_MAP_WITH_PROGRAMSTATE(MostSpecializedTypeArgsMap, SymbolRef, + const ObjCObjectPointerType *) + +namespace { +class DynamicTypePropagation: + public Checker< check::PreCall, + check::PostCall, + check::DeadSymbols, + check::PostStmt<CastExpr>, + check::PostStmt<CXXNewExpr>, + check::PreObjCMessage, + check::PostObjCMessage > { + const ObjCObjectType *getObjectTypeForAllocAndNew(const ObjCMessageExpr *MsgE, + CheckerContext &C) const; + + /// Return a better dynamic type if one can be derived from the cast. + const ObjCObjectPointerType *getBetterObjCType(const Expr *CastE, + CheckerContext &C) const; + + ExplodedNode *dynamicTypePropagationOnCasts(const CastExpr *CE, + ProgramStateRef &State, + CheckerContext &C) const; + + mutable std::unique_ptr<BugType> ObjCGenericsBugType; + void initBugType() const { + if (!ObjCGenericsBugType) + ObjCGenericsBugType.reset( + new BugType(this, "Generics", categories::CoreFoundationObjectiveC)); + } + + class GenericsBugVisitor : public BugReporterVisitor { + public: + GenericsBugVisitor(SymbolRef S) : Sym(S) {} + + void Profile(llvm::FoldingSetNodeID &ID) const override { + static int X = 0; + ID.AddPointer(&X); + ID.AddPointer(Sym); + } + + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; + + private: + // The tracked symbol. + SymbolRef Sym; + }; + + void reportGenericsBug(const ObjCObjectPointerType *From, + const ObjCObjectPointerType *To, ExplodedNode *N, + SymbolRef Sym, CheckerContext &C, + const Stmt *ReportedNode = nullptr) const; + +public: + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + void checkPostCall(const CallEvent &Call, CheckerContext &C) const; + void checkPostStmt(const CastExpr *CastE, CheckerContext &C) const; + void checkPostStmt(const CXXNewExpr *NewE, CheckerContext &C) const; + void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; + void checkPreObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const; + void checkPostObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const; + + /// This value is set to true, when the Generics checker is turned on. + DefaultBool CheckGenerics; +}; +} // end anonymous namespace + +void DynamicTypePropagation::checkDeadSymbols(SymbolReaper &SR, + CheckerContext &C) const { + ProgramStateRef State = removeDeadTypes(C.getState(), SR); + + MostSpecializedTypeArgsMapTy TyArgMap = + State->get<MostSpecializedTypeArgsMap>(); + for (MostSpecializedTypeArgsMapTy::iterator I = TyArgMap.begin(), + E = TyArgMap.end(); + I != E; ++I) { + if (SR.isDead(I->first)) { + State = State->remove<MostSpecializedTypeArgsMap>(I->first); + } + } + + C.addTransition(State); +} + +static void recordFixedType(const MemRegion *Region, const CXXMethodDecl *MD, + CheckerContext &C) { + assert(Region); + assert(MD); + + ASTContext &Ctx = C.getASTContext(); + QualType Ty = Ctx.getPointerType(Ctx.getRecordType(MD->getParent())); + + ProgramStateRef State = C.getState(); + State = setDynamicTypeInfo(State, Region, Ty, /*CanBeSubClassed=*/false); + C.addTransition(State); +} + +void DynamicTypePropagation::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + if (const CXXConstructorCall *Ctor = dyn_cast<CXXConstructorCall>(&Call)) { + // C++11 [class.cdtor]p4: When a virtual function is called directly or + // indirectly from a constructor or from a destructor, including during + // the construction or destruction of the class's non-static data members, + // and the object to which the call applies is the object under + // construction or destruction, the function called is the final overrider + // in the constructor's or destructor's class and not one overriding it in + // a more-derived class. + + switch (Ctor->getOriginExpr()->getConstructionKind()) { + case CXXConstructExpr::CK_Complete: + case CXXConstructExpr::CK_Delegating: + // No additional type info necessary. + return; + case CXXConstructExpr::CK_NonVirtualBase: + case CXXConstructExpr::CK_VirtualBase: + if (const MemRegion *Target = Ctor->getCXXThisVal().getAsRegion()) + recordFixedType(Target, Ctor->getDecl(), C); + return; + } + + return; + } + + if (const CXXDestructorCall *Dtor = dyn_cast<CXXDestructorCall>(&Call)) { + // C++11 [class.cdtor]p4 (see above) + if (!Dtor->isBaseDestructor()) + return; + + const MemRegion *Target = Dtor->getCXXThisVal().getAsRegion(); + if (!Target) + return; + + const Decl *D = Dtor->getDecl(); + if (!D) + return; + + recordFixedType(Target, cast<CXXDestructorDecl>(D), C); + return; + } +} + +void DynamicTypePropagation::checkPostCall(const CallEvent &Call, + CheckerContext &C) const { + // We can obtain perfect type info for return values from some calls. + if (const ObjCMethodCall *Msg = dyn_cast<ObjCMethodCall>(&Call)) { + + // Get the returned value if it's a region. + const MemRegion *RetReg = Call.getReturnValue().getAsRegion(); + if (!RetReg) + return; + + ProgramStateRef State = C.getState(); + const ObjCMethodDecl *D = Msg->getDecl(); + + if (D && D->hasRelatedResultType()) { + switch (Msg->getMethodFamily()) { + default: + break; + + // We assume that the type of the object returned by alloc and new are the + // pointer to the object of the class specified in the receiver of the + // message. + case OMF_alloc: + case OMF_new: { + // Get the type of object that will get created. + const ObjCMessageExpr *MsgE = Msg->getOriginExpr(); + const ObjCObjectType *ObjTy = getObjectTypeForAllocAndNew(MsgE, C); + if (!ObjTy) + return; + QualType DynResTy = + C.getASTContext().getObjCObjectPointerType(QualType(ObjTy, 0)); + C.addTransition(setDynamicTypeInfo(State, RetReg, DynResTy, false)); + break; + } + case OMF_init: { + // Assume, the result of the init method has the same dynamic type as + // the receiver and propagate the dynamic type info. + const MemRegion *RecReg = Msg->getReceiverSVal().getAsRegion(); + if (!RecReg) + return; + DynamicTypeInfo RecDynType = getDynamicTypeInfo(State, RecReg); + C.addTransition(setDynamicTypeInfo(State, RetReg, RecDynType)); + break; + } + } + } + return; + } + + if (const CXXConstructorCall *Ctor = dyn_cast<CXXConstructorCall>(&Call)) { + // We may need to undo the effects of our pre-call check. + switch (Ctor->getOriginExpr()->getConstructionKind()) { + case CXXConstructExpr::CK_Complete: + case CXXConstructExpr::CK_Delegating: + // No additional work necessary. + // Note: This will leave behind the actual type of the object for + // complete constructors, but arguably that's a good thing, since it + // means the dynamic type info will be correct even for objects + // constructed with operator new. + return; + case CXXConstructExpr::CK_NonVirtualBase: + case CXXConstructExpr::CK_VirtualBase: + if (const MemRegion *Target = Ctor->getCXXThisVal().getAsRegion()) { + // We just finished a base constructor. Now we can use the subclass's + // type when resolving virtual calls. + const LocationContext *LCtx = C.getLocationContext(); + + // FIXME: In C++17 classes with non-virtual bases may be treated as + // aggregates, and in such case no top-frame constructor will be called. + // Figure out if we need to do anything in this case. + // FIXME: Instead of relying on the ParentMap, we should have the + // trigger-statement (InitListExpr in this case) available in this + // callback, ideally as part of CallEvent. + if (dyn_cast_or_null<InitListExpr>( + LCtx->getParentMap().getParent(Ctor->getOriginExpr()))) + return; + + recordFixedType(Target, cast<CXXConstructorDecl>(LCtx->getDecl()), C); + } + return; + } + } +} + +/// TODO: Handle explicit casts. +/// Handle C++ casts. +/// +/// Precondition: the cast is between ObjCObjectPointers. +ExplodedNode *DynamicTypePropagation::dynamicTypePropagationOnCasts( + const CastExpr *CE, ProgramStateRef &State, CheckerContext &C) const { + // We only track type info for regions. + const MemRegion *ToR = C.getSVal(CE).getAsRegion(); + if (!ToR) + return C.getPredecessor(); + + if (isa<ExplicitCastExpr>(CE)) + return C.getPredecessor(); + + if (const Type *NewTy = getBetterObjCType(CE, C)) { + State = setDynamicTypeInfo(State, ToR, QualType(NewTy, 0)); + return C.addTransition(State); + } + return C.getPredecessor(); +} + +void DynamicTypePropagation::checkPostStmt(const CXXNewExpr *NewE, + CheckerContext &C) const { + if (NewE->isArray()) + return; + + // We only track dynamic type info for regions. + const MemRegion *MR = C.getSVal(NewE).getAsRegion(); + if (!MR) + return; + + C.addTransition(setDynamicTypeInfo(C.getState(), MR, NewE->getType(), + /*CanBeSubClassed=*/false)); +} + +const ObjCObjectType * +DynamicTypePropagation::getObjectTypeForAllocAndNew(const ObjCMessageExpr *MsgE, + CheckerContext &C) const { + if (MsgE->getReceiverKind() == ObjCMessageExpr::Class) { + if (const ObjCObjectType *ObjTy + = MsgE->getClassReceiver()->getAs<ObjCObjectType>()) + return ObjTy; + } + + if (MsgE->getReceiverKind() == ObjCMessageExpr::SuperClass) { + if (const ObjCObjectType *ObjTy + = MsgE->getSuperType()->getAs<ObjCObjectType>()) + return ObjTy; + } + + const Expr *RecE = MsgE->getInstanceReceiver(); + if (!RecE) + return nullptr; + + RecE= RecE->IgnoreParenImpCasts(); + if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(RecE)) { + const StackFrameContext *SFCtx = C.getStackFrame(); + // Are we calling [self alloc]? If this is self, get the type of the + // enclosing ObjC class. + if (DRE->getDecl() == SFCtx->getSelfDecl()) { + if (const ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(SFCtx->getDecl())) + if (const ObjCObjectType *ObjTy = + dyn_cast<ObjCObjectType>(MD->getClassInterface()->getTypeForDecl())) + return ObjTy; + } + } + return nullptr; +} + +// Return a better dynamic type if one can be derived from the cast. +// Compare the current dynamic type of the region and the new type to which we +// are casting. If the new type is lower in the inheritance hierarchy, pick it. +const ObjCObjectPointerType * +DynamicTypePropagation::getBetterObjCType(const Expr *CastE, + CheckerContext &C) const { + const MemRegion *ToR = C.getSVal(CastE).getAsRegion(); + assert(ToR); + + // Get the old and new types. + const ObjCObjectPointerType *NewTy = + CastE->getType()->getAs<ObjCObjectPointerType>(); + if (!NewTy) + return nullptr; + QualType OldDTy = getDynamicTypeInfo(C.getState(), ToR).getType(); + if (OldDTy.isNull()) { + return NewTy; + } + const ObjCObjectPointerType *OldTy = + OldDTy->getAs<ObjCObjectPointerType>(); + if (!OldTy) + return nullptr; + + // Id the old type is 'id', the new one is more precise. + if (OldTy->isObjCIdType() && !NewTy->isObjCIdType()) + return NewTy; + + // Return new if it's a subclass of old. + const ObjCInterfaceDecl *ToI = NewTy->getInterfaceDecl(); + const ObjCInterfaceDecl *FromI = OldTy->getInterfaceDecl(); + if (ToI && FromI && FromI->isSuperClassOf(ToI)) + return NewTy; + + return nullptr; +} + +static const ObjCObjectPointerType *getMostInformativeDerivedClassImpl( + const ObjCObjectPointerType *From, const ObjCObjectPointerType *To, + const ObjCObjectPointerType *MostInformativeCandidate, ASTContext &C) { + // Checking if from and to are the same classes modulo specialization. + if (From->getInterfaceDecl()->getCanonicalDecl() == + To->getInterfaceDecl()->getCanonicalDecl()) { + if (To->isSpecialized()) { + assert(MostInformativeCandidate->isSpecialized()); + return MostInformativeCandidate; + } + return From; + } + + if (To->getObjectType()->getSuperClassType().isNull()) { + // If To has no super class and From and To aren't the same then + // To was not actually a descendent of From. In this case the best we can + // do is 'From'. + return From; + } + + const auto *SuperOfTo = + To->getObjectType()->getSuperClassType()->castAs<ObjCObjectType>(); + assert(SuperOfTo); + QualType SuperPtrOfToQual = + C.getObjCObjectPointerType(QualType(SuperOfTo, 0)); + const auto *SuperPtrOfTo = SuperPtrOfToQual->castAs<ObjCObjectPointerType>(); + if (To->isUnspecialized()) + return getMostInformativeDerivedClassImpl(From, SuperPtrOfTo, SuperPtrOfTo, + C); + else + return getMostInformativeDerivedClassImpl(From, SuperPtrOfTo, + MostInformativeCandidate, C); +} + +/// A downcast may loose specialization information. E. g.: +/// MutableMap<T, U> : Map +/// The downcast to MutableMap looses the information about the types of the +/// Map (due to the type parameters are not being forwarded to Map), and in +/// general there is no way to recover that information from the +/// declaration. In order to have to most information, lets find the most +/// derived type that has all the type parameters forwarded. +/// +/// Get the a subclass of \p From (which has a lower bound \p To) that do not +/// loose information about type parameters. \p To has to be a subclass of +/// \p From. From has to be specialized. +static const ObjCObjectPointerType * +getMostInformativeDerivedClass(const ObjCObjectPointerType *From, + const ObjCObjectPointerType *To, ASTContext &C) { + return getMostInformativeDerivedClassImpl(From, To, To, C); +} + +/// Inputs: +/// \param StaticLowerBound Static lower bound for a symbol. The dynamic lower +/// bound might be the subclass of this type. +/// \param StaticUpperBound A static upper bound for a symbol. +/// \p StaticLowerBound expected to be the subclass of \p StaticUpperBound. +/// \param Current The type that was inferred for a symbol in a previous +/// context. Might be null when this is the first time that inference happens. +/// Precondition: +/// \p StaticLowerBound or \p StaticUpperBound is specialized. If \p Current +/// is not null, it is specialized. +/// Possible cases: +/// (1) The \p Current is null and \p StaticLowerBound <: \p StaticUpperBound +/// (2) \p StaticLowerBound <: \p Current <: \p StaticUpperBound +/// (3) \p Current <: \p StaticLowerBound <: \p StaticUpperBound +/// (4) \p StaticLowerBound <: \p StaticUpperBound <: \p Current +/// Effect: +/// Use getMostInformativeDerivedClass with the upper and lower bound of the +/// set {\p StaticLowerBound, \p Current, \p StaticUpperBound}. The computed +/// lower bound must be specialized. If the result differs from \p Current or +/// \p Current is null, store the result. +static bool +storeWhenMoreInformative(ProgramStateRef &State, SymbolRef Sym, + const ObjCObjectPointerType *const *Current, + const ObjCObjectPointerType *StaticLowerBound, + const ObjCObjectPointerType *StaticUpperBound, + ASTContext &C) { + // TODO: The above 4 cases are not exhaustive. In particular, it is possible + // for Current to be incomparable with StaticLowerBound, StaticUpperBound, + // or both. + // + // For example, suppose Foo<T> and Bar<T> are unrelated types. + // + // Foo<T> *f = ... + // Bar<T> *b = ... + // + // id t1 = b; + // f = t1; + // id t2 = f; // StaticLowerBound is Foo<T>, Current is Bar<T> + // + // We should either constrain the callers of this function so that the stated + // preconditions hold (and assert it) or rewrite the function to expicitly + // handle the additional cases. + + // Precondition + assert(StaticUpperBound->isSpecialized() || + StaticLowerBound->isSpecialized()); + assert(!Current || (*Current)->isSpecialized()); + + // Case (1) + if (!Current) { + if (StaticUpperBound->isUnspecialized()) { + State = State->set<MostSpecializedTypeArgsMap>(Sym, StaticLowerBound); + return true; + } + // Upper bound is specialized. + const ObjCObjectPointerType *WithMostInfo = + getMostInformativeDerivedClass(StaticUpperBound, StaticLowerBound, C); + State = State->set<MostSpecializedTypeArgsMap>(Sym, WithMostInfo); + return true; + } + + // Case (3) + if (C.canAssignObjCInterfaces(StaticLowerBound, *Current)) { + return false; + } + + // Case (4) + if (C.canAssignObjCInterfaces(*Current, StaticUpperBound)) { + // The type arguments might not be forwarded at any point of inheritance. + const ObjCObjectPointerType *WithMostInfo = + getMostInformativeDerivedClass(*Current, StaticUpperBound, C); + WithMostInfo = + getMostInformativeDerivedClass(WithMostInfo, StaticLowerBound, C); + if (WithMostInfo == *Current) + return false; + State = State->set<MostSpecializedTypeArgsMap>(Sym, WithMostInfo); + return true; + } + + // Case (2) + const ObjCObjectPointerType *WithMostInfo = + getMostInformativeDerivedClass(*Current, StaticLowerBound, C); + if (WithMostInfo != *Current) { + State = State->set<MostSpecializedTypeArgsMap>(Sym, WithMostInfo); + return true; + } + + return false; +} + +/// Type inference based on static type information that is available for the +/// cast and the tracked type information for the given symbol. When the tracked +/// symbol and the destination type of the cast are unrelated, report an error. +void DynamicTypePropagation::checkPostStmt(const CastExpr *CE, + CheckerContext &C) const { + if (CE->getCastKind() != CK_BitCast) + return; + + QualType OriginType = CE->getSubExpr()->getType(); + QualType DestType = CE->getType(); + + const auto *OrigObjectPtrType = OriginType->getAs<ObjCObjectPointerType>(); + const auto *DestObjectPtrType = DestType->getAs<ObjCObjectPointerType>(); + + if (!OrigObjectPtrType || !DestObjectPtrType) + return; + + ProgramStateRef State = C.getState(); + ExplodedNode *AfterTypeProp = dynamicTypePropagationOnCasts(CE, State, C); + + ASTContext &ASTCtxt = C.getASTContext(); + + // This checker detects the subtyping relationships using the assignment + // rules. In order to be able to do this the kindofness must be stripped + // first. The checker treats every type as kindof type anyways: when the + // tracked type is the subtype of the static type it tries to look up the + // methods in the tracked type first. + OrigObjectPtrType = OrigObjectPtrType->stripObjCKindOfTypeAndQuals(ASTCtxt); + DestObjectPtrType = DestObjectPtrType->stripObjCKindOfTypeAndQuals(ASTCtxt); + + if (OrigObjectPtrType->isUnspecialized() && + DestObjectPtrType->isUnspecialized()) + return; + + SymbolRef Sym = C.getSVal(CE).getAsSymbol(); + if (!Sym) + return; + + const ObjCObjectPointerType *const *TrackedType = + State->get<MostSpecializedTypeArgsMap>(Sym); + + if (isa<ExplicitCastExpr>(CE)) { + // Treat explicit casts as an indication from the programmer that the + // Objective-C type system is not rich enough to express the needed + // invariant. In such cases, forget any existing information inferred + // about the type arguments. We don't assume the casted-to specialized + // type here because the invariant the programmer specifies in the cast + // may only hold at this particular program point and not later ones. + // We don't want a suppressing cast to require a cascade of casts down the + // line. + if (TrackedType) { + State = State->remove<MostSpecializedTypeArgsMap>(Sym); + C.addTransition(State, AfterTypeProp); + } + return; + } + + // Check which assignments are legal. + bool OrigToDest = + ASTCtxt.canAssignObjCInterfaces(DestObjectPtrType, OrigObjectPtrType); + bool DestToOrig = + ASTCtxt.canAssignObjCInterfaces(OrigObjectPtrType, DestObjectPtrType); + + // The tracked type should be the sub or super class of the static destination + // type. When an (implicit) upcast or a downcast happens according to static + // types, and there is no subtyping relationship between the tracked and the + // static destination types, it indicates an error. + if (TrackedType && + !ASTCtxt.canAssignObjCInterfaces(DestObjectPtrType, *TrackedType) && + !ASTCtxt.canAssignObjCInterfaces(*TrackedType, DestObjectPtrType)) { + static CheckerProgramPointTag IllegalConv(this, "IllegalConversion"); + ExplodedNode *N = C.addTransition(State, AfterTypeProp, &IllegalConv); + reportGenericsBug(*TrackedType, DestObjectPtrType, N, Sym, C); + return; + } + + // Handle downcasts and upcasts. + + const ObjCObjectPointerType *LowerBound = DestObjectPtrType; + const ObjCObjectPointerType *UpperBound = OrigObjectPtrType; + if (OrigToDest && !DestToOrig) + std::swap(LowerBound, UpperBound); + + // The id type is not a real bound. Eliminate it. + LowerBound = LowerBound->isObjCIdType() ? UpperBound : LowerBound; + UpperBound = UpperBound->isObjCIdType() ? LowerBound : UpperBound; + + if (storeWhenMoreInformative(State, Sym, TrackedType, LowerBound, UpperBound, + ASTCtxt)) { + C.addTransition(State, AfterTypeProp); + } +} + +static const Expr *stripCastsAndSugar(const Expr *E) { + E = E->IgnoreParenImpCasts(); + if (const PseudoObjectExpr *POE = dyn_cast<PseudoObjectExpr>(E)) + E = POE->getSyntacticForm()->IgnoreParenImpCasts(); + if (const OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(E)) + E = OVE->getSourceExpr()->IgnoreParenImpCasts(); + return E; +} + +static bool isObjCTypeParamDependent(QualType Type) { + // It is illegal to typedef parameterized types inside an interface. Therefore + // an Objective-C type can only be dependent on a type parameter when the type + // parameter structurally present in the type itself. + class IsObjCTypeParamDependentTypeVisitor + : public RecursiveASTVisitor<IsObjCTypeParamDependentTypeVisitor> { + public: + IsObjCTypeParamDependentTypeVisitor() : Result(false) {} + bool VisitObjCTypeParamType(const ObjCTypeParamType *Type) { + if (isa<ObjCTypeParamDecl>(Type->getDecl())) { + Result = true; + return false; + } + return true; + } + + bool Result; + }; + + IsObjCTypeParamDependentTypeVisitor Visitor; + Visitor.TraverseType(Type); + return Visitor.Result; +} + +/// A method might not be available in the interface indicated by the static +/// type. However it might be available in the tracked type. In order to +/// properly substitute the type parameters we need the declaration context of +/// the method. The more specialized the enclosing class of the method is, the +/// more likely that the parameter substitution will be successful. +static const ObjCMethodDecl * +findMethodDecl(const ObjCMessageExpr *MessageExpr, + const ObjCObjectPointerType *TrackedType, ASTContext &ASTCtxt) { + const ObjCMethodDecl *Method = nullptr; + + QualType ReceiverType = MessageExpr->getReceiverType(); + const auto *ReceiverObjectPtrType = + ReceiverType->getAs<ObjCObjectPointerType>(); + + // Do this "devirtualization" on instance and class methods only. Trust the + // static type on super and super class calls. + if (MessageExpr->getReceiverKind() == ObjCMessageExpr::Instance || + MessageExpr->getReceiverKind() == ObjCMessageExpr::Class) { + // When the receiver type is id, Class, or some super class of the tracked + // type, look up the method in the tracked type, not in the receiver type. + // This way we preserve more information. + if (ReceiverType->isObjCIdType() || ReceiverType->isObjCClassType() || + ASTCtxt.canAssignObjCInterfaces(ReceiverObjectPtrType, TrackedType)) { + const ObjCInterfaceDecl *InterfaceDecl = TrackedType->getInterfaceDecl(); + // The method might not be found. + Selector Sel = MessageExpr->getSelector(); + Method = InterfaceDecl->lookupInstanceMethod(Sel); + if (!Method) + Method = InterfaceDecl->lookupClassMethod(Sel); + } + } + + // Fallback to statick method lookup when the one based on the tracked type + // failed. + return Method ? Method : MessageExpr->getMethodDecl(); +} + +/// Get the returned ObjCObjectPointerType by a method based on the tracked type +/// information, or null pointer when the returned type is not an +/// ObjCObjectPointerType. +static QualType getReturnTypeForMethod( + const ObjCMethodDecl *Method, ArrayRef<QualType> TypeArgs, + const ObjCObjectPointerType *SelfType, ASTContext &C) { + QualType StaticResultType = Method->getReturnType(); + + // Is the return type declared as instance type? + if (StaticResultType == C.getObjCInstanceType()) + return QualType(SelfType, 0); + + // Check whether the result type depends on a type parameter. + if (!isObjCTypeParamDependent(StaticResultType)) + return QualType(); + + QualType ResultType = StaticResultType.substObjCTypeArgs( + C, TypeArgs, ObjCSubstitutionContext::Result); + + return ResultType; +} + +/// When the receiver has a tracked type, use that type to validate the +/// argumments of the message expression and the return value. +void DynamicTypePropagation::checkPreObjCMessage(const ObjCMethodCall &M, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + SymbolRef Sym = M.getReceiverSVal().getAsSymbol(); + if (!Sym) + return; + + const ObjCObjectPointerType *const *TrackedType = + State->get<MostSpecializedTypeArgsMap>(Sym); + if (!TrackedType) + return; + + // Get the type arguments from tracked type and substitute type arguments + // before do the semantic check. + + ASTContext &ASTCtxt = C.getASTContext(); + const ObjCMessageExpr *MessageExpr = M.getOriginExpr(); + const ObjCMethodDecl *Method = + findMethodDecl(MessageExpr, *TrackedType, ASTCtxt); + + // It is possible to call non-existent methods in Obj-C. + if (!Method) + return; + + // If the method is declared on a class that has a non-invariant + // type parameter, don't warn about parameter mismatches after performing + // substitution. This prevents warning when the programmer has purposely + // casted the receiver to a super type or unspecialized type but the analyzer + // has a more precise tracked type than the programmer intends at the call + // site. + // + // For example, consider NSArray (which has a covariant type parameter) + // and NSMutableArray (a subclass of NSArray where the type parameter is + // invariant): + // NSMutableArray *a = [[NSMutableArray<NSString *> alloc] init; + // + // [a containsObject:number]; // Safe: -containsObject is defined on NSArray. + // NSArray<NSObject *> *other = [a arrayByAddingObject:number] // Safe + // + // [a addObject:number] // Unsafe: -addObject: is defined on NSMutableArray + // + + const ObjCInterfaceDecl *Interface = Method->getClassInterface(); + if (!Interface) + return; + + ObjCTypeParamList *TypeParams = Interface->getTypeParamList(); + if (!TypeParams) + return; + + for (ObjCTypeParamDecl *TypeParam : *TypeParams) { + if (TypeParam->getVariance() != ObjCTypeParamVariance::Invariant) + return; + } + + Optional<ArrayRef<QualType>> TypeArgs = + (*TrackedType)->getObjCSubstitutions(Method->getDeclContext()); + // This case might happen when there is an unspecialized override of a + // specialized method. + if (!TypeArgs) + return; + + for (unsigned i = 0; i < Method->param_size(); i++) { + const Expr *Arg = MessageExpr->getArg(i); + const ParmVarDecl *Param = Method->parameters()[i]; + + QualType OrigParamType = Param->getType(); + if (!isObjCTypeParamDependent(OrigParamType)) + continue; + + QualType ParamType = OrigParamType.substObjCTypeArgs( + ASTCtxt, *TypeArgs, ObjCSubstitutionContext::Parameter); + // Check if it can be assigned + const auto *ParamObjectPtrType = ParamType->getAs<ObjCObjectPointerType>(); + const auto *ArgObjectPtrType = + stripCastsAndSugar(Arg)->getType()->getAs<ObjCObjectPointerType>(); + if (!ParamObjectPtrType || !ArgObjectPtrType) + continue; + + // Check if we have more concrete tracked type that is not a super type of + // the static argument type. + SVal ArgSVal = M.getArgSVal(i); + SymbolRef ArgSym = ArgSVal.getAsSymbol(); + if (ArgSym) { + const ObjCObjectPointerType *const *TrackedArgType = + State->get<MostSpecializedTypeArgsMap>(ArgSym); + if (TrackedArgType && + ASTCtxt.canAssignObjCInterfaces(ArgObjectPtrType, *TrackedArgType)) { + ArgObjectPtrType = *TrackedArgType; + } + } + + // Warn when argument is incompatible with the parameter. + if (!ASTCtxt.canAssignObjCInterfaces(ParamObjectPtrType, + ArgObjectPtrType)) { + static CheckerProgramPointTag Tag(this, "ArgTypeMismatch"); + ExplodedNode *N = C.addTransition(State, &Tag); + reportGenericsBug(ArgObjectPtrType, ParamObjectPtrType, N, Sym, C, Arg); + return; + } + } +} + +/// This callback is used to infer the types for Class variables. This info is +/// used later to validate messages that sent to classes. Class variables are +/// initialized with by invoking the 'class' method on a class. +/// This method is also used to infer the type information for the return +/// types. +// TODO: right now it only tracks generic types. Extend this to track every +// type in the DynamicTypeMap and diagnose type errors! +void DynamicTypePropagation::checkPostObjCMessage(const ObjCMethodCall &M, + CheckerContext &C) const { + const ObjCMessageExpr *MessageExpr = M.getOriginExpr(); + + SymbolRef RetSym = M.getReturnValue().getAsSymbol(); + if (!RetSym) + return; + + Selector Sel = MessageExpr->getSelector(); + ProgramStateRef State = C.getState(); + // Inference for class variables. + // We are only interested in cases where the class method is invoked on a + // class. This method is provided by the runtime and available on all classes. + if (MessageExpr->getReceiverKind() == ObjCMessageExpr::Class && + Sel.getAsString() == "class") { + QualType ReceiverType = MessageExpr->getClassReceiver(); + const auto *ReceiverClassType = ReceiverType->castAs<ObjCObjectType>(); + if (!ReceiverClassType->isSpecialized()) + return; + + QualType ReceiverClassPointerType = + C.getASTContext().getObjCObjectPointerType( + QualType(ReceiverClassType, 0)); + const auto *InferredType = + ReceiverClassPointerType->castAs<ObjCObjectPointerType>(); + + State = State->set<MostSpecializedTypeArgsMap>(RetSym, InferredType); + C.addTransition(State); + return; + } + + // Tracking for return types. + SymbolRef RecSym = M.getReceiverSVal().getAsSymbol(); + if (!RecSym) + return; + + const ObjCObjectPointerType *const *TrackedType = + State->get<MostSpecializedTypeArgsMap>(RecSym); + if (!TrackedType) + return; + + ASTContext &ASTCtxt = C.getASTContext(); + const ObjCMethodDecl *Method = + findMethodDecl(MessageExpr, *TrackedType, ASTCtxt); + if (!Method) + return; + + Optional<ArrayRef<QualType>> TypeArgs = + (*TrackedType)->getObjCSubstitutions(Method->getDeclContext()); + if (!TypeArgs) + return; + + QualType ResultType = + getReturnTypeForMethod(Method, *TypeArgs, *TrackedType, ASTCtxt); + // The static type is the same as the deduced type. + if (ResultType.isNull()) + return; + + const MemRegion *RetRegion = M.getReturnValue().getAsRegion(); + ExplodedNode *Pred = C.getPredecessor(); + // When there is an entry available for the return symbol in DynamicTypeMap, + // the call was inlined, and the information in the DynamicTypeMap is should + // be precise. + if (RetRegion && !getRawDynamicTypeInfo(State, RetRegion)) { + // TODO: we have duplicated information in DynamicTypeMap and + // MostSpecializedTypeArgsMap. We should only store anything in the later if + // the stored data differs from the one stored in the former. + State = setDynamicTypeInfo(State, RetRegion, ResultType, + /*CanBeSubClassed=*/true); + Pred = C.addTransition(State); + } + + const auto *ResultPtrType = ResultType->getAs<ObjCObjectPointerType>(); + + if (!ResultPtrType || ResultPtrType->isUnspecialized()) + return; + + // When the result is a specialized type and it is not tracked yet, track it + // for the result symbol. + if (!State->get<MostSpecializedTypeArgsMap>(RetSym)) { + State = State->set<MostSpecializedTypeArgsMap>(RetSym, ResultPtrType); + C.addTransition(State, Pred); + } +} + +void DynamicTypePropagation::reportGenericsBug( + const ObjCObjectPointerType *From, const ObjCObjectPointerType *To, + ExplodedNode *N, SymbolRef Sym, CheckerContext &C, + const Stmt *ReportedNode) const { + if (!CheckGenerics) + return; + + initBugType(); + SmallString<192> Buf; + llvm::raw_svector_ostream OS(Buf); + OS << "Conversion from value of type '"; + QualType::print(From, Qualifiers(), OS, C.getLangOpts(), llvm::Twine()); + OS << "' to incompatible type '"; + QualType::print(To, Qualifiers(), OS, C.getLangOpts(), llvm::Twine()); + OS << "'"; + auto R = std::make_unique<PathSensitiveBugReport>(*ObjCGenericsBugType, + OS.str(), N); + R->markInteresting(Sym); + R->addVisitor(std::make_unique<GenericsBugVisitor>(Sym)); + if (ReportedNode) + R->addRange(ReportedNode->getSourceRange()); + C.emitReport(std::move(R)); +} + +PathDiagnosticPieceRef DynamicTypePropagation::GenericsBugVisitor::VisitNode( + const ExplodedNode *N, BugReporterContext &BRC, + PathSensitiveBugReport &BR) { + ProgramStateRef state = N->getState(); + ProgramStateRef statePrev = N->getFirstPred()->getState(); + + const ObjCObjectPointerType *const *TrackedType = + state->get<MostSpecializedTypeArgsMap>(Sym); + const ObjCObjectPointerType *const *TrackedTypePrev = + statePrev->get<MostSpecializedTypeArgsMap>(Sym); + if (!TrackedType) + return nullptr; + + if (TrackedTypePrev && *TrackedTypePrev == *TrackedType) + return nullptr; + + // Retrieve the associated statement. + const Stmt *S = N->getStmtForDiagnostics(); + if (!S) + return nullptr; + + const LangOptions &LangOpts = BRC.getASTContext().getLangOpts(); + + SmallString<256> Buf; + llvm::raw_svector_ostream OS(Buf); + OS << "Type '"; + QualType::print(*TrackedType, Qualifiers(), OS, LangOpts, llvm::Twine()); + OS << "' is inferred from "; + + if (const auto *ExplicitCast = dyn_cast<ExplicitCastExpr>(S)) { + OS << "explicit cast (from '"; + QualType::print(ExplicitCast->getSubExpr()->getType().getTypePtr(), + Qualifiers(), OS, LangOpts, llvm::Twine()); + OS << "' to '"; + QualType::print(ExplicitCast->getType().getTypePtr(), Qualifiers(), OS, + LangOpts, llvm::Twine()); + OS << "')"; + } else if (const auto *ImplicitCast = dyn_cast<ImplicitCastExpr>(S)) { + OS << "implicit cast (from '"; + QualType::print(ImplicitCast->getSubExpr()->getType().getTypePtr(), + Qualifiers(), OS, LangOpts, llvm::Twine()); + OS << "' to '"; + QualType::print(ImplicitCast->getType().getTypePtr(), Qualifiers(), OS, + LangOpts, llvm::Twine()); + OS << "')"; + } else { + OS << "this context"; + } + + // Generate the extra diagnostic. + PathDiagnosticLocation Pos(S, BRC.getSourceManager(), + N->getLocationContext()); + return std::make_shared<PathDiagnosticEventPiece>(Pos, OS.str(), true); +} + +/// Register checkers. +void ento::registerObjCGenericsChecker(CheckerManager &mgr) { + DynamicTypePropagation *checker = mgr.getChecker<DynamicTypePropagation>(); + checker->CheckGenerics = true; +} + +bool ento::shouldRegisterObjCGenericsChecker(const LangOptions &LO) { + return true; +} + +void ento::registerDynamicTypePropagation(CheckerManager &mgr) { + mgr.registerChecker<DynamicTypePropagation>(); +} + +bool ento::shouldRegisterDynamicTypePropagation(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/EnumCastOutOfRangeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/EnumCastOutOfRangeChecker.cpp new file mode 100644 index 000000000000..481a5685a71f --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/EnumCastOutOfRangeChecker.cpp @@ -0,0 +1,147 @@ +//===- EnumCastOutOfRangeChecker.cpp ---------------------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The EnumCastOutOfRangeChecker is responsible for checking integer to +// enumeration casts that could result in undefined values. This could happen +// if the value that we cast from is out of the value range of the enumeration. +// Reference: +// [ISO/IEC 14882-2014] ISO/IEC 14882-2014. +// Programming Languages — C++, Fourth Edition. 2014. +// C++ Standard, [dcl.enum], in paragraph 8, which defines the range of an enum +// C++ Standard, [expr.static.cast], paragraph 10, which defines the behaviour +// of casting an integer value that is out of range +// SEI CERT C++ Coding Standard, INT50-CPP. Do not cast to an out-of-range +// enumeration value +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +// This evaluator checks two SVals for equality. The first SVal is provided via +// the constructor, the second is the parameter of the overloaded () operator. +// It uses the in-built ConstraintManager to resolve the equlity to possible or +// not possible ProgramStates. +class ConstraintBasedEQEvaluator { + const DefinedOrUnknownSVal CompareValue; + const ProgramStateRef PS; + SValBuilder &SVB; + +public: + ConstraintBasedEQEvaluator(CheckerContext &C, + const DefinedOrUnknownSVal CompareValue) + : CompareValue(CompareValue), PS(C.getState()), SVB(C.getSValBuilder()) {} + + bool operator()(const llvm::APSInt &EnumDeclInitValue) { + DefinedOrUnknownSVal EnumDeclValue = SVB.makeIntVal(EnumDeclInitValue); + DefinedOrUnknownSVal ElemEqualsValueToCast = + SVB.evalEQ(PS, EnumDeclValue, CompareValue); + + return static_cast<bool>(PS->assume(ElemEqualsValueToCast, true)); + } +}; + +// This checker checks CastExpr statements. +// If the value provided to the cast is one of the values the enumeration can +// represent, the said value matches the enumeration. If the checker can +// establish the impossibility of matching it gives a warning. +// Being conservative, it does not warn if there is slight possibility the +// value can be matching. +class EnumCastOutOfRangeChecker : public Checker<check::PreStmt<CastExpr>> { + mutable std::unique_ptr<BuiltinBug> EnumValueCastOutOfRange; + void reportWarning(CheckerContext &C) const; + +public: + void checkPreStmt(const CastExpr *CE, CheckerContext &C) const; +}; + +using EnumValueVector = llvm::SmallVector<llvm::APSInt, 6>; + +// Collects all of the values an enum can represent (as SVals). +EnumValueVector getDeclValuesForEnum(const EnumDecl *ED) { + EnumValueVector DeclValues( + std::distance(ED->enumerator_begin(), ED->enumerator_end())); + llvm::transform(ED->enumerators(), DeclValues.begin(), + [](const EnumConstantDecl *D) { return D->getInitVal(); }); + return DeclValues; +} +} // namespace + +void EnumCastOutOfRangeChecker::reportWarning(CheckerContext &C) const { + if (const ExplodedNode *N = C.generateNonFatalErrorNode()) { + if (!EnumValueCastOutOfRange) + EnumValueCastOutOfRange.reset( + new BuiltinBug(this, "Enum cast out of range", + "The value provided to the cast expression is not in " + "the valid range of values for the enum")); + C.emitReport(std::make_unique<PathSensitiveBugReport>( + *EnumValueCastOutOfRange, EnumValueCastOutOfRange->getDescription(), + N)); + } +} + +void EnumCastOutOfRangeChecker::checkPreStmt(const CastExpr *CE, + CheckerContext &C) const { + + // Only perform enum range check on casts where such checks are valid. For + // all other cast kinds (where enum range checks are unnecessary or invalid), + // just return immediately. TODO: The set of casts whitelisted for enum + // range checking may be incomplete. Better to add a missing cast kind to + // enable a missing check than to generate false negatives and have to remove + // those later. + switch (CE->getCastKind()) { + case CK_IntegralCast: + break; + + default: + return; + break; + } + + // Get the value of the expression to cast. + const llvm::Optional<DefinedOrUnknownSVal> ValueToCast = + C.getSVal(CE->getSubExpr()).getAs<DefinedOrUnknownSVal>(); + + // If the value cannot be reasoned about (not even a DefinedOrUnknownSVal), + // don't analyze further. + if (!ValueToCast) + return; + + const QualType T = CE->getType(); + // Check whether the cast type is an enum. + if (!T->isEnumeralType()) + return; + + // If the cast is an enum, get its declaration. + // If the isEnumeralType() returned true, then the declaration must exist + // even if it is a stub declaration. It is up to the getDeclValuesForEnum() + // function to handle this. + const EnumDecl *ED = T->castAs<EnumType>()->getDecl(); + + EnumValueVector DeclValues = getDeclValuesForEnum(ED); + // Check if any of the enum values possibly match. + bool PossibleValueMatch = llvm::any_of( + DeclValues, ConstraintBasedEQEvaluator(C, *ValueToCast)); + + // If there is no value that can possibly match any of the enum values, then + // warn. + if (!PossibleValueMatch) + reportWarning(C); +} + +void ento::registerEnumCastOutOfRangeChecker(CheckerManager &mgr) { + mgr.registerChecker<EnumCastOutOfRangeChecker>(); +} + +bool ento::shouldRegisterEnumCastOutOfRangeChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp new file mode 100644 index 000000000000..17c813962a23 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp @@ -0,0 +1,419 @@ +//==- ExprInspectionChecker.cpp - Used for regression tests ------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Checkers/SValExplainer.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/IssueHash.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/ScopedPrinter.h" + +using namespace clang; +using namespace ento; + +namespace { +class ExprInspectionChecker : public Checker<eval::Call, check::DeadSymbols, + check::EndAnalysis> { + mutable std::unique_ptr<BugType> BT; + + // These stats are per-analysis, not per-branch, hence they shouldn't + // stay inside the program state. + struct ReachedStat { + ExplodedNode *ExampleNode; + unsigned NumTimesReached; + }; + mutable llvm::DenseMap<const CallExpr *, ReachedStat> ReachedStats; + + void analyzerEval(const CallExpr *CE, CheckerContext &C) const; + void analyzerCheckInlined(const CallExpr *CE, CheckerContext &C) const; + void analyzerWarnIfReached(const CallExpr *CE, CheckerContext &C) const; + void analyzerNumTimesReached(const CallExpr *CE, CheckerContext &C) const; + void analyzerCrash(const CallExpr *CE, CheckerContext &C) const; + void analyzerWarnOnDeadSymbol(const CallExpr *CE, CheckerContext &C) const; + void analyzerDump(const CallExpr *CE, CheckerContext &C) const; + void analyzerExplain(const CallExpr *CE, CheckerContext &C) const; + void analyzerPrintState(const CallExpr *CE, CheckerContext &C) const; + void analyzerGetExtent(const CallExpr *CE, CheckerContext &C) const; + void analyzerHashDump(const CallExpr *CE, CheckerContext &C) const; + void analyzerDenote(const CallExpr *CE, CheckerContext &C) const; + void analyzerExpress(const CallExpr *CE, CheckerContext &C) const; + + typedef void (ExprInspectionChecker::*FnCheck)(const CallExpr *, + CheckerContext &C) const; + + ExplodedNode *reportBug(llvm::StringRef Msg, CheckerContext &C) const; + ExplodedNode *reportBug(llvm::StringRef Msg, BugReporter &BR, + ExplodedNode *N) const; + +public: + bool evalCall(const CallEvent &Call, CheckerContext &C) const; + void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const; + void checkEndAnalysis(ExplodedGraph &G, BugReporter &BR, + ExprEngine &Eng) const; +}; +} + +REGISTER_SET_WITH_PROGRAMSTATE(MarkedSymbols, SymbolRef) +REGISTER_MAP_WITH_PROGRAMSTATE(DenotedSymbols, SymbolRef, const StringLiteral *) + +bool ExprInspectionChecker::evalCall(const CallEvent &Call, + CheckerContext &C) const { + const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr()); + if (!CE) + return false; + + // These checks should have no effect on the surrounding environment + // (globals should not be invalidated, etc), hence the use of evalCall. + FnCheck Handler = llvm::StringSwitch<FnCheck>(C.getCalleeName(CE)) + .Case("clang_analyzer_eval", &ExprInspectionChecker::analyzerEval) + .Case("clang_analyzer_checkInlined", + &ExprInspectionChecker::analyzerCheckInlined) + .Case("clang_analyzer_crash", &ExprInspectionChecker::analyzerCrash) + .Case("clang_analyzer_warnIfReached", + &ExprInspectionChecker::analyzerWarnIfReached) + .Case("clang_analyzer_warnOnDeadSymbol", + &ExprInspectionChecker::analyzerWarnOnDeadSymbol) + .StartsWith("clang_analyzer_explain", &ExprInspectionChecker::analyzerExplain) + .StartsWith("clang_analyzer_dump", &ExprInspectionChecker::analyzerDump) + .Case("clang_analyzer_getExtent", &ExprInspectionChecker::analyzerGetExtent) + .Case("clang_analyzer_printState", + &ExprInspectionChecker::analyzerPrintState) + .Case("clang_analyzer_numTimesReached", + &ExprInspectionChecker::analyzerNumTimesReached) + .Case("clang_analyzer_hashDump", &ExprInspectionChecker::analyzerHashDump) + .Case("clang_analyzer_denote", &ExprInspectionChecker::analyzerDenote) + .Case("clang_analyzer_express", &ExprInspectionChecker::analyzerExpress) + .Default(nullptr); + + if (!Handler) + return false; + + (this->*Handler)(CE, C); + return true; +} + +static const char *getArgumentValueString(const CallExpr *CE, + CheckerContext &C) { + if (CE->getNumArgs() == 0) + return "Missing assertion argument"; + + ExplodedNode *N = C.getPredecessor(); + const LocationContext *LC = N->getLocationContext(); + ProgramStateRef State = N->getState(); + + const Expr *Assertion = CE->getArg(0); + SVal AssertionVal = State->getSVal(Assertion, LC); + + if (AssertionVal.isUndef()) + return "UNDEFINED"; + + ProgramStateRef StTrue, StFalse; + std::tie(StTrue, StFalse) = + State->assume(AssertionVal.castAs<DefinedOrUnknownSVal>()); + + if (StTrue) { + if (StFalse) + return "UNKNOWN"; + else + return "TRUE"; + } else { + if (StFalse) + return "FALSE"; + else + llvm_unreachable("Invalid constraint; neither true or false."); + } +} + +ExplodedNode *ExprInspectionChecker::reportBug(llvm::StringRef Msg, + CheckerContext &C) const { + ExplodedNode *N = C.generateNonFatalErrorNode(); + reportBug(Msg, C.getBugReporter(), N); + return N; +} + +ExplodedNode *ExprInspectionChecker::reportBug(llvm::StringRef Msg, + BugReporter &BR, + ExplodedNode *N) const { + if (!N) + return nullptr; + + if (!BT) + BT.reset(new BugType(this, "Checking analyzer assumptions", "debug")); + + BR.emitReport(std::make_unique<PathSensitiveBugReport>(*BT, Msg, N)); + return N; +} + +void ExprInspectionChecker::analyzerEval(const CallExpr *CE, + CheckerContext &C) const { + const LocationContext *LC = C.getPredecessor()->getLocationContext(); + + // A specific instantiation of an inlined function may have more constrained + // values than can generally be assumed. Skip the check. + if (LC->getStackFrame()->getParent() != nullptr) + return; + + reportBug(getArgumentValueString(CE, C), C); +} + +void ExprInspectionChecker::analyzerWarnIfReached(const CallExpr *CE, + CheckerContext &C) const { + reportBug("REACHABLE", C); +} + +void ExprInspectionChecker::analyzerNumTimesReached(const CallExpr *CE, + CheckerContext &C) const { + ++ReachedStats[CE].NumTimesReached; + if (!ReachedStats[CE].ExampleNode) { + // Later, in checkEndAnalysis, we'd throw a report against it. + ReachedStats[CE].ExampleNode = C.generateNonFatalErrorNode(); + } +} + +void ExprInspectionChecker::analyzerCheckInlined(const CallExpr *CE, + CheckerContext &C) const { + const LocationContext *LC = C.getPredecessor()->getLocationContext(); + + // An inlined function could conceivably also be analyzed as a top-level + // function. We ignore this case and only emit a message (TRUE or FALSE) + // when we are analyzing it as an inlined function. This means that + // clang_analyzer_checkInlined(true) should always print TRUE, but + // clang_analyzer_checkInlined(false) should never actually print anything. + if (LC->getStackFrame()->getParent() == nullptr) + return; + + reportBug(getArgumentValueString(CE, C), C); +} + +void ExprInspectionChecker::analyzerExplain(const CallExpr *CE, + CheckerContext &C) const { + if (CE->getNumArgs() == 0) { + reportBug("Missing argument for explaining", C); + return; + } + + SVal V = C.getSVal(CE->getArg(0)); + SValExplainer Ex(C.getASTContext()); + reportBug(Ex.Visit(V), C); +} + +void ExprInspectionChecker::analyzerDump(const CallExpr *CE, + CheckerContext &C) const { + if (CE->getNumArgs() == 0) { + reportBug("Missing argument for dumping", C); + return; + } + + SVal V = C.getSVal(CE->getArg(0)); + + llvm::SmallString<32> Str; + llvm::raw_svector_ostream OS(Str); + V.dumpToStream(OS); + reportBug(OS.str(), C); +} + +void ExprInspectionChecker::analyzerGetExtent(const CallExpr *CE, + CheckerContext &C) const { + if (CE->getNumArgs() == 0) { + reportBug("Missing region for obtaining extent", C); + return; + } + + auto MR = dyn_cast_or_null<SubRegion>(C.getSVal(CE->getArg(0)).getAsRegion()); + if (!MR) { + reportBug("Obtaining extent of a non-region", C); + return; + } + + ProgramStateRef State = C.getState(); + State = State->BindExpr(CE, C.getLocationContext(), + MR->getExtent(C.getSValBuilder())); + C.addTransition(State); +} + +void ExprInspectionChecker::analyzerPrintState(const CallExpr *CE, + CheckerContext &C) const { + C.getState()->dump(); +} + +void ExprInspectionChecker::analyzerWarnOnDeadSymbol(const CallExpr *CE, + CheckerContext &C) const { + if (CE->getNumArgs() == 0) + return; + SVal Val = C.getSVal(CE->getArg(0)); + SymbolRef Sym = Val.getAsSymbol(); + if (!Sym) + return; + + ProgramStateRef State = C.getState(); + State = State->add<MarkedSymbols>(Sym); + C.addTransition(State); +} + +void ExprInspectionChecker::checkDeadSymbols(SymbolReaper &SymReaper, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + const MarkedSymbolsTy &Syms = State->get<MarkedSymbols>(); + ExplodedNode *N = C.getPredecessor(); + for (auto I = Syms.begin(), E = Syms.end(); I != E; ++I) { + SymbolRef Sym = *I; + if (!SymReaper.isDead(Sym)) + continue; + + // The non-fatal error node should be the same for all reports. + if (ExplodedNode *BugNode = reportBug("SYMBOL DEAD", C)) + N = BugNode; + State = State->remove<MarkedSymbols>(Sym); + } + + for (auto I : State->get<DenotedSymbols>()) { + SymbolRef Sym = I.first; + if (!SymReaper.isLive(Sym)) + State = State->remove<DenotedSymbols>(Sym); + } + + C.addTransition(State, N); +} + +void ExprInspectionChecker::checkEndAnalysis(ExplodedGraph &G, BugReporter &BR, + ExprEngine &Eng) const { + for (auto Item: ReachedStats) { + unsigned NumTimesReached = Item.second.NumTimesReached; + ExplodedNode *N = Item.second.ExampleNode; + + reportBug(llvm::to_string(NumTimesReached), BR, N); + } + ReachedStats.clear(); +} + +void ExprInspectionChecker::analyzerCrash(const CallExpr *CE, + CheckerContext &C) const { + LLVM_BUILTIN_TRAP; +} + +void ExprInspectionChecker::analyzerHashDump(const CallExpr *CE, + CheckerContext &C) const { + const LangOptions &Opts = C.getLangOpts(); + const SourceManager &SM = C.getSourceManager(); + FullSourceLoc FL(CE->getArg(0)->getBeginLoc(), SM); + std::string HashContent = + GetIssueString(SM, FL, getCheckerName().getName(), "Category", + C.getLocationContext()->getDecl(), Opts); + + reportBug(HashContent, C); +} + +void ExprInspectionChecker::analyzerDenote(const CallExpr *CE, + CheckerContext &C) const { + if (CE->getNumArgs() < 2) { + reportBug("clang_analyzer_denote() requires a symbol and a string literal", + C); + return; + } + + SymbolRef Sym = C.getSVal(CE->getArg(0)).getAsSymbol(); + if (!Sym) { + reportBug("Not a symbol", C); + return; + } + + const auto *E = dyn_cast<StringLiteral>(CE->getArg(1)->IgnoreParenCasts()); + if (!E) { + reportBug("Not a string literal", C); + return; + } + + ProgramStateRef State = C.getState(); + + C.addTransition(C.getState()->set<DenotedSymbols>(Sym, E)); +} + +namespace { +class SymbolExpressor + : public SymExprVisitor<SymbolExpressor, Optional<std::string>> { + ProgramStateRef State; + +public: + SymbolExpressor(ProgramStateRef State) : State(State) {} + + Optional<std::string> lookup(const SymExpr *S) { + if (const StringLiteral *const *SLPtr = State->get<DenotedSymbols>(S)) { + const StringLiteral *SL = *SLPtr; + return std::string(SL->getBytes()); + } + return None; + } + + Optional<std::string> VisitSymExpr(const SymExpr *S) { + return lookup(S); + } + + Optional<std::string> VisitSymIntExpr(const SymIntExpr *S) { + if (Optional<std::string> Str = lookup(S)) + return Str; + if (Optional<std::string> Str = Visit(S->getLHS())) + return (*Str + " " + BinaryOperator::getOpcodeStr(S->getOpcode()) + " " + + std::to_string(S->getRHS().getLimitedValue()) + + (S->getRHS().isUnsigned() ? "U" : "")) + .str(); + return None; + } + + Optional<std::string> VisitSymSymExpr(const SymSymExpr *S) { + if (Optional<std::string> Str = lookup(S)) + return Str; + if (Optional<std::string> Str1 = Visit(S->getLHS())) + if (Optional<std::string> Str2 = Visit(S->getRHS())) + return (*Str1 + " " + BinaryOperator::getOpcodeStr(S->getOpcode()) + + " " + *Str2).str(); + return None; + } + + Optional<std::string> VisitSymbolCast(const SymbolCast *S) { + if (Optional<std::string> Str = lookup(S)) + return Str; + if (Optional<std::string> Str = Visit(S->getOperand())) + return (Twine("(") + S->getType().getAsString() + ")" + *Str).str(); + return None; + } +}; +} // namespace + +void ExprInspectionChecker::analyzerExpress(const CallExpr *CE, + CheckerContext &C) const { + if (CE->getNumArgs() == 0) { + reportBug("clang_analyzer_express() requires a symbol", C); + return; + } + + SymbolRef Sym = C.getSVal(CE->getArg(0)).getAsSymbol(); + if (!Sym) { + reportBug("Not a symbol", C); + return; + } + + SymbolExpressor V(C.getState()); + auto Str = V.Visit(Sym); + if (!Str) { + reportBug("Unable to express", C); + return; + } + + reportBug(*Str, C); +} + +void ento::registerExprInspectionChecker(CheckerManager &Mgr) { + Mgr.registerChecker<ExprInspectionChecker>(); +} + +bool ento::shouldRegisterExprInspectionChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp new file mode 100644 index 000000000000..b315a8452285 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp @@ -0,0 +1,71 @@ +//=== FixedAddressChecker.cpp - Fixed address usage checker ----*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files defines FixedAddressChecker, a builtin checker that checks for +// assignment of a fixed address to a pointer. +// This check corresponds to CWE-587. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class FixedAddressChecker + : public Checker< check::PreStmt<BinaryOperator> > { + mutable std::unique_ptr<BuiltinBug> BT; + +public: + void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const; +}; +} + +void FixedAddressChecker::checkPreStmt(const BinaryOperator *B, + CheckerContext &C) const { + // Using a fixed address is not portable because that address will probably + // not be valid in all environments or platforms. + + if (B->getOpcode() != BO_Assign) + return; + + QualType T = B->getType(); + if (!T->isPointerType()) + return; + + SVal RV = C.getSVal(B->getRHS()); + + if (!RV.isConstant() || RV.isZeroConstant()) + return; + + if (ExplodedNode *N = C.generateNonFatalErrorNode()) { + if (!BT) + BT.reset( + new BuiltinBug(this, "Use fixed address", + "Using a fixed address is not portable because that " + "address will probably not be valid in all " + "environments or platforms.")); + auto R = + std::make_unique<PathSensitiveBugReport>(*BT, BT->getDescription(), N); + R->addRange(B->getRHS()->getSourceRange()); + C.emitReport(std::move(R)); + } +} + +void ento::registerFixedAddressChecker(CheckerManager &mgr) { + mgr.registerChecker<FixedAddressChecker>(); +} + +bool ento::shouldRegisterFixedAddressChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/GCDAntipatternChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GCDAntipatternChecker.cpp new file mode 100644 index 000000000000..d471c23b83bf --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/GCDAntipatternChecker.cpp @@ -0,0 +1,232 @@ +//===- GCDAntipatternChecker.cpp ---------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines GCDAntipatternChecker which checks against a common +// antipattern when synchronous API is emulated from asynchronous callbacks +// using a semaphore: +// +// dispatch_semaphore_t sema = dispatch_semaphore_create(0); +// +// AnyCFunctionCall(^{ +// // code… +// dispatch_semaphore_signal(sema); +// }) +// dispatch_semaphore_wait(sema, *) +// +// Such code is a common performance problem, due to inability of GCD to +// properly handle QoS when a combination of queues and semaphores is used. +// Good code would either use asynchronous API (when available), or perform +// the necessary action in asynchronous callback. +// +// Currently, the check is performed using a simple heuristical AST pattern +// matching. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "llvm/Support/Debug.h" + +using namespace clang; +using namespace ento; +using namespace ast_matchers; + +namespace { + +// ID of a node at which the diagnostic would be emitted. +const char *WarnAtNode = "waitcall"; + +class GCDAntipatternChecker : public Checker<check::ASTCodeBody> { +public: + void checkASTCodeBody(const Decl *D, + AnalysisManager &AM, + BugReporter &BR) const; +}; + +auto callsName(const char *FunctionName) + -> decltype(callee(functionDecl())) { + return callee(functionDecl(hasName(FunctionName))); +} + +auto equalsBoundArgDecl(int ArgIdx, const char *DeclName) + -> decltype(hasArgument(0, expr())) { + return hasArgument(ArgIdx, ignoringParenCasts(declRefExpr( + to(varDecl(equalsBoundNode(DeclName)))))); +} + +auto bindAssignmentToDecl(const char *DeclName) -> decltype(hasLHS(expr())) { + return hasLHS(ignoringParenImpCasts( + declRefExpr(to(varDecl().bind(DeclName))))); +} + +/// The pattern is very common in tests, and it is OK to use it there. +/// We have to heuristics for detecting tests: method name starts with "test" +/// (used in XCTest), and a class name contains "mock" or "test" (used in +/// helpers which are not tests themselves, but used exclusively in tests). +static bool isTest(const Decl *D) { + if (const auto* ND = dyn_cast<NamedDecl>(D)) { + std::string DeclName = ND->getNameAsString(); + if (StringRef(DeclName).startswith("test")) + return true; + } + if (const auto *OD = dyn_cast<ObjCMethodDecl>(D)) { + if (const auto *CD = dyn_cast<ObjCContainerDecl>(OD->getParent())) { + std::string ContainerName = CD->getNameAsString(); + StringRef CN(ContainerName); + if (CN.contains_lower("test") || CN.contains_lower("mock")) + return true; + } + } + return false; +} + +static auto findGCDAntiPatternWithSemaphore() -> decltype(compoundStmt()) { + + const char *SemaphoreBinding = "semaphore_name"; + auto SemaphoreCreateM = callExpr(allOf( + callsName("dispatch_semaphore_create"), + hasArgument(0, ignoringParenCasts(integerLiteral(equals(0)))))); + + auto SemaphoreBindingM = anyOf( + forEachDescendant( + varDecl(hasDescendant(SemaphoreCreateM)).bind(SemaphoreBinding)), + forEachDescendant(binaryOperator(bindAssignmentToDecl(SemaphoreBinding), + hasRHS(SemaphoreCreateM)))); + + auto HasBlockArgumentM = hasAnyArgument(hasType( + hasCanonicalType(blockPointerType()) + )); + + auto ArgCallsSignalM = hasAnyArgument(stmt(hasDescendant(callExpr( + allOf( + callsName("dispatch_semaphore_signal"), + equalsBoundArgDecl(0, SemaphoreBinding) + ))))); + + auto HasBlockAndCallsSignalM = allOf(HasBlockArgumentM, ArgCallsSignalM); + + auto HasBlockCallingSignalM = + forEachDescendant( + stmt(anyOf( + callExpr(HasBlockAndCallsSignalM), + objcMessageExpr(HasBlockAndCallsSignalM) + ))); + + auto SemaphoreWaitM = forEachDescendant( + callExpr( + allOf( + callsName("dispatch_semaphore_wait"), + equalsBoundArgDecl(0, SemaphoreBinding) + ) + ).bind(WarnAtNode)); + + return compoundStmt( + SemaphoreBindingM, HasBlockCallingSignalM, SemaphoreWaitM); +} + +static auto findGCDAntiPatternWithGroup() -> decltype(compoundStmt()) { + + const char *GroupBinding = "group_name"; + auto DispatchGroupCreateM = callExpr(callsName("dispatch_group_create")); + + auto GroupBindingM = anyOf( + forEachDescendant( + varDecl(hasDescendant(DispatchGroupCreateM)).bind(GroupBinding)), + forEachDescendant(binaryOperator(bindAssignmentToDecl(GroupBinding), + hasRHS(DispatchGroupCreateM)))); + + auto GroupEnterM = forEachDescendant( + stmt(callExpr(allOf(callsName("dispatch_group_enter"), + equalsBoundArgDecl(0, GroupBinding))))); + + auto HasBlockArgumentM = hasAnyArgument(hasType( + hasCanonicalType(blockPointerType()) + )); + + auto ArgCallsSignalM = hasAnyArgument(stmt(hasDescendant(callExpr( + allOf( + callsName("dispatch_group_leave"), + equalsBoundArgDecl(0, GroupBinding) + ))))); + + auto HasBlockAndCallsLeaveM = allOf(HasBlockArgumentM, ArgCallsSignalM); + + auto AcceptsBlockM = + forEachDescendant( + stmt(anyOf( + callExpr(HasBlockAndCallsLeaveM), + objcMessageExpr(HasBlockAndCallsLeaveM) + ))); + + auto GroupWaitM = forEachDescendant( + callExpr( + allOf( + callsName("dispatch_group_wait"), + equalsBoundArgDecl(0, GroupBinding) + ) + ).bind(WarnAtNode)); + + return compoundStmt(GroupBindingM, GroupEnterM, AcceptsBlockM, GroupWaitM); +} + +static void emitDiagnostics(const BoundNodes &Nodes, + const char* Type, + BugReporter &BR, + AnalysisDeclContext *ADC, + const GCDAntipatternChecker *Checker) { + const auto *SW = Nodes.getNodeAs<CallExpr>(WarnAtNode); + assert(SW); + + std::string Diagnostics; + llvm::raw_string_ostream OS(Diagnostics); + OS << "Waiting on a callback using a " << Type << " creates useless threads " + << "and is subject to priority inversion; consider " + << "using a synchronous API or changing the caller to be asynchronous"; + + BR.EmitBasicReport( + ADC->getDecl(), + Checker, + /*Name=*/"GCD performance anti-pattern", + /*BugCategory=*/"Performance", + OS.str(), + PathDiagnosticLocation::createBegin(SW, BR.getSourceManager(), ADC), + SW->getSourceRange()); +} + +void GCDAntipatternChecker::checkASTCodeBody(const Decl *D, + AnalysisManager &AM, + BugReporter &BR) const { + if (isTest(D)) + return; + + AnalysisDeclContext *ADC = AM.getAnalysisDeclContext(D); + + auto SemaphoreMatcherM = findGCDAntiPatternWithSemaphore(); + auto Matches = match(SemaphoreMatcherM, *D->getBody(), AM.getASTContext()); + for (BoundNodes Match : Matches) + emitDiagnostics(Match, "semaphore", BR, ADC, this); + + auto GroupMatcherM = findGCDAntiPatternWithGroup(); + Matches = match(GroupMatcherM, *D->getBody(), AM.getASTContext()); + for (BoundNodes Match : Matches) + emitDiagnostics(Match, "group", BR, ADC, this); +} + +} // end of anonymous namespace + +void ento::registerGCDAntipattern(CheckerManager &Mgr) { + Mgr.registerChecker<GCDAntipatternChecker>(); +} + +bool ento::shouldRegisterGCDAntipattern(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/GTestChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GTestChecker.cpp new file mode 100644 index 000000000000..f4308f510f0b --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/GTestChecker.cpp @@ -0,0 +1,298 @@ +//==- GTestChecker.cpp - Model gtest API --*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This checker models the behavior of un-inlined APIs from the gtest +// unit-testing library to avoid false positives when using assertions from +// that library. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/Expr.h" +#include "clang/Basic/LangOptions.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +// Modeling of un-inlined AssertionResult constructors +// +// The gtest unit testing API provides macros for assertions that expand +// into an if statement that calls a series of constructors and returns +// when the "assertion" is false. +// +// For example, +// +// ASSERT_TRUE(a == b) +// +// expands into: +// +// switch (0) +// case 0: +// default: +// if (const ::testing::AssertionResult gtest_ar_ = +// ::testing::AssertionResult((a == b))) +// ; +// else +// return ::testing::internal::AssertHelper( +// ::testing::TestPartResult::kFatalFailure, +// "<path to project>", +// <line number>, +// ::testing::internal::GetBoolAssertionFailureMessage( +// gtest_ar_, "a == b", "false", "true") +// .c_str()) = ::testing::Message(); +// +// where AssertionResult is defined similarly to +// +// class AssertionResult { +// public: +// AssertionResult(const AssertionResult& other); +// explicit AssertionResult(bool success) : success_(success) {} +// operator bool() const { return success_; } +// ... +// private: +// bool success_; +// }; +// +// In order for the analyzer to correctly handle this assertion, it needs to +// know that the boolean value of the expression "a == b" is stored the +// 'success_' field of the original AssertionResult temporary and propagated +// (via the copy constructor) into the 'success_' field of the object stored +// in 'gtest_ar_'. That boolean value will then be returned from the bool +// conversion method in the if statement. This guarantees that the assertion +// holds when the return path is not taken. +// +// If the success value is not properly propagated, then the eager case split +// on evaluating the expression can cause pernicious false positives +// on the non-return path: +// +// ASSERT(ptr != NULL) +// *ptr = 7; // False positive null pointer dereference here +// +// Unfortunately, the bool constructor cannot be inlined (because its +// implementation is not present in the headers) and the copy constructor is +// not inlined (because it is constructed into a temporary and the analyzer +// does not inline these since it does not yet reliably call temporary +// destructors). +// +// This checker compensates for the missing inlining by propagating the +// _success value across the bool and copy constructors so the assertion behaves +// as expected. + +namespace { +class GTestChecker : public Checker<check::PostCall> { + + mutable IdentifierInfo *AssertionResultII; + mutable IdentifierInfo *SuccessII; + +public: + GTestChecker(); + + void checkPostCall(const CallEvent &Call, CheckerContext &C) const; + +private: + void modelAssertionResultBoolConstructor(const CXXConstructorCall *Call, + bool IsRef, CheckerContext &C) const; + + void modelAssertionResultCopyConstructor(const CXXConstructorCall *Call, + CheckerContext &C) const; + + void initIdentifierInfo(ASTContext &Ctx) const; + + SVal + getAssertionResultSuccessFieldValue(const CXXRecordDecl *AssertionResultDecl, + SVal Instance, + ProgramStateRef State) const; + + static ProgramStateRef assumeValuesEqual(SVal Val1, SVal Val2, + ProgramStateRef State, + CheckerContext &C); +}; +} // End anonymous namespace. + +GTestChecker::GTestChecker() : AssertionResultII(nullptr), SuccessII(nullptr) {} + +/// Model a call to an un-inlined AssertionResult(bool) or +/// AssertionResult(bool &, ...). +/// To do so, constrain the value of the newly-constructed instance's 'success_' +/// field to be equal to the passed-in boolean value. +/// +/// \param IsRef Whether the boolean parameter is a reference or not. +void GTestChecker::modelAssertionResultBoolConstructor( + const CXXConstructorCall *Call, bool IsRef, CheckerContext &C) const { + assert(Call->getNumArgs() >= 1 && Call->getNumArgs() <= 2); + + ProgramStateRef State = C.getState(); + SVal BooleanArgVal = Call->getArgSVal(0); + if (IsRef) { + // The argument is a reference, so load from it to get the boolean value. + if (!BooleanArgVal.getAs<Loc>()) + return; + BooleanArgVal = C.getState()->getSVal(BooleanArgVal.castAs<Loc>()); + } + + SVal ThisVal = Call->getCXXThisVal(); + + SVal ThisSuccess = getAssertionResultSuccessFieldValue( + Call->getDecl()->getParent(), ThisVal, State); + + State = assumeValuesEqual(ThisSuccess, BooleanArgVal, State, C); + C.addTransition(State); +} + +/// Model a call to an un-inlined AssertionResult copy constructor: +/// +/// AssertionResult(const &AssertionResult other) +/// +/// To do so, constrain the value of the newly-constructed instance's +/// 'success_' field to be equal to the value of the pass-in instance's +/// 'success_' field. +void GTestChecker::modelAssertionResultCopyConstructor( + const CXXConstructorCall *Call, CheckerContext &C) const { + assert(Call->getNumArgs() == 1); + + // The first parameter of the copy constructor must be the other + // instance to initialize this instances fields from. + SVal OtherVal = Call->getArgSVal(0); + SVal ThisVal = Call->getCXXThisVal(); + + const CXXRecordDecl *AssertResultClassDecl = Call->getDecl()->getParent(); + ProgramStateRef State = C.getState(); + + SVal ThisSuccess = getAssertionResultSuccessFieldValue(AssertResultClassDecl, + ThisVal, State); + SVal OtherSuccess = getAssertionResultSuccessFieldValue(AssertResultClassDecl, + OtherVal, State); + + State = assumeValuesEqual(ThisSuccess, OtherSuccess, State, C); + C.addTransition(State); +} + +/// Model calls to AssertionResult constructors that are not inlined. +void GTestChecker::checkPostCall(const CallEvent &Call, + CheckerContext &C) const { + /// If the constructor was inlined, there is no need model it. + if (C.wasInlined) + return; + + initIdentifierInfo(C.getASTContext()); + + auto *CtorCall = dyn_cast<CXXConstructorCall>(&Call); + if (!CtorCall) + return; + + const CXXConstructorDecl *CtorDecl = CtorCall->getDecl(); + const CXXRecordDecl *CtorParent = CtorDecl->getParent(); + if (CtorParent->getIdentifier() != AssertionResultII) + return; + + unsigned ParamCount = CtorDecl->getNumParams(); + + // Call the appropriate modeling method based the parameters and their + // types. + + // We have AssertionResult(const &AssertionResult) + if (CtorDecl->isCopyConstructor() && ParamCount == 1) { + modelAssertionResultCopyConstructor(CtorCall, C); + return; + } + + // There are two possible boolean constructors, depending on which + // version of gtest is being used: + // + // v1.7 and earlier: + // AssertionResult(bool success) + // + // v1.8 and greater: + // template <typename T> + // AssertionResult(const T& success, + // typename internal::EnableIf< + // !internal::ImplicitlyConvertible<T, + // AssertionResult>::value>::type*) + // + CanQualType BoolTy = C.getASTContext().BoolTy; + if (ParamCount == 1 && CtorDecl->getParamDecl(0)->getType() == BoolTy) { + // We have AssertionResult(bool) + modelAssertionResultBoolConstructor(CtorCall, /*IsRef=*/false, C); + return; + } + if (ParamCount == 2){ + auto *RefTy = CtorDecl->getParamDecl(0)->getType()->getAs<ReferenceType>(); + if (RefTy && + RefTy->getPointeeType()->getCanonicalTypeUnqualified() == BoolTy) { + // We have AssertionResult(bool &, ...) + modelAssertionResultBoolConstructor(CtorCall, /*IsRef=*/true, C); + return; + } + } +} + +void GTestChecker::initIdentifierInfo(ASTContext &Ctx) const { + if (AssertionResultII) + return; + + AssertionResultII = &Ctx.Idents.get("AssertionResult"); + SuccessII = &Ctx.Idents.get("success_"); +} + +/// Returns the value stored in the 'success_' field of the passed-in +/// AssertionResult instance. +SVal GTestChecker::getAssertionResultSuccessFieldValue( + const CXXRecordDecl *AssertionResultDecl, SVal Instance, + ProgramStateRef State) const { + + DeclContext::lookup_result Result = AssertionResultDecl->lookup(SuccessII); + if (Result.empty()) + return UnknownVal(); + + auto *SuccessField = dyn_cast<FieldDecl>(Result.front()); + if (!SuccessField) + return UnknownVal(); + + Optional<Loc> FieldLoc = + State->getLValue(SuccessField, Instance).getAs<Loc>(); + if (!FieldLoc.hasValue()) + return UnknownVal(); + + return State->getSVal(*FieldLoc); +} + +/// Constrain the passed-in state to assume two values are equal. +ProgramStateRef GTestChecker::assumeValuesEqual(SVal Val1, SVal Val2, + ProgramStateRef State, + CheckerContext &C) { + if (!Val1.getAs<DefinedOrUnknownSVal>() || + !Val2.getAs<DefinedOrUnknownSVal>()) + return State; + + auto ValuesEqual = + C.getSValBuilder().evalEQ(State, Val1.castAs<DefinedOrUnknownSVal>(), + Val2.castAs<DefinedOrUnknownSVal>()); + + if (!ValuesEqual.getAs<DefinedSVal>()) + return State; + + State = C.getConstraintManager().assume( + State, ValuesEqual.castAs<DefinedSVal>(), true); + + return State; +} + +void ento::registerGTestChecker(CheckerManager &Mgr) { + Mgr.registerChecker<GTestChecker>(); +} + +bool ento::shouldRegisterGTestChecker(const LangOptions &LO) { + // gtest is a C++ API so there is no sense running the checker + // if not compiling for C++. + return LO.CPlusPlus; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp new file mode 100644 index 000000000000..d442b26b3959 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -0,0 +1,855 @@ +//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This checker defines the attack surface for generic taint propagation. +// +// The taint information produced by it might be useful to other checkers. For +// example, checkers should report errors which involve tainted data more +// aggressively, even if the involved symbols are under constrained. +// +//===----------------------------------------------------------------------===// + +#include "Taint.h" +#include "Yaml.h" +#include "clang/AST/Attr.h" +#include "clang/Basic/Builtins.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/YAMLTraits.h" +#include <limits> +#include <utility> + +using namespace clang; +using namespace ento; +using namespace taint; + +namespace { +class GenericTaintChecker + : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> { +public: + static void *getTag() { + static int Tag; + return &Tag; + } + + void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; + + void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; + + void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, + const char *Sep) const override; + + using ArgVector = SmallVector<unsigned, 2>; + using SignedArgVector = SmallVector<int, 2>; + + enum class VariadicType { None, Src, Dst }; + + /// Used to parse the configuration file. + struct TaintConfiguration { + using NameArgsPair = std::pair<std::string, ArgVector>; + + struct Propagation { + std::string Name; + ArgVector SrcArgs; + SignedArgVector DstArgs; + VariadicType VarType; + unsigned VarIndex; + }; + + std::vector<Propagation> Propagations; + std::vector<NameArgsPair> Filters; + std::vector<NameArgsPair> Sinks; + + TaintConfiguration() = default; + TaintConfiguration(const TaintConfiguration &) = default; + TaintConfiguration(TaintConfiguration &&) = default; + TaintConfiguration &operator=(const TaintConfiguration &) = default; + TaintConfiguration &operator=(TaintConfiguration &&) = default; + }; + + /// Convert SignedArgVector to ArgVector. + ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option, + SignedArgVector Args); + + /// Parse the config. + void parseConfiguration(CheckerManager &Mgr, const std::string &Option, + TaintConfiguration &&Config); + + static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()}; + /// Denotes the return vale. + static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() - + 1}; + +private: + mutable std::unique_ptr<BugType> BT; + void initBugType() const { + if (!BT) + BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data")); + } + + /// Catch taint related bugs. Check if tainted data is passed to a + /// system call etc. + bool checkPre(const CallExpr *CE, CheckerContext &C) const; + + /// Add taint sources on a pre-visit. + void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; + + /// Propagate taint generated at pre-visit. + bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; + + /// Check if the region the expression evaluates to is the standard input, + /// and thus, is tainted. + static bool isStdin(const Expr *E, CheckerContext &C); + + /// Given a pointer argument, return the value it points to. + static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg); + + /// Check for CWE-134: Uncontrolled Format String. + static constexpr llvm::StringLiteral MsgUncontrolledFormatString = + "Untrusted data is used as a format string " + "(CWE-134: Uncontrolled Format String)"; + bool checkUncontrolledFormatString(const CallExpr *CE, + CheckerContext &C) const; + + /// Check for: + /// CERT/STR02-C. "Sanitize data passed to complex subsystems" + /// CWE-78, "Failure to Sanitize Data into an OS Command" + static constexpr llvm::StringLiteral MsgSanitizeSystemArgs = + "Untrusted data is passed to a system call " + "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; + bool checkSystemCall(const CallExpr *CE, StringRef Name, + CheckerContext &C) const; + + /// Check if tainted data is used as a buffer size ins strn.. functions, + /// and allocators. + static constexpr llvm::StringLiteral MsgTaintedBufferSize = + "Untrusted data is used to specify the buffer size " + "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " + "for character data and the null terminator)"; + bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, + CheckerContext &C) const; + + /// Check if tainted data is used as a custom sink's parameter. + static constexpr llvm::StringLiteral MsgCustomSink = + "Untrusted data is passed to a user-defined sink"; + bool checkCustomSinks(const CallExpr *CE, StringRef Name, + CheckerContext &C) const; + + /// Generate a report if the expression is tainted or points to tainted data. + bool generateReportIfTainted(const Expr *E, StringRef Msg, + CheckerContext &C) const; + + struct TaintPropagationRule; + using NameRuleMap = llvm::StringMap<TaintPropagationRule>; + using NameArgMap = llvm::StringMap<ArgVector>; + + /// A struct used to specify taint propagation rules for a function. + /// + /// If any of the possible taint source arguments is tainted, all of the + /// destination arguments should also be tainted. Use InvalidArgIndex in the + /// src list to specify that all of the arguments can introduce taint. Use + /// InvalidArgIndex in the dst arguments to signify that all the non-const + /// pointer and reference arguments might be tainted on return. If + /// ReturnValueIndex is added to the dst list, the return value will be + /// tainted. + struct TaintPropagationRule { + using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *, + CheckerContext &C); + + /// List of arguments which can be taint sources and should be checked. + ArgVector SrcArgs; + /// List of arguments which should be tainted on function return. + ArgVector DstArgs; + /// Index for the first variadic parameter if exist. + unsigned VariadicIndex; + /// Show when a function has variadic parameters. If it has, it marks all + /// of them as source or destination. + VariadicType VarType; + /// Special function for tainted source determination. If defined, it can + /// override the default behavior. + PropagationFuncType PropagationFunc; + + TaintPropagationRule() + : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None), + PropagationFunc(nullptr) {} + + TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst, + VariadicType Var = VariadicType::None, + unsigned VarIndex = InvalidArgIndex, + PropagationFuncType Func = nullptr) + : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)), + VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {} + + /// Get the propagation rule for a given function. + static TaintPropagationRule + getTaintPropagationRule(const NameRuleMap &CustomPropagations, + const FunctionDecl *FDecl, StringRef Name, + CheckerContext &C); + + void addSrcArg(unsigned A) { SrcArgs.push_back(A); } + void addDstArg(unsigned A) { DstArgs.push_back(A); } + + bool isNull() const { + return SrcArgs.empty() && DstArgs.empty() && + VariadicType::None == VarType; + } + + bool isDestinationArgument(unsigned ArgNum) const { + return (llvm::find(DstArgs, ArgNum) != DstArgs.end()); + } + + static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State, + CheckerContext &C) { + if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C)) + return true; + + if (!E->getType().getTypePtr()->isPointerType()) + return false; + + Optional<SVal> V = getPointedToSVal(C, E); + return (V && isTainted(State, *V)); + } + + /// Pre-process a function which propagates taint according to the + /// taint rule. + ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; + + // Functions for custom taintedness propagation. + static bool postSocket(bool IsTainted, const CallExpr *CE, + CheckerContext &C); + }; + + /// Defines a map between the propagation function's name and + /// TaintPropagationRule. + NameRuleMap CustomPropagations; + + /// Defines a map between the filter function's name and filtering args. + NameArgMap CustomFilters; + + /// Defines a map between the sink function's name and sinking args. + NameArgMap CustomSinks; +}; + +const unsigned GenericTaintChecker::ReturnValueIndex; +const unsigned GenericTaintChecker::InvalidArgIndex; + +// FIXME: these lines can be removed in C++17 +constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString; +constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs; +constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize; +constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink; +} // end of anonymous namespace + +using TaintConfig = GenericTaintChecker::TaintConfiguration; + +LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation) +LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameArgsPair) + +namespace llvm { +namespace yaml { +template <> struct MappingTraits<TaintConfig> { + static void mapping(IO &IO, TaintConfig &Config) { + IO.mapOptional("Propagations", Config.Propagations); + IO.mapOptional("Filters", Config.Filters); + IO.mapOptional("Sinks", Config.Sinks); + } +}; + +template <> struct MappingTraits<TaintConfig::Propagation> { + static void mapping(IO &IO, TaintConfig::Propagation &Propagation) { + IO.mapRequired("Name", Propagation.Name); + IO.mapOptional("SrcArgs", Propagation.SrcArgs); + IO.mapOptional("DstArgs", Propagation.DstArgs); + IO.mapOptional("VariadicType", Propagation.VarType, + GenericTaintChecker::VariadicType::None); + IO.mapOptional("VariadicIndex", Propagation.VarIndex, + GenericTaintChecker::InvalidArgIndex); + } +}; + +template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> { + static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) { + IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None); + IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src); + IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst); + } +}; + +template <> struct MappingTraits<TaintConfig::NameArgsPair> { + static void mapping(IO &IO, TaintConfig::NameArgsPair &NameArg) { + IO.mapRequired("Name", NameArg.first); + IO.mapRequired("Args", NameArg.second); + } +}; +} // namespace yaml +} // namespace llvm + +/// A set which is used to pass information from call pre-visit instruction +/// to the call post-visit. The values are unsigned integers, which are either +/// ReturnValueIndex, or indexes of the pointer/reference argument, which +/// points to data, which should be tainted on return. +REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) + +GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector( + CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) { + ArgVector Result; + for (int Arg : Args) { + if (Arg == -1) + Result.push_back(ReturnValueIndex); + else if (Arg < -1) { + Result.push_back(InvalidArgIndex); + Mgr.reportInvalidCheckerOptionValue( + this, Option, + "an argument number for propagation rules greater or equal to -1"); + } else + Result.push_back(static_cast<unsigned>(Arg)); + } + return Result; +} + +void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr, + const std::string &Option, + TaintConfiguration &&Config) { + for (auto &P : Config.Propagations) { + GenericTaintChecker::CustomPropagations.try_emplace( + P.Name, std::move(P.SrcArgs), + convertToArgVector(Mgr, Option, P.DstArgs), P.VarType, P.VarIndex); + } + + for (auto &F : Config.Filters) { + GenericTaintChecker::CustomFilters.try_emplace(F.first, + std::move(F.second)); + } + + for (auto &S : Config.Sinks) { + GenericTaintChecker::CustomSinks.try_emplace(S.first, std::move(S.second)); + } +} + +GenericTaintChecker::TaintPropagationRule +GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( + const NameRuleMap &CustomPropagations, const FunctionDecl *FDecl, + StringRef Name, CheckerContext &C) { + // TODO: Currently, we might lose precision here: we always mark a return + // value as tainted even if it's just a pointer, pointing to tainted data. + + // Check for exact name match for functions without builtin substitutes. + TaintPropagationRule Rule = + llvm::StringSwitch<TaintPropagationRule>(Name) + // Source functions + // TODO: Add support for vfscanf & family. + .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex})) + .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex})) + .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex})) + .Case("getch", TaintPropagationRule({}, {ReturnValueIndex})) + .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex})) + .Case("getchar_unlocked", + TaintPropagationRule({}, {ReturnValueIndex})) + .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex})) + .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex})) + .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1)) + .Case("socket", + TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None, + InvalidArgIndex, + &TaintPropagationRule::postSocket)) + .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex})) + // Propagating functions + .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex})) + .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2)) + .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("getdelim", TaintPropagationRule({3}, {0})) + .Case("getline", TaintPropagationRule({2}, {0})) + .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("pread", + TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex})) + .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex})) + .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex})) + .Default(TaintPropagationRule()); + + if (!Rule.isNull()) + return Rule; + + // Check if it's one of the memory setting/copying functions. + // This check is specialized but faster then calling isCLibraryFunction. + unsigned BId = 0; + if ((BId = FDecl->getMemoryFunctionKind())) + switch (BId) { + case Builtin::BImemcpy: + case Builtin::BImemmove: + case Builtin::BIstrncpy: + case Builtin::BIstrncat: + return TaintPropagationRule({1, 2}, {0, ReturnValueIndex}); + case Builtin::BIstrlcpy: + case Builtin::BIstrlcat: + return TaintPropagationRule({1, 2}, {0}); + case Builtin::BIstrndup: + return TaintPropagationRule({0, 1}, {ReturnValueIndex}); + + default: + break; + }; + + // Process all other functions which could be defined as builtins. + if (Rule.isNull()) { + if (C.isCLibraryFunction(FDecl, "snprintf")) + return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src, + 3); + else if (C.isCLibraryFunction(FDecl, "sprintf")) + return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src, + 2); + else if (C.isCLibraryFunction(FDecl, "strcpy") || + C.isCLibraryFunction(FDecl, "stpcpy") || + C.isCLibraryFunction(FDecl, "strcat")) + return TaintPropagationRule({1}, {0, ReturnValueIndex}); + else if (C.isCLibraryFunction(FDecl, "bcopy")) + return TaintPropagationRule({0, 2}, {1}); + else if (C.isCLibraryFunction(FDecl, "strdup") || + C.isCLibraryFunction(FDecl, "strdupa")) + return TaintPropagationRule({0}, {ReturnValueIndex}); + else if (C.isCLibraryFunction(FDecl, "wcsdup")) + return TaintPropagationRule({0}, {ReturnValueIndex}); + } + + // Skipping the following functions, since they might be used for cleansing + // or smart memory copy: + // - memccpy - copying until hitting a special character. + + auto It = CustomPropagations.find(Name); + if (It != CustomPropagations.end()) + return It->getValue(); + + return TaintPropagationRule(); +} + +void GenericTaintChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const { + // Check for taintedness related errors first: system call, uncontrolled + // format string, tainted buffer size. + if (checkPre(CE, C)) + return; + + // Marks the function's arguments and/or return value tainted if it present in + // the list. + addSourcesPre(CE, C); +} + +void GenericTaintChecker::checkPostStmt(const CallExpr *CE, + CheckerContext &C) const { + // Set the marked values as tainted. The return value only accessible from + // checkPostStmt. + propagateFromPre(CE, C); +} + +void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, + const char *NL, const char *Sep) const { + printTaint(State, Out, NL, Sep); +} + +void GenericTaintChecker::addSourcesPre(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef State = nullptr; + const FunctionDecl *FDecl = C.getCalleeDecl(CE); + if (!FDecl || FDecl->getKind() != Decl::Function) + return; + + StringRef Name = C.getCalleeName(FDecl); + if (Name.empty()) + return; + + // First, try generating a propagation rule for this function. + TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule( + this->CustomPropagations, FDecl, Name, C); + if (!Rule.isNull()) { + State = Rule.process(CE, C); + if (!State) + return; + C.addTransition(State); + return; + } + + if (!State) + return; + C.addTransition(State); +} + +bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + + // Depending on what was tainted at pre-visit, we determined a set of + // arguments which should be tainted after the function returns. These are + // stored in the state as TaintArgsOnPostVisit set. + TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); + if (TaintArgs.isEmpty()) + return false; + + for (unsigned ArgNum : TaintArgs) { + // Special handling for the tainted return value. + if (ArgNum == ReturnValueIndex) { + State = addTaint(State, CE, C.getLocationContext()); + continue; + } + + // The arguments are pointer arguments. The data they are pointing at is + // tainted after the call. + if (CE->getNumArgs() < (ArgNum + 1)) + return false; + const Expr *Arg = CE->getArg(ArgNum); + Optional<SVal> V = getPointedToSVal(C, Arg); + if (V) + State = addTaint(State, *V); + } + + // Clear up the taint info from the state. + State = State->remove<TaintArgsOnPostVisit>(); + + if (State != C.getState()) { + C.addTransition(State); + return true; + } + return false; +} + +bool GenericTaintChecker::checkPre(const CallExpr *CE, + CheckerContext &C) const { + + if (checkUncontrolledFormatString(CE, C)) + return true; + + const FunctionDecl *FDecl = C.getCalleeDecl(CE); + if (!FDecl || FDecl->getKind() != Decl::Function) + return false; + + StringRef Name = C.getCalleeName(FDecl); + if (Name.empty()) + return false; + + if (checkSystemCall(CE, Name, C)) + return true; + + if (checkTaintedBufferSize(CE, FDecl, C)) + return true; + + if (checkCustomSinks(CE, Name, C)) + return true; + + return false; +} + +Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C, + const Expr *Arg) { + ProgramStateRef State = C.getState(); + SVal AddrVal = C.getSVal(Arg->IgnoreParens()); + if (AddrVal.isUnknownOrUndef()) + return None; + + Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); + if (!AddrLoc) + return None; + + QualType ArgTy = Arg->getType().getCanonicalType(); + if (!ArgTy->isPointerType()) + return None; + + QualType ValTy = ArgTy->getPointeeType(); + + // Do not dereference void pointers. Treat them as byte pointers instead. + // FIXME: we might want to consider more than just the first byte. + if (ValTy->isVoidType()) + ValTy = C.getASTContext().CharTy; + + return State->getSVal(*AddrLoc, ValTy); +} + +ProgramStateRef +GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + + // Check for taint in arguments. + bool IsTainted = true; + for (unsigned ArgNum : SrcArgs) { + if (ArgNum >= CE->getNumArgs()) + continue; + + if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) + break; + } + + // Check for taint in variadic arguments. + if (!IsTainted && VariadicType::Src == VarType) { + // Check if any of the arguments is tainted + for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { + if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) + break; + } + } + + if (PropagationFunc) + IsTainted = PropagationFunc(IsTainted, CE, C); + + if (!IsTainted) + return State; + + // Mark the arguments which should be tainted after the function returns. + for (unsigned ArgNum : DstArgs) { + // Should mark the return value? + if (ArgNum == ReturnValueIndex) { + State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); + continue; + } + + if (ArgNum >= CE->getNumArgs()) + continue; + + // Mark the given argument. + State = State->add<TaintArgsOnPostVisit>(ArgNum); + } + + // Mark all variadic arguments tainted if present. + if (VariadicType::Dst == VarType) { + // For all pointer and references that were passed in: + // If they are not pointing to const data, mark data as tainted. + // TODO: So far we are just going one level down; ideally we'd need to + // recurse here. + for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { + const Expr *Arg = CE->getArg(i); + // Process pointer argument. + const Type *ArgTy = Arg->getType().getTypePtr(); + QualType PType = ArgTy->getPointeeType(); + if ((!PType.isNull() && !PType.isConstQualified()) || + (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) + State = State->add<TaintArgsOnPostVisit>(i); + } + } + + return State; +} + +// If argument 0(protocol domain) is network, the return value should get taint. +bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/, + const CallExpr *CE, + CheckerContext &C) { + SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); + StringRef DomName = C.getMacroNameOrSpelling(DomLoc); + // White list the internal communication protocols. + if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || + DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) + return false; + + return true; +} + +bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { + ProgramStateRef State = C.getState(); + SVal Val = C.getSVal(E); + + // stdin is a pointer, so it would be a region. + const MemRegion *MemReg = Val.getAsRegion(); + + // The region should be symbolic, we do not know it's value. + const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); + if (!SymReg) + return false; + + // Get it's symbol and find the declaration region it's pointing to. + const SymbolRegionValue *Sm = + dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); + if (!Sm) + return false; + const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); + if (!DeclReg) + return false; + + // This region corresponds to a declaration, find out if it's a global/extern + // variable named stdin with the proper type. + if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { + D = D->getCanonicalDecl(); + if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) { + const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr()); + if (PtrTy && PtrTy->getPointeeType().getCanonicalType() == + C.getASTContext().getFILEType().getCanonicalType()) + return true; + } + } + return false; +} + +static bool getPrintfFormatArgumentNum(const CallExpr *CE, + const CheckerContext &C, + unsigned &ArgNum) { + // Find if the function contains a format string argument. + // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, + // vsnprintf, syslog, custom annotated functions. + const FunctionDecl *FDecl = C.getCalleeDecl(CE); + if (!FDecl) + return false; + for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { + ArgNum = Format->getFormatIdx() - 1; + if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum) + return true; + } + + // Or if a function is named setproctitle (this is a heuristic). + if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { + ArgNum = 0; + return true; + } + + return false; +} + +bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, + CheckerContext &C) const { + assert(E); + + // Check for taint. + ProgramStateRef State = C.getState(); + Optional<SVal> PointedToSVal = getPointedToSVal(C, E); + SVal TaintedSVal; + if (PointedToSVal && isTainted(State, *PointedToSVal)) + TaintedSVal = *PointedToSVal; + else if (isTainted(State, E, C.getLocationContext())) + TaintedSVal = C.getSVal(E); + else + return false; + + // Generate diagnostic. + if (ExplodedNode *N = C.generateNonFatalErrorNode()) { + initBugType(); + auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); + report->addRange(E->getSourceRange()); + report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal)); + C.emitReport(std::move(report)); + return true; + } + return false; +} + +bool GenericTaintChecker::checkUncontrolledFormatString( + const CallExpr *CE, CheckerContext &C) const { + // Check if the function contains a format string argument. + unsigned ArgNum = 0; + if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) + return false; + + // If either the format string content or the pointer itself are tainted, + // warn. + return generateReportIfTainted(CE->getArg(ArgNum), + MsgUncontrolledFormatString, C); +} + +bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name, + CheckerContext &C) const { + // TODO: It might make sense to run this check on demand. In some cases, + // we should check if the environment has been cleansed here. We also might + // need to know if the user was reset before these calls(seteuid). + unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) + .Case("system", 0) + .Case("popen", 0) + .Case("execl", 0) + .Case("execle", 0) + .Case("execlp", 0) + .Case("execv", 0) + .Case("execvp", 0) + .Case("execvP", 0) + .Case("execve", 0) + .Case("dlopen", 0) + .Default(InvalidArgIndex); + + if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1)) + return false; + + return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C); +} + +// TODO: Should this check be a part of the CString checker? +// If yes, should taint be a global setting? +bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, + const FunctionDecl *FDecl, + CheckerContext &C) const { + // If the function has a buffer size argument, set ArgNum. + unsigned ArgNum = InvalidArgIndex; + unsigned BId = 0; + if ((BId = FDecl->getMemoryFunctionKind())) + switch (BId) { + case Builtin::BImemcpy: + case Builtin::BImemmove: + case Builtin::BIstrncpy: + ArgNum = 2; + break; + case Builtin::BIstrndup: + ArgNum = 1; + break; + default: + break; + }; + + if (ArgNum == InvalidArgIndex) { + if (C.isCLibraryFunction(FDecl, "malloc") || + C.isCLibraryFunction(FDecl, "calloc") || + C.isCLibraryFunction(FDecl, "alloca")) + ArgNum = 0; + else if (C.isCLibraryFunction(FDecl, "memccpy")) + ArgNum = 3; + else if (C.isCLibraryFunction(FDecl, "realloc")) + ArgNum = 1; + else if (C.isCLibraryFunction(FDecl, "bcopy")) + ArgNum = 2; + } + + return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && + generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C); +} + +bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE, StringRef Name, + CheckerContext &C) const { + auto It = CustomSinks.find(Name); + if (It == CustomSinks.end()) + return false; + + const GenericTaintChecker::ArgVector &Args = It->getValue(); + for (unsigned ArgNum : Args) { + if (ArgNum >= CE->getNumArgs()) + continue; + + if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C)) + return true; + } + + return false; +} + +void ento::registerGenericTaintChecker(CheckerManager &Mgr) { + auto *Checker = Mgr.registerChecker<GenericTaintChecker>(); + std::string Option{"Config"}; + StringRef ConfigFile = + Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option); + llvm::Optional<TaintConfig> Config = + getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile); + if (Config) + Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue())); +} + +bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp new file mode 100644 index 000000000000..cc2cfb774227 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp @@ -0,0 +1,518 @@ +//== IdenticalExprChecker.cpp - Identical expression checker----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This defines IdenticalExprChecker, a check that warns about +/// unintended use of identical expressions. +/// +/// It checks for use of identical expressions with comparison operators and +/// inside conditional expressions. +/// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +static bool isIdenticalStmt(const ASTContext &Ctx, const Stmt *Stmt1, + const Stmt *Stmt2, bool IgnoreSideEffects = false); +//===----------------------------------------------------------------------===// +// FindIdenticalExprVisitor - Identify nodes using identical expressions. +//===----------------------------------------------------------------------===// + +namespace { +class FindIdenticalExprVisitor + : public RecursiveASTVisitor<FindIdenticalExprVisitor> { + BugReporter &BR; + const CheckerBase *Checker; + AnalysisDeclContext *AC; +public: + explicit FindIdenticalExprVisitor(BugReporter &B, + const CheckerBase *Checker, + AnalysisDeclContext *A) + : BR(B), Checker(Checker), AC(A) {} + // FindIdenticalExprVisitor only visits nodes + // that are binary operators, if statements or + // conditional operators. + bool VisitBinaryOperator(const BinaryOperator *B); + bool VisitIfStmt(const IfStmt *I); + bool VisitConditionalOperator(const ConditionalOperator *C); + +private: + void reportIdenticalExpr(const BinaryOperator *B, bool CheckBitwise, + ArrayRef<SourceRange> Sr); + void checkBitwiseOrLogicalOp(const BinaryOperator *B, bool CheckBitwise); + void checkComparisonOp(const BinaryOperator *B); +}; +} // end anonymous namespace + +void FindIdenticalExprVisitor::reportIdenticalExpr(const BinaryOperator *B, + bool CheckBitwise, + ArrayRef<SourceRange> Sr) { + StringRef Message; + if (CheckBitwise) + Message = "identical expressions on both sides of bitwise operator"; + else + Message = "identical expressions on both sides of logical operator"; + + PathDiagnosticLocation ELoc = + PathDiagnosticLocation::createOperatorLoc(B, BR.getSourceManager()); + BR.EmitBasicReport(AC->getDecl(), Checker, + "Use of identical expressions", + categories::LogicError, + Message, ELoc, Sr); +} + +void FindIdenticalExprVisitor::checkBitwiseOrLogicalOp(const BinaryOperator *B, + bool CheckBitwise) { + SourceRange Sr[2]; + + const Expr *LHS = B->getLHS(); + const Expr *RHS = B->getRHS(); + + // Split operators as long as we still have operators to split on. We will + // get called for every binary operator in an expression so there is no need + // to check every one against each other here, just the right most one with + // the others. + while (const BinaryOperator *B2 = dyn_cast<BinaryOperator>(LHS)) { + if (B->getOpcode() != B2->getOpcode()) + break; + if (isIdenticalStmt(AC->getASTContext(), RHS, B2->getRHS())) { + Sr[0] = RHS->getSourceRange(); + Sr[1] = B2->getRHS()->getSourceRange(); + reportIdenticalExpr(B, CheckBitwise, Sr); + } + LHS = B2->getLHS(); + } + + if (isIdenticalStmt(AC->getASTContext(), RHS, LHS)) { + Sr[0] = RHS->getSourceRange(); + Sr[1] = LHS->getSourceRange(); + reportIdenticalExpr(B, CheckBitwise, Sr); + } +} + +bool FindIdenticalExprVisitor::VisitIfStmt(const IfStmt *I) { + const Stmt *Stmt1 = I->getThen(); + const Stmt *Stmt2 = I->getElse(); + + // Check for identical inner condition: + // + // if (x<10) { + // if (x<10) { + // .. + if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(Stmt1)) { + if (!CS->body_empty()) { + const IfStmt *InnerIf = dyn_cast<IfStmt>(*CS->body_begin()); + if (InnerIf && isIdenticalStmt(AC->getASTContext(), I->getCond(), InnerIf->getCond(), /*IgnoreSideEffects=*/ false)) { + PathDiagnosticLocation ELoc(InnerIf->getCond(), BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), Checker, "Identical conditions", + categories::LogicError, + "conditions of the inner and outer statements are identical", + ELoc); + } + } + } + + // Check for identical conditions: + // + // if (b) { + // foo1(); + // } else if (b) { + // foo2(); + // } + if (Stmt1 && Stmt2) { + const Expr *Cond1 = I->getCond(); + const Stmt *Else = Stmt2; + while (const IfStmt *I2 = dyn_cast_or_null<IfStmt>(Else)) { + const Expr *Cond2 = I2->getCond(); + if (isIdenticalStmt(AC->getASTContext(), Cond1, Cond2, false)) { + SourceRange Sr = Cond1->getSourceRange(); + PathDiagnosticLocation ELoc(Cond2, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), Checker, "Identical conditions", + categories::LogicError, + "expression is identical to previous condition", + ELoc, Sr); + } + Else = I2->getElse(); + } + } + + if (!Stmt1 || !Stmt2) + return true; + + // Special handling for code like: + // + // if (b) { + // i = 1; + // } else + // i = 1; + if (const CompoundStmt *CompStmt = dyn_cast<CompoundStmt>(Stmt1)) { + if (CompStmt->size() == 1) + Stmt1 = CompStmt->body_back(); + } + if (const CompoundStmt *CompStmt = dyn_cast<CompoundStmt>(Stmt2)) { + if (CompStmt->size() == 1) + Stmt2 = CompStmt->body_back(); + } + + if (isIdenticalStmt(AC->getASTContext(), Stmt1, Stmt2, true)) { + PathDiagnosticLocation ELoc = + PathDiagnosticLocation::createBegin(I, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), Checker, + "Identical branches", + categories::LogicError, + "true and false branches are identical", ELoc); + } + return true; +} + +bool FindIdenticalExprVisitor::VisitBinaryOperator(const BinaryOperator *B) { + BinaryOperator::Opcode Op = B->getOpcode(); + + if (BinaryOperator::isBitwiseOp(Op)) + checkBitwiseOrLogicalOp(B, true); + + if (BinaryOperator::isLogicalOp(Op)) + checkBitwiseOrLogicalOp(B, false); + + if (BinaryOperator::isComparisonOp(Op)) + checkComparisonOp(B); + + // We want to visit ALL nodes (subexpressions of binary comparison + // expressions too) that contains comparison operators. + // True is always returned to traverse ALL nodes. + return true; +} + +void FindIdenticalExprVisitor::checkComparisonOp(const BinaryOperator *B) { + BinaryOperator::Opcode Op = B->getOpcode(); + + // + // Special case for floating-point representation. + // + // If expressions on both sides of comparison operator are of type float, + // then for some comparison operators no warning shall be + // reported even if the expressions are identical from a symbolic point of + // view. Comparison between expressions, declared variables and literals + // are treated differently. + // + // != and == between float literals that have the same value should NOT warn. + // < > between float literals that have the same value SHOULD warn. + // + // != and == between the same float declaration should NOT warn. + // < > between the same float declaration SHOULD warn. + // + // != and == between eq. expressions that evaluates into float + // should NOT warn. + // < > between eq. expressions that evaluates into float + // should NOT warn. + // + const Expr *LHS = B->getLHS()->IgnoreParenImpCasts(); + const Expr *RHS = B->getRHS()->IgnoreParenImpCasts(); + + const DeclRefExpr *DeclRef1 = dyn_cast<DeclRefExpr>(LHS); + const DeclRefExpr *DeclRef2 = dyn_cast<DeclRefExpr>(RHS); + const FloatingLiteral *FloatLit1 = dyn_cast<FloatingLiteral>(LHS); + const FloatingLiteral *FloatLit2 = dyn_cast<FloatingLiteral>(RHS); + if ((DeclRef1) && (DeclRef2)) { + if ((DeclRef1->getType()->hasFloatingRepresentation()) && + (DeclRef2->getType()->hasFloatingRepresentation())) { + if (DeclRef1->getDecl() == DeclRef2->getDecl()) { + if ((Op == BO_EQ) || (Op == BO_NE)) { + return; + } + } + } + } else if ((FloatLit1) && (FloatLit2)) { + if (FloatLit1->getValue().bitwiseIsEqual(FloatLit2->getValue())) { + if ((Op == BO_EQ) || (Op == BO_NE)) { + return; + } + } + } else if (LHS->getType()->hasFloatingRepresentation()) { + // If any side of comparison operator still has floating-point + // representation, then it's an expression. Don't warn. + // Here only LHS is checked since RHS will be implicit casted to float. + return; + } else { + // No special case with floating-point representation, report as usual. + } + + if (isIdenticalStmt(AC->getASTContext(), B->getLHS(), B->getRHS())) { + PathDiagnosticLocation ELoc = + PathDiagnosticLocation::createOperatorLoc(B, BR.getSourceManager()); + StringRef Message; + if (Op == BO_Cmp) + Message = "comparison of identical expressions always evaluates to " + "'equal'"; + else if (((Op == BO_EQ) || (Op == BO_LE) || (Op == BO_GE))) + Message = "comparison of identical expressions always evaluates to true"; + else + Message = "comparison of identical expressions always evaluates to false"; + BR.EmitBasicReport(AC->getDecl(), Checker, + "Compare of identical expressions", + categories::LogicError, Message, ELoc); + } +} + +bool FindIdenticalExprVisitor::VisitConditionalOperator( + const ConditionalOperator *C) { + + // Check if expressions in conditional expression are identical + // from a symbolic point of view. + + if (isIdenticalStmt(AC->getASTContext(), C->getTrueExpr(), + C->getFalseExpr(), true)) { + PathDiagnosticLocation ELoc = + PathDiagnosticLocation::createConditionalColonLoc( + C, BR.getSourceManager()); + + SourceRange Sr[2]; + Sr[0] = C->getTrueExpr()->getSourceRange(); + Sr[1] = C->getFalseExpr()->getSourceRange(); + BR.EmitBasicReport( + AC->getDecl(), Checker, + "Identical expressions in conditional expression", + categories::LogicError, + "identical expressions on both sides of ':' in conditional expression", + ELoc, Sr); + } + // We want to visit ALL nodes (expressions in conditional + // expressions too) that contains conditional operators, + // thus always return true to traverse ALL nodes. + return true; +} + +/// Determines whether two statement trees are identical regarding +/// operators and symbols. +/// +/// Exceptions: expressions containing macros or functions with possible side +/// effects are never considered identical. +/// Limitations: (t + u) and (u + t) are not considered identical. +/// t*(u + t) and t*u + t*t are not considered identical. +/// +static bool isIdenticalStmt(const ASTContext &Ctx, const Stmt *Stmt1, + const Stmt *Stmt2, bool IgnoreSideEffects) { + + if (!Stmt1 || !Stmt2) { + return !Stmt1 && !Stmt2; + } + + // If Stmt1 & Stmt2 are of different class then they are not + // identical statements. + if (Stmt1->getStmtClass() != Stmt2->getStmtClass()) + return false; + + const Expr *Expr1 = dyn_cast<Expr>(Stmt1); + const Expr *Expr2 = dyn_cast<Expr>(Stmt2); + + if (Expr1 && Expr2) { + // If Stmt1 has side effects then don't warn even if expressions + // are identical. + if (!IgnoreSideEffects && Expr1->HasSideEffects(Ctx)) + return false; + // If either expression comes from a macro then don't warn even if + // the expressions are identical. + if ((Expr1->getExprLoc().isMacroID()) || (Expr2->getExprLoc().isMacroID())) + return false; + + // If all children of two expressions are identical, return true. + Expr::const_child_iterator I1 = Expr1->child_begin(); + Expr::const_child_iterator I2 = Expr2->child_begin(); + while (I1 != Expr1->child_end() && I2 != Expr2->child_end()) { + if (!*I1 || !*I2 || !isIdenticalStmt(Ctx, *I1, *I2, IgnoreSideEffects)) + return false; + ++I1; + ++I2; + } + // If there are different number of children in the statements, return + // false. + if (I1 != Expr1->child_end()) + return false; + if (I2 != Expr2->child_end()) + return false; + } + + switch (Stmt1->getStmtClass()) { + default: + return false; + case Stmt::CallExprClass: + case Stmt::ArraySubscriptExprClass: + case Stmt::OMPArraySectionExprClass: + case Stmt::ImplicitCastExprClass: + case Stmt::ParenExprClass: + case Stmt::BreakStmtClass: + case Stmt::ContinueStmtClass: + case Stmt::NullStmtClass: + return true; + case Stmt::CStyleCastExprClass: { + const CStyleCastExpr* CastExpr1 = cast<CStyleCastExpr>(Stmt1); + const CStyleCastExpr* CastExpr2 = cast<CStyleCastExpr>(Stmt2); + + return CastExpr1->getTypeAsWritten() == CastExpr2->getTypeAsWritten(); + } + case Stmt::ReturnStmtClass: { + const ReturnStmt *ReturnStmt1 = cast<ReturnStmt>(Stmt1); + const ReturnStmt *ReturnStmt2 = cast<ReturnStmt>(Stmt2); + + return isIdenticalStmt(Ctx, ReturnStmt1->getRetValue(), + ReturnStmt2->getRetValue(), IgnoreSideEffects); + } + case Stmt::ForStmtClass: { + const ForStmt *ForStmt1 = cast<ForStmt>(Stmt1); + const ForStmt *ForStmt2 = cast<ForStmt>(Stmt2); + + if (!isIdenticalStmt(Ctx, ForStmt1->getInit(), ForStmt2->getInit(), + IgnoreSideEffects)) + return false; + if (!isIdenticalStmt(Ctx, ForStmt1->getCond(), ForStmt2->getCond(), + IgnoreSideEffects)) + return false; + if (!isIdenticalStmt(Ctx, ForStmt1->getInc(), ForStmt2->getInc(), + IgnoreSideEffects)) + return false; + if (!isIdenticalStmt(Ctx, ForStmt1->getBody(), ForStmt2->getBody(), + IgnoreSideEffects)) + return false; + return true; + } + case Stmt::DoStmtClass: { + const DoStmt *DStmt1 = cast<DoStmt>(Stmt1); + const DoStmt *DStmt2 = cast<DoStmt>(Stmt2); + + if (!isIdenticalStmt(Ctx, DStmt1->getCond(), DStmt2->getCond(), + IgnoreSideEffects)) + return false; + if (!isIdenticalStmt(Ctx, DStmt1->getBody(), DStmt2->getBody(), + IgnoreSideEffects)) + return false; + return true; + } + case Stmt::WhileStmtClass: { + const WhileStmt *WStmt1 = cast<WhileStmt>(Stmt1); + const WhileStmt *WStmt2 = cast<WhileStmt>(Stmt2); + + if (!isIdenticalStmt(Ctx, WStmt1->getCond(), WStmt2->getCond(), + IgnoreSideEffects)) + return false; + if (!isIdenticalStmt(Ctx, WStmt1->getBody(), WStmt2->getBody(), + IgnoreSideEffects)) + return false; + return true; + } + case Stmt::IfStmtClass: { + const IfStmt *IStmt1 = cast<IfStmt>(Stmt1); + const IfStmt *IStmt2 = cast<IfStmt>(Stmt2); + + if (!isIdenticalStmt(Ctx, IStmt1->getCond(), IStmt2->getCond(), + IgnoreSideEffects)) + return false; + if (!isIdenticalStmt(Ctx, IStmt1->getThen(), IStmt2->getThen(), + IgnoreSideEffects)) + return false; + if (!isIdenticalStmt(Ctx, IStmt1->getElse(), IStmt2->getElse(), + IgnoreSideEffects)) + return false; + return true; + } + case Stmt::CompoundStmtClass: { + const CompoundStmt *CompStmt1 = cast<CompoundStmt>(Stmt1); + const CompoundStmt *CompStmt2 = cast<CompoundStmt>(Stmt2); + + if (CompStmt1->size() != CompStmt2->size()) + return false; + + CompoundStmt::const_body_iterator I1 = CompStmt1->body_begin(); + CompoundStmt::const_body_iterator I2 = CompStmt2->body_begin(); + while (I1 != CompStmt1->body_end() && I2 != CompStmt2->body_end()) { + if (!isIdenticalStmt(Ctx, *I1, *I2, IgnoreSideEffects)) + return false; + ++I1; + ++I2; + } + + return true; + } + case Stmt::CompoundAssignOperatorClass: + case Stmt::BinaryOperatorClass: { + const BinaryOperator *BinOp1 = cast<BinaryOperator>(Stmt1); + const BinaryOperator *BinOp2 = cast<BinaryOperator>(Stmt2); + return BinOp1->getOpcode() == BinOp2->getOpcode(); + } + case Stmt::CharacterLiteralClass: { + const CharacterLiteral *CharLit1 = cast<CharacterLiteral>(Stmt1); + const CharacterLiteral *CharLit2 = cast<CharacterLiteral>(Stmt2); + return CharLit1->getValue() == CharLit2->getValue(); + } + case Stmt::DeclRefExprClass: { + const DeclRefExpr *DeclRef1 = cast<DeclRefExpr>(Stmt1); + const DeclRefExpr *DeclRef2 = cast<DeclRefExpr>(Stmt2); + return DeclRef1->getDecl() == DeclRef2->getDecl(); + } + case Stmt::IntegerLiteralClass: { + const IntegerLiteral *IntLit1 = cast<IntegerLiteral>(Stmt1); + const IntegerLiteral *IntLit2 = cast<IntegerLiteral>(Stmt2); + + llvm::APInt I1 = IntLit1->getValue(); + llvm::APInt I2 = IntLit2->getValue(); + if (I1.getBitWidth() != I2.getBitWidth()) + return false; + return I1 == I2; + } + case Stmt::FloatingLiteralClass: { + const FloatingLiteral *FloatLit1 = cast<FloatingLiteral>(Stmt1); + const FloatingLiteral *FloatLit2 = cast<FloatingLiteral>(Stmt2); + return FloatLit1->getValue().bitwiseIsEqual(FloatLit2->getValue()); + } + case Stmt::StringLiteralClass: { + const StringLiteral *StringLit1 = cast<StringLiteral>(Stmt1); + const StringLiteral *StringLit2 = cast<StringLiteral>(Stmt2); + return StringLit1->getBytes() == StringLit2->getBytes(); + } + case Stmt::MemberExprClass: { + const MemberExpr *MemberStmt1 = cast<MemberExpr>(Stmt1); + const MemberExpr *MemberStmt2 = cast<MemberExpr>(Stmt2); + return MemberStmt1->getMemberDecl() == MemberStmt2->getMemberDecl(); + } + case Stmt::UnaryOperatorClass: { + const UnaryOperator *UnaryOp1 = cast<UnaryOperator>(Stmt1); + const UnaryOperator *UnaryOp2 = cast<UnaryOperator>(Stmt2); + return UnaryOp1->getOpcode() == UnaryOp2->getOpcode(); + } + } +} + +//===----------------------------------------------------------------------===// +// FindIdenticalExprChecker +//===----------------------------------------------------------------------===// + +namespace { +class FindIdenticalExprChecker : public Checker<check::ASTCodeBody> { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager &Mgr, + BugReporter &BR) const { + FindIdenticalExprVisitor Visitor(BR, this, Mgr.getAnalysisDeclContext(D)); + Visitor.TraverseDecl(const_cast<Decl *>(D)); + } +}; +} // end anonymous namespace + +void ento::registerIdenticalExprChecker(CheckerManager &Mgr) { + Mgr.registerChecker<FindIdenticalExprChecker>(); +} + +bool ento::shouldRegisterIdenticalExprChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/InnerPointerChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/InnerPointerChecker.cpp new file mode 100644 index 000000000000..b0d101c88517 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/InnerPointerChecker.cpp @@ -0,0 +1,312 @@ +//=== InnerPointerChecker.cpp -------------------------------------*- C++ -*--// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a check that marks a raw pointer to a C++ container's +// inner buffer released when the object is destroyed. This information can +// be used by MallocChecker to detect use-after-free problems. +// +//===----------------------------------------------------------------------===// + +#include "AllocationState.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "InterCheckerAPI.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +// Associate container objects with a set of raw pointer symbols. +REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(PtrSet, SymbolRef) +REGISTER_MAP_WITH_PROGRAMSTATE(RawPtrMap, const MemRegion *, PtrSet) + + +namespace { + +class InnerPointerChecker + : public Checker<check::DeadSymbols, check::PostCall> { + + CallDescription AppendFn, AssignFn, ClearFn, CStrFn, DataFn, EraseFn, + InsertFn, PopBackFn, PushBackFn, ReplaceFn, ReserveFn, ResizeFn, + ShrinkToFitFn, SwapFn; + +public: + class InnerPointerBRVisitor : public BugReporterVisitor { + SymbolRef PtrToBuf; + + public: + InnerPointerBRVisitor(SymbolRef Sym) : PtrToBuf(Sym) {} + + static void *getTag() { + static int Tag = 0; + return &Tag; + } + + void Profile(llvm::FoldingSetNodeID &ID) const override { + ID.AddPointer(getTag()); + } + + virtual PathDiagnosticPieceRef + VisitNode(const ExplodedNode *N, BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; + + // FIXME: Scan the map once in the visitor's constructor and do a direct + // lookup by region. + bool isSymbolTracked(ProgramStateRef State, SymbolRef Sym) { + RawPtrMapTy Map = State->get<RawPtrMap>(); + for (const auto Entry : Map) { + if (Entry.second.contains(Sym)) + return true; + } + return false; + } + }; + + InnerPointerChecker() + : AppendFn({"std", "basic_string", "append"}), + AssignFn({"std", "basic_string", "assign"}), + ClearFn({"std", "basic_string", "clear"}), + CStrFn({"std", "basic_string", "c_str"}), + DataFn({"std", "basic_string", "data"}), + EraseFn({"std", "basic_string", "erase"}), + InsertFn({"std", "basic_string", "insert"}), + PopBackFn({"std", "basic_string", "pop_back"}), + PushBackFn({"std", "basic_string", "push_back"}), + ReplaceFn({"std", "basic_string", "replace"}), + ReserveFn({"std", "basic_string", "reserve"}), + ResizeFn({"std", "basic_string", "resize"}), + ShrinkToFitFn({"std", "basic_string", "shrink_to_fit"}), + SwapFn({"std", "basic_string", "swap"}) {} + + /// Check whether the called member function potentially invalidates + /// pointers referring to the container object's inner buffer. + bool isInvalidatingMemberFunction(const CallEvent &Call) const; + + /// Mark pointer symbols associated with the given memory region released + /// in the program state. + void markPtrSymbolsReleased(const CallEvent &Call, ProgramStateRef State, + const MemRegion *ObjRegion, + CheckerContext &C) const; + + /// Standard library functions that take a non-const `basic_string` argument by + /// reference may invalidate its inner pointers. Check for these cases and + /// mark the pointers released. + void checkFunctionArguments(const CallEvent &Call, ProgramStateRef State, + CheckerContext &C) const; + + /// Record the connection between raw pointers referring to a container + /// object's inner buffer and the object's memory region in the program state. + /// Mark potentially invalidated pointers released. + void checkPostCall(const CallEvent &Call, CheckerContext &C) const; + + /// Clean up the program state map. + void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const; +}; + +} // end anonymous namespace + +bool InnerPointerChecker::isInvalidatingMemberFunction( + const CallEvent &Call) const { + if (const auto *MemOpCall = dyn_cast<CXXMemberOperatorCall>(&Call)) { + OverloadedOperatorKind Opc = MemOpCall->getOriginExpr()->getOperator(); + if (Opc == OO_Equal || Opc == OO_PlusEqual) + return true; + return false; + } + return (isa<CXXDestructorCall>(Call) || Call.isCalled(AppendFn) || + Call.isCalled(AssignFn) || Call.isCalled(ClearFn) || + Call.isCalled(EraseFn) || Call.isCalled(InsertFn) || + Call.isCalled(PopBackFn) || Call.isCalled(PushBackFn) || + Call.isCalled(ReplaceFn) || Call.isCalled(ReserveFn) || + Call.isCalled(ResizeFn) || Call.isCalled(ShrinkToFitFn) || + Call.isCalled(SwapFn)); +} + +void InnerPointerChecker::markPtrSymbolsReleased(const CallEvent &Call, + ProgramStateRef State, + const MemRegion *MR, + CheckerContext &C) const { + if (const PtrSet *PS = State->get<RawPtrMap>(MR)) { + const Expr *Origin = Call.getOriginExpr(); + for (const auto Symbol : *PS) { + // NOTE: `Origin` may be null, and will be stored so in the symbol's + // `RefState` in MallocChecker's `RegionState` program state map. + State = allocation_state::markReleased(State, Symbol, Origin); + } + State = State->remove<RawPtrMap>(MR); + C.addTransition(State); + return; + } +} + +void InnerPointerChecker::checkFunctionArguments(const CallEvent &Call, + ProgramStateRef State, + CheckerContext &C) const { + if (const auto *FC = dyn_cast<AnyFunctionCall>(&Call)) { + const FunctionDecl *FD = FC->getDecl(); + if (!FD || !FD->isInStdNamespace()) + return; + + for (unsigned I = 0, E = FD->getNumParams(); I != E; ++I) { + QualType ParamTy = FD->getParamDecl(I)->getType(); + if (!ParamTy->isReferenceType() || + ParamTy->getPointeeType().isConstQualified()) + continue; + + // In case of member operator calls, `this` is counted as an + // argument but not as a parameter. + bool isaMemberOpCall = isa<CXXMemberOperatorCall>(FC); + unsigned ArgI = isaMemberOpCall ? I+1 : I; + + SVal Arg = FC->getArgSVal(ArgI); + const auto *ArgRegion = + dyn_cast_or_null<TypedValueRegion>(Arg.getAsRegion()); + if (!ArgRegion) + continue; + + markPtrSymbolsReleased(Call, State, ArgRegion, C); + } + } +} + +// [string.require] +// +// "References, pointers, and iterators referring to the elements of a +// basic_string sequence may be invalidated by the following uses of that +// basic_string object: +// +// -- As an argument to any standard library function taking a reference +// to non-const basic_string as an argument. For example, as an argument to +// non-member functions swap(), operator>>(), and getline(), or as an argument +// to basic_string::swap(). +// +// -- Calling non-const member functions, except operator[], at, front, back, +// begin, rbegin, end, and rend." + +void InnerPointerChecker::checkPostCall(const CallEvent &Call, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + + if (const auto *ICall = dyn_cast<CXXInstanceCall>(&Call)) { + // TODO: Do we need these to be typed? + const auto *ObjRegion = dyn_cast_or_null<TypedValueRegion>( + ICall->getCXXThisVal().getAsRegion()); + if (!ObjRegion) + return; + + if (Call.isCalled(CStrFn) || Call.isCalled(DataFn)) { + SVal RawPtr = Call.getReturnValue(); + if (SymbolRef Sym = RawPtr.getAsSymbol(/*IncludeBaseRegions=*/true)) { + // Start tracking this raw pointer by adding it to the set of symbols + // associated with this container object in the program state map. + + PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>(); + const PtrSet *SetPtr = State->get<RawPtrMap>(ObjRegion); + PtrSet Set = SetPtr ? *SetPtr : F.getEmptySet(); + assert(C.wasInlined || !Set.contains(Sym)); + Set = F.add(Set, Sym); + + State = State->set<RawPtrMap>(ObjRegion, Set); + C.addTransition(State); + } + return; + } + + // Check [string.require] / second point. + if (isInvalidatingMemberFunction(Call)) { + markPtrSymbolsReleased(Call, State, ObjRegion, C); + return; + } + } + + // Check [string.require] / first point. + checkFunctionArguments(Call, State, C); +} + +void InnerPointerChecker::checkDeadSymbols(SymbolReaper &SymReaper, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>(); + RawPtrMapTy RPM = State->get<RawPtrMap>(); + for (const auto Entry : RPM) { + if (!SymReaper.isLiveRegion(Entry.first)) { + // Due to incomplete destructor support, some dead regions might + // remain in the program state map. Clean them up. + State = State->remove<RawPtrMap>(Entry.first); + } + if (const PtrSet *OldSet = State->get<RawPtrMap>(Entry.first)) { + PtrSet CleanedUpSet = *OldSet; + for (const auto Symbol : Entry.second) { + if (!SymReaper.isLive(Symbol)) + CleanedUpSet = F.remove(CleanedUpSet, Symbol); + } + State = CleanedUpSet.isEmpty() + ? State->remove<RawPtrMap>(Entry.first) + : State->set<RawPtrMap>(Entry.first, CleanedUpSet); + } + } + C.addTransition(State); +} + +namespace clang { +namespace ento { +namespace allocation_state { + +std::unique_ptr<BugReporterVisitor> getInnerPointerBRVisitor(SymbolRef Sym) { + return std::make_unique<InnerPointerChecker::InnerPointerBRVisitor>(Sym); +} + +const MemRegion *getContainerObjRegion(ProgramStateRef State, SymbolRef Sym) { + RawPtrMapTy Map = State->get<RawPtrMap>(); + for (const auto Entry : Map) { + if (Entry.second.contains(Sym)) { + return Entry.first; + } + } + return nullptr; +} + +} // end namespace allocation_state +} // end namespace ento +} // end namespace clang + +PathDiagnosticPieceRef InnerPointerChecker::InnerPointerBRVisitor::VisitNode( + const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &) { + if (!isSymbolTracked(N->getState(), PtrToBuf) || + isSymbolTracked(N->getFirstPred()->getState(), PtrToBuf)) + return nullptr; + + const Stmt *S = N->getStmtForDiagnostics(); + if (!S) + return nullptr; + + const MemRegion *ObjRegion = + allocation_state::getContainerObjRegion(N->getState(), PtrToBuf); + const auto *TypedRegion = cast<TypedValueRegion>(ObjRegion); + QualType ObjTy = TypedRegion->getValueType(); + + SmallString<256> Buf; + llvm::raw_svector_ostream OS(Buf); + OS << "Pointer to inner buffer of '" << ObjTy.getAsString() + << "' obtained here"; + PathDiagnosticLocation Pos(S, BRC.getSourceManager(), + N->getLocationContext()); + return std::make_shared<PathDiagnosticEventPiece>(Pos, OS.str(), true); +} + +void ento::registerInnerPointerChecker(CheckerManager &Mgr) { + registerInnerPointerCheckerAux(Mgr); + Mgr.registerChecker<InnerPointerChecker>(); +} + +bool ento::shouldRegisterInnerPointerChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/InterCheckerAPI.h b/clang/lib/StaticAnalyzer/Checkers/InterCheckerAPI.h new file mode 100644 index 000000000000..9642588d6a41 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/InterCheckerAPI.h @@ -0,0 +1,23 @@ +//==--- InterCheckerAPI.h ---------------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file allows introduction of checker dependencies. It contains APIs for +// inter-checker communications. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_INTERCHECKERAPI_H +#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_INTERCHECKERAPI_H +namespace clang { +class CheckerManager; + +namespace ento { + +/// Register the part of MallocChecker connected to InnerPointerChecker. +void registerInnerPointerCheckerAux(CheckerManager &Mgr); + +}} +#endif /* INTERCHECKERAPI_H_ */ diff --git a/clang/lib/StaticAnalyzer/Checkers/IteratorChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/IteratorChecker.cpp new file mode 100644 index 000000000000..97ace68569ef --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/IteratorChecker.cpp @@ -0,0 +1,2390 @@ +//===-- IteratorChecker.cpp ---------------------------------------*- C++ -*--// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines a checker for using iterators outside their range (past end). Usage +// means here dereferencing, incrementing etc. +// +//===----------------------------------------------------------------------===// +// +// In the code, iterator can be represented as a: +// * type-I: typedef-ed pointer. Operations over such iterator, such as +// comparisons or increments, are modeled straightforwardly by the +// analyzer. +// * type-II: structure with its method bodies available. Operations over such +// iterator are inlined by the analyzer, and results of modeling +// these operations are exposing implementation details of the +// iterators, which is not necessarily helping. +// * type-III: completely opaque structure. Operations over such iterator are +// modeled conservatively, producing conjured symbols everywhere. +// +// To handle all these types in a common way we introduce a structure called +// IteratorPosition which is an abstraction of the position the iterator +// represents using symbolic expressions. The checker handles all the +// operations on this structure. +// +// Additionally, depending on the circumstances, operators of types II and III +// can be represented as: +// * type-IIa, type-IIIa: conjured structure symbols - when returned by value +// from conservatively evaluated methods such as +// `.begin()`. +// * type-IIb, type-IIIb: memory regions of iterator-typed objects, such as +// variables or temporaries, when the iterator object is +// currently treated as an lvalue. +// * type-IIc, type-IIIc: compound values of iterator-typed objects, when the +// iterator object is treated as an rvalue taken of a +// particular lvalue, eg. a copy of "type-a" iterator +// object, or an iterator that existed before the +// analysis has started. +// +// To handle any of these three different representations stored in an SVal we +// use setter and getters functions which separate the three cases. To store +// them we use a pointer union of symbol and memory region. +// +// The checker works the following way: We record the begin and the +// past-end iterator for all containers whenever their `.begin()` and `.end()` +// are called. Since the Constraint Manager cannot handle such SVals we need +// to take over its role. We post-check equality and non-equality comparisons +// and record that the two sides are equal if we are in the 'equal' branch +// (true-branch for `==` and false-branch for `!=`). +// +// In case of type-I or type-II iterators we get a concrete integer as a result +// of the comparison (1 or 0) but in case of type-III we only get a Symbol. In +// this latter case we record the symbol and reload it in evalAssume() and do +// the propagation there. We also handle (maybe double) negated comparisons +// which are represented in the form of (x == 0 or x != 0) where x is the +// comparison itself. +// +// Since `SimpleConstraintManager` cannot handle complex symbolic expressions +// we only use expressions of the format S, S+n or S-n for iterator positions +// where S is a conjured symbol and n is an unsigned concrete integer. When +// making an assumption e.g. `S1 + n == S2 + m` we store `S1 - S2 == m - n` as +// a constraint which we later retrieve when doing an actual comparison. + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/DeclTemplate.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h" + +#include <utility> + +using namespace clang; +using namespace ento; + +namespace { + +// Abstract position of an iterator. This helps to handle all three kinds +// of operators in a common way by using a symbolic position. +struct IteratorPosition { +private: + + // Container the iterator belongs to + const MemRegion *Cont; + + // Whether iterator is valid + const bool Valid; + + // Abstract offset + const SymbolRef Offset; + + IteratorPosition(const MemRegion *C, bool V, SymbolRef Of) + : Cont(C), Valid(V), Offset(Of) {} + +public: + const MemRegion *getContainer() const { return Cont; } + bool isValid() const { return Valid; } + SymbolRef getOffset() const { return Offset; } + + IteratorPosition invalidate() const { + return IteratorPosition(Cont, false, Offset); + } + + static IteratorPosition getPosition(const MemRegion *C, SymbolRef Of) { + return IteratorPosition(C, true, Of); + } + + IteratorPosition setTo(SymbolRef NewOf) const { + return IteratorPosition(Cont, Valid, NewOf); + } + + IteratorPosition reAssign(const MemRegion *NewCont) const { + return IteratorPosition(NewCont, Valid, Offset); + } + + bool operator==(const IteratorPosition &X) const { + return Cont == X.Cont && Valid == X.Valid && Offset == X.Offset; + } + + bool operator!=(const IteratorPosition &X) const { + return Cont != X.Cont || Valid != X.Valid || Offset != X.Offset; + } + + void Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddPointer(Cont); + ID.AddInteger(Valid); + ID.Add(Offset); + } +}; + +// Structure to record the symbolic begin and end position of a container +struct ContainerData { +private: + const SymbolRef Begin, End; + + ContainerData(SymbolRef B, SymbolRef E) : Begin(B), End(E) {} + +public: + static ContainerData fromBegin(SymbolRef B) { + return ContainerData(B, nullptr); + } + + static ContainerData fromEnd(SymbolRef E) { + return ContainerData(nullptr, E); + } + + SymbolRef getBegin() const { return Begin; } + SymbolRef getEnd() const { return End; } + + ContainerData newBegin(SymbolRef B) const { return ContainerData(B, End); } + + ContainerData newEnd(SymbolRef E) const { return ContainerData(Begin, E); } + + bool operator==(const ContainerData &X) const { + return Begin == X.Begin && End == X.End; + } + + bool operator!=(const ContainerData &X) const { + return Begin != X.Begin || End != X.End; + } + + void Profile(llvm::FoldingSetNodeID &ID) const { + ID.Add(Begin); + ID.Add(End); + } +}; + +class IteratorChecker + : public Checker<check::PreCall, check::PostCall, + check::PostStmt<MaterializeTemporaryExpr>, check::Bind, + check::LiveSymbols, check::DeadSymbols> { + + std::unique_ptr<BugType> OutOfRangeBugType; + std::unique_ptr<BugType> MismatchedBugType; + std::unique_ptr<BugType> InvalidatedBugType; + + void handleComparison(CheckerContext &C, const Expr *CE, const SVal &RetVal, + const SVal &LVal, const SVal &RVal, + OverloadedOperatorKind Op) const; + void processComparison(CheckerContext &C, ProgramStateRef State, + SymbolRef Sym1, SymbolRef Sym2, const SVal &RetVal, + OverloadedOperatorKind Op) const; + void verifyAccess(CheckerContext &C, const SVal &Val) const; + void verifyDereference(CheckerContext &C, const SVal &Val) const; + void handleIncrement(CheckerContext &C, const SVal &RetVal, const SVal &Iter, + bool Postfix) const; + void handleDecrement(CheckerContext &C, const SVal &RetVal, const SVal &Iter, + bool Postfix) const; + void handleRandomIncrOrDecr(CheckerContext &C, OverloadedOperatorKind Op, + const SVal &RetVal, const SVal &LHS, + const SVal &RHS) const; + void handleBegin(CheckerContext &C, const Expr *CE, const SVal &RetVal, + const SVal &Cont) const; + void handleEnd(CheckerContext &C, const Expr *CE, const SVal &RetVal, + const SVal &Cont) const; + void assignToContainer(CheckerContext &C, const Expr *CE, const SVal &RetVal, + const MemRegion *Cont) const; + void handleAssign(CheckerContext &C, const SVal &Cont, + const Expr *CE = nullptr, + const SVal &OldCont = UndefinedVal()) const; + void handleClear(CheckerContext &C, const SVal &Cont) const; + void handlePushBack(CheckerContext &C, const SVal &Cont) const; + void handlePopBack(CheckerContext &C, const SVal &Cont) const; + void handlePushFront(CheckerContext &C, const SVal &Cont) const; + void handlePopFront(CheckerContext &C, const SVal &Cont) const; + void handleInsert(CheckerContext &C, const SVal &Iter) const; + void handleErase(CheckerContext &C, const SVal &Iter) const; + void handleErase(CheckerContext &C, const SVal &Iter1, + const SVal &Iter2) const; + void handleEraseAfter(CheckerContext &C, const SVal &Iter) const; + void handleEraseAfter(CheckerContext &C, const SVal &Iter1, + const SVal &Iter2) const; + void verifyIncrement(CheckerContext &C, const SVal &Iter) const; + void verifyDecrement(CheckerContext &C, const SVal &Iter) const; + void verifyRandomIncrOrDecr(CheckerContext &C, OverloadedOperatorKind Op, + const SVal &LHS, const SVal &RHS) const; + void verifyMatch(CheckerContext &C, const SVal &Iter, + const MemRegion *Cont) const; + void verifyMatch(CheckerContext &C, const SVal &Iter1, + const SVal &Iter2) const; + IteratorPosition advancePosition(CheckerContext &C, OverloadedOperatorKind Op, + const IteratorPosition &Pos, + const SVal &Distance) const; + void reportOutOfRangeBug(const StringRef &Message, const SVal &Val, + CheckerContext &C, ExplodedNode *ErrNode) const; + void reportMismatchedBug(const StringRef &Message, const SVal &Val1, + const SVal &Val2, CheckerContext &C, + ExplodedNode *ErrNode) const; + void reportMismatchedBug(const StringRef &Message, const SVal &Val, + const MemRegion *Reg, CheckerContext &C, + ExplodedNode *ErrNode) const; + void reportInvalidatedBug(const StringRef &Message, const SVal &Val, + CheckerContext &C, ExplodedNode *ErrNode) const; + +public: + IteratorChecker(); + + enum CheckKind { + CK_IteratorRangeChecker, + CK_MismatchedIteratorChecker, + CK_InvalidatedIteratorChecker, + CK_NumCheckKinds + }; + + DefaultBool ChecksEnabled[CK_NumCheckKinds]; + CheckerNameRef CheckNames[CK_NumCheckKinds]; + + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + void checkPostCall(const CallEvent &Call, CheckerContext &C) const; + void checkBind(SVal Loc, SVal Val, const Stmt *S, CheckerContext &C) const; + void checkPostStmt(const CXXConstructExpr *CCE, CheckerContext &C) const; + void checkPostStmt(const DeclStmt *DS, CheckerContext &C) const; + void checkPostStmt(const MaterializeTemporaryExpr *MTE, + CheckerContext &C) const; + void checkLiveSymbols(ProgramStateRef State, SymbolReaper &SR) const; + void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; +}; +} // namespace + +REGISTER_MAP_WITH_PROGRAMSTATE(IteratorSymbolMap, SymbolRef, IteratorPosition) +REGISTER_MAP_WITH_PROGRAMSTATE(IteratorRegionMap, const MemRegion *, + IteratorPosition) + +REGISTER_MAP_WITH_PROGRAMSTATE(ContainerMap, const MemRegion *, ContainerData) + +namespace { + +bool isIteratorType(const QualType &Type); +bool isIterator(const CXXRecordDecl *CRD); +bool isComparisonOperator(OverloadedOperatorKind OK); +bool isBeginCall(const FunctionDecl *Func); +bool isEndCall(const FunctionDecl *Func); +bool isAssignCall(const FunctionDecl *Func); +bool isClearCall(const FunctionDecl *Func); +bool isPushBackCall(const FunctionDecl *Func); +bool isEmplaceBackCall(const FunctionDecl *Func); +bool isPopBackCall(const FunctionDecl *Func); +bool isPushFrontCall(const FunctionDecl *Func); +bool isEmplaceFrontCall(const FunctionDecl *Func); +bool isPopFrontCall(const FunctionDecl *Func); +bool isInsertCall(const FunctionDecl *Func); +bool isEraseCall(const FunctionDecl *Func); +bool isEraseAfterCall(const FunctionDecl *Func); +bool isEmplaceCall(const FunctionDecl *Func); +bool isAssignmentOperator(OverloadedOperatorKind OK); +bool isSimpleComparisonOperator(OverloadedOperatorKind OK); +bool isAccessOperator(OverloadedOperatorKind OK); +bool isDereferenceOperator(OverloadedOperatorKind OK); +bool isIncrementOperator(OverloadedOperatorKind OK); +bool isDecrementOperator(OverloadedOperatorKind OK); +bool isRandomIncrOrDecrOperator(OverloadedOperatorKind OK); +bool hasSubscriptOperator(ProgramStateRef State, const MemRegion *Reg); +bool frontModifiable(ProgramStateRef State, const MemRegion *Reg); +bool backModifiable(ProgramStateRef State, const MemRegion *Reg); +SymbolRef getContainerBegin(ProgramStateRef State, const MemRegion *Cont); +SymbolRef getContainerEnd(ProgramStateRef State, const MemRegion *Cont); +ProgramStateRef createContainerBegin(ProgramStateRef State, + const MemRegion *Cont, const Expr *E, + QualType T, const LocationContext *LCtx, + unsigned BlockCount); +ProgramStateRef createContainerEnd(ProgramStateRef State, const MemRegion *Cont, + const Expr *E, QualType T, + const LocationContext *LCtx, + unsigned BlockCount); +const IteratorPosition *getIteratorPosition(ProgramStateRef State, + const SVal &Val); +ProgramStateRef setIteratorPosition(ProgramStateRef State, const SVal &Val, + const IteratorPosition &Pos); +ProgramStateRef removeIteratorPosition(ProgramStateRef State, const SVal &Val); +ProgramStateRef assumeNoOverflow(ProgramStateRef State, SymbolRef Sym, + long Scale); +ProgramStateRef invalidateAllIteratorPositions(ProgramStateRef State, + const MemRegion *Cont); +ProgramStateRef +invalidateAllIteratorPositionsExcept(ProgramStateRef State, + const MemRegion *Cont, SymbolRef Offset, + BinaryOperator::Opcode Opc); +ProgramStateRef invalidateIteratorPositions(ProgramStateRef State, + SymbolRef Offset, + BinaryOperator::Opcode Opc); +ProgramStateRef invalidateIteratorPositions(ProgramStateRef State, + SymbolRef Offset1, + BinaryOperator::Opcode Opc1, + SymbolRef Offset2, + BinaryOperator::Opcode Opc2); +ProgramStateRef reassignAllIteratorPositions(ProgramStateRef State, + const MemRegion *Cont, + const MemRegion *NewCont); +ProgramStateRef reassignAllIteratorPositionsUnless(ProgramStateRef State, + const MemRegion *Cont, + const MemRegion *NewCont, + SymbolRef Offset, + BinaryOperator::Opcode Opc); +ProgramStateRef rebaseSymbolInIteratorPositionsIf( + ProgramStateRef State, SValBuilder &SVB, SymbolRef OldSym, + SymbolRef NewSym, SymbolRef CondSym, BinaryOperator::Opcode Opc); +ProgramStateRef relateSymbols(ProgramStateRef State, SymbolRef Sym1, + SymbolRef Sym2, bool Equal); +const ContainerData *getContainerData(ProgramStateRef State, + const MemRegion *Cont); +ProgramStateRef setContainerData(ProgramStateRef State, const MemRegion *Cont, + const ContainerData &CData); +bool hasLiveIterators(ProgramStateRef State, const MemRegion *Cont); +bool isBoundThroughLazyCompoundVal(const Environment &Env, + const MemRegion *Reg); +bool isPastTheEnd(ProgramStateRef State, const IteratorPosition &Pos); +bool isAheadOfRange(ProgramStateRef State, const IteratorPosition &Pos); +bool isBehindPastTheEnd(ProgramStateRef State, const IteratorPosition &Pos); +bool isZero(ProgramStateRef State, const NonLoc &Val); +} // namespace + +IteratorChecker::IteratorChecker() { + OutOfRangeBugType.reset( + new BugType(this, "Iterator out of range", "Misuse of STL APIs")); + MismatchedBugType.reset( + new BugType(this, "Iterator(s) mismatched", "Misuse of STL APIs", + /*SuppressOnSink=*/true)); + InvalidatedBugType.reset( + new BugType(this, "Iterator invalidated", "Misuse of STL APIs")); +} + +void IteratorChecker::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + // Check for out of range access or access of invalidated position and + // iterator mismatches + const auto *Func = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); + if (!Func) + return; + + if (Func->isOverloadedOperator()) { + if (ChecksEnabled[CK_InvalidatedIteratorChecker] && + isAccessOperator(Func->getOverloadedOperator())) { + // Check for any kind of access of invalidated iterator positions + if (const auto *InstCall = dyn_cast<CXXInstanceCall>(&Call)) { + verifyAccess(C, InstCall->getCXXThisVal()); + } else { + verifyAccess(C, Call.getArgSVal(0)); + } + } + if (ChecksEnabled[CK_IteratorRangeChecker]) { + if (isIncrementOperator(Func->getOverloadedOperator())) { + // Check for out-of-range incrementions + if (const auto *InstCall = dyn_cast<CXXInstanceCall>(&Call)) { + verifyIncrement(C, InstCall->getCXXThisVal()); + } else { + if (Call.getNumArgs() >= 1) { + verifyIncrement(C, Call.getArgSVal(0)); + } + } + } else if (isDecrementOperator(Func->getOverloadedOperator())) { + // Check for out-of-range decrementions + if (const auto *InstCall = dyn_cast<CXXInstanceCall>(&Call)) { + verifyDecrement(C, InstCall->getCXXThisVal()); + } else { + if (Call.getNumArgs() >= 1) { + verifyDecrement(C, Call.getArgSVal(0)); + } + } + } else if (isRandomIncrOrDecrOperator(Func->getOverloadedOperator())) { + if (const auto *InstCall = dyn_cast<CXXInstanceCall>(&Call)) { + // Check for out-of-range incrementions and decrementions + if (Call.getNumArgs() >= 1 && + Call.getArgExpr(0)->getType()->isIntegralOrEnumerationType()) { + verifyRandomIncrOrDecr(C, Func->getOverloadedOperator(), + InstCall->getCXXThisVal(), + Call.getArgSVal(0)); + } + } else { + if (Call.getNumArgs() >= 2 && + Call.getArgExpr(1)->getType()->isIntegralOrEnumerationType()) { + verifyRandomIncrOrDecr(C, Func->getOverloadedOperator(), + Call.getArgSVal(0), Call.getArgSVal(1)); + } + } + } else if (isDereferenceOperator(Func->getOverloadedOperator())) { + // Check for dereference of out-of-range iterators + if (const auto *InstCall = dyn_cast<CXXInstanceCall>(&Call)) { + verifyDereference(C, InstCall->getCXXThisVal()); + } else { + verifyDereference(C, Call.getArgSVal(0)); + } + } + } else if (ChecksEnabled[CK_MismatchedIteratorChecker] && + isComparisonOperator(Func->getOverloadedOperator())) { + // Check for comparisons of iterators of different containers + if (const auto *InstCall = dyn_cast<CXXInstanceCall>(&Call)) { + if (Call.getNumArgs() < 1) + return; + + if (!isIteratorType(InstCall->getCXXThisExpr()->getType()) || + !isIteratorType(Call.getArgExpr(0)->getType())) + return; + + verifyMatch(C, InstCall->getCXXThisVal(), Call.getArgSVal(0)); + } else { + if (Call.getNumArgs() < 2) + return; + + if (!isIteratorType(Call.getArgExpr(0)->getType()) || + !isIteratorType(Call.getArgExpr(1)->getType())) + return; + + verifyMatch(C, Call.getArgSVal(0), Call.getArgSVal(1)); + } + } + } else if (const auto *InstCall = dyn_cast<CXXInstanceCall>(&Call)) { + if (!ChecksEnabled[CK_MismatchedIteratorChecker]) + return; + + const auto *ContReg = InstCall->getCXXThisVal().getAsRegion(); + if (!ContReg) + return; + // Check for erase, insert and emplace using iterator of another container + if (isEraseCall(Func) || isEraseAfterCall(Func)) { + verifyMatch(C, Call.getArgSVal(0), + InstCall->getCXXThisVal().getAsRegion()); + if (Call.getNumArgs() == 2) { + verifyMatch(C, Call.getArgSVal(1), + InstCall->getCXXThisVal().getAsRegion()); + } + } else if (isInsertCall(Func)) { + verifyMatch(C, Call.getArgSVal(0), + InstCall->getCXXThisVal().getAsRegion()); + if (Call.getNumArgs() == 3 && + isIteratorType(Call.getArgExpr(1)->getType()) && + isIteratorType(Call.getArgExpr(2)->getType())) { + verifyMatch(C, Call.getArgSVal(1), Call.getArgSVal(2)); + } + } else if (isEmplaceCall(Func)) { + verifyMatch(C, Call.getArgSVal(0), + InstCall->getCXXThisVal().getAsRegion()); + } + } else if (isa<CXXConstructorCall>(&Call)) { + // Check match of first-last iterator pair in a constructor of a container + if (Call.getNumArgs() < 2) + return; + + const auto *Ctr = cast<CXXConstructorDecl>(Call.getDecl()); + if (Ctr->getNumParams() < 2) + return; + + if (Ctr->getParamDecl(0)->getName() != "first" || + Ctr->getParamDecl(1)->getName() != "last") + return; + + if (!isIteratorType(Call.getArgExpr(0)->getType()) || + !isIteratorType(Call.getArgExpr(1)->getType())) + return; + + verifyMatch(C, Call.getArgSVal(0), Call.getArgSVal(1)); + } else { + // The main purpose of iterators is to abstract away from different + // containers and provide a (maybe limited) uniform access to them. + // This implies that any correctly written template function that + // works on multiple containers using iterators takes different + // template parameters for different containers. So we can safely + // assume that passing iterators of different containers as arguments + // whose type replaces the same template parameter is a bug. + // + // Example: + // template<typename I1, typename I2> + // void f(I1 first1, I1 last1, I2 first2, I2 last2); + // + // In this case the first two arguments to f() must be iterators must belong + // to the same container and the last to also to the same container but + // not necessarily to the same as the first two. + + if (!ChecksEnabled[CK_MismatchedIteratorChecker]) + return; + + const auto *Templ = Func->getPrimaryTemplate(); + if (!Templ) + return; + + const auto *TParams = Templ->getTemplateParameters(); + const auto *TArgs = Func->getTemplateSpecializationArgs(); + + // Iterate over all the template parameters + for (size_t I = 0; I < TParams->size(); ++I) { + const auto *TPDecl = dyn_cast<TemplateTypeParmDecl>(TParams->getParam(I)); + if (!TPDecl) + continue; + + if (TPDecl->isParameterPack()) + continue; + + const auto TAType = TArgs->get(I).getAsType(); + if (!isIteratorType(TAType)) + continue; + + SVal LHS = UndefinedVal(); + + // For every template parameter which is an iterator type in the + // instantiation look for all functions' parameters' type by it and + // check whether they belong to the same container + for (auto J = 0U; J < Func->getNumParams(); ++J) { + const auto *Param = Func->getParamDecl(J); + const auto *ParamType = + Param->getType()->getAs<SubstTemplateTypeParmType>(); + if (!ParamType || + ParamType->getReplacedParameter()->getDecl() != TPDecl) + continue; + if (LHS.isUndef()) { + LHS = Call.getArgSVal(J); + } else { + verifyMatch(C, LHS, Call.getArgSVal(J)); + } + } + } + } +} + +void IteratorChecker::checkPostCall(const CallEvent &Call, + CheckerContext &C) const { + // Record new iterator positions and iterator position changes + const auto *Func = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); + if (!Func) + return; + + if (Func->isOverloadedOperator()) { + const auto Op = Func->getOverloadedOperator(); + if (isAssignmentOperator(Op)) { + // Overloaded 'operator=' must be a non-static member function. + const auto *InstCall = cast<CXXInstanceCall>(&Call); + if (cast<CXXMethodDecl>(Func)->isMoveAssignmentOperator()) { + handleAssign(C, InstCall->getCXXThisVal(), Call.getOriginExpr(), + Call.getArgSVal(0)); + return; + } + + handleAssign(C, InstCall->getCXXThisVal()); + return; + } else if (isSimpleComparisonOperator(Op)) { + const auto *OrigExpr = Call.getOriginExpr(); + if (!OrigExpr) + return; + + if (const auto *InstCall = dyn_cast<CXXInstanceCall>(&Call)) { + handleComparison(C, OrigExpr, Call.getReturnValue(), + InstCall->getCXXThisVal(), Call.getArgSVal(0), Op); + return; + } + + handleComparison(C, OrigExpr, Call.getReturnValue(), Call.getArgSVal(0), + Call.getArgSVal(1), Op); + return; + } else if (isRandomIncrOrDecrOperator(Func->getOverloadedOperator())) { + if (const auto *InstCall = dyn_cast<CXXInstanceCall>(&Call)) { + if (Call.getNumArgs() >= 1 && + Call.getArgExpr(0)->getType()->isIntegralOrEnumerationType()) { + handleRandomIncrOrDecr(C, Func->getOverloadedOperator(), + Call.getReturnValue(), + InstCall->getCXXThisVal(), Call.getArgSVal(0)); + return; + } + } else { + if (Call.getNumArgs() >= 2 && + Call.getArgExpr(1)->getType()->isIntegralOrEnumerationType()) { + handleRandomIncrOrDecr(C, Func->getOverloadedOperator(), + Call.getReturnValue(), Call.getArgSVal(0), + Call.getArgSVal(1)); + return; + } + } + } else if (isIncrementOperator(Func->getOverloadedOperator())) { + if (const auto *InstCall = dyn_cast<CXXInstanceCall>(&Call)) { + handleIncrement(C, Call.getReturnValue(), InstCall->getCXXThisVal(), + Call.getNumArgs()); + return; + } + + handleIncrement(C, Call.getReturnValue(), Call.getArgSVal(0), + Call.getNumArgs()); + return; + } else if (isDecrementOperator(Func->getOverloadedOperator())) { + if (const auto *InstCall = dyn_cast<CXXInstanceCall>(&Call)) { + handleDecrement(C, Call.getReturnValue(), InstCall->getCXXThisVal(), + Call.getNumArgs()); + return; + } + + handleDecrement(C, Call.getReturnValue(), Call.getArgSVal(0), + Call.getNumArgs()); + return; + } + } else { + if (const auto *InstCall = dyn_cast<CXXInstanceCall>(&Call)) { + if (isAssignCall(Func)) { + handleAssign(C, InstCall->getCXXThisVal()); + return; + } + + if (isClearCall(Func)) { + handleClear(C, InstCall->getCXXThisVal()); + return; + } + + if (isPushBackCall(Func) || isEmplaceBackCall(Func)) { + handlePushBack(C, InstCall->getCXXThisVal()); + return; + } + + if (isPopBackCall(Func)) { + handlePopBack(C, InstCall->getCXXThisVal()); + return; + } + + if (isPushFrontCall(Func) || isEmplaceFrontCall(Func)) { + handlePushFront(C, InstCall->getCXXThisVal()); + return; + } + + if (isPopFrontCall(Func)) { + handlePopFront(C, InstCall->getCXXThisVal()); + return; + } + + if (isInsertCall(Func) || isEmplaceCall(Func)) { + handleInsert(C, Call.getArgSVal(0)); + return; + } + + if (isEraseCall(Func)) { + if (Call.getNumArgs() == 1) { + handleErase(C, Call.getArgSVal(0)); + return; + } + + if (Call.getNumArgs() == 2) { + handleErase(C, Call.getArgSVal(0), Call.getArgSVal(1)); + return; + } + } + + if (isEraseAfterCall(Func)) { + if (Call.getNumArgs() == 1) { + handleEraseAfter(C, Call.getArgSVal(0)); + return; + } + + if (Call.getNumArgs() == 2) { + handleEraseAfter(C, Call.getArgSVal(0), Call.getArgSVal(1)); + return; + } + } + } + + const auto *OrigExpr = Call.getOriginExpr(); + if (!OrigExpr) + return; + + if (!isIteratorType(Call.getResultType())) + return; + + auto State = C.getState(); + + if (const auto *InstCall = dyn_cast<CXXInstanceCall>(&Call)) { + if (isBeginCall(Func)) { + handleBegin(C, OrigExpr, Call.getReturnValue(), + InstCall->getCXXThisVal()); + return; + } + + if (isEndCall(Func)) { + handleEnd(C, OrigExpr, Call.getReturnValue(), + InstCall->getCXXThisVal()); + return; + } + } + + // Already bound to container? + if (getIteratorPosition(State, Call.getReturnValue())) + return; + + // Copy-like and move constructors + if (isa<CXXConstructorCall>(&Call) && Call.getNumArgs() == 1) { + if (const auto *Pos = getIteratorPosition(State, Call.getArgSVal(0))) { + State = setIteratorPosition(State, Call.getReturnValue(), *Pos); + if (cast<CXXConstructorDecl>(Func)->isMoveConstructor()) { + State = removeIteratorPosition(State, Call.getArgSVal(0)); + } + C.addTransition(State); + return; + } + } + + // Assumption: if return value is an iterator which is not yet bound to a + // container, then look for the first iterator argument, and + // bind the return value to the same container. This approach + // works for STL algorithms. + // FIXME: Add a more conservative mode + for (unsigned i = 0; i < Call.getNumArgs(); ++i) { + if (isIteratorType(Call.getArgExpr(i)->getType())) { + if (const auto *Pos = getIteratorPosition(State, Call.getArgSVal(i))) { + assignToContainer(C, OrigExpr, Call.getReturnValue(), + Pos->getContainer()); + return; + } + } + } + } +} + +void IteratorChecker::checkBind(SVal Loc, SVal Val, const Stmt *S, + CheckerContext &C) const { + auto State = C.getState(); + const auto *Pos = getIteratorPosition(State, Val); + if (Pos) { + State = setIteratorPosition(State, Loc, *Pos); + C.addTransition(State); + } else { + const auto *OldPos = getIteratorPosition(State, Loc); + if (OldPos) { + State = removeIteratorPosition(State, Loc); + C.addTransition(State); + } + } +} + +void IteratorChecker::checkPostStmt(const MaterializeTemporaryExpr *MTE, + CheckerContext &C) const { + /* Transfer iterator state to temporary objects */ + auto State = C.getState(); + const auto *Pos = + getIteratorPosition(State, C.getSVal(MTE->GetTemporaryExpr())); + if (!Pos) + return; + State = setIteratorPosition(State, C.getSVal(MTE), *Pos); + C.addTransition(State); +} + +void IteratorChecker::checkLiveSymbols(ProgramStateRef State, + SymbolReaper &SR) const { + // Keep symbolic expressions of iterator positions, container begins and ends + // alive + auto RegionMap = State->get<IteratorRegionMap>(); + for (const auto Reg : RegionMap) { + const auto Offset = Reg.second.getOffset(); + for (auto i = Offset->symbol_begin(); i != Offset->symbol_end(); ++i) + if (isa<SymbolData>(*i)) + SR.markLive(*i); + } + + auto SymbolMap = State->get<IteratorSymbolMap>(); + for (const auto Sym : SymbolMap) { + const auto Offset = Sym.second.getOffset(); + for (auto i = Offset->symbol_begin(); i != Offset->symbol_end(); ++i) + if (isa<SymbolData>(*i)) + SR.markLive(*i); + } + + auto ContMap = State->get<ContainerMap>(); + for (const auto Cont : ContMap) { + const auto CData = Cont.second; + if (CData.getBegin()) { + SR.markLive(CData.getBegin()); + if(const auto *SIE = dyn_cast<SymIntExpr>(CData.getBegin())) + SR.markLive(SIE->getLHS()); + } + if (CData.getEnd()) { + SR.markLive(CData.getEnd()); + if(const auto *SIE = dyn_cast<SymIntExpr>(CData.getEnd())) + SR.markLive(SIE->getLHS()); + } + } +} + +void IteratorChecker::checkDeadSymbols(SymbolReaper &SR, + CheckerContext &C) const { + // Cleanup + auto State = C.getState(); + + auto RegionMap = State->get<IteratorRegionMap>(); + for (const auto Reg : RegionMap) { + if (!SR.isLiveRegion(Reg.first)) { + // The region behind the `LazyCompoundVal` is often cleaned up before + // the `LazyCompoundVal` itself. If there are iterator positions keyed + // by these regions their cleanup must be deferred. + if (!isBoundThroughLazyCompoundVal(State->getEnvironment(), Reg.first)) { + State = State->remove<IteratorRegionMap>(Reg.first); + } + } + } + + auto SymbolMap = State->get<IteratorSymbolMap>(); + for (const auto Sym : SymbolMap) { + if (!SR.isLive(Sym.first)) { + State = State->remove<IteratorSymbolMap>(Sym.first); + } + } + + auto ContMap = State->get<ContainerMap>(); + for (const auto Cont : ContMap) { + if (!SR.isLiveRegion(Cont.first)) { + // We must keep the container data while it has live iterators to be able + // to compare them to the begin and the end of the container. + if (!hasLiveIterators(State, Cont.first)) { + State = State->remove<ContainerMap>(Cont.first); + } + } + } + + C.addTransition(State); +} + +void IteratorChecker::handleComparison(CheckerContext &C, const Expr *CE, + const SVal &RetVal, const SVal &LVal, + const SVal &RVal, + OverloadedOperatorKind Op) const { + // Record the operands and the operator of the comparison for the next + // evalAssume, if the result is a symbolic expression. If it is a concrete + // value (only one branch is possible), then transfer the state between + // the operands according to the operator and the result + auto State = C.getState(); + const auto *LPos = getIteratorPosition(State, LVal); + const auto *RPos = getIteratorPosition(State, RVal); + const MemRegion *Cont = nullptr; + if (LPos) { + Cont = LPos->getContainer(); + } else if (RPos) { + Cont = RPos->getContainer(); + } + if (!Cont) + return; + + // At least one of the iterators have recorded positions. If one of them has + // not then create a new symbol for the offset. + SymbolRef Sym; + if (!LPos || !RPos) { + auto &SymMgr = C.getSymbolManager(); + Sym = SymMgr.conjureSymbol(CE, C.getLocationContext(), + C.getASTContext().LongTy, C.blockCount()); + State = assumeNoOverflow(State, Sym, 4); + } + + if (!LPos) { + State = setIteratorPosition(State, LVal, + IteratorPosition::getPosition(Cont, Sym)); + LPos = getIteratorPosition(State, LVal); + } else if (!RPos) { + State = setIteratorPosition(State, RVal, + IteratorPosition::getPosition(Cont, Sym)); + RPos = getIteratorPosition(State, RVal); + } + + processComparison(C, State, LPos->getOffset(), RPos->getOffset(), RetVal, Op); +} + +void IteratorChecker::processComparison(CheckerContext &C, + ProgramStateRef State, SymbolRef Sym1, + SymbolRef Sym2, const SVal &RetVal, + OverloadedOperatorKind Op) const { + if (const auto TruthVal = RetVal.getAs<nonloc::ConcreteInt>()) { + if ((State = relateSymbols(State, Sym1, Sym2, + (Op == OO_EqualEqual) == + (TruthVal->getValue() != 0)))) { + C.addTransition(State); + } else { + C.generateSink(State, C.getPredecessor()); + } + return; + } + + const auto ConditionVal = RetVal.getAs<DefinedSVal>(); + if (!ConditionVal) + return; + + if (auto StateTrue = relateSymbols(State, Sym1, Sym2, Op == OO_EqualEqual)) { + StateTrue = StateTrue->assume(*ConditionVal, true); + C.addTransition(StateTrue); + } + + if (auto StateFalse = relateSymbols(State, Sym1, Sym2, Op != OO_EqualEqual)) { + StateFalse = StateFalse->assume(*ConditionVal, false); + C.addTransition(StateFalse); + } +} + +void IteratorChecker::verifyDereference(CheckerContext &C, + const SVal &Val) const { + auto State = C.getState(); + const auto *Pos = getIteratorPosition(State, Val); + if (Pos && isPastTheEnd(State, *Pos)) { + auto *N = C.generateErrorNode(State); + if (!N) + return; + reportOutOfRangeBug("Past-the-end iterator dereferenced.", Val, C, N); + return; + } +} + +void IteratorChecker::verifyAccess(CheckerContext &C, const SVal &Val) const { + auto State = C.getState(); + const auto *Pos = getIteratorPosition(State, Val); + if (Pos && !Pos->isValid()) { + auto *N = C.generateErrorNode(State); + if (!N) { + return; + } + reportInvalidatedBug("Invalidated iterator accessed.", Val, C, N); + } +} + +void IteratorChecker::handleIncrement(CheckerContext &C, const SVal &RetVal, + const SVal &Iter, bool Postfix) const { + // Increment the symbolic expressions which represents the position of the + // iterator + auto State = C.getState(); + const auto *Pos = getIteratorPosition(State, Iter); + if (Pos) { + auto &SymMgr = C.getSymbolManager(); + auto &BVF = SymMgr.getBasicVals(); + const auto NewPos = + advancePosition(C, OO_Plus, *Pos, + nonloc::ConcreteInt(BVF.getValue(llvm::APSInt::get(1)))); + State = setIteratorPosition(State, Iter, NewPos); + State = setIteratorPosition(State, RetVal, Postfix ? *Pos : NewPos); + C.addTransition(State); + } +} + +void IteratorChecker::handleDecrement(CheckerContext &C, const SVal &RetVal, + const SVal &Iter, bool Postfix) const { + // Decrement the symbolic expressions which represents the position of the + // iterator + auto State = C.getState(); + const auto *Pos = getIteratorPosition(State, Iter); + if (Pos) { + auto &SymMgr = C.getSymbolManager(); + auto &BVF = SymMgr.getBasicVals(); + const auto NewPos = + advancePosition(C, OO_Minus, *Pos, + nonloc::ConcreteInt(BVF.getValue(llvm::APSInt::get(1)))); + State = setIteratorPosition(State, Iter, NewPos); + State = setIteratorPosition(State, RetVal, Postfix ? *Pos : NewPos); + C.addTransition(State); + } +} + +void IteratorChecker::handleRandomIncrOrDecr(CheckerContext &C, + OverloadedOperatorKind Op, + const SVal &RetVal, + const SVal &LHS, + const SVal &RHS) const { + // Increment or decrement the symbolic expressions which represents the + // position of the iterator + auto State = C.getState(); + const auto *Pos = getIteratorPosition(State, LHS); + if (!Pos) + return; + + const auto *value = &RHS; + if (auto loc = RHS.getAs<Loc>()) { + const auto val = State->getRawSVal(*loc); + value = &val; + } + + auto &TgtVal = (Op == OO_PlusEqual || Op == OO_MinusEqual) ? LHS : RetVal; + State = + setIteratorPosition(State, TgtVal, advancePosition(C, Op, *Pos, *value)); + C.addTransition(State); +} + +void IteratorChecker::verifyIncrement(CheckerContext &C, + const SVal &Iter) const { + auto &BVF = C.getSValBuilder().getBasicValueFactory(); + verifyRandomIncrOrDecr(C, OO_Plus, Iter, + nonloc::ConcreteInt(BVF.getValue(llvm::APSInt::get(1)))); +} + +void IteratorChecker::verifyDecrement(CheckerContext &C, + const SVal &Iter) const { + auto &BVF = C.getSValBuilder().getBasicValueFactory(); + verifyRandomIncrOrDecr(C, OO_Minus, Iter, + nonloc::ConcreteInt(BVF.getValue(llvm::APSInt::get(1)))); +} + +void IteratorChecker::verifyRandomIncrOrDecr(CheckerContext &C, + OverloadedOperatorKind Op, + const SVal &LHS, + const SVal &RHS) const { + auto State = C.getState(); + + // If the iterator is initially inside its range, then the operation is valid + const auto *Pos = getIteratorPosition(State, LHS); + if (!Pos) + return; + + auto Value = RHS; + if (auto ValAsLoc = RHS.getAs<Loc>()) { + Value = State->getRawSVal(*ValAsLoc); + } + + if (Value.isUnknown()) + return; + + // Incremention or decremention by 0 is never a bug. + if (isZero(State, Value.castAs<NonLoc>())) + return; + + // The result may be the past-end iterator of the container, but any other + // out of range position is undefined behaviour + if (isAheadOfRange(State, advancePosition(C, Op, *Pos, Value))) { + auto *N = C.generateErrorNode(State); + if (!N) + return; + reportOutOfRangeBug("Iterator decremented ahead of its valid range.", LHS, + C, N); + } + if (isBehindPastTheEnd(State, advancePosition(C, Op, *Pos, Value))) { + auto *N = C.generateErrorNode(State); + if (!N) + return; + reportOutOfRangeBug("Iterator incremented behind the past-the-end " + "iterator.", LHS, C, N); + } +} + +void IteratorChecker::verifyMatch(CheckerContext &C, const SVal &Iter, + const MemRegion *Cont) const { + // Verify match between a container and the container of an iterator + Cont = Cont->getMostDerivedObjectRegion(); + + if (const auto *ContSym = Cont->getSymbolicBase()) { + if (isa<SymbolConjured>(ContSym->getSymbol())) + return; + } + + auto State = C.getState(); + const auto *Pos = getIteratorPosition(State, Iter); + if (!Pos) + return; + + const auto *IterCont = Pos->getContainer(); + + // Skip symbolic regions based on conjured symbols. Two conjured symbols + // may or may not be the same. For example, the same function can return + // the same or a different container but we get different conjured symbols + // for each call. This may cause false positives so omit them from the check. + if (const auto *ContSym = IterCont->getSymbolicBase()) { + if (isa<SymbolConjured>(ContSym->getSymbol())) + return; + } + + if (IterCont != Cont) { + auto *N = C.generateNonFatalErrorNode(State); + if (!N) { + return; + } + reportMismatchedBug("Container accessed using foreign iterator argument.", + Iter, Cont, C, N); + } +} + +void IteratorChecker::verifyMatch(CheckerContext &C, const SVal &Iter1, + const SVal &Iter2) const { + // Verify match between the containers of two iterators + auto State = C.getState(); + const auto *Pos1 = getIteratorPosition(State, Iter1); + if (!Pos1) + return; + + const auto *IterCont1 = Pos1->getContainer(); + + // Skip symbolic regions based on conjured symbols. Two conjured symbols + // may or may not be the same. For example, the same function can return + // the same or a different container but we get different conjured symbols + // for each call. This may cause false positives so omit them from the check. + if (const auto *ContSym = IterCont1->getSymbolicBase()) { + if (isa<SymbolConjured>(ContSym->getSymbol())) + return; + } + + const auto *Pos2 = getIteratorPosition(State, Iter2); + if (!Pos2) + return; + + const auto *IterCont2 = Pos2->getContainer(); + if (const auto *ContSym = IterCont2->getSymbolicBase()) { + if (isa<SymbolConjured>(ContSym->getSymbol())) + return; + } + + if (IterCont1 != IterCont2) { + auto *N = C.generateNonFatalErrorNode(State); + if (!N) + return; + reportMismatchedBug("Iterators of different containers used where the " + "same container is expected.", Iter1, Iter2, C, N); + } +} + +void IteratorChecker::handleBegin(CheckerContext &C, const Expr *CE, + const SVal &RetVal, const SVal &Cont) const { + const auto *ContReg = Cont.getAsRegion(); + if (!ContReg) + return; + + ContReg = ContReg->getMostDerivedObjectRegion(); + + // If the container already has a begin symbol then use it. Otherwise first + // create a new one. + auto State = C.getState(); + auto BeginSym = getContainerBegin(State, ContReg); + if (!BeginSym) { + State = createContainerBegin(State, ContReg, CE, C.getASTContext().LongTy, + C.getLocationContext(), C.blockCount()); + BeginSym = getContainerBegin(State, ContReg); + } + State = setIteratorPosition(State, RetVal, + IteratorPosition::getPosition(ContReg, BeginSym)); + C.addTransition(State); +} + +void IteratorChecker::handleEnd(CheckerContext &C, const Expr *CE, + const SVal &RetVal, const SVal &Cont) const { + const auto *ContReg = Cont.getAsRegion(); + if (!ContReg) + return; + + ContReg = ContReg->getMostDerivedObjectRegion(); + + // If the container already has an end symbol then use it. Otherwise first + // create a new one. + auto State = C.getState(); + auto EndSym = getContainerEnd(State, ContReg); + if (!EndSym) { + State = createContainerEnd(State, ContReg, CE, C.getASTContext().LongTy, + C.getLocationContext(), C.blockCount()); + EndSym = getContainerEnd(State, ContReg); + } + State = setIteratorPosition(State, RetVal, + IteratorPosition::getPosition(ContReg, EndSym)); + C.addTransition(State); +} + +void IteratorChecker::assignToContainer(CheckerContext &C, const Expr *CE, + const SVal &RetVal, + const MemRegion *Cont) const { + Cont = Cont->getMostDerivedObjectRegion(); + + auto State = C.getState(); + auto &SymMgr = C.getSymbolManager(); + auto Sym = SymMgr.conjureSymbol(CE, C.getLocationContext(), + C.getASTContext().LongTy, C.blockCount()); + State = assumeNoOverflow(State, Sym, 4); + State = setIteratorPosition(State, RetVal, + IteratorPosition::getPosition(Cont, Sym)); + C.addTransition(State); +} + +void IteratorChecker::handleAssign(CheckerContext &C, const SVal &Cont, + const Expr *CE, const SVal &OldCont) const { + const auto *ContReg = Cont.getAsRegion(); + if (!ContReg) + return; + + ContReg = ContReg->getMostDerivedObjectRegion(); + + // Assignment of a new value to a container always invalidates all its + // iterators + auto State = C.getState(); + const auto CData = getContainerData(State, ContReg); + if (CData) { + State = invalidateAllIteratorPositions(State, ContReg); + } + + // In case of move, iterators of the old container (except the past-end + // iterators) remain valid but refer to the new container + if (!OldCont.isUndef()) { + const auto *OldContReg = OldCont.getAsRegion(); + if (OldContReg) { + OldContReg = OldContReg->getMostDerivedObjectRegion(); + const auto OldCData = getContainerData(State, OldContReg); + if (OldCData) { + if (const auto OldEndSym = OldCData->getEnd()) { + // If we already assigned an "end" symbol to the old container, then + // first reassign all iterator positions to the new container which + // are not past the container (thus not greater or equal to the + // current "end" symbol). + State = reassignAllIteratorPositionsUnless(State, OldContReg, ContReg, + OldEndSym, BO_GE); + auto &SymMgr = C.getSymbolManager(); + auto &SVB = C.getSValBuilder(); + // Then generate and assign a new "end" symbol for the new container. + auto NewEndSym = + SymMgr.conjureSymbol(CE, C.getLocationContext(), + C.getASTContext().LongTy, C.blockCount()); + State = assumeNoOverflow(State, NewEndSym, 4); + if (CData) { + State = setContainerData(State, ContReg, CData->newEnd(NewEndSym)); + } else { + State = setContainerData(State, ContReg, + ContainerData::fromEnd(NewEndSym)); + } + // Finally, replace the old "end" symbol in the already reassigned + // iterator positions with the new "end" symbol. + State = rebaseSymbolInIteratorPositionsIf( + State, SVB, OldEndSym, NewEndSym, OldEndSym, BO_LT); + } else { + // There was no "end" symbol assigned yet to the old container, + // so reassign all iterator positions to the new container. + State = reassignAllIteratorPositions(State, OldContReg, ContReg); + } + if (const auto OldBeginSym = OldCData->getBegin()) { + // If we already assigned a "begin" symbol to the old container, then + // assign it to the new container and remove it from the old one. + if (CData) { + State = + setContainerData(State, ContReg, CData->newBegin(OldBeginSym)); + } else { + State = setContainerData(State, ContReg, + ContainerData::fromBegin(OldBeginSym)); + } + State = + setContainerData(State, OldContReg, OldCData->newEnd(nullptr)); + } + } else { + // There was neither "begin" nor "end" symbol assigned yet to the old + // container, so reassign all iterator positions to the new container. + State = reassignAllIteratorPositions(State, OldContReg, ContReg); + } + } + } + C.addTransition(State); +} + +void IteratorChecker::handleClear(CheckerContext &C, const SVal &Cont) const { + const auto *ContReg = Cont.getAsRegion(); + if (!ContReg) + return; + + ContReg = ContReg->getMostDerivedObjectRegion(); + + // The clear() operation invalidates all the iterators, except the past-end + // iterators of list-like containers + auto State = C.getState(); + if (!hasSubscriptOperator(State, ContReg) || + !backModifiable(State, ContReg)) { + const auto CData = getContainerData(State, ContReg); + if (CData) { + if (const auto EndSym = CData->getEnd()) { + State = + invalidateAllIteratorPositionsExcept(State, ContReg, EndSym, BO_GE); + C.addTransition(State); + return; + } + } + } + State = invalidateAllIteratorPositions(State, ContReg); + C.addTransition(State); +} + +void IteratorChecker::handlePushBack(CheckerContext &C, + const SVal &Cont) const { + const auto *ContReg = Cont.getAsRegion(); + if (!ContReg) + return; + + ContReg = ContReg->getMostDerivedObjectRegion(); + + // For deque-like containers invalidate all iterator positions + auto State = C.getState(); + if (hasSubscriptOperator(State, ContReg) && frontModifiable(State, ContReg)) { + State = invalidateAllIteratorPositions(State, ContReg); + C.addTransition(State); + return; + } + + const auto CData = getContainerData(State, ContReg); + if (!CData) + return; + + // For vector-like containers invalidate the past-end iterator positions + if (const auto EndSym = CData->getEnd()) { + if (hasSubscriptOperator(State, ContReg)) { + State = invalidateIteratorPositions(State, EndSym, BO_GE); + } + auto &SymMgr = C.getSymbolManager(); + auto &BVF = SymMgr.getBasicVals(); + auto &SVB = C.getSValBuilder(); + const auto newEndSym = + SVB.evalBinOp(State, BO_Add, + nonloc::SymbolVal(EndSym), + nonloc::ConcreteInt(BVF.getValue(llvm::APSInt::get(1))), + SymMgr.getType(EndSym)).getAsSymbol(); + State = setContainerData(State, ContReg, CData->newEnd(newEndSym)); + } + C.addTransition(State); +} + +void IteratorChecker::handlePopBack(CheckerContext &C, const SVal &Cont) const { + const auto *ContReg = Cont.getAsRegion(); + if (!ContReg) + return; + + ContReg = ContReg->getMostDerivedObjectRegion(); + + auto State = C.getState(); + const auto CData = getContainerData(State, ContReg); + if (!CData) + return; + + if (const auto EndSym = CData->getEnd()) { + auto &SymMgr = C.getSymbolManager(); + auto &BVF = SymMgr.getBasicVals(); + auto &SVB = C.getSValBuilder(); + const auto BackSym = + SVB.evalBinOp(State, BO_Sub, + nonloc::SymbolVal(EndSym), + nonloc::ConcreteInt(BVF.getValue(llvm::APSInt::get(1))), + SymMgr.getType(EndSym)).getAsSymbol(); + // For vector-like and deque-like containers invalidate the last and the + // past-end iterator positions. For list-like containers only invalidate + // the last position + if (hasSubscriptOperator(State, ContReg) && + backModifiable(State, ContReg)) { + State = invalidateIteratorPositions(State, BackSym, BO_GE); + State = setContainerData(State, ContReg, CData->newEnd(nullptr)); + } else { + State = invalidateIteratorPositions(State, BackSym, BO_EQ); + } + auto newEndSym = BackSym; + State = setContainerData(State, ContReg, CData->newEnd(newEndSym)); + C.addTransition(State); + } +} + +void IteratorChecker::handlePushFront(CheckerContext &C, + const SVal &Cont) const { + const auto *ContReg = Cont.getAsRegion(); + if (!ContReg) + return; + + ContReg = ContReg->getMostDerivedObjectRegion(); + + // For deque-like containers invalidate all iterator positions + auto State = C.getState(); + if (hasSubscriptOperator(State, ContReg)) { + State = invalidateAllIteratorPositions(State, ContReg); + C.addTransition(State); + } else { + const auto CData = getContainerData(State, ContReg); + if (!CData) + return; + + if (const auto BeginSym = CData->getBegin()) { + auto &SymMgr = C.getSymbolManager(); + auto &BVF = SymMgr.getBasicVals(); + auto &SVB = C.getSValBuilder(); + const auto newBeginSym = + SVB.evalBinOp(State, BO_Sub, + nonloc::SymbolVal(BeginSym), + nonloc::ConcreteInt(BVF.getValue(llvm::APSInt::get(1))), + SymMgr.getType(BeginSym)).getAsSymbol(); + State = setContainerData(State, ContReg, CData->newBegin(newBeginSym)); + C.addTransition(State); + } + } +} + +void IteratorChecker::handlePopFront(CheckerContext &C, + const SVal &Cont) const { + const auto *ContReg = Cont.getAsRegion(); + if (!ContReg) + return; + + ContReg = ContReg->getMostDerivedObjectRegion(); + + auto State = C.getState(); + const auto CData = getContainerData(State, ContReg); + if (!CData) + return; + + // For deque-like containers invalidate all iterator positions. For list-like + // iterators only invalidate the first position + if (const auto BeginSym = CData->getBegin()) { + if (hasSubscriptOperator(State, ContReg)) { + State = invalidateIteratorPositions(State, BeginSym, BO_LE); + } else { + State = invalidateIteratorPositions(State, BeginSym, BO_EQ); + } + auto &SymMgr = C.getSymbolManager(); + auto &BVF = SymMgr.getBasicVals(); + auto &SVB = C.getSValBuilder(); + const auto newBeginSym = + SVB.evalBinOp(State, BO_Add, + nonloc::SymbolVal(BeginSym), + nonloc::ConcreteInt(BVF.getValue(llvm::APSInt::get(1))), + SymMgr.getType(BeginSym)).getAsSymbol(); + State = setContainerData(State, ContReg, CData->newBegin(newBeginSym)); + C.addTransition(State); + } +} + +void IteratorChecker::handleInsert(CheckerContext &C, const SVal &Iter) const { + auto State = C.getState(); + const auto *Pos = getIteratorPosition(State, Iter); + if (!Pos) + return; + + // For deque-like containers invalidate all iterator positions. For + // vector-like containers invalidate iterator positions after the insertion. + const auto *Cont = Pos->getContainer(); + if (hasSubscriptOperator(State, Cont) && backModifiable(State, Cont)) { + if (frontModifiable(State, Cont)) { + State = invalidateAllIteratorPositions(State, Cont); + } else { + State = invalidateIteratorPositions(State, Pos->getOffset(), BO_GE); + } + if (const auto *CData = getContainerData(State, Cont)) { + if (const auto EndSym = CData->getEnd()) { + State = invalidateIteratorPositions(State, EndSym, BO_GE); + State = setContainerData(State, Cont, CData->newEnd(nullptr)); + } + } + C.addTransition(State); + } +} + +void IteratorChecker::handleErase(CheckerContext &C, const SVal &Iter) const { + auto State = C.getState(); + const auto *Pos = getIteratorPosition(State, Iter); + if (!Pos) + return; + + // For deque-like containers invalidate all iterator positions. For + // vector-like containers invalidate iterator positions at and after the + // deletion. For list-like containers only invalidate the deleted position. + const auto *Cont = Pos->getContainer(); + if (hasSubscriptOperator(State, Cont) && backModifiable(State, Cont)) { + if (frontModifiable(State, Cont)) { + State = invalidateAllIteratorPositions(State, Cont); + } else { + State = invalidateIteratorPositions(State, Pos->getOffset(), BO_GE); + } + if (const auto *CData = getContainerData(State, Cont)) { + if (const auto EndSym = CData->getEnd()) { + State = invalidateIteratorPositions(State, EndSym, BO_GE); + State = setContainerData(State, Cont, CData->newEnd(nullptr)); + } + } + } else { + State = invalidateIteratorPositions(State, Pos->getOffset(), BO_EQ); + } + C.addTransition(State); +} + +void IteratorChecker::handleErase(CheckerContext &C, const SVal &Iter1, + const SVal &Iter2) const { + auto State = C.getState(); + const auto *Pos1 = getIteratorPosition(State, Iter1); + const auto *Pos2 = getIteratorPosition(State, Iter2); + if (!Pos1 || !Pos2) + return; + + // For deque-like containers invalidate all iterator positions. For + // vector-like containers invalidate iterator positions at and after the + // deletion range. For list-like containers only invalidate the deleted + // position range [first..last]. + const auto *Cont = Pos1->getContainer(); + if (hasSubscriptOperator(State, Cont) && backModifiable(State, Cont)) { + if (frontModifiable(State, Cont)) { + State = invalidateAllIteratorPositions(State, Cont); + } else { + State = invalidateIteratorPositions(State, Pos1->getOffset(), BO_GE); + } + if (const auto *CData = getContainerData(State, Cont)) { + if (const auto EndSym = CData->getEnd()) { + State = invalidateIteratorPositions(State, EndSym, BO_GE); + State = setContainerData(State, Cont, CData->newEnd(nullptr)); + } + } + } else { + State = invalidateIteratorPositions(State, Pos1->getOffset(), BO_GE, + Pos2->getOffset(), BO_LT); + } + C.addTransition(State); +} + +void IteratorChecker::handleEraseAfter(CheckerContext &C, + const SVal &Iter) const { + auto State = C.getState(); + const auto *Pos = getIteratorPosition(State, Iter); + if (!Pos) + return; + + // Invalidate the deleted iterator position, which is the position of the + // parameter plus one. + auto &SymMgr = C.getSymbolManager(); + auto &BVF = SymMgr.getBasicVals(); + auto &SVB = C.getSValBuilder(); + const auto NextSym = + SVB.evalBinOp(State, BO_Add, + nonloc::SymbolVal(Pos->getOffset()), + nonloc::ConcreteInt(BVF.getValue(llvm::APSInt::get(1))), + SymMgr.getType(Pos->getOffset())).getAsSymbol(); + State = invalidateIteratorPositions(State, NextSym, BO_EQ); + C.addTransition(State); +} + +void IteratorChecker::handleEraseAfter(CheckerContext &C, const SVal &Iter1, + const SVal &Iter2) const { + auto State = C.getState(); + const auto *Pos1 = getIteratorPosition(State, Iter1); + const auto *Pos2 = getIteratorPosition(State, Iter2); + if (!Pos1 || !Pos2) + return; + + // Invalidate the deleted iterator position range (first..last) + State = invalidateIteratorPositions(State, Pos1->getOffset(), BO_GT, + Pos2->getOffset(), BO_LT); + C.addTransition(State); +} + +IteratorPosition IteratorChecker::advancePosition(CheckerContext &C, + OverloadedOperatorKind Op, + const IteratorPosition &Pos, + const SVal &Distance) const { + auto State = C.getState(); + auto &SymMgr = C.getSymbolManager(); + auto &SVB = C.getSValBuilder(); + + assert ((Op == OO_Plus || Op == OO_PlusEqual || + Op == OO_Minus || Op == OO_MinusEqual) && + "Advance operator must be one of +, -, += and -=."); + auto BinOp = (Op == OO_Plus || Op == OO_PlusEqual) ? BO_Add : BO_Sub; + if (const auto IntDist = Distance.getAs<nonloc::ConcreteInt>()) { + // For concrete integers we can calculate the new position + return Pos.setTo(SVB.evalBinOp(State, BinOp, + nonloc::SymbolVal(Pos.getOffset()), *IntDist, + SymMgr.getType(Pos.getOffset())) + .getAsSymbol()); + } else { + // For other symbols create a new symbol to keep expressions simple + const auto &LCtx = C.getLocationContext(); + const auto NewPosSym = SymMgr.conjureSymbol(nullptr, LCtx, + SymMgr.getType(Pos.getOffset()), + C.blockCount()); + State = assumeNoOverflow(State, NewPosSym, 4); + return Pos.setTo(NewPosSym); + } +} + +void IteratorChecker::reportOutOfRangeBug(const StringRef &Message, + const SVal &Val, CheckerContext &C, + ExplodedNode *ErrNode) const { + auto R = std::make_unique<PathSensitiveBugReport>(*OutOfRangeBugType, Message, + ErrNode); + R->markInteresting(Val); + C.emitReport(std::move(R)); +} + +void IteratorChecker::reportMismatchedBug(const StringRef &Message, + const SVal &Val1, const SVal &Val2, + CheckerContext &C, + ExplodedNode *ErrNode) const { + auto R = std::make_unique<PathSensitiveBugReport>(*MismatchedBugType, Message, + ErrNode); + R->markInteresting(Val1); + R->markInteresting(Val2); + C.emitReport(std::move(R)); +} + +void IteratorChecker::reportMismatchedBug(const StringRef &Message, + const SVal &Val, const MemRegion *Reg, + CheckerContext &C, + ExplodedNode *ErrNode) const { + auto R = std::make_unique<PathSensitiveBugReport>(*MismatchedBugType, Message, + ErrNode); + R->markInteresting(Val); + R->markInteresting(Reg); + C.emitReport(std::move(R)); +} + +void IteratorChecker::reportInvalidatedBug(const StringRef &Message, + const SVal &Val, CheckerContext &C, + ExplodedNode *ErrNode) const { + auto R = std::make_unique<PathSensitiveBugReport>(*InvalidatedBugType, + Message, ErrNode); + R->markInteresting(Val); + C.emitReport(std::move(R)); +} + +namespace { + +bool isLess(ProgramStateRef State, SymbolRef Sym1, SymbolRef Sym2); +bool isGreater(ProgramStateRef State, SymbolRef Sym1, SymbolRef Sym2); +bool isEqual(ProgramStateRef State, SymbolRef Sym1, SymbolRef Sym2); +bool compare(ProgramStateRef State, SymbolRef Sym1, SymbolRef Sym2, + BinaryOperator::Opcode Opc); +bool compare(ProgramStateRef State, NonLoc NL1, NonLoc NL2, + BinaryOperator::Opcode Opc); +const CXXRecordDecl *getCXXRecordDecl(ProgramStateRef State, + const MemRegion *Reg); +SymbolRef rebaseSymbol(ProgramStateRef State, SValBuilder &SVB, SymbolRef Expr, + SymbolRef OldSym, SymbolRef NewSym); + +bool isIteratorType(const QualType &Type) { + if (Type->isPointerType()) + return true; + + const auto *CRD = Type->getUnqualifiedDesugaredType()->getAsCXXRecordDecl(); + return isIterator(CRD); +} + +bool isIterator(const CXXRecordDecl *CRD) { + if (!CRD) + return false; + + const auto Name = CRD->getName(); + if (!(Name.endswith_lower("iterator") || Name.endswith_lower("iter") || + Name.endswith_lower("it"))) + return false; + + bool HasCopyCtor = false, HasCopyAssign = true, HasDtor = false, + HasPreIncrOp = false, HasPostIncrOp = false, HasDerefOp = false; + for (const auto *Method : CRD->methods()) { + if (const auto *Ctor = dyn_cast<CXXConstructorDecl>(Method)) { + if (Ctor->isCopyConstructor()) { + HasCopyCtor = !Ctor->isDeleted() && Ctor->getAccess() == AS_public; + } + continue; + } + if (const auto *Dtor = dyn_cast<CXXDestructorDecl>(Method)) { + HasDtor = !Dtor->isDeleted() && Dtor->getAccess() == AS_public; + continue; + } + if (Method->isCopyAssignmentOperator()) { + HasCopyAssign = !Method->isDeleted() && Method->getAccess() == AS_public; + continue; + } + if (!Method->isOverloadedOperator()) + continue; + const auto OPK = Method->getOverloadedOperator(); + if (OPK == OO_PlusPlus) { + HasPreIncrOp = HasPreIncrOp || (Method->getNumParams() == 0); + HasPostIncrOp = HasPostIncrOp || (Method->getNumParams() == 1); + continue; + } + if (OPK == OO_Star) { + HasDerefOp = (Method->getNumParams() == 0); + continue; + } + } + + return HasCopyCtor && HasCopyAssign && HasDtor && HasPreIncrOp && + HasPostIncrOp && HasDerefOp; +} + +bool isComparisonOperator(OverloadedOperatorKind OK) { + return OK == OO_EqualEqual || OK == OO_ExclaimEqual || OK == OO_Less || + OK == OO_LessEqual || OK == OO_Greater || OK == OO_GreaterEqual; +} + +bool isBeginCall(const FunctionDecl *Func) { + const auto *IdInfo = Func->getIdentifier(); + if (!IdInfo) + return false; + return IdInfo->getName().endswith_lower("begin"); +} + +bool isEndCall(const FunctionDecl *Func) { + const auto *IdInfo = Func->getIdentifier(); + if (!IdInfo) + return false; + return IdInfo->getName().endswith_lower("end"); +} + +bool isAssignCall(const FunctionDecl *Func) { + const auto *IdInfo = Func->getIdentifier(); + if (!IdInfo) + return false; + if (Func->getNumParams() > 2) + return false; + return IdInfo->getName() == "assign"; +} + +bool isClearCall(const FunctionDecl *Func) { + const auto *IdInfo = Func->getIdentifier(); + if (!IdInfo) + return false; + if (Func->getNumParams() > 0) + return false; + return IdInfo->getName() == "clear"; +} + +bool isPushBackCall(const FunctionDecl *Func) { + const auto *IdInfo = Func->getIdentifier(); + if (!IdInfo) + return false; + if (Func->getNumParams() != 1) + return false; + return IdInfo->getName() == "push_back"; +} + +bool isEmplaceBackCall(const FunctionDecl *Func) { + const auto *IdInfo = Func->getIdentifier(); + if (!IdInfo) + return false; + if (Func->getNumParams() < 1) + return false; + return IdInfo->getName() == "emplace_back"; +} + +bool isPopBackCall(const FunctionDecl *Func) { + const auto *IdInfo = Func->getIdentifier(); + if (!IdInfo) + return false; + if (Func->getNumParams() > 0) + return false; + return IdInfo->getName() == "pop_back"; +} + +bool isPushFrontCall(const FunctionDecl *Func) { + const auto *IdInfo = Func->getIdentifier(); + if (!IdInfo) + return false; + if (Func->getNumParams() != 1) + return false; + return IdInfo->getName() == "push_front"; +} + +bool isEmplaceFrontCall(const FunctionDecl *Func) { + const auto *IdInfo = Func->getIdentifier(); + if (!IdInfo) + return false; + if (Func->getNumParams() < 1) + return false; + return IdInfo->getName() == "emplace_front"; +} + +bool isPopFrontCall(const FunctionDecl *Func) { + const auto *IdInfo = Func->getIdentifier(); + if (!IdInfo) + return false; + if (Func->getNumParams() > 0) + return false; + return IdInfo->getName() == "pop_front"; +} + +bool isInsertCall(const FunctionDecl *Func) { + const auto *IdInfo = Func->getIdentifier(); + if (!IdInfo) + return false; + if (Func->getNumParams() < 2 || Func->getNumParams() > 3) + return false; + if (!isIteratorType(Func->getParamDecl(0)->getType())) + return false; + return IdInfo->getName() == "insert"; +} + +bool isEmplaceCall(const FunctionDecl *Func) { + const auto *IdInfo = Func->getIdentifier(); + if (!IdInfo) + return false; + if (Func->getNumParams() < 2) + return false; + if (!isIteratorType(Func->getParamDecl(0)->getType())) + return false; + return IdInfo->getName() == "emplace"; +} + +bool isEraseCall(const FunctionDecl *Func) { + const auto *IdInfo = Func->getIdentifier(); + if (!IdInfo) + return false; + if (Func->getNumParams() < 1 || Func->getNumParams() > 2) + return false; + if (!isIteratorType(Func->getParamDecl(0)->getType())) + return false; + if (Func->getNumParams() == 2 && + !isIteratorType(Func->getParamDecl(1)->getType())) + return false; + return IdInfo->getName() == "erase"; +} + +bool isEraseAfterCall(const FunctionDecl *Func) { + const auto *IdInfo = Func->getIdentifier(); + if (!IdInfo) + return false; + if (Func->getNumParams() < 1 || Func->getNumParams() > 2) + return false; + if (!isIteratorType(Func->getParamDecl(0)->getType())) + return false; + if (Func->getNumParams() == 2 && + !isIteratorType(Func->getParamDecl(1)->getType())) + return false; + return IdInfo->getName() == "erase_after"; +} + +bool isAssignmentOperator(OverloadedOperatorKind OK) { return OK == OO_Equal; } + +bool isSimpleComparisonOperator(OverloadedOperatorKind OK) { + return OK == OO_EqualEqual || OK == OO_ExclaimEqual; +} + +bool isAccessOperator(OverloadedOperatorKind OK) { + return isDereferenceOperator(OK) || isIncrementOperator(OK) || + isDecrementOperator(OK) || isRandomIncrOrDecrOperator(OK); +} + +bool isDereferenceOperator(OverloadedOperatorKind OK) { + return OK == OO_Star || OK == OO_Arrow || OK == OO_ArrowStar || + OK == OO_Subscript; +} + +bool isIncrementOperator(OverloadedOperatorKind OK) { + return OK == OO_PlusPlus; +} + +bool isDecrementOperator(OverloadedOperatorKind OK) { + return OK == OO_MinusMinus; +} + +bool isRandomIncrOrDecrOperator(OverloadedOperatorKind OK) { + return OK == OO_Plus || OK == OO_PlusEqual || OK == OO_Minus || + OK == OO_MinusEqual; +} + +bool hasSubscriptOperator(ProgramStateRef State, const MemRegion *Reg) { + const auto *CRD = getCXXRecordDecl(State, Reg); + if (!CRD) + return false; + + for (const auto *Method : CRD->methods()) { + if (!Method->isOverloadedOperator()) + continue; + const auto OPK = Method->getOverloadedOperator(); + if (OPK == OO_Subscript) { + return true; + } + } + return false; +} + +bool frontModifiable(ProgramStateRef State, const MemRegion *Reg) { + const auto *CRD = getCXXRecordDecl(State, Reg); + if (!CRD) + return false; + + for (const auto *Method : CRD->methods()) { + if (!Method->getDeclName().isIdentifier()) + continue; + if (Method->getName() == "push_front" || Method->getName() == "pop_front") { + return true; + } + } + return false; +} + +bool backModifiable(ProgramStateRef State, const MemRegion *Reg) { + const auto *CRD = getCXXRecordDecl(State, Reg); + if (!CRD) + return false; + + for (const auto *Method : CRD->methods()) { + if (!Method->getDeclName().isIdentifier()) + continue; + if (Method->getName() == "push_back" || Method->getName() == "pop_back") { + return true; + } + } + return false; +} + +const CXXRecordDecl *getCXXRecordDecl(ProgramStateRef State, + const MemRegion *Reg) { + auto TI = getDynamicTypeInfo(State, Reg); + if (!TI.isValid()) + return nullptr; + + auto Type = TI.getType(); + if (const auto *RefT = Type->getAs<ReferenceType>()) { + Type = RefT->getPointeeType(); + } + + return Type->getUnqualifiedDesugaredType()->getAsCXXRecordDecl(); +} + +SymbolRef getContainerBegin(ProgramStateRef State, const MemRegion *Cont) { + const auto *CDataPtr = getContainerData(State, Cont); + if (!CDataPtr) + return nullptr; + + return CDataPtr->getBegin(); +} + +SymbolRef getContainerEnd(ProgramStateRef State, const MemRegion *Cont) { + const auto *CDataPtr = getContainerData(State, Cont); + if (!CDataPtr) + return nullptr; + + return CDataPtr->getEnd(); +} + +ProgramStateRef createContainerBegin(ProgramStateRef State, + const MemRegion *Cont, const Expr *E, + QualType T, const LocationContext *LCtx, + unsigned BlockCount) { + // Only create if it does not exist + const auto *CDataPtr = getContainerData(State, Cont); + if (CDataPtr && CDataPtr->getBegin()) + return State; + + auto &SymMgr = State->getSymbolManager(); + const SymbolConjured *Sym = SymMgr.conjureSymbol(E, LCtx, T, BlockCount, + "begin"); + State = assumeNoOverflow(State, Sym, 4); + + if (CDataPtr) { + const auto CData = CDataPtr->newBegin(Sym); + return setContainerData(State, Cont, CData); + } + + const auto CData = ContainerData::fromBegin(Sym); + return setContainerData(State, Cont, CData); +} + +ProgramStateRef createContainerEnd(ProgramStateRef State, const MemRegion *Cont, + const Expr *E, QualType T, + const LocationContext *LCtx, + unsigned BlockCount) { + // Only create if it does not exist + const auto *CDataPtr = getContainerData(State, Cont); + if (CDataPtr && CDataPtr->getEnd()) + return State; + + auto &SymMgr = State->getSymbolManager(); + const SymbolConjured *Sym = SymMgr.conjureSymbol(E, LCtx, T, BlockCount, + "end"); + State = assumeNoOverflow(State, Sym, 4); + + if (CDataPtr) { + const auto CData = CDataPtr->newEnd(Sym); + return setContainerData(State, Cont, CData); + } + + const auto CData = ContainerData::fromEnd(Sym); + return setContainerData(State, Cont, CData); +} + +const ContainerData *getContainerData(ProgramStateRef State, + const MemRegion *Cont) { + return State->get<ContainerMap>(Cont); +} + +ProgramStateRef setContainerData(ProgramStateRef State, const MemRegion *Cont, + const ContainerData &CData) { + return State->set<ContainerMap>(Cont, CData); +} + +const IteratorPosition *getIteratorPosition(ProgramStateRef State, + const SVal &Val) { + if (auto Reg = Val.getAsRegion()) { + Reg = Reg->getMostDerivedObjectRegion(); + return State->get<IteratorRegionMap>(Reg); + } else if (const auto Sym = Val.getAsSymbol()) { + return State->get<IteratorSymbolMap>(Sym); + } else if (const auto LCVal = Val.getAs<nonloc::LazyCompoundVal>()) { + return State->get<IteratorRegionMap>(LCVal->getRegion()); + } + return nullptr; +} + +ProgramStateRef setIteratorPosition(ProgramStateRef State, const SVal &Val, + const IteratorPosition &Pos) { + if (auto Reg = Val.getAsRegion()) { + Reg = Reg->getMostDerivedObjectRegion(); + return State->set<IteratorRegionMap>(Reg, Pos); + } else if (const auto Sym = Val.getAsSymbol()) { + return State->set<IteratorSymbolMap>(Sym, Pos); + } else if (const auto LCVal = Val.getAs<nonloc::LazyCompoundVal>()) { + return State->set<IteratorRegionMap>(LCVal->getRegion(), Pos); + } + return nullptr; +} + +ProgramStateRef removeIteratorPosition(ProgramStateRef State, const SVal &Val) { + if (auto Reg = Val.getAsRegion()) { + Reg = Reg->getMostDerivedObjectRegion(); + return State->remove<IteratorRegionMap>(Reg); + } else if (const auto Sym = Val.getAsSymbol()) { + return State->remove<IteratorSymbolMap>(Sym); + } else if (const auto LCVal = Val.getAs<nonloc::LazyCompoundVal>()) { + return State->remove<IteratorRegionMap>(LCVal->getRegion()); + } + return nullptr; +} + +ProgramStateRef relateSymbols(ProgramStateRef State, SymbolRef Sym1, + SymbolRef Sym2, bool Equal) { + auto &SVB = State->getStateManager().getSValBuilder(); + + // FIXME: This code should be reworked as follows: + // 1. Subtract the operands using evalBinOp(). + // 2. Assume that the result doesn't overflow. + // 3. Compare the result to 0. + // 4. Assume the result of the comparison. + const auto comparison = + SVB.evalBinOp(State, BO_EQ, nonloc::SymbolVal(Sym1), + nonloc::SymbolVal(Sym2), SVB.getConditionType()); + + assert(comparison.getAs<DefinedSVal>() && + "Symbol comparison must be a `DefinedSVal`"); + + auto NewState = State->assume(comparison.castAs<DefinedSVal>(), Equal); + if (!NewState) + return nullptr; + + if (const auto CompSym = comparison.getAsSymbol()) { + assert(isa<SymIntExpr>(CompSym) && + "Symbol comparison must be a `SymIntExpr`"); + assert(BinaryOperator::isComparisonOp( + cast<SymIntExpr>(CompSym)->getOpcode()) && + "Symbol comparison must be a comparison"); + return assumeNoOverflow(NewState, cast<SymIntExpr>(CompSym)->getLHS(), 2); + } + + return NewState; +} + +bool hasLiveIterators(ProgramStateRef State, const MemRegion *Cont) { + auto RegionMap = State->get<IteratorRegionMap>(); + for (const auto Reg : RegionMap) { + if (Reg.second.getContainer() == Cont) + return true; + } + + auto SymbolMap = State->get<IteratorSymbolMap>(); + for (const auto Sym : SymbolMap) { + if (Sym.second.getContainer() == Cont) + return true; + } + + return false; +} + +bool isBoundThroughLazyCompoundVal(const Environment &Env, + const MemRegion *Reg) { + for (const auto Binding: Env) { + if (const auto LCVal = Binding.second.getAs<nonloc::LazyCompoundVal>()) { + if (LCVal->getRegion() == Reg) + return true; + } + } + + return false; +} + +// This function tells the analyzer's engine that symbols produced by our +// checker, most notably iterator positions, are relatively small. +// A distance between items in the container should not be very large. +// By assuming that it is within around 1/8 of the address space, +// we can help the analyzer perform operations on these symbols +// without being afraid of integer overflows. +// FIXME: Should we provide it as an API, so that all checkers could use it? +ProgramStateRef assumeNoOverflow(ProgramStateRef State, SymbolRef Sym, + long Scale) { + SValBuilder &SVB = State->getStateManager().getSValBuilder(); + BasicValueFactory &BV = SVB.getBasicValueFactory(); + + QualType T = Sym->getType(); + assert(T->isSignedIntegerOrEnumerationType()); + APSIntType AT = BV.getAPSIntType(T); + + ProgramStateRef NewState = State; + + llvm::APSInt Max = AT.getMaxValue() / AT.getValue(Scale); + SVal IsCappedFromAbove = + SVB.evalBinOpNN(State, BO_LE, nonloc::SymbolVal(Sym), + nonloc::ConcreteInt(Max), SVB.getConditionType()); + if (auto DV = IsCappedFromAbove.getAs<DefinedSVal>()) { + NewState = NewState->assume(*DV, true); + if (!NewState) + return State; + } + + llvm::APSInt Min = -Max; + SVal IsCappedFromBelow = + SVB.evalBinOpNN(State, BO_GE, nonloc::SymbolVal(Sym), + nonloc::ConcreteInt(Min), SVB.getConditionType()); + if (auto DV = IsCappedFromBelow.getAs<DefinedSVal>()) { + NewState = NewState->assume(*DV, true); + if (!NewState) + return State; + } + + return NewState; +} + +template <typename Condition, typename Process> +ProgramStateRef processIteratorPositions(ProgramStateRef State, Condition Cond, + Process Proc) { + auto &RegionMapFactory = State->get_context<IteratorRegionMap>(); + auto RegionMap = State->get<IteratorRegionMap>(); + bool Changed = false; + for (const auto Reg : RegionMap) { + if (Cond(Reg.second)) { + RegionMap = RegionMapFactory.add(RegionMap, Reg.first, Proc(Reg.second)); + Changed = true; + } + } + + if (Changed) + State = State->set<IteratorRegionMap>(RegionMap); + + auto &SymbolMapFactory = State->get_context<IteratorSymbolMap>(); + auto SymbolMap = State->get<IteratorSymbolMap>(); + Changed = false; + for (const auto Sym : SymbolMap) { + if (Cond(Sym.second)) { + SymbolMap = SymbolMapFactory.add(SymbolMap, Sym.first, Proc(Sym.second)); + Changed = true; + } + } + + if (Changed) + State = State->set<IteratorSymbolMap>(SymbolMap); + + return State; +} + +ProgramStateRef invalidateAllIteratorPositions(ProgramStateRef State, + const MemRegion *Cont) { + auto MatchCont = [&](const IteratorPosition &Pos) { + return Pos.getContainer() == Cont; + }; + auto Invalidate = [&](const IteratorPosition &Pos) { + return Pos.invalidate(); + }; + return processIteratorPositions(State, MatchCont, Invalidate); +} + +ProgramStateRef +invalidateAllIteratorPositionsExcept(ProgramStateRef State, + const MemRegion *Cont, SymbolRef Offset, + BinaryOperator::Opcode Opc) { + auto MatchContAndCompare = [&](const IteratorPosition &Pos) { + return Pos.getContainer() == Cont && + !compare(State, Pos.getOffset(), Offset, Opc); + }; + auto Invalidate = [&](const IteratorPosition &Pos) { + return Pos.invalidate(); + }; + return processIteratorPositions(State, MatchContAndCompare, Invalidate); +} + +ProgramStateRef invalidateIteratorPositions(ProgramStateRef State, + SymbolRef Offset, + BinaryOperator::Opcode Opc) { + auto Compare = [&](const IteratorPosition &Pos) { + return compare(State, Pos.getOffset(), Offset, Opc); + }; + auto Invalidate = [&](const IteratorPosition &Pos) { + return Pos.invalidate(); + }; + return processIteratorPositions(State, Compare, Invalidate); +} + +ProgramStateRef invalidateIteratorPositions(ProgramStateRef State, + SymbolRef Offset1, + BinaryOperator::Opcode Opc1, + SymbolRef Offset2, + BinaryOperator::Opcode Opc2) { + auto Compare = [&](const IteratorPosition &Pos) { + return compare(State, Pos.getOffset(), Offset1, Opc1) && + compare(State, Pos.getOffset(), Offset2, Opc2); + }; + auto Invalidate = [&](const IteratorPosition &Pos) { + return Pos.invalidate(); + }; + return processIteratorPositions(State, Compare, Invalidate); +} + +ProgramStateRef reassignAllIteratorPositions(ProgramStateRef State, + const MemRegion *Cont, + const MemRegion *NewCont) { + auto MatchCont = [&](const IteratorPosition &Pos) { + return Pos.getContainer() == Cont; + }; + auto ReAssign = [&](const IteratorPosition &Pos) { + return Pos.reAssign(NewCont); + }; + return processIteratorPositions(State, MatchCont, ReAssign); +} + +ProgramStateRef reassignAllIteratorPositionsUnless(ProgramStateRef State, + const MemRegion *Cont, + const MemRegion *NewCont, + SymbolRef Offset, + BinaryOperator::Opcode Opc) { + auto MatchContAndCompare = [&](const IteratorPosition &Pos) { + return Pos.getContainer() == Cont && + !compare(State, Pos.getOffset(), Offset, Opc); + }; + auto ReAssign = [&](const IteratorPosition &Pos) { + return Pos.reAssign(NewCont); + }; + return processIteratorPositions(State, MatchContAndCompare, ReAssign); +} + +// This function rebases symbolic expression `OldSym + Int` to `NewSym + Int`, +// `OldSym - Int` to `NewSym - Int` and `OldSym` to `NewSym` in any iterator +// position offsets where `CondSym` is true. +ProgramStateRef rebaseSymbolInIteratorPositionsIf( + ProgramStateRef State, SValBuilder &SVB, SymbolRef OldSym, + SymbolRef NewSym, SymbolRef CondSym, BinaryOperator::Opcode Opc) { + auto LessThanEnd = [&](const IteratorPosition &Pos) { + return compare(State, Pos.getOffset(), CondSym, Opc); + }; + auto RebaseSymbol = [&](const IteratorPosition &Pos) { + return Pos.setTo(rebaseSymbol(State, SVB, Pos.getOffset(), OldSym, + NewSym)); + }; + return processIteratorPositions(State, LessThanEnd, RebaseSymbol); +} + +// This function rebases symbolic expression `OldExpr + Int` to `NewExpr + Int`, +// `OldExpr - Int` to `NewExpr - Int` and `OldExpr` to `NewExpr` in expression +// `OrigExpr`. +SymbolRef rebaseSymbol(ProgramStateRef State, SValBuilder &SVB, + SymbolRef OrigExpr, SymbolRef OldExpr, + SymbolRef NewSym) { + auto &SymMgr = SVB.getSymbolManager(); + auto Diff = SVB.evalBinOpNN(State, BO_Sub, nonloc::SymbolVal(OrigExpr), + nonloc::SymbolVal(OldExpr), + SymMgr.getType(OrigExpr)); + + const auto DiffInt = Diff.getAs<nonloc::ConcreteInt>(); + if (!DiffInt) + return OrigExpr; + + return SVB.evalBinOpNN(State, BO_Add, *DiffInt, nonloc::SymbolVal(NewSym), + SymMgr.getType(OrigExpr)).getAsSymbol(); +} + +bool isZero(ProgramStateRef State, const NonLoc &Val) { + auto &BVF = State->getBasicVals(); + return compare(State, Val, + nonloc::ConcreteInt(BVF.getValue(llvm::APSInt::get(0))), + BO_EQ); +} + +bool isPastTheEnd(ProgramStateRef State, const IteratorPosition &Pos) { + const auto *Cont = Pos.getContainer(); + const auto *CData = getContainerData(State, Cont); + if (!CData) + return false; + + const auto End = CData->getEnd(); + if (End) { + if (isEqual(State, Pos.getOffset(), End)) { + return true; + } + } + + return false; +} + +bool isAheadOfRange(ProgramStateRef State, const IteratorPosition &Pos) { + const auto *Cont = Pos.getContainer(); + const auto *CData = getContainerData(State, Cont); + if (!CData) + return false; + + const auto Beg = CData->getBegin(); + if (Beg) { + if (isLess(State, Pos.getOffset(), Beg)) { + return true; + } + } + + return false; +} + +bool isBehindPastTheEnd(ProgramStateRef State, const IteratorPosition &Pos) { + const auto *Cont = Pos.getContainer(); + const auto *CData = getContainerData(State, Cont); + if (!CData) + return false; + + const auto End = CData->getEnd(); + if (End) { + if (isGreater(State, Pos.getOffset(), End)) { + return true; + } + } + + return false; +} + +bool isLess(ProgramStateRef State, SymbolRef Sym1, SymbolRef Sym2) { + return compare(State, Sym1, Sym2, BO_LT); +} + +bool isGreater(ProgramStateRef State, SymbolRef Sym1, SymbolRef Sym2) { + return compare(State, Sym1, Sym2, BO_GT); +} + +bool isEqual(ProgramStateRef State, SymbolRef Sym1, SymbolRef Sym2) { + return compare(State, Sym1, Sym2, BO_EQ); +} + +bool compare(ProgramStateRef State, SymbolRef Sym1, SymbolRef Sym2, + BinaryOperator::Opcode Opc) { + return compare(State, nonloc::SymbolVal(Sym1), nonloc::SymbolVal(Sym2), Opc); +} + +bool compare(ProgramStateRef State, NonLoc NL1, NonLoc NL2, + BinaryOperator::Opcode Opc) { + auto &SVB = State->getStateManager().getSValBuilder(); + + const auto comparison = + SVB.evalBinOp(State, Opc, NL1, NL2, SVB.getConditionType()); + + assert(comparison.getAs<DefinedSVal>() && + "Symbol comparison must be a `DefinedSVal`"); + + return !State->assume(comparison.castAs<DefinedSVal>(), false); +} + +} // namespace + +void ento::registerIteratorModeling(CheckerManager &mgr) { + mgr.registerChecker<IteratorChecker>(); +} + +bool ento::shouldRegisterIteratorModeling(const LangOptions &LO) { + return true; +} + +#define REGISTER_CHECKER(name) \ + void ento::register##name(CheckerManager &Mgr) { \ + auto *checker = Mgr.getChecker<IteratorChecker>(); \ + checker->ChecksEnabled[IteratorChecker::CK_##name] = true; \ + checker->CheckNames[IteratorChecker::CK_##name] = \ + Mgr.getCurrentCheckerName(); \ + } \ + \ + bool ento::shouldRegister##name(const LangOptions &LO) { return true; } + +REGISTER_CHECKER(IteratorRangeChecker) +REGISTER_CHECKER(MismatchedIteratorChecker) +REGISTER_CHECKER(InvalidatedIteratorChecker) diff --git a/clang/lib/StaticAnalyzer/Checkers/IvarInvalidationChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/IvarInvalidationChecker.cpp new file mode 100644 index 000000000000..0d64fbd6f62e --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/IvarInvalidationChecker.cpp @@ -0,0 +1,757 @@ +//===- IvarInvalidationChecker.cpp ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This checker implements annotation driven invalidation checking. If a class +// contains a method annotated with 'objc_instance_variable_invalidator', +// - (void) foo +// __attribute__((annotate("objc_instance_variable_invalidator"))); +// all the "ivalidatable" instance variables of this class should be +// invalidated. We call an instance variable ivalidatable if it is an object of +// a class which contains an invalidation method. There could be multiple +// methods annotated with such annotations per class, either one can be used +// to invalidate the ivar. An ivar or property are considered to be +// invalidated if they are being assigned 'nil' or an invalidation method has +// been called on them. An invalidation method should either invalidate all +// the ivars or call another invalidation method (on self). +// +// Partial invalidor annotation allows to address cases when ivars are +// invalidated by other methods, which might or might not be called from +// the invalidation method. The checker checks that each invalidation +// method and all the partial methods cumulatively invalidate all ivars. +// __attribute__((annotate("objc_instance_variable_invalidator_partial"))); +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/Attr.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallString.h" + +using namespace clang; +using namespace ento; + +namespace { +struct ChecksFilter { + /// Check for missing invalidation method declarations. + DefaultBool check_MissingInvalidationMethod; + /// Check that all ivars are invalidated. + DefaultBool check_InstanceVariableInvalidation; + + CheckerNameRef checkName_MissingInvalidationMethod; + CheckerNameRef checkName_InstanceVariableInvalidation; +}; + +class IvarInvalidationCheckerImpl { + typedef llvm::SmallSetVector<const ObjCMethodDecl*, 2> MethodSet; + typedef llvm::DenseMap<const ObjCMethodDecl*, + const ObjCIvarDecl*> MethToIvarMapTy; + typedef llvm::DenseMap<const ObjCPropertyDecl*, + const ObjCIvarDecl*> PropToIvarMapTy; + typedef llvm::DenseMap<const ObjCIvarDecl*, + const ObjCPropertyDecl*> IvarToPropMapTy; + + struct InvalidationInfo { + /// Has the ivar been invalidated? + bool IsInvalidated; + + /// The methods which can be used to invalidate the ivar. + MethodSet InvalidationMethods; + + InvalidationInfo() : IsInvalidated(false) {} + void addInvalidationMethod(const ObjCMethodDecl *MD) { + InvalidationMethods.insert(MD); + } + + bool needsInvalidation() const { + return !InvalidationMethods.empty(); + } + + bool hasMethod(const ObjCMethodDecl *MD) { + if (IsInvalidated) + return true; + for (MethodSet::iterator I = InvalidationMethods.begin(), + E = InvalidationMethods.end(); I != E; ++I) { + if (*I == MD) { + IsInvalidated = true; + return true; + } + } + return false; + } + }; + + typedef llvm::DenseMap<const ObjCIvarDecl*, InvalidationInfo> IvarSet; + + /// Statement visitor, which walks the method body and flags the ivars + /// referenced in it (either directly or via property). + class MethodCrawler : public ConstStmtVisitor<MethodCrawler> { + /// The set of Ivars which need to be invalidated. + IvarSet &IVars; + + /// Flag is set as the result of a message send to another + /// invalidation method. + bool &CalledAnotherInvalidationMethod; + + /// Property setter to ivar mapping. + const MethToIvarMapTy &PropertySetterToIvarMap; + + /// Property getter to ivar mapping. + const MethToIvarMapTy &PropertyGetterToIvarMap; + + /// Property to ivar mapping. + const PropToIvarMapTy &PropertyToIvarMap; + + /// The invalidation method being currently processed. + const ObjCMethodDecl *InvalidationMethod; + + ASTContext &Ctx; + + /// Peel off parens, casts, OpaqueValueExpr, and PseudoObjectExpr. + const Expr *peel(const Expr *E) const; + + /// Does this expression represent zero: '0'? + bool isZero(const Expr *E) const; + + /// Mark the given ivar as invalidated. + void markInvalidated(const ObjCIvarDecl *Iv); + + /// Checks if IvarRef refers to the tracked IVar, if yes, marks it as + /// invalidated. + void checkObjCIvarRefExpr(const ObjCIvarRefExpr *IvarRef); + + /// Checks if ObjCPropertyRefExpr refers to the tracked IVar, if yes, marks + /// it as invalidated. + void checkObjCPropertyRefExpr(const ObjCPropertyRefExpr *PA); + + /// Checks if ObjCMessageExpr refers to (is a getter for) the tracked IVar, + /// if yes, marks it as invalidated. + void checkObjCMessageExpr(const ObjCMessageExpr *ME); + + /// Checks if the Expr refers to an ivar, if yes, marks it as invalidated. + void check(const Expr *E); + + public: + MethodCrawler(IvarSet &InIVars, + bool &InCalledAnotherInvalidationMethod, + const MethToIvarMapTy &InPropertySetterToIvarMap, + const MethToIvarMapTy &InPropertyGetterToIvarMap, + const PropToIvarMapTy &InPropertyToIvarMap, + ASTContext &InCtx) + : IVars(InIVars), + CalledAnotherInvalidationMethod(InCalledAnotherInvalidationMethod), + PropertySetterToIvarMap(InPropertySetterToIvarMap), + PropertyGetterToIvarMap(InPropertyGetterToIvarMap), + PropertyToIvarMap(InPropertyToIvarMap), + InvalidationMethod(nullptr), + Ctx(InCtx) {} + + void VisitStmt(const Stmt *S) { VisitChildren(S); } + + void VisitBinaryOperator(const BinaryOperator *BO); + + void VisitObjCMessageExpr(const ObjCMessageExpr *ME); + + void VisitChildren(const Stmt *S) { + for (const auto *Child : S->children()) { + if (Child) + this->Visit(Child); + if (CalledAnotherInvalidationMethod) + return; + } + } + }; + + /// Check if the any of the methods inside the interface are annotated with + /// the invalidation annotation, update the IvarInfo accordingly. + /// \param LookForPartial is set when we are searching for partial + /// invalidators. + static void containsInvalidationMethod(const ObjCContainerDecl *D, + InvalidationInfo &Out, + bool LookForPartial); + + /// Check if ivar should be tracked and add to TrackedIvars if positive. + /// Returns true if ivar should be tracked. + static bool trackIvar(const ObjCIvarDecl *Iv, IvarSet &TrackedIvars, + const ObjCIvarDecl **FirstIvarDecl); + + /// Given the property declaration, and the list of tracked ivars, finds + /// the ivar backing the property when possible. Returns '0' when no such + /// ivar could be found. + static const ObjCIvarDecl *findPropertyBackingIvar( + const ObjCPropertyDecl *Prop, + const ObjCInterfaceDecl *InterfaceD, + IvarSet &TrackedIvars, + const ObjCIvarDecl **FirstIvarDecl); + + /// Print ivar name or the property if the given ivar backs a property. + static void printIvar(llvm::raw_svector_ostream &os, + const ObjCIvarDecl *IvarDecl, + const IvarToPropMapTy &IvarToPopertyMap); + + void reportNoInvalidationMethod(CheckerNameRef CheckName, + const ObjCIvarDecl *FirstIvarDecl, + const IvarToPropMapTy &IvarToPopertyMap, + const ObjCInterfaceDecl *InterfaceD, + bool MissingDeclaration) const; + + void reportIvarNeedsInvalidation(const ObjCIvarDecl *IvarD, + const IvarToPropMapTy &IvarToPopertyMap, + const ObjCMethodDecl *MethodD) const; + + AnalysisManager& Mgr; + BugReporter &BR; + /// Filter on the checks performed. + const ChecksFilter &Filter; + +public: + IvarInvalidationCheckerImpl(AnalysisManager& InMgr, + BugReporter &InBR, + const ChecksFilter &InFilter) : + Mgr (InMgr), BR(InBR), Filter(InFilter) {} + + void visit(const ObjCImplementationDecl *D) const; +}; + +static bool isInvalidationMethod(const ObjCMethodDecl *M, bool LookForPartial) { + for (const auto *Ann : M->specific_attrs<AnnotateAttr>()) { + if (!LookForPartial && + Ann->getAnnotation() == "objc_instance_variable_invalidator") + return true; + if (LookForPartial && + Ann->getAnnotation() == "objc_instance_variable_invalidator_partial") + return true; + } + return false; +} + +void IvarInvalidationCheckerImpl::containsInvalidationMethod( + const ObjCContainerDecl *D, InvalidationInfo &OutInfo, bool Partial) { + + if (!D) + return; + + assert(!isa<ObjCImplementationDecl>(D)); + // TODO: Cache the results. + + // Check all methods. + for (const auto *MDI : D->methods()) + if (isInvalidationMethod(MDI, Partial)) + OutInfo.addInvalidationMethod( + cast<ObjCMethodDecl>(MDI->getCanonicalDecl())); + + // If interface, check all parent protocols and super. + if (const ObjCInterfaceDecl *InterfD = dyn_cast<ObjCInterfaceDecl>(D)) { + + // Visit all protocols. + for (const auto *I : InterfD->protocols()) + containsInvalidationMethod(I->getDefinition(), OutInfo, Partial); + + // Visit all categories in case the invalidation method is declared in + // a category. + for (const auto *Ext : InterfD->visible_extensions()) + containsInvalidationMethod(Ext, OutInfo, Partial); + + containsInvalidationMethod(InterfD->getSuperClass(), OutInfo, Partial); + return; + } + + // If protocol, check all parent protocols. + if (const ObjCProtocolDecl *ProtD = dyn_cast<ObjCProtocolDecl>(D)) { + for (const auto *I : ProtD->protocols()) { + containsInvalidationMethod(I->getDefinition(), OutInfo, Partial); + } + return; + } +} + +bool IvarInvalidationCheckerImpl::trackIvar(const ObjCIvarDecl *Iv, + IvarSet &TrackedIvars, + const ObjCIvarDecl **FirstIvarDecl) { + QualType IvQTy = Iv->getType(); + const ObjCObjectPointerType *IvTy = IvQTy->getAs<ObjCObjectPointerType>(); + if (!IvTy) + return false; + const ObjCInterfaceDecl *IvInterf = IvTy->getInterfaceDecl(); + + InvalidationInfo Info; + containsInvalidationMethod(IvInterf, Info, /*LookForPartial*/ false); + if (Info.needsInvalidation()) { + const ObjCIvarDecl *I = cast<ObjCIvarDecl>(Iv->getCanonicalDecl()); + TrackedIvars[I] = Info; + if (!*FirstIvarDecl) + *FirstIvarDecl = I; + return true; + } + return false; +} + +const ObjCIvarDecl *IvarInvalidationCheckerImpl::findPropertyBackingIvar( + const ObjCPropertyDecl *Prop, + const ObjCInterfaceDecl *InterfaceD, + IvarSet &TrackedIvars, + const ObjCIvarDecl **FirstIvarDecl) { + const ObjCIvarDecl *IvarD = nullptr; + + // Lookup for the synthesized case. + IvarD = Prop->getPropertyIvarDecl(); + // We only track the ivars/properties that are defined in the current + // class (not the parent). + if (IvarD && IvarD->getContainingInterface() == InterfaceD) { + if (TrackedIvars.count(IvarD)) { + return IvarD; + } + // If the ivar is synthesized we still want to track it. + if (trackIvar(IvarD, TrackedIvars, FirstIvarDecl)) + return IvarD; + } + + // Lookup IVars named "_PropName"or "PropName" among the tracked Ivars. + StringRef PropName = Prop->getIdentifier()->getName(); + for (IvarSet::const_iterator I = TrackedIvars.begin(), + E = TrackedIvars.end(); I != E; ++I) { + const ObjCIvarDecl *Iv = I->first; + StringRef IvarName = Iv->getName(); + + if (IvarName == PropName) + return Iv; + + SmallString<128> PropNameWithUnderscore; + { + llvm::raw_svector_ostream os(PropNameWithUnderscore); + os << '_' << PropName; + } + if (IvarName == PropNameWithUnderscore) + return Iv; + } + + // Note, this is a possible source of false positives. We could look at the + // getter implementation to find the ivar when its name is not derived from + // the property name. + return nullptr; +} + +void IvarInvalidationCheckerImpl::printIvar(llvm::raw_svector_ostream &os, + const ObjCIvarDecl *IvarDecl, + const IvarToPropMapTy &IvarToPopertyMap) { + if (IvarDecl->getSynthesize()) { + const ObjCPropertyDecl *PD = IvarToPopertyMap.lookup(IvarDecl); + assert(PD &&"Do we synthesize ivars for something other than properties?"); + os << "Property "<< PD->getName() << " "; + } else { + os << "Instance variable "<< IvarDecl->getName() << " "; + } +} + +// Check that the invalidatable interfaces with ivars/properties implement the +// invalidation methods. +void IvarInvalidationCheckerImpl:: +visit(const ObjCImplementationDecl *ImplD) const { + // Collect all ivars that need cleanup. + IvarSet Ivars; + // Record the first Ivar needing invalidation; used in reporting when only + // one ivar is sufficient. Cannot grab the first on the Ivars set to ensure + // deterministic output. + const ObjCIvarDecl *FirstIvarDecl = nullptr; + const ObjCInterfaceDecl *InterfaceD = ImplD->getClassInterface(); + + // Collect ivars declared in this class, its extensions and its implementation + ObjCInterfaceDecl *IDecl = const_cast<ObjCInterfaceDecl *>(InterfaceD); + for (const ObjCIvarDecl *Iv = IDecl->all_declared_ivar_begin(); Iv; + Iv= Iv->getNextIvar()) + trackIvar(Iv, Ivars, &FirstIvarDecl); + + // Construct Property/Property Accessor to Ivar maps to assist checking if an + // ivar which is backing a property has been reset. + MethToIvarMapTy PropSetterToIvarMap; + MethToIvarMapTy PropGetterToIvarMap; + PropToIvarMapTy PropertyToIvarMap; + IvarToPropMapTy IvarToPopertyMap; + + ObjCInterfaceDecl::PropertyMap PropMap; + ObjCInterfaceDecl::PropertyDeclOrder PropOrder; + InterfaceD->collectPropertiesToImplement(PropMap, PropOrder); + + for (ObjCInterfaceDecl::PropertyMap::iterator + I = PropMap.begin(), E = PropMap.end(); I != E; ++I) { + const ObjCPropertyDecl *PD = I->second; + if (PD->isClassProperty()) + continue; + + const ObjCIvarDecl *ID = findPropertyBackingIvar(PD, InterfaceD, Ivars, + &FirstIvarDecl); + if (!ID) + continue; + + // Store the mappings. + PD = cast<ObjCPropertyDecl>(PD->getCanonicalDecl()); + PropertyToIvarMap[PD] = ID; + IvarToPopertyMap[ID] = PD; + + // Find the setter and the getter. + const ObjCMethodDecl *SetterD = PD->getSetterMethodDecl(); + if (SetterD) { + SetterD = SetterD->getCanonicalDecl(); + PropSetterToIvarMap[SetterD] = ID; + } + + const ObjCMethodDecl *GetterD = PD->getGetterMethodDecl(); + if (GetterD) { + GetterD = GetterD->getCanonicalDecl(); + PropGetterToIvarMap[GetterD] = ID; + } + } + + // If no ivars need invalidation, there is nothing to check here. + if (Ivars.empty()) + return; + + // Find all partial invalidation methods. + InvalidationInfo PartialInfo; + containsInvalidationMethod(InterfaceD, PartialInfo, /*LookForPartial*/ true); + + // Remove ivars invalidated by the partial invalidation methods. They do not + // need to be invalidated in the regular invalidation methods. + bool AtImplementationContainsAtLeastOnePartialInvalidationMethod = false; + for (MethodSet::iterator + I = PartialInfo.InvalidationMethods.begin(), + E = PartialInfo.InvalidationMethods.end(); I != E; ++I) { + const ObjCMethodDecl *InterfD = *I; + + // Get the corresponding method in the @implementation. + const ObjCMethodDecl *D = ImplD->getMethod(InterfD->getSelector(), + InterfD->isInstanceMethod()); + if (D && D->hasBody()) { + AtImplementationContainsAtLeastOnePartialInvalidationMethod = true; + + bool CalledAnotherInvalidationMethod = false; + // The MethodCrowler is going to remove the invalidated ivars. + MethodCrawler(Ivars, + CalledAnotherInvalidationMethod, + PropSetterToIvarMap, + PropGetterToIvarMap, + PropertyToIvarMap, + BR.getContext()).VisitStmt(D->getBody()); + // If another invalidation method was called, trust that full invalidation + // has occurred. + if (CalledAnotherInvalidationMethod) + Ivars.clear(); + } + } + + // If all ivars have been invalidated by partial invalidators, there is + // nothing to check here. + if (Ivars.empty()) + return; + + // Find all invalidation methods in this @interface declaration and parents. + InvalidationInfo Info; + containsInvalidationMethod(InterfaceD, Info, /*LookForPartial*/ false); + + // Report an error in case none of the invalidation methods are declared. + if (!Info.needsInvalidation() && !PartialInfo.needsInvalidation()) { + if (Filter.check_MissingInvalidationMethod) + reportNoInvalidationMethod(Filter.checkName_MissingInvalidationMethod, + FirstIvarDecl, IvarToPopertyMap, InterfaceD, + /*MissingDeclaration*/ true); + // If there are no invalidation methods, there is no ivar validation work + // to be done. + return; + } + + // Only check if Ivars are invalidated when InstanceVariableInvalidation + // has been requested. + if (!Filter.check_InstanceVariableInvalidation) + return; + + // Check that all ivars are invalidated by the invalidation methods. + bool AtImplementationContainsAtLeastOneInvalidationMethod = false; + for (MethodSet::iterator I = Info.InvalidationMethods.begin(), + E = Info.InvalidationMethods.end(); I != E; ++I) { + const ObjCMethodDecl *InterfD = *I; + + // Get the corresponding method in the @implementation. + const ObjCMethodDecl *D = ImplD->getMethod(InterfD->getSelector(), + InterfD->isInstanceMethod()); + if (D && D->hasBody()) { + AtImplementationContainsAtLeastOneInvalidationMethod = true; + + // Get a copy of ivars needing invalidation. + IvarSet IvarsI = Ivars; + + bool CalledAnotherInvalidationMethod = false; + MethodCrawler(IvarsI, + CalledAnotherInvalidationMethod, + PropSetterToIvarMap, + PropGetterToIvarMap, + PropertyToIvarMap, + BR.getContext()).VisitStmt(D->getBody()); + // If another invalidation method was called, trust that full invalidation + // has occurred. + if (CalledAnotherInvalidationMethod) + continue; + + // Warn on the ivars that were not invalidated by the method. + for (IvarSet::const_iterator + I = IvarsI.begin(), E = IvarsI.end(); I != E; ++I) + reportIvarNeedsInvalidation(I->first, IvarToPopertyMap, D); + } + } + + // Report an error in case none of the invalidation methods are implemented. + if (!AtImplementationContainsAtLeastOneInvalidationMethod) { + if (AtImplementationContainsAtLeastOnePartialInvalidationMethod) { + // Warn on the ivars that were not invalidated by the prrtial + // invalidation methods. + for (IvarSet::const_iterator + I = Ivars.begin(), E = Ivars.end(); I != E; ++I) + reportIvarNeedsInvalidation(I->first, IvarToPopertyMap, nullptr); + } else { + // Otherwise, no invalidation methods were implemented. + reportNoInvalidationMethod(Filter.checkName_InstanceVariableInvalidation, + FirstIvarDecl, IvarToPopertyMap, InterfaceD, + /*MissingDeclaration*/ false); + } + } +} + +void IvarInvalidationCheckerImpl::reportNoInvalidationMethod( + CheckerNameRef CheckName, const ObjCIvarDecl *FirstIvarDecl, + const IvarToPropMapTy &IvarToPopertyMap, + const ObjCInterfaceDecl *InterfaceD, bool MissingDeclaration) const { + SmallString<128> sbuf; + llvm::raw_svector_ostream os(sbuf); + assert(FirstIvarDecl); + printIvar(os, FirstIvarDecl, IvarToPopertyMap); + os << "needs to be invalidated; "; + if (MissingDeclaration) + os << "no invalidation method is declared for "; + else + os << "no invalidation method is defined in the @implementation for "; + os << InterfaceD->getName(); + + PathDiagnosticLocation IvarDecLocation = + PathDiagnosticLocation::createBegin(FirstIvarDecl, BR.getSourceManager()); + + BR.EmitBasicReport(FirstIvarDecl, CheckName, "Incomplete invalidation", + categories::CoreFoundationObjectiveC, os.str(), + IvarDecLocation); +} + +void IvarInvalidationCheckerImpl:: +reportIvarNeedsInvalidation(const ObjCIvarDecl *IvarD, + const IvarToPropMapTy &IvarToPopertyMap, + const ObjCMethodDecl *MethodD) const { + SmallString<128> sbuf; + llvm::raw_svector_ostream os(sbuf); + printIvar(os, IvarD, IvarToPopertyMap); + os << "needs to be invalidated or set to nil"; + if (MethodD) { + PathDiagnosticLocation MethodDecLocation = + PathDiagnosticLocation::createEnd(MethodD->getBody(), + BR.getSourceManager(), + Mgr.getAnalysisDeclContext(MethodD)); + BR.EmitBasicReport(MethodD, Filter.checkName_InstanceVariableInvalidation, + "Incomplete invalidation", + categories::CoreFoundationObjectiveC, os.str(), + MethodDecLocation); + } else { + BR.EmitBasicReport( + IvarD, Filter.checkName_InstanceVariableInvalidation, + "Incomplete invalidation", categories::CoreFoundationObjectiveC, + os.str(), + PathDiagnosticLocation::createBegin(IvarD, BR.getSourceManager())); + } +} + +void IvarInvalidationCheckerImpl::MethodCrawler::markInvalidated( + const ObjCIvarDecl *Iv) { + IvarSet::iterator I = IVars.find(Iv); + if (I != IVars.end()) { + // If InvalidationMethod is present, we are processing the message send and + // should ensure we are invalidating with the appropriate method, + // otherwise, we are processing setting to 'nil'. + if (!InvalidationMethod || I->second.hasMethod(InvalidationMethod)) + IVars.erase(I); + } +} + +const Expr *IvarInvalidationCheckerImpl::MethodCrawler::peel(const Expr *E) const { + E = E->IgnoreParenCasts(); + if (const PseudoObjectExpr *POE = dyn_cast<PseudoObjectExpr>(E)) + E = POE->getSyntacticForm()->IgnoreParenCasts(); + if (const OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(E)) + E = OVE->getSourceExpr()->IgnoreParenCasts(); + return E; +} + +void IvarInvalidationCheckerImpl::MethodCrawler::checkObjCIvarRefExpr( + const ObjCIvarRefExpr *IvarRef) { + if (const Decl *D = IvarRef->getDecl()) + markInvalidated(cast<ObjCIvarDecl>(D->getCanonicalDecl())); +} + +void IvarInvalidationCheckerImpl::MethodCrawler::checkObjCMessageExpr( + const ObjCMessageExpr *ME) { + const ObjCMethodDecl *MD = ME->getMethodDecl(); + if (MD) { + MD = MD->getCanonicalDecl(); + MethToIvarMapTy::const_iterator IvI = PropertyGetterToIvarMap.find(MD); + if (IvI != PropertyGetterToIvarMap.end()) + markInvalidated(IvI->second); + } +} + +void IvarInvalidationCheckerImpl::MethodCrawler::checkObjCPropertyRefExpr( + const ObjCPropertyRefExpr *PA) { + + if (PA->isExplicitProperty()) { + const ObjCPropertyDecl *PD = PA->getExplicitProperty(); + if (PD) { + PD = cast<ObjCPropertyDecl>(PD->getCanonicalDecl()); + PropToIvarMapTy::const_iterator IvI = PropertyToIvarMap.find(PD); + if (IvI != PropertyToIvarMap.end()) + markInvalidated(IvI->second); + return; + } + } + + if (PA->isImplicitProperty()) { + const ObjCMethodDecl *MD = PA->getImplicitPropertySetter(); + if (MD) { + MD = MD->getCanonicalDecl(); + MethToIvarMapTy::const_iterator IvI =PropertyGetterToIvarMap.find(MD); + if (IvI != PropertyGetterToIvarMap.end()) + markInvalidated(IvI->second); + return; + } + } +} + +bool IvarInvalidationCheckerImpl::MethodCrawler::isZero(const Expr *E) const { + E = peel(E); + + return (E->isNullPointerConstant(Ctx, Expr::NPC_ValueDependentIsNotNull) + != Expr::NPCK_NotNull); +} + +void IvarInvalidationCheckerImpl::MethodCrawler::check(const Expr *E) { + E = peel(E); + + if (const ObjCIvarRefExpr *IvarRef = dyn_cast<ObjCIvarRefExpr>(E)) { + checkObjCIvarRefExpr(IvarRef); + return; + } + + if (const ObjCPropertyRefExpr *PropRef = dyn_cast<ObjCPropertyRefExpr>(E)) { + checkObjCPropertyRefExpr(PropRef); + return; + } + + if (const ObjCMessageExpr *MsgExpr = dyn_cast<ObjCMessageExpr>(E)) { + checkObjCMessageExpr(MsgExpr); + return; + } +} + +void IvarInvalidationCheckerImpl::MethodCrawler::VisitBinaryOperator( + const BinaryOperator *BO) { + VisitStmt(BO); + + // Do we assign/compare against zero? If yes, check the variable we are + // assigning to. + BinaryOperatorKind Opcode = BO->getOpcode(); + if (Opcode != BO_Assign && + Opcode != BO_EQ && + Opcode != BO_NE) + return; + + if (isZero(BO->getRHS())) { + check(BO->getLHS()); + return; + } + + if (Opcode != BO_Assign && isZero(BO->getLHS())) { + check(BO->getRHS()); + return; + } +} + +void IvarInvalidationCheckerImpl::MethodCrawler::VisitObjCMessageExpr( + const ObjCMessageExpr *ME) { + const ObjCMethodDecl *MD = ME->getMethodDecl(); + const Expr *Receiver = ME->getInstanceReceiver(); + + // Stop if we are calling '[self invalidate]'. + if (Receiver && isInvalidationMethod(MD, /*LookForPartial*/ false)) + if (Receiver->isObjCSelfExpr()) { + CalledAnotherInvalidationMethod = true; + return; + } + + // Check if we call a setter and set the property to 'nil'. + if (MD && (ME->getNumArgs() == 1) && isZero(ME->getArg(0))) { + MD = MD->getCanonicalDecl(); + MethToIvarMapTy::const_iterator IvI = PropertySetterToIvarMap.find(MD); + if (IvI != PropertySetterToIvarMap.end()) { + markInvalidated(IvI->second); + return; + } + } + + // Check if we call the 'invalidation' routine on the ivar. + if (Receiver) { + InvalidationMethod = MD; + check(Receiver->IgnoreParenCasts()); + InvalidationMethod = nullptr; + } + + VisitStmt(ME); +} +} // end anonymous namespace + +// Register the checkers. +namespace { +class IvarInvalidationChecker : + public Checker<check::ASTDecl<ObjCImplementationDecl> > { +public: + ChecksFilter Filter; +public: + void checkASTDecl(const ObjCImplementationDecl *D, AnalysisManager& Mgr, + BugReporter &BR) const { + IvarInvalidationCheckerImpl Walker(Mgr, BR, Filter); + Walker.visit(D); + } +}; +} // end anonymous namespace + +void ento::registerIvarInvalidationModeling(CheckerManager &mgr) { + mgr.registerChecker<IvarInvalidationChecker>(); +} + +bool ento::shouldRegisterIvarInvalidationModeling(const LangOptions &LO) { + return true; +} + +#define REGISTER_CHECKER(name) \ + void ento::register##name(CheckerManager &mgr) { \ + IvarInvalidationChecker *checker = \ + mgr.getChecker<IvarInvalidationChecker>(); \ + checker->Filter.check_##name = true; \ + checker->Filter.checkName_##name = mgr.getCurrentCheckerName(); \ + } \ + \ + bool ento::shouldRegister##name(const LangOptions &LO) { return true; } + +REGISTER_CHECKER(InstanceVariableInvalidation) +REGISTER_CHECKER(MissingInvalidationMethod) diff --git a/clang/lib/StaticAnalyzer/Checkers/LLVMConventionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/LLVMConventionsChecker.cpp new file mode 100644 index 000000000000..7522fdd0a99b --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/LLVMConventionsChecker.cpp @@ -0,0 +1,319 @@ +//=== LLVMConventionsChecker.cpp - Check LLVM codebase conventions ---*- C++ -*- +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines LLVMConventionsChecker, a bunch of small little checks +// for checking specific coding conventions in the LLVM/Clang codebase. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/DeclTemplate.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +//===----------------------------------------------------------------------===// +// Generic type checking routines. +//===----------------------------------------------------------------------===// + +static bool IsLLVMStringRef(QualType T) { + const RecordType *RT = T->getAs<RecordType>(); + if (!RT) + return false; + + return StringRef(QualType(RT, 0).getAsString()) == "class StringRef"; +} + +/// Check whether the declaration is semantically inside the top-level +/// namespace named by ns. +static bool InNamespace(const Decl *D, StringRef NS) { + const NamespaceDecl *ND = dyn_cast<NamespaceDecl>(D->getDeclContext()); + if (!ND) + return false; + const IdentifierInfo *II = ND->getIdentifier(); + if (!II || !II->getName().equals(NS)) + return false; + return isa<TranslationUnitDecl>(ND->getDeclContext()); +} + +static bool IsStdString(QualType T) { + if (const ElaboratedType *QT = T->getAs<ElaboratedType>()) + T = QT->getNamedType(); + + const TypedefType *TT = T->getAs<TypedefType>(); + if (!TT) + return false; + + const TypedefNameDecl *TD = TT->getDecl(); + + if (!TD->isInStdNamespace()) + return false; + + return TD->getName() == "string"; +} + +static bool IsClangType(const RecordDecl *RD) { + return RD->getName() == "Type" && InNamespace(RD, "clang"); +} + +static bool IsClangDecl(const RecordDecl *RD) { + return RD->getName() == "Decl" && InNamespace(RD, "clang"); +} + +static bool IsClangStmt(const RecordDecl *RD) { + return RD->getName() == "Stmt" && InNamespace(RD, "clang"); +} + +static bool IsClangAttr(const RecordDecl *RD) { + return RD->getName() == "Attr" && InNamespace(RD, "clang"); +} + +static bool IsStdVector(QualType T) { + const TemplateSpecializationType *TS = T->getAs<TemplateSpecializationType>(); + if (!TS) + return false; + + TemplateName TM = TS->getTemplateName(); + TemplateDecl *TD = TM.getAsTemplateDecl(); + + if (!TD || !InNamespace(TD, "std")) + return false; + + return TD->getName() == "vector"; +} + +static bool IsSmallVector(QualType T) { + const TemplateSpecializationType *TS = T->getAs<TemplateSpecializationType>(); + if (!TS) + return false; + + TemplateName TM = TS->getTemplateName(); + TemplateDecl *TD = TM.getAsTemplateDecl(); + + if (!TD || !InNamespace(TD, "llvm")) + return false; + + return TD->getName() == "SmallVector"; +} + +//===----------------------------------------------------------------------===// +// CHECK: a StringRef should not be bound to a temporary std::string whose +// lifetime is shorter than the StringRef's. +//===----------------------------------------------------------------------===// + +namespace { +class StringRefCheckerVisitor : public StmtVisitor<StringRefCheckerVisitor> { + const Decl *DeclWithIssue; + BugReporter &BR; + const CheckerBase *Checker; + +public: + StringRefCheckerVisitor(const Decl *declWithIssue, BugReporter &br, + const CheckerBase *checker) + : DeclWithIssue(declWithIssue), BR(br), Checker(checker) {} + void VisitChildren(Stmt *S) { + for (Stmt *Child : S->children()) + if (Child) + Visit(Child); + } + void VisitStmt(Stmt *S) { VisitChildren(S); } + void VisitDeclStmt(DeclStmt *DS); +private: + void VisitVarDecl(VarDecl *VD); +}; +} // end anonymous namespace + +static void CheckStringRefAssignedTemporary(const Decl *D, BugReporter &BR, + const CheckerBase *Checker) { + StringRefCheckerVisitor walker(D, BR, Checker); + walker.Visit(D->getBody()); +} + +void StringRefCheckerVisitor::VisitDeclStmt(DeclStmt *S) { + VisitChildren(S); + + for (auto *I : S->decls()) + if (VarDecl *VD = dyn_cast<VarDecl>(I)) + VisitVarDecl(VD); +} + +void StringRefCheckerVisitor::VisitVarDecl(VarDecl *VD) { + Expr *Init = VD->getInit(); + if (!Init) + return; + + // Pattern match for: + // StringRef x = call() (where call returns std::string) + if (!IsLLVMStringRef(VD->getType())) + return; + ExprWithCleanups *Ex1 = dyn_cast<ExprWithCleanups>(Init); + if (!Ex1) + return; + CXXConstructExpr *Ex2 = dyn_cast<CXXConstructExpr>(Ex1->getSubExpr()); + if (!Ex2 || Ex2->getNumArgs() != 1) + return; + ImplicitCastExpr *Ex3 = dyn_cast<ImplicitCastExpr>(Ex2->getArg(0)); + if (!Ex3) + return; + CXXConstructExpr *Ex4 = dyn_cast<CXXConstructExpr>(Ex3->getSubExpr()); + if (!Ex4 || Ex4->getNumArgs() != 1) + return; + ImplicitCastExpr *Ex5 = dyn_cast<ImplicitCastExpr>(Ex4->getArg(0)); + if (!Ex5) + return; + CXXBindTemporaryExpr *Ex6 = dyn_cast<CXXBindTemporaryExpr>(Ex5->getSubExpr()); + if (!Ex6 || !IsStdString(Ex6->getType())) + return; + + // Okay, badness! Report an error. + const char *desc = "StringRef should not be bound to temporary " + "std::string that it outlives"; + PathDiagnosticLocation VDLoc = + PathDiagnosticLocation::createBegin(VD, BR.getSourceManager()); + BR.EmitBasicReport(DeclWithIssue, Checker, desc, "LLVM Conventions", desc, + VDLoc, Init->getSourceRange()); +} + +//===----------------------------------------------------------------------===// +// CHECK: Clang AST nodes should not have fields that can allocate +// memory. +//===----------------------------------------------------------------------===// + +static bool AllocatesMemory(QualType T) { + return IsStdVector(T) || IsStdString(T) || IsSmallVector(T); +} + +// This type checking could be sped up via dynamic programming. +static bool IsPartOfAST(const CXXRecordDecl *R) { + if (IsClangStmt(R) || IsClangType(R) || IsClangDecl(R) || IsClangAttr(R)) + return true; + + for (const auto &BS : R->bases()) { + QualType T = BS.getType(); + if (const RecordType *baseT = T->getAs<RecordType>()) { + CXXRecordDecl *baseD = cast<CXXRecordDecl>(baseT->getDecl()); + if (IsPartOfAST(baseD)) + return true; + } + } + + return false; +} + +namespace { +class ASTFieldVisitor { + SmallVector<FieldDecl*, 10> FieldChain; + const CXXRecordDecl *Root; + BugReporter &BR; + const CheckerBase *Checker; + +public: + ASTFieldVisitor(const CXXRecordDecl *root, BugReporter &br, + const CheckerBase *checker) + : Root(root), BR(br), Checker(checker) {} + + void Visit(FieldDecl *D); + void ReportError(QualType T); +}; +} // end anonymous namespace + +static void CheckASTMemory(const CXXRecordDecl *R, BugReporter &BR, + const CheckerBase *Checker) { + if (!IsPartOfAST(R)) + return; + + for (auto *I : R->fields()) { + ASTFieldVisitor walker(R, BR, Checker); + walker.Visit(I); + } +} + +void ASTFieldVisitor::Visit(FieldDecl *D) { + FieldChain.push_back(D); + + QualType T = D->getType(); + + if (AllocatesMemory(T)) + ReportError(T); + + if (const RecordType *RT = T->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl()->getDefinition(); + for (auto *I : RD->fields()) + Visit(I); + } + + FieldChain.pop_back(); +} + +void ASTFieldVisitor::ReportError(QualType T) { + SmallString<1024> buf; + llvm::raw_svector_ostream os(buf); + + os << "AST class '" << Root->getName() << "' has a field '" + << FieldChain.front()->getName() << "' that allocates heap memory"; + if (FieldChain.size() > 1) { + os << " via the following chain: "; + bool isFirst = true; + for (SmallVectorImpl<FieldDecl*>::iterator I=FieldChain.begin(), + E=FieldChain.end(); I!=E; ++I) { + if (!isFirst) + os << '.'; + else + isFirst = false; + os << (*I)->getName(); + } + } + os << " (type " << FieldChain.back()->getType().getAsString() << ")"; + + // Note that this will fire for every translation unit that uses this + // class. This is suboptimal, but at least scan-build will merge + // duplicate HTML reports. In the future we need a unified way of merging + // duplicate reports across translation units. For C++ classes we cannot + // just report warnings when we see an out-of-line method definition for a + // class, as that heuristic doesn't always work (the complete definition of + // the class may be in the header file, for example). + PathDiagnosticLocation L = PathDiagnosticLocation::createBegin( + FieldChain.front(), BR.getSourceManager()); + BR.EmitBasicReport(Root, Checker, "AST node allocates heap memory", + "LLVM Conventions", os.str(), L); +} + +//===----------------------------------------------------------------------===// +// LLVMConventionsChecker +//===----------------------------------------------------------------------===// + +namespace { +class LLVMConventionsChecker : public Checker< + check::ASTDecl<CXXRecordDecl>, + check::ASTCodeBody > { +public: + void checkASTDecl(const CXXRecordDecl *R, AnalysisManager& mgr, + BugReporter &BR) const { + if (R->isCompleteDefinition()) + CheckASTMemory(R, BR, this); + } + + void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) const { + CheckStringRefAssignedTemporary(D, BR, this); + } +}; +} + +void ento::registerLLVMConventionsChecker(CheckerManager &mgr) { + mgr.registerChecker<LLVMConventionsChecker>(); +} + +bool ento::shouldRegisterLLVMConventionsChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp new file mode 100644 index 000000000000..a81015b6e524 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp @@ -0,0 +1,1422 @@ +//=- LocalizationChecker.cpp -------------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a set of checks for localizability including: +// 1) A checker that warns about uses of non-localized NSStrings passed to +// UI methods expecting localized strings +// 2) A syntactic checker that warns against the bad practice of +// not including a comment in NSLocalizedString macros. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Lex/Lexer.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "llvm/Support/Unicode.h" + +using namespace clang; +using namespace ento; + +namespace { +struct LocalizedState { +private: + enum Kind { NonLocalized, Localized } K; + LocalizedState(Kind InK) : K(InK) {} + +public: + bool isLocalized() const { return K == Localized; } + bool isNonLocalized() const { return K == NonLocalized; } + + static LocalizedState getLocalized() { return LocalizedState(Localized); } + static LocalizedState getNonLocalized() { + return LocalizedState(NonLocalized); + } + + // Overload the == operator + bool operator==(const LocalizedState &X) const { return K == X.K; } + + // LLVMs equivalent of a hash function + void Profile(llvm::FoldingSetNodeID &ID) const { ID.AddInteger(K); } +}; + +class NonLocalizedStringChecker + : public Checker<check::PreCall, check::PostCall, check::PreObjCMessage, + check::PostObjCMessage, + check::PostStmt<ObjCStringLiteral>> { + + mutable std::unique_ptr<BugType> BT; + + // Methods that require a localized string + mutable llvm::DenseMap<const IdentifierInfo *, + llvm::DenseMap<Selector, uint8_t>> UIMethods; + // Methods that return a localized string + mutable llvm::SmallSet<std::pair<const IdentifierInfo *, Selector>, 12> LSM; + // C Functions that return a localized string + mutable llvm::SmallSet<const IdentifierInfo *, 5> LSF; + + void initUIMethods(ASTContext &Ctx) const; + void initLocStringsMethods(ASTContext &Ctx) const; + + bool hasNonLocalizedState(SVal S, CheckerContext &C) const; + bool hasLocalizedState(SVal S, CheckerContext &C) const; + void setNonLocalizedState(SVal S, CheckerContext &C) const; + void setLocalizedState(SVal S, CheckerContext &C) const; + + bool isAnnotatedAsReturningLocalized(const Decl *D) const; + bool isAnnotatedAsTakingLocalized(const Decl *D) const; + void reportLocalizationError(SVal S, const CallEvent &M, CheckerContext &C, + int argumentNumber = 0) const; + + int getLocalizedArgumentForSelector(const IdentifierInfo *Receiver, + Selector S) const; + +public: + NonLocalizedStringChecker(); + + // When this parameter is set to true, the checker assumes all + // methods that return NSStrings are unlocalized. Thus, more false + // positives will be reported. + DefaultBool IsAggressive; + + void checkPreObjCMessage(const ObjCMethodCall &msg, CheckerContext &C) const; + void checkPostObjCMessage(const ObjCMethodCall &msg, CheckerContext &C) const; + void checkPostStmt(const ObjCStringLiteral *SL, CheckerContext &C) const; + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + void checkPostCall(const CallEvent &Call, CheckerContext &C) const; +}; + +} // end anonymous namespace + +REGISTER_MAP_WITH_PROGRAMSTATE(LocalizedMemMap, const MemRegion *, + LocalizedState) + +NonLocalizedStringChecker::NonLocalizedStringChecker() { + BT.reset(new BugType(this, "Unlocalizable string", + "Localizability Issue (Apple)")); +} + +namespace { +class NonLocalizedStringBRVisitor final : public BugReporterVisitor { + + const MemRegion *NonLocalizedString; + bool Satisfied; + +public: + NonLocalizedStringBRVisitor(const MemRegion *NonLocalizedString) + : NonLocalizedString(NonLocalizedString), Satisfied(false) { + assert(NonLocalizedString); + } + + PathDiagnosticPieceRef VisitNode(const ExplodedNode *Succ, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; + + void Profile(llvm::FoldingSetNodeID &ID) const override { + ID.Add(NonLocalizedString); + } +}; +} // End anonymous namespace. + +#define NEW_RECEIVER(receiver) \ + llvm::DenseMap<Selector, uint8_t> &receiver##M = \ + UIMethods.insert({&Ctx.Idents.get(#receiver), \ + llvm::DenseMap<Selector, uint8_t>()}) \ + .first->second; +#define ADD_NULLARY_METHOD(receiver, method, argument) \ + receiver##M.insert( \ + {Ctx.Selectors.getNullarySelector(&Ctx.Idents.get(#method)), argument}); +#define ADD_UNARY_METHOD(receiver, method, argument) \ + receiver##M.insert( \ + {Ctx.Selectors.getUnarySelector(&Ctx.Idents.get(#method)), argument}); +#define ADD_METHOD(receiver, method_list, count, argument) \ + receiver##M.insert({Ctx.Selectors.getSelector(count, method_list), argument}); + +/// Initializes a list of methods that require a localized string +/// Format: {"ClassName", {{"selectorName:", LocStringArg#}, ...}, ...} +void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const { + if (!UIMethods.empty()) + return; + + // UI Methods + NEW_RECEIVER(UISearchDisplayController) + ADD_UNARY_METHOD(UISearchDisplayController, setSearchResultsTitle, 0) + + NEW_RECEIVER(UITabBarItem) + IdentifierInfo *initWithTitleUITabBarItemTag[] = { + &Ctx.Idents.get("initWithTitle"), &Ctx.Idents.get("image"), + &Ctx.Idents.get("tag")}; + ADD_METHOD(UITabBarItem, initWithTitleUITabBarItemTag, 3, 0) + IdentifierInfo *initWithTitleUITabBarItemImage[] = { + &Ctx.Idents.get("initWithTitle"), &Ctx.Idents.get("image"), + &Ctx.Idents.get("selectedImage")}; + ADD_METHOD(UITabBarItem, initWithTitleUITabBarItemImage, 3, 0) + + NEW_RECEIVER(NSDockTile) + ADD_UNARY_METHOD(NSDockTile, setBadgeLabel, 0) + + NEW_RECEIVER(NSStatusItem) + ADD_UNARY_METHOD(NSStatusItem, setTitle, 0) + ADD_UNARY_METHOD(NSStatusItem, setToolTip, 0) + + NEW_RECEIVER(UITableViewRowAction) + IdentifierInfo *rowActionWithStyleUITableViewRowAction[] = { + &Ctx.Idents.get("rowActionWithStyle"), &Ctx.Idents.get("title"), + &Ctx.Idents.get("handler")}; + ADD_METHOD(UITableViewRowAction, rowActionWithStyleUITableViewRowAction, 3, 1) + ADD_UNARY_METHOD(UITableViewRowAction, setTitle, 0) + + NEW_RECEIVER(NSBox) + ADD_UNARY_METHOD(NSBox, setTitle, 0) + + NEW_RECEIVER(NSButton) + ADD_UNARY_METHOD(NSButton, setTitle, 0) + ADD_UNARY_METHOD(NSButton, setAlternateTitle, 0) + IdentifierInfo *radioButtonWithTitleNSButton[] = { + &Ctx.Idents.get("radioButtonWithTitle"), &Ctx.Idents.get("target"), + &Ctx.Idents.get("action")}; + ADD_METHOD(NSButton, radioButtonWithTitleNSButton, 3, 0) + IdentifierInfo *buttonWithTitleNSButtonImage[] = { + &Ctx.Idents.get("buttonWithTitle"), &Ctx.Idents.get("image"), + &Ctx.Idents.get("target"), &Ctx.Idents.get("action")}; + ADD_METHOD(NSButton, buttonWithTitleNSButtonImage, 4, 0) + IdentifierInfo *checkboxWithTitleNSButton[] = { + &Ctx.Idents.get("checkboxWithTitle"), &Ctx.Idents.get("target"), + &Ctx.Idents.get("action")}; + ADD_METHOD(NSButton, checkboxWithTitleNSButton, 3, 0) + IdentifierInfo *buttonWithTitleNSButtonTarget[] = { + &Ctx.Idents.get("buttonWithTitle"), &Ctx.Idents.get("target"), + &Ctx.Idents.get("action")}; + ADD_METHOD(NSButton, buttonWithTitleNSButtonTarget, 3, 0) + + NEW_RECEIVER(NSSavePanel) + ADD_UNARY_METHOD(NSSavePanel, setPrompt, 0) + ADD_UNARY_METHOD(NSSavePanel, setTitle, 0) + ADD_UNARY_METHOD(NSSavePanel, setNameFieldLabel, 0) + ADD_UNARY_METHOD(NSSavePanel, setNameFieldStringValue, 0) + ADD_UNARY_METHOD(NSSavePanel, setMessage, 0) + + NEW_RECEIVER(UIPrintInfo) + ADD_UNARY_METHOD(UIPrintInfo, setJobName, 0) + + NEW_RECEIVER(NSTabViewItem) + ADD_UNARY_METHOD(NSTabViewItem, setLabel, 0) + ADD_UNARY_METHOD(NSTabViewItem, setToolTip, 0) + + NEW_RECEIVER(NSBrowser) + IdentifierInfo *setTitleNSBrowser[] = {&Ctx.Idents.get("setTitle"), + &Ctx.Idents.get("ofColumn")}; + ADD_METHOD(NSBrowser, setTitleNSBrowser, 2, 0) + + NEW_RECEIVER(UIAccessibilityElement) + ADD_UNARY_METHOD(UIAccessibilityElement, setAccessibilityLabel, 0) + ADD_UNARY_METHOD(UIAccessibilityElement, setAccessibilityHint, 0) + ADD_UNARY_METHOD(UIAccessibilityElement, setAccessibilityValue, 0) + + NEW_RECEIVER(UIAlertAction) + IdentifierInfo *actionWithTitleUIAlertAction[] = { + &Ctx.Idents.get("actionWithTitle"), &Ctx.Idents.get("style"), + &Ctx.Idents.get("handler")}; + ADD_METHOD(UIAlertAction, actionWithTitleUIAlertAction, 3, 0) + + NEW_RECEIVER(NSPopUpButton) + ADD_UNARY_METHOD(NSPopUpButton, addItemWithTitle, 0) + IdentifierInfo *insertItemWithTitleNSPopUpButton[] = { + &Ctx.Idents.get("insertItemWithTitle"), &Ctx.Idents.get("atIndex")}; + ADD_METHOD(NSPopUpButton, insertItemWithTitleNSPopUpButton, 2, 0) + ADD_UNARY_METHOD(NSPopUpButton, removeItemWithTitle, 0) + ADD_UNARY_METHOD(NSPopUpButton, selectItemWithTitle, 0) + ADD_UNARY_METHOD(NSPopUpButton, setTitle, 0) + + NEW_RECEIVER(NSTableViewRowAction) + IdentifierInfo *rowActionWithStyleNSTableViewRowAction[] = { + &Ctx.Idents.get("rowActionWithStyle"), &Ctx.Idents.get("title"), + &Ctx.Idents.get("handler")}; + ADD_METHOD(NSTableViewRowAction, rowActionWithStyleNSTableViewRowAction, 3, 1) + ADD_UNARY_METHOD(NSTableViewRowAction, setTitle, 0) + + NEW_RECEIVER(NSImage) + ADD_UNARY_METHOD(NSImage, setAccessibilityDescription, 0) + + NEW_RECEIVER(NSUserActivity) + ADD_UNARY_METHOD(NSUserActivity, setTitle, 0) + + NEW_RECEIVER(NSPathControlItem) + ADD_UNARY_METHOD(NSPathControlItem, setTitle, 0) + + NEW_RECEIVER(NSCell) + ADD_UNARY_METHOD(NSCell, initTextCell, 0) + ADD_UNARY_METHOD(NSCell, setTitle, 0) + ADD_UNARY_METHOD(NSCell, setStringValue, 0) + + NEW_RECEIVER(NSPathControl) + ADD_UNARY_METHOD(NSPathControl, setPlaceholderString, 0) + + NEW_RECEIVER(UIAccessibility) + ADD_UNARY_METHOD(UIAccessibility, setAccessibilityLabel, 0) + ADD_UNARY_METHOD(UIAccessibility, setAccessibilityHint, 0) + ADD_UNARY_METHOD(UIAccessibility, setAccessibilityValue, 0) + + NEW_RECEIVER(NSTableColumn) + ADD_UNARY_METHOD(NSTableColumn, setTitle, 0) + ADD_UNARY_METHOD(NSTableColumn, setHeaderToolTip, 0) + + NEW_RECEIVER(NSSegmentedControl) + IdentifierInfo *setLabelNSSegmentedControl[] = { + &Ctx.Idents.get("setLabel"), &Ctx.Idents.get("forSegment")}; + ADD_METHOD(NSSegmentedControl, setLabelNSSegmentedControl, 2, 0) + IdentifierInfo *setToolTipNSSegmentedControl[] = { + &Ctx.Idents.get("setToolTip"), &Ctx.Idents.get("forSegment")}; + ADD_METHOD(NSSegmentedControl, setToolTipNSSegmentedControl, 2, 0) + + NEW_RECEIVER(NSButtonCell) + ADD_UNARY_METHOD(NSButtonCell, setTitle, 0) + ADD_UNARY_METHOD(NSButtonCell, setAlternateTitle, 0) + + NEW_RECEIVER(NSDatePickerCell) + ADD_UNARY_METHOD(NSDatePickerCell, initTextCell, 0) + + NEW_RECEIVER(NSSliderCell) + ADD_UNARY_METHOD(NSSliderCell, setTitle, 0) + + NEW_RECEIVER(NSControl) + ADD_UNARY_METHOD(NSControl, setStringValue, 0) + + NEW_RECEIVER(NSAccessibility) + ADD_UNARY_METHOD(NSAccessibility, setAccessibilityValueDescription, 0) + ADD_UNARY_METHOD(NSAccessibility, setAccessibilityLabel, 0) + ADD_UNARY_METHOD(NSAccessibility, setAccessibilityTitle, 0) + ADD_UNARY_METHOD(NSAccessibility, setAccessibilityPlaceholderValue, 0) + ADD_UNARY_METHOD(NSAccessibility, setAccessibilityHelp, 0) + + NEW_RECEIVER(NSMatrix) + IdentifierInfo *setToolTipNSMatrix[] = {&Ctx.Idents.get("setToolTip"), + &Ctx.Idents.get("forCell")}; + ADD_METHOD(NSMatrix, setToolTipNSMatrix, 2, 0) + + NEW_RECEIVER(NSPrintPanel) + ADD_UNARY_METHOD(NSPrintPanel, setDefaultButtonTitle, 0) + + NEW_RECEIVER(UILocalNotification) + ADD_UNARY_METHOD(UILocalNotification, setAlertBody, 0) + ADD_UNARY_METHOD(UILocalNotification, setAlertAction, 0) + ADD_UNARY_METHOD(UILocalNotification, setAlertTitle, 0) + + NEW_RECEIVER(NSSlider) + ADD_UNARY_METHOD(NSSlider, setTitle, 0) + + NEW_RECEIVER(UIMenuItem) + IdentifierInfo *initWithTitleUIMenuItem[] = {&Ctx.Idents.get("initWithTitle"), + &Ctx.Idents.get("action")}; + ADD_METHOD(UIMenuItem, initWithTitleUIMenuItem, 2, 0) + ADD_UNARY_METHOD(UIMenuItem, setTitle, 0) + + NEW_RECEIVER(UIAlertController) + IdentifierInfo *alertControllerWithTitleUIAlertController[] = { + &Ctx.Idents.get("alertControllerWithTitle"), &Ctx.Idents.get("message"), + &Ctx.Idents.get("preferredStyle")}; + ADD_METHOD(UIAlertController, alertControllerWithTitleUIAlertController, 3, 1) + ADD_UNARY_METHOD(UIAlertController, setTitle, 0) + ADD_UNARY_METHOD(UIAlertController, setMessage, 0) + + NEW_RECEIVER(UIApplicationShortcutItem) + IdentifierInfo *initWithTypeUIApplicationShortcutItemIcon[] = { + &Ctx.Idents.get("initWithType"), &Ctx.Idents.get("localizedTitle"), + &Ctx.Idents.get("localizedSubtitle"), &Ctx.Idents.get("icon"), + &Ctx.Idents.get("userInfo")}; + ADD_METHOD(UIApplicationShortcutItem, + initWithTypeUIApplicationShortcutItemIcon, 5, 1) + IdentifierInfo *initWithTypeUIApplicationShortcutItem[] = { + &Ctx.Idents.get("initWithType"), &Ctx.Idents.get("localizedTitle")}; + ADD_METHOD(UIApplicationShortcutItem, initWithTypeUIApplicationShortcutItem, + 2, 1) + + NEW_RECEIVER(UIActionSheet) + IdentifierInfo *initWithTitleUIActionSheet[] = { + &Ctx.Idents.get("initWithTitle"), &Ctx.Idents.get("delegate"), + &Ctx.Idents.get("cancelButtonTitle"), + &Ctx.Idents.get("destructiveButtonTitle"), + &Ctx.Idents.get("otherButtonTitles")}; + ADD_METHOD(UIActionSheet, initWithTitleUIActionSheet, 5, 0) + ADD_UNARY_METHOD(UIActionSheet, addButtonWithTitle, 0) + ADD_UNARY_METHOD(UIActionSheet, setTitle, 0) + + NEW_RECEIVER(UIAccessibilityCustomAction) + IdentifierInfo *initWithNameUIAccessibilityCustomAction[] = { + &Ctx.Idents.get("initWithName"), &Ctx.Idents.get("target"), + &Ctx.Idents.get("selector")}; + ADD_METHOD(UIAccessibilityCustomAction, + initWithNameUIAccessibilityCustomAction, 3, 0) + ADD_UNARY_METHOD(UIAccessibilityCustomAction, setName, 0) + + NEW_RECEIVER(UISearchBar) + ADD_UNARY_METHOD(UISearchBar, setText, 0) + ADD_UNARY_METHOD(UISearchBar, setPrompt, 0) + ADD_UNARY_METHOD(UISearchBar, setPlaceholder, 0) + + NEW_RECEIVER(UIBarItem) + ADD_UNARY_METHOD(UIBarItem, setTitle, 0) + + NEW_RECEIVER(UITextView) + ADD_UNARY_METHOD(UITextView, setText, 0) + + NEW_RECEIVER(NSView) + ADD_UNARY_METHOD(NSView, setToolTip, 0) + + NEW_RECEIVER(NSTextField) + ADD_UNARY_METHOD(NSTextField, setPlaceholderString, 0) + ADD_UNARY_METHOD(NSTextField, textFieldWithString, 0) + ADD_UNARY_METHOD(NSTextField, wrappingLabelWithString, 0) + ADD_UNARY_METHOD(NSTextField, labelWithString, 0) + + NEW_RECEIVER(NSAttributedString) + ADD_UNARY_METHOD(NSAttributedString, initWithString, 0) + IdentifierInfo *initWithStringNSAttributedString[] = { + &Ctx.Idents.get("initWithString"), &Ctx.Idents.get("attributes")}; + ADD_METHOD(NSAttributedString, initWithStringNSAttributedString, 2, 0) + + NEW_RECEIVER(NSText) + ADD_UNARY_METHOD(NSText, setString, 0) + + NEW_RECEIVER(UIKeyCommand) + IdentifierInfo *keyCommandWithInputUIKeyCommand[] = { + &Ctx.Idents.get("keyCommandWithInput"), &Ctx.Idents.get("modifierFlags"), + &Ctx.Idents.get("action"), &Ctx.Idents.get("discoverabilityTitle")}; + ADD_METHOD(UIKeyCommand, keyCommandWithInputUIKeyCommand, 4, 3) + ADD_UNARY_METHOD(UIKeyCommand, setDiscoverabilityTitle, 0) + + NEW_RECEIVER(UILabel) + ADD_UNARY_METHOD(UILabel, setText, 0) + + NEW_RECEIVER(NSAlert) + IdentifierInfo *alertWithMessageTextNSAlert[] = { + &Ctx.Idents.get("alertWithMessageText"), &Ctx.Idents.get("defaultButton"), + &Ctx.Idents.get("alternateButton"), &Ctx.Idents.get("otherButton"), + &Ctx.Idents.get("informativeTextWithFormat")}; + ADD_METHOD(NSAlert, alertWithMessageTextNSAlert, 5, 0) + ADD_UNARY_METHOD(NSAlert, addButtonWithTitle, 0) + ADD_UNARY_METHOD(NSAlert, setMessageText, 0) + ADD_UNARY_METHOD(NSAlert, setInformativeText, 0) + ADD_UNARY_METHOD(NSAlert, setHelpAnchor, 0) + + NEW_RECEIVER(UIMutableApplicationShortcutItem) + ADD_UNARY_METHOD(UIMutableApplicationShortcutItem, setLocalizedTitle, 0) + ADD_UNARY_METHOD(UIMutableApplicationShortcutItem, setLocalizedSubtitle, 0) + + NEW_RECEIVER(UIButton) + IdentifierInfo *setTitleUIButton[] = {&Ctx.Idents.get("setTitle"), + &Ctx.Idents.get("forState")}; + ADD_METHOD(UIButton, setTitleUIButton, 2, 0) + + NEW_RECEIVER(NSWindow) + ADD_UNARY_METHOD(NSWindow, setTitle, 0) + IdentifierInfo *minFrameWidthWithTitleNSWindow[] = { + &Ctx.Idents.get("minFrameWidthWithTitle"), &Ctx.Idents.get("styleMask")}; + ADD_METHOD(NSWindow, minFrameWidthWithTitleNSWindow, 2, 0) + ADD_UNARY_METHOD(NSWindow, setMiniwindowTitle, 0) + + NEW_RECEIVER(NSPathCell) + ADD_UNARY_METHOD(NSPathCell, setPlaceholderString, 0) + + NEW_RECEIVER(UIDocumentMenuViewController) + IdentifierInfo *addOptionWithTitleUIDocumentMenuViewController[] = { + &Ctx.Idents.get("addOptionWithTitle"), &Ctx.Idents.get("image"), + &Ctx.Idents.get("order"), &Ctx.Idents.get("handler")}; + ADD_METHOD(UIDocumentMenuViewController, + addOptionWithTitleUIDocumentMenuViewController, 4, 0) + + NEW_RECEIVER(UINavigationItem) + ADD_UNARY_METHOD(UINavigationItem, initWithTitle, 0) + ADD_UNARY_METHOD(UINavigationItem, setTitle, 0) + ADD_UNARY_METHOD(UINavigationItem, setPrompt, 0) + + NEW_RECEIVER(UIAlertView) + IdentifierInfo *initWithTitleUIAlertView[] = { + &Ctx.Idents.get("initWithTitle"), &Ctx.Idents.get("message"), + &Ctx.Idents.get("delegate"), &Ctx.Idents.get("cancelButtonTitle"), + &Ctx.Idents.get("otherButtonTitles")}; + ADD_METHOD(UIAlertView, initWithTitleUIAlertView, 5, 0) + ADD_UNARY_METHOD(UIAlertView, addButtonWithTitle, 0) + ADD_UNARY_METHOD(UIAlertView, setTitle, 0) + ADD_UNARY_METHOD(UIAlertView, setMessage, 0) + + NEW_RECEIVER(NSFormCell) + ADD_UNARY_METHOD(NSFormCell, initTextCell, 0) + ADD_UNARY_METHOD(NSFormCell, setTitle, 0) + ADD_UNARY_METHOD(NSFormCell, setPlaceholderString, 0) + + NEW_RECEIVER(NSUserNotification) + ADD_UNARY_METHOD(NSUserNotification, setTitle, 0) + ADD_UNARY_METHOD(NSUserNotification, setSubtitle, 0) + ADD_UNARY_METHOD(NSUserNotification, setInformativeText, 0) + ADD_UNARY_METHOD(NSUserNotification, setActionButtonTitle, 0) + ADD_UNARY_METHOD(NSUserNotification, setOtherButtonTitle, 0) + ADD_UNARY_METHOD(NSUserNotification, setResponsePlaceholder, 0) + + NEW_RECEIVER(NSToolbarItem) + ADD_UNARY_METHOD(NSToolbarItem, setLabel, 0) + ADD_UNARY_METHOD(NSToolbarItem, setPaletteLabel, 0) + ADD_UNARY_METHOD(NSToolbarItem, setToolTip, 0) + + NEW_RECEIVER(NSProgress) + ADD_UNARY_METHOD(NSProgress, setLocalizedDescription, 0) + ADD_UNARY_METHOD(NSProgress, setLocalizedAdditionalDescription, 0) + + NEW_RECEIVER(NSSegmentedCell) + IdentifierInfo *setLabelNSSegmentedCell[] = {&Ctx.Idents.get("setLabel"), + &Ctx.Idents.get("forSegment")}; + ADD_METHOD(NSSegmentedCell, setLabelNSSegmentedCell, 2, 0) + IdentifierInfo *setToolTipNSSegmentedCell[] = {&Ctx.Idents.get("setToolTip"), + &Ctx.Idents.get("forSegment")}; + ADD_METHOD(NSSegmentedCell, setToolTipNSSegmentedCell, 2, 0) + + NEW_RECEIVER(NSUndoManager) + ADD_UNARY_METHOD(NSUndoManager, setActionName, 0) + ADD_UNARY_METHOD(NSUndoManager, undoMenuTitleForUndoActionName, 0) + ADD_UNARY_METHOD(NSUndoManager, redoMenuTitleForUndoActionName, 0) + + NEW_RECEIVER(NSMenuItem) + IdentifierInfo *initWithTitleNSMenuItem[] = { + &Ctx.Idents.get("initWithTitle"), &Ctx.Idents.get("action"), + &Ctx.Idents.get("keyEquivalent")}; + ADD_METHOD(NSMenuItem, initWithTitleNSMenuItem, 3, 0) + ADD_UNARY_METHOD(NSMenuItem, setTitle, 0) + ADD_UNARY_METHOD(NSMenuItem, setToolTip, 0) + + NEW_RECEIVER(NSPopUpButtonCell) + IdentifierInfo *initTextCellNSPopUpButtonCell[] = { + &Ctx.Idents.get("initTextCell"), &Ctx.Idents.get("pullsDown")}; + ADD_METHOD(NSPopUpButtonCell, initTextCellNSPopUpButtonCell, 2, 0) + ADD_UNARY_METHOD(NSPopUpButtonCell, addItemWithTitle, 0) + IdentifierInfo *insertItemWithTitleNSPopUpButtonCell[] = { + &Ctx.Idents.get("insertItemWithTitle"), &Ctx.Idents.get("atIndex")}; + ADD_METHOD(NSPopUpButtonCell, insertItemWithTitleNSPopUpButtonCell, 2, 0) + ADD_UNARY_METHOD(NSPopUpButtonCell, removeItemWithTitle, 0) + ADD_UNARY_METHOD(NSPopUpButtonCell, selectItemWithTitle, 0) + ADD_UNARY_METHOD(NSPopUpButtonCell, setTitle, 0) + + NEW_RECEIVER(NSViewController) + ADD_UNARY_METHOD(NSViewController, setTitle, 0) + + NEW_RECEIVER(NSMenu) + ADD_UNARY_METHOD(NSMenu, initWithTitle, 0) + IdentifierInfo *insertItemWithTitleNSMenu[] = { + &Ctx.Idents.get("insertItemWithTitle"), &Ctx.Idents.get("action"), + &Ctx.Idents.get("keyEquivalent"), &Ctx.Idents.get("atIndex")}; + ADD_METHOD(NSMenu, insertItemWithTitleNSMenu, 4, 0) + IdentifierInfo *addItemWithTitleNSMenu[] = { + &Ctx.Idents.get("addItemWithTitle"), &Ctx.Idents.get("action"), + &Ctx.Idents.get("keyEquivalent")}; + ADD_METHOD(NSMenu, addItemWithTitleNSMenu, 3, 0) + ADD_UNARY_METHOD(NSMenu, setTitle, 0) + + NEW_RECEIVER(UIMutableUserNotificationAction) + ADD_UNARY_METHOD(UIMutableUserNotificationAction, setTitle, 0) + + NEW_RECEIVER(NSForm) + ADD_UNARY_METHOD(NSForm, addEntry, 0) + IdentifierInfo *insertEntryNSForm[] = {&Ctx.Idents.get("insertEntry"), + &Ctx.Idents.get("atIndex")}; + ADD_METHOD(NSForm, insertEntryNSForm, 2, 0) + + NEW_RECEIVER(NSTextFieldCell) + ADD_UNARY_METHOD(NSTextFieldCell, setPlaceholderString, 0) + + NEW_RECEIVER(NSUserNotificationAction) + IdentifierInfo *actionWithIdentifierNSUserNotificationAction[] = { + &Ctx.Idents.get("actionWithIdentifier"), &Ctx.Idents.get("title")}; + ADD_METHOD(NSUserNotificationAction, + actionWithIdentifierNSUserNotificationAction, 2, 1) + + NEW_RECEIVER(UITextField) + ADD_UNARY_METHOD(UITextField, setText, 0) + ADD_UNARY_METHOD(UITextField, setPlaceholder, 0) + + NEW_RECEIVER(UIBarButtonItem) + IdentifierInfo *initWithTitleUIBarButtonItem[] = { + &Ctx.Idents.get("initWithTitle"), &Ctx.Idents.get("style"), + &Ctx.Idents.get("target"), &Ctx.Idents.get("action")}; + ADD_METHOD(UIBarButtonItem, initWithTitleUIBarButtonItem, 4, 0) + + NEW_RECEIVER(UIViewController) + ADD_UNARY_METHOD(UIViewController, setTitle, 0) + + NEW_RECEIVER(UISegmentedControl) + IdentifierInfo *insertSegmentWithTitleUISegmentedControl[] = { + &Ctx.Idents.get("insertSegmentWithTitle"), &Ctx.Idents.get("atIndex"), + &Ctx.Idents.get("animated")}; + ADD_METHOD(UISegmentedControl, insertSegmentWithTitleUISegmentedControl, 3, 0) + IdentifierInfo *setTitleUISegmentedControl[] = { + &Ctx.Idents.get("setTitle"), &Ctx.Idents.get("forSegmentAtIndex")}; + ADD_METHOD(UISegmentedControl, setTitleUISegmentedControl, 2, 0) + + NEW_RECEIVER(NSAccessibilityCustomRotorItemResult) + IdentifierInfo + *initWithItemLoadingTokenNSAccessibilityCustomRotorItemResult[] = { + &Ctx.Idents.get("initWithItemLoadingToken"), + &Ctx.Idents.get("customLabel")}; + ADD_METHOD(NSAccessibilityCustomRotorItemResult, + initWithItemLoadingTokenNSAccessibilityCustomRotorItemResult, 2, 1) + ADD_UNARY_METHOD(NSAccessibilityCustomRotorItemResult, setCustomLabel, 0) + + NEW_RECEIVER(UIContextualAction) + IdentifierInfo *contextualActionWithStyleUIContextualAction[] = { + &Ctx.Idents.get("contextualActionWithStyle"), &Ctx.Idents.get("title"), + &Ctx.Idents.get("handler")}; + ADD_METHOD(UIContextualAction, contextualActionWithStyleUIContextualAction, 3, + 1) + ADD_UNARY_METHOD(UIContextualAction, setTitle, 0) + + NEW_RECEIVER(NSAccessibilityCustomRotor) + IdentifierInfo *initWithLabelNSAccessibilityCustomRotor[] = { + &Ctx.Idents.get("initWithLabel"), &Ctx.Idents.get("itemSearchDelegate")}; + ADD_METHOD(NSAccessibilityCustomRotor, + initWithLabelNSAccessibilityCustomRotor, 2, 0) + ADD_UNARY_METHOD(NSAccessibilityCustomRotor, setLabel, 0) + + NEW_RECEIVER(NSWindowTab) + ADD_UNARY_METHOD(NSWindowTab, setTitle, 0) + ADD_UNARY_METHOD(NSWindowTab, setToolTip, 0) + + NEW_RECEIVER(NSAccessibilityCustomAction) + IdentifierInfo *initWithNameNSAccessibilityCustomAction[] = { + &Ctx.Idents.get("initWithName"), &Ctx.Idents.get("handler")}; + ADD_METHOD(NSAccessibilityCustomAction, + initWithNameNSAccessibilityCustomAction, 2, 0) + IdentifierInfo *initWithNameTargetNSAccessibilityCustomAction[] = { + &Ctx.Idents.get("initWithName"), &Ctx.Idents.get("target"), + &Ctx.Idents.get("selector")}; + ADD_METHOD(NSAccessibilityCustomAction, + initWithNameTargetNSAccessibilityCustomAction, 3, 0) + ADD_UNARY_METHOD(NSAccessibilityCustomAction, setName, 0) +} + +#define LSF_INSERT(function_name) LSF.insert(&Ctx.Idents.get(function_name)); +#define LSM_INSERT_NULLARY(receiver, method_name) \ + LSM.insert({&Ctx.Idents.get(receiver), Ctx.Selectors.getNullarySelector( \ + &Ctx.Idents.get(method_name))}); +#define LSM_INSERT_UNARY(receiver, method_name) \ + LSM.insert({&Ctx.Idents.get(receiver), \ + Ctx.Selectors.getUnarySelector(&Ctx.Idents.get(method_name))}); +#define LSM_INSERT_SELECTOR(receiver, method_list, arguments) \ + LSM.insert({&Ctx.Idents.get(receiver), \ + Ctx.Selectors.getSelector(arguments, method_list)}); + +/// Initializes a list of methods and C functions that return a localized string +void NonLocalizedStringChecker::initLocStringsMethods(ASTContext &Ctx) const { + if (!LSM.empty()) + return; + + IdentifierInfo *LocalizedStringMacro[] = { + &Ctx.Idents.get("localizedStringForKey"), &Ctx.Idents.get("value"), + &Ctx.Idents.get("table")}; + LSM_INSERT_SELECTOR("NSBundle", LocalizedStringMacro, 3) + LSM_INSERT_UNARY("NSDateFormatter", "stringFromDate") + IdentifierInfo *LocalizedStringFromDate[] = { + &Ctx.Idents.get("localizedStringFromDate"), &Ctx.Idents.get("dateStyle"), + &Ctx.Idents.get("timeStyle")}; + LSM_INSERT_SELECTOR("NSDateFormatter", LocalizedStringFromDate, 3) + LSM_INSERT_UNARY("NSNumberFormatter", "stringFromNumber") + LSM_INSERT_NULLARY("UITextField", "text") + LSM_INSERT_NULLARY("UITextView", "text") + LSM_INSERT_NULLARY("UILabel", "text") + + LSF_INSERT("CFDateFormatterCreateStringWithDate"); + LSF_INSERT("CFDateFormatterCreateStringWithAbsoluteTime"); + LSF_INSERT("CFNumberFormatterCreateStringWithNumber"); +} + +/// Checks to see if the method / function declaration includes +/// __attribute__((annotate("returns_localized_nsstring"))) +bool NonLocalizedStringChecker::isAnnotatedAsReturningLocalized( + const Decl *D) const { + if (!D) + return false; + return std::any_of( + D->specific_attr_begin<AnnotateAttr>(), + D->specific_attr_end<AnnotateAttr>(), [](const AnnotateAttr *Ann) { + return Ann->getAnnotation() == "returns_localized_nsstring"; + }); +} + +/// Checks to see if the method / function declaration includes +/// __attribute__((annotate("takes_localized_nsstring"))) +bool NonLocalizedStringChecker::isAnnotatedAsTakingLocalized( + const Decl *D) const { + if (!D) + return false; + return std::any_of( + D->specific_attr_begin<AnnotateAttr>(), + D->specific_attr_end<AnnotateAttr>(), [](const AnnotateAttr *Ann) { + return Ann->getAnnotation() == "takes_localized_nsstring"; + }); +} + +/// Returns true if the given SVal is marked as Localized in the program state +bool NonLocalizedStringChecker::hasLocalizedState(SVal S, + CheckerContext &C) const { + const MemRegion *mt = S.getAsRegion(); + if (mt) { + const LocalizedState *LS = C.getState()->get<LocalizedMemMap>(mt); + if (LS && LS->isLocalized()) + return true; + } + return false; +} + +/// Returns true if the given SVal is marked as NonLocalized in the program +/// state +bool NonLocalizedStringChecker::hasNonLocalizedState(SVal S, + CheckerContext &C) const { + const MemRegion *mt = S.getAsRegion(); + if (mt) { + const LocalizedState *LS = C.getState()->get<LocalizedMemMap>(mt); + if (LS && LS->isNonLocalized()) + return true; + } + return false; +} + +/// Marks the given SVal as Localized in the program state +void NonLocalizedStringChecker::setLocalizedState(const SVal S, + CheckerContext &C) const { + const MemRegion *mt = S.getAsRegion(); + if (mt) { + ProgramStateRef State = + C.getState()->set<LocalizedMemMap>(mt, LocalizedState::getLocalized()); + C.addTransition(State); + } +} + +/// Marks the given SVal as NonLocalized in the program state +void NonLocalizedStringChecker::setNonLocalizedState(const SVal S, + CheckerContext &C) const { + const MemRegion *mt = S.getAsRegion(); + if (mt) { + ProgramStateRef State = C.getState()->set<LocalizedMemMap>( + mt, LocalizedState::getNonLocalized()); + C.addTransition(State); + } +} + + +static bool isDebuggingName(std::string name) { + return StringRef(name).lower().find("debug") != StringRef::npos; +} + +/// Returns true when, heuristically, the analyzer may be analyzing debugging +/// code. We use this to suppress localization diagnostics in un-localized user +/// interfaces that are only used for debugging and are therefore not user +/// facing. +static bool isDebuggingContext(CheckerContext &C) { + const Decl *D = C.getCurrentAnalysisDeclContext()->getDecl(); + if (!D) + return false; + + if (auto *ND = dyn_cast<NamedDecl>(D)) { + if (isDebuggingName(ND->getNameAsString())) + return true; + } + + const DeclContext *DC = D->getDeclContext(); + + if (auto *CD = dyn_cast<ObjCContainerDecl>(DC)) { + if (isDebuggingName(CD->getNameAsString())) + return true; + } + + return false; +} + + +/// Reports a localization error for the passed in method call and SVal +void NonLocalizedStringChecker::reportLocalizationError( + SVal S, const CallEvent &M, CheckerContext &C, int argumentNumber) const { + + // Don't warn about localization errors in classes and methods that + // may be debug code. + if (isDebuggingContext(C)) + return; + + static CheckerProgramPointTag Tag("NonLocalizedStringChecker", + "UnlocalizedString"); + ExplodedNode *ErrNode = C.addTransition(C.getState(), C.getPredecessor(), &Tag); + + if (!ErrNode) + return; + + // Generate the bug report. + auto R = std::make_unique<PathSensitiveBugReport>( + *BT, "User-facing text should use localized string macro", ErrNode); + if (argumentNumber) { + R->addRange(M.getArgExpr(argumentNumber - 1)->getSourceRange()); + } else { + R->addRange(M.getSourceRange()); + } + R->markInteresting(S); + + const MemRegion *StringRegion = S.getAsRegion(); + if (StringRegion) + R->addVisitor(std::make_unique<NonLocalizedStringBRVisitor>(StringRegion)); + + C.emitReport(std::move(R)); +} + +/// Returns the argument number requiring localized string if it exists +/// otherwise, returns -1 +int NonLocalizedStringChecker::getLocalizedArgumentForSelector( + const IdentifierInfo *Receiver, Selector S) const { + auto method = UIMethods.find(Receiver); + + if (method == UIMethods.end()) + return -1; + + auto argumentIterator = method->getSecond().find(S); + + if (argumentIterator == method->getSecond().end()) + return -1; + + int argumentNumber = argumentIterator->getSecond(); + return argumentNumber; +} + +/// Check if the string being passed in has NonLocalized state +void NonLocalizedStringChecker::checkPreObjCMessage(const ObjCMethodCall &msg, + CheckerContext &C) const { + initUIMethods(C.getASTContext()); + + const ObjCInterfaceDecl *OD = msg.getReceiverInterface(); + if (!OD) + return; + const IdentifierInfo *odInfo = OD->getIdentifier(); + + Selector S = msg.getSelector(); + + std::string SelectorString = S.getAsString(); + StringRef SelectorName = SelectorString; + assert(!SelectorName.empty()); + + if (odInfo->isStr("NSString")) { + // Handle the case where the receiver is an NSString + // These special NSString methods draw to the screen + + if (!(SelectorName.startswith("drawAtPoint") || + SelectorName.startswith("drawInRect") || + SelectorName.startswith("drawWithRect"))) + return; + + SVal svTitle = msg.getReceiverSVal(); + + bool isNonLocalized = hasNonLocalizedState(svTitle, C); + + if (isNonLocalized) { + reportLocalizationError(svTitle, msg, C); + } + } + + int argumentNumber = getLocalizedArgumentForSelector(odInfo, S); + // Go up each hierarchy of superclasses and their protocols + while (argumentNumber < 0 && OD->getSuperClass() != nullptr) { + for (const auto *P : OD->all_referenced_protocols()) { + argumentNumber = getLocalizedArgumentForSelector(P->getIdentifier(), S); + if (argumentNumber >= 0) + break; + } + if (argumentNumber < 0) { + OD = OD->getSuperClass(); + argumentNumber = getLocalizedArgumentForSelector(OD->getIdentifier(), S); + } + } + + if (argumentNumber < 0) { // There was no match in UIMethods + if (const Decl *D = msg.getDecl()) { + if (const ObjCMethodDecl *OMD = dyn_cast_or_null<ObjCMethodDecl>(D)) { + auto formals = OMD->parameters(); + for (unsigned i = 0, ei = formals.size(); i != ei; ++i) { + if (isAnnotatedAsTakingLocalized(formals[i])) { + argumentNumber = i; + break; + } + } + } + } + } + + if (argumentNumber < 0) // Still no match + return; + + SVal svTitle = msg.getArgSVal(argumentNumber); + + if (const ObjCStringRegion *SR = + dyn_cast_or_null<ObjCStringRegion>(svTitle.getAsRegion())) { + StringRef stringValue = + SR->getObjCStringLiteral()->getString()->getString(); + if ((stringValue.trim().size() == 0 && stringValue.size() > 0) || + stringValue.empty()) + return; + if (!IsAggressive && llvm::sys::unicode::columnWidthUTF8(stringValue) < 2) + return; + } + + bool isNonLocalized = hasNonLocalizedState(svTitle, C); + + if (isNonLocalized) { + reportLocalizationError(svTitle, msg, C, argumentNumber + 1); + } +} + +void NonLocalizedStringChecker::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + const auto *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); + if (!FD) + return; + + auto formals = FD->parameters(); + for (unsigned i = 0, ei = std::min(static_cast<unsigned>(formals.size()), + Call.getNumArgs()); i != ei; ++i) { + if (isAnnotatedAsTakingLocalized(formals[i])) { + auto actual = Call.getArgSVal(i); + if (hasNonLocalizedState(actual, C)) { + reportLocalizationError(actual, Call, C, i + 1); + } + } + } +} + +static inline bool isNSStringType(QualType T, ASTContext &Ctx) { + + const ObjCObjectPointerType *PT = T->getAs<ObjCObjectPointerType>(); + if (!PT) + return false; + + ObjCInterfaceDecl *Cls = PT->getObjectType()->getInterface(); + if (!Cls) + return false; + + IdentifierInfo *ClsName = Cls->getIdentifier(); + + // FIXME: Should we walk the chain of classes? + return ClsName == &Ctx.Idents.get("NSString") || + ClsName == &Ctx.Idents.get("NSMutableString"); +} + +/// Marks a string being returned by any call as localized +/// if it is in LocStringFunctions (LSF) or the function is annotated. +/// Otherwise, we mark it as NonLocalized (Aggressive) or +/// NonLocalized only if it is not backed by a SymRegion (Non-Aggressive), +/// basically leaving only string literals as NonLocalized. +void NonLocalizedStringChecker::checkPostCall(const CallEvent &Call, + CheckerContext &C) const { + initLocStringsMethods(C.getASTContext()); + + if (!Call.getOriginExpr()) + return; + + // Anything that takes in a localized NSString as an argument + // and returns an NSString will be assumed to be returning a + // localized NSString. (Counter: Incorrectly combining two LocalizedStrings) + const QualType RT = Call.getResultType(); + if (isNSStringType(RT, C.getASTContext())) { + for (unsigned i = 0; i < Call.getNumArgs(); ++i) { + SVal argValue = Call.getArgSVal(i); + if (hasLocalizedState(argValue, C)) { + SVal sv = Call.getReturnValue(); + setLocalizedState(sv, C); + return; + } + } + } + + const Decl *D = Call.getDecl(); + if (!D) + return; + + const IdentifierInfo *Identifier = Call.getCalleeIdentifier(); + + SVal sv = Call.getReturnValue(); + if (isAnnotatedAsReturningLocalized(D) || LSF.count(Identifier) != 0) { + setLocalizedState(sv, C); + } else if (isNSStringType(RT, C.getASTContext()) && + !hasLocalizedState(sv, C)) { + if (IsAggressive) { + setNonLocalizedState(sv, C); + } else { + const SymbolicRegion *SymReg = + dyn_cast_or_null<SymbolicRegion>(sv.getAsRegion()); + if (!SymReg) + setNonLocalizedState(sv, C); + } + } +} + +/// Marks a string being returned by an ObjC method as localized +/// if it is in LocStringMethods or the method is annotated +void NonLocalizedStringChecker::checkPostObjCMessage(const ObjCMethodCall &msg, + CheckerContext &C) const { + initLocStringsMethods(C.getASTContext()); + + if (!msg.isInstanceMessage()) + return; + + const ObjCInterfaceDecl *OD = msg.getReceiverInterface(); + if (!OD) + return; + const IdentifierInfo *odInfo = OD->getIdentifier(); + + Selector S = msg.getSelector(); + std::string SelectorName = S.getAsString(); + + std::pair<const IdentifierInfo *, Selector> MethodDescription = {odInfo, S}; + + if (LSM.count(MethodDescription) || + isAnnotatedAsReturningLocalized(msg.getDecl())) { + SVal sv = msg.getReturnValue(); + setLocalizedState(sv, C); + } +} + +/// Marks all empty string literals as localized +void NonLocalizedStringChecker::checkPostStmt(const ObjCStringLiteral *SL, + CheckerContext &C) const { + SVal sv = C.getSVal(SL); + setNonLocalizedState(sv, C); +} + +PathDiagnosticPieceRef +NonLocalizedStringBRVisitor::VisitNode(const ExplodedNode *Succ, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) { + if (Satisfied) + return nullptr; + + Optional<StmtPoint> Point = Succ->getLocation().getAs<StmtPoint>(); + if (!Point.hasValue()) + return nullptr; + + auto *LiteralExpr = dyn_cast<ObjCStringLiteral>(Point->getStmt()); + if (!LiteralExpr) + return nullptr; + + SVal LiteralSVal = Succ->getSVal(LiteralExpr); + if (LiteralSVal.getAsRegion() != NonLocalizedString) + return nullptr; + + Satisfied = true; + + PathDiagnosticLocation L = + PathDiagnosticLocation::create(*Point, BRC.getSourceManager()); + + if (!L.isValid() || !L.asLocation().isValid()) + return nullptr; + + auto Piece = std::make_shared<PathDiagnosticEventPiece>( + L, "Non-localized string literal here"); + Piece->addRange(LiteralExpr->getSourceRange()); + + return std::move(Piece); +} + +namespace { +class EmptyLocalizationContextChecker + : public Checker<check::ASTDecl<ObjCImplementationDecl>> { + + // A helper class, which walks the AST + class MethodCrawler : public ConstStmtVisitor<MethodCrawler> { + const ObjCMethodDecl *MD; + BugReporter &BR; + AnalysisManager &Mgr; + const CheckerBase *Checker; + LocationOrAnalysisDeclContext DCtx; + + public: + MethodCrawler(const ObjCMethodDecl *InMD, BugReporter &InBR, + const CheckerBase *Checker, AnalysisManager &InMgr, + AnalysisDeclContext *InDCtx) + : MD(InMD), BR(InBR), Mgr(InMgr), Checker(Checker), DCtx(InDCtx) {} + + void VisitStmt(const Stmt *S) { VisitChildren(S); } + + void VisitObjCMessageExpr(const ObjCMessageExpr *ME); + + void reportEmptyContextError(const ObjCMessageExpr *M) const; + + void VisitChildren(const Stmt *S) { + for (const Stmt *Child : S->children()) { + if (Child) + this->Visit(Child); + } + } + }; + +public: + void checkASTDecl(const ObjCImplementationDecl *D, AnalysisManager &Mgr, + BugReporter &BR) const; +}; +} // end anonymous namespace + +void EmptyLocalizationContextChecker::checkASTDecl( + const ObjCImplementationDecl *D, AnalysisManager &Mgr, + BugReporter &BR) const { + + for (const ObjCMethodDecl *M : D->methods()) { + AnalysisDeclContext *DCtx = Mgr.getAnalysisDeclContext(M); + + const Stmt *Body = M->getBody(); + assert(Body); + + MethodCrawler MC(M->getCanonicalDecl(), BR, this, Mgr, DCtx); + MC.VisitStmt(Body); + } +} + +/// This check attempts to match these macros, assuming they are defined as +/// follows: +/// +/// #define NSLocalizedString(key, comment) \ +/// [[NSBundle mainBundle] localizedStringForKey:(key) value:@"" table:nil] +/// #define NSLocalizedStringFromTable(key, tbl, comment) \ +/// [[NSBundle mainBundle] localizedStringForKey:(key) value:@"" table:(tbl)] +/// #define NSLocalizedStringFromTableInBundle(key, tbl, bundle, comment) \ +/// [bundle localizedStringForKey:(key) value:@"" table:(tbl)] +/// #define NSLocalizedStringWithDefaultValue(key, tbl, bundle, val, comment) +/// +/// We cannot use the path sensitive check because the macro argument we are +/// checking for (comment) is not used and thus not present in the AST, +/// so we use Lexer on the original macro call and retrieve the value of +/// the comment. If it's empty or nil, we raise a warning. +void EmptyLocalizationContextChecker::MethodCrawler::VisitObjCMessageExpr( + const ObjCMessageExpr *ME) { + + // FIXME: We may be able to use PPCallbacks to check for empty context + // comments as part of preprocessing and avoid this re-lexing hack. + const ObjCInterfaceDecl *OD = ME->getReceiverInterface(); + if (!OD) + return; + + const IdentifierInfo *odInfo = OD->getIdentifier(); + + if (!(odInfo->isStr("NSBundle") && + ME->getSelector().getAsString() == + "localizedStringForKey:value:table:")) { + return; + } + + SourceRange R = ME->getSourceRange(); + if (!R.getBegin().isMacroID()) + return; + + // getImmediateMacroCallerLoc gets the location of the immediate macro + // caller, one level up the stack toward the initial macro typed into the + // source, so SL should point to the NSLocalizedString macro. + SourceLocation SL = + Mgr.getSourceManager().getImmediateMacroCallerLoc(R.getBegin()); + std::pair<FileID, unsigned> SLInfo = + Mgr.getSourceManager().getDecomposedLoc(SL); + + SrcMgr::SLocEntry SE = Mgr.getSourceManager().getSLocEntry(SLInfo.first); + + // If NSLocalizedString macro is wrapped in another macro, we need to + // unwrap the expansion until we get to the NSLocalizedStringMacro. + while (SE.isExpansion()) { + SL = SE.getExpansion().getSpellingLoc(); + SLInfo = Mgr.getSourceManager().getDecomposedLoc(SL); + SE = Mgr.getSourceManager().getSLocEntry(SLInfo.first); + } + + bool Invalid = false; + const llvm::MemoryBuffer *BF = + Mgr.getSourceManager().getBuffer(SLInfo.first, SL, &Invalid); + if (Invalid) + return; + + Lexer TheLexer(SL, LangOptions(), BF->getBufferStart(), + BF->getBufferStart() + SLInfo.second, BF->getBufferEnd()); + + Token I; + Token Result; // This will hold the token just before the last ')' + int p_count = 0; // This is for parenthesis matching + while (!TheLexer.LexFromRawLexer(I)) { + if (I.getKind() == tok::l_paren) + ++p_count; + if (I.getKind() == tok::r_paren) { + if (p_count == 1) + break; + --p_count; + } + Result = I; + } + + if (isAnyIdentifier(Result.getKind())) { + if (Result.getRawIdentifier().equals("nil")) { + reportEmptyContextError(ME); + return; + } + } + + if (!isStringLiteral(Result.getKind())) + return; + + StringRef Comment = + StringRef(Result.getLiteralData(), Result.getLength()).trim('"'); + + if ((Comment.trim().size() == 0 && Comment.size() > 0) || // Is Whitespace + Comment.empty()) { + reportEmptyContextError(ME); + } +} + +void EmptyLocalizationContextChecker::MethodCrawler::reportEmptyContextError( + const ObjCMessageExpr *ME) const { + // Generate the bug report. + BR.EmitBasicReport(MD, Checker, "Context Missing", + "Localizability Issue (Apple)", + "Localized string macro should include a non-empty " + "comment for translators", + PathDiagnosticLocation(ME, BR.getSourceManager(), DCtx)); +} + +namespace { +class PluralMisuseChecker : public Checker<check::ASTCodeBody> { + + // A helper class, which walks the AST + class MethodCrawler : public RecursiveASTVisitor<MethodCrawler> { + BugReporter &BR; + const CheckerBase *Checker; + AnalysisDeclContext *AC; + + // This functions like a stack. We push on any IfStmt or + // ConditionalOperator that matches the condition + // and pop it off when we leave that statement + llvm::SmallVector<const clang::Stmt *, 8> MatchingStatements; + // This is true when we are the direct-child of a + // matching statement + bool InMatchingStatement = false; + + public: + explicit MethodCrawler(BugReporter &InBR, const CheckerBase *Checker, + AnalysisDeclContext *InAC) + : BR(InBR), Checker(Checker), AC(InAC) {} + + bool VisitIfStmt(const IfStmt *I); + bool EndVisitIfStmt(IfStmt *I); + bool TraverseIfStmt(IfStmt *x); + bool VisitConditionalOperator(const ConditionalOperator *C); + bool TraverseConditionalOperator(ConditionalOperator *C); + bool VisitCallExpr(const CallExpr *CE); + bool VisitObjCMessageExpr(const ObjCMessageExpr *ME); + + private: + void reportPluralMisuseError(const Stmt *S) const; + bool isCheckingPlurality(const Expr *E) const; + }; + +public: + void checkASTCodeBody(const Decl *D, AnalysisManager &Mgr, + BugReporter &BR) const { + MethodCrawler Visitor(BR, this, Mgr.getAnalysisDeclContext(D)); + Visitor.TraverseDecl(const_cast<Decl *>(D)); + } +}; +} // end anonymous namespace + +// Checks the condition of the IfStmt and returns true if one +// of the following heuristics are met: +// 1) The conidtion is a variable with "singular" or "plural" in the name +// 2) The condition is a binary operator with 1 or 2 on the right-hand side +bool PluralMisuseChecker::MethodCrawler::isCheckingPlurality( + const Expr *Condition) const { + const BinaryOperator *BO = nullptr; + // Accounts for when a VarDecl represents a BinaryOperator + if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Condition)) { + if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl())) { + const Expr *InitExpr = VD->getInit(); + if (InitExpr) { + if (const BinaryOperator *B = + dyn_cast<BinaryOperator>(InitExpr->IgnoreParenImpCasts())) { + BO = B; + } + } + if (VD->getName().lower().find("plural") != StringRef::npos || + VD->getName().lower().find("singular") != StringRef::npos) { + return true; + } + } + } else if (const BinaryOperator *B = dyn_cast<BinaryOperator>(Condition)) { + BO = B; + } + + if (BO == nullptr) + return false; + + if (IntegerLiteral *IL = dyn_cast_or_null<IntegerLiteral>( + BO->getRHS()->IgnoreParenImpCasts())) { + llvm::APInt Value = IL->getValue(); + if (Value == 1 || Value == 2) { + return true; + } + } + return false; +} + +// A CallExpr with "LOC" in its identifier that takes in a string literal +// has been shown to almost always be a function that returns a localized +// string. Raise a diagnostic when this is in a statement that matches +// the condition. +bool PluralMisuseChecker::MethodCrawler::VisitCallExpr(const CallExpr *CE) { + if (InMatchingStatement) { + if (const FunctionDecl *FD = CE->getDirectCallee()) { + std::string NormalizedName = + StringRef(FD->getNameInfo().getAsString()).lower(); + if (NormalizedName.find("loc") != std::string::npos) { + for (const Expr *Arg : CE->arguments()) { + if (isa<ObjCStringLiteral>(Arg)) + reportPluralMisuseError(CE); + } + } + } + } + return true; +} + +// The other case is for NSLocalizedString which also returns +// a localized string. It's a macro for the ObjCMessageExpr +// [NSBundle localizedStringForKey:value:table:] Raise a +// diagnostic when this is in a statement that matches +// the condition. +bool PluralMisuseChecker::MethodCrawler::VisitObjCMessageExpr( + const ObjCMessageExpr *ME) { + const ObjCInterfaceDecl *OD = ME->getReceiverInterface(); + if (!OD) + return true; + + const IdentifierInfo *odInfo = OD->getIdentifier(); + + if (odInfo->isStr("NSBundle") && + ME->getSelector().getAsString() == "localizedStringForKey:value:table:") { + if (InMatchingStatement) { + reportPluralMisuseError(ME); + } + } + return true; +} + +/// Override TraverseIfStmt so we know when we are done traversing an IfStmt +bool PluralMisuseChecker::MethodCrawler::TraverseIfStmt(IfStmt *I) { + RecursiveASTVisitor<MethodCrawler>::TraverseIfStmt(I); + return EndVisitIfStmt(I); +} + +// EndVisit callbacks are not provided by the RecursiveASTVisitor +// so we override TraverseIfStmt and make a call to EndVisitIfStmt +// after traversing the IfStmt +bool PluralMisuseChecker::MethodCrawler::EndVisitIfStmt(IfStmt *I) { + MatchingStatements.pop_back(); + if (!MatchingStatements.empty()) { + if (MatchingStatements.back() != nullptr) { + InMatchingStatement = true; + return true; + } + } + InMatchingStatement = false; + return true; +} + +bool PluralMisuseChecker::MethodCrawler::VisitIfStmt(const IfStmt *I) { + const Expr *Condition = I->getCond()->IgnoreParenImpCasts(); + if (isCheckingPlurality(Condition)) { + MatchingStatements.push_back(I); + InMatchingStatement = true; + } else { + MatchingStatements.push_back(nullptr); + InMatchingStatement = false; + } + + return true; +} + +// Preliminary support for conditional operators. +bool PluralMisuseChecker::MethodCrawler::TraverseConditionalOperator( + ConditionalOperator *C) { + RecursiveASTVisitor<MethodCrawler>::TraverseConditionalOperator(C); + MatchingStatements.pop_back(); + if (!MatchingStatements.empty()) { + if (MatchingStatements.back() != nullptr) + InMatchingStatement = true; + else + InMatchingStatement = false; + } else { + InMatchingStatement = false; + } + return true; +} + +bool PluralMisuseChecker::MethodCrawler::VisitConditionalOperator( + const ConditionalOperator *C) { + const Expr *Condition = C->getCond()->IgnoreParenImpCasts(); + if (isCheckingPlurality(Condition)) { + MatchingStatements.push_back(C); + InMatchingStatement = true; + } else { + MatchingStatements.push_back(nullptr); + InMatchingStatement = false; + } + return true; +} + +void PluralMisuseChecker::MethodCrawler::reportPluralMisuseError( + const Stmt *S) const { + // Generate the bug report. + BR.EmitBasicReport(AC->getDecl(), Checker, "Plural Misuse", + "Localizability Issue (Apple)", + "Plural cases are not supported across all languages. " + "Use a .stringsdict file instead", + PathDiagnosticLocation(S, BR.getSourceManager(), AC)); +} + +//===----------------------------------------------------------------------===// +// Checker registration. +//===----------------------------------------------------------------------===// + +void ento::registerNonLocalizedStringChecker(CheckerManager &mgr) { + NonLocalizedStringChecker *checker = + mgr.registerChecker<NonLocalizedStringChecker>(); + checker->IsAggressive = + mgr.getAnalyzerOptions().getCheckerBooleanOption( + checker, "AggressiveReport"); +} + +bool ento::shouldRegisterNonLocalizedStringChecker(const LangOptions &LO) { + return true; +} + +void ento::registerEmptyLocalizationContextChecker(CheckerManager &mgr) { + mgr.registerChecker<EmptyLocalizationContextChecker>(); +} + +bool ento::shouldRegisterEmptyLocalizationContextChecker( + const LangOptions &LO) { + return true; +} + +void ento::registerPluralMisuseChecker(CheckerManager &mgr) { + mgr.registerChecker<PluralMisuseChecker>(); +} + +bool ento::shouldRegisterPluralMisuseChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/MIGChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MIGChecker.cpp new file mode 100644 index 000000000000..d8fd125f4003 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/MIGChecker.cpp @@ -0,0 +1,296 @@ +//== MIGChecker.cpp - MIG calling convention checker ------------*- C++ -*--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines MIGChecker, a Mach Interface Generator calling convention +// checker. Namely, in MIG callback implementation the following rules apply: +// - When a server routine returns an error code that represents success, it +// must take ownership of resources passed to it (and eventually release +// them). +// - Additionally, when returning success, all out-parameters must be +// initialized. +// - When it returns any other error code, it must not take ownership, +// because the message and its out-of-line parameters will be destroyed +// by the client that called the function. +// For now we only check the last rule, as its violations lead to dangerous +// use-after-free exploits. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/AnyCall.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class MIGChecker : public Checker<check::PostCall, check::PreStmt<ReturnStmt>, + check::EndFunction> { + BugType BT{this, "Use-after-free (MIG calling convention violation)", + categories::MemoryError}; + + // The checker knows that an out-of-line object is deallocated if it is + // passed as an argument to one of these functions. If this object is + // additionally an argument of a MIG routine, the checker keeps track of that + // information and issues a warning when an error is returned from the + // respective routine. + std::vector<std::pair<CallDescription, unsigned>> Deallocators = { +#define CALL(required_args, deallocated_arg, ...) \ + {{{__VA_ARGS__}, required_args}, deallocated_arg} + // E.g., if the checker sees a C function 'vm_deallocate' that is + // defined on class 'IOUserClient' that has exactly 3 parameters, it knows + // that argument #1 (starting from 0, i.e. the second argument) is going + // to be consumed in the sense of the MIG consume-on-success convention. + CALL(3, 1, "vm_deallocate"), + CALL(3, 1, "mach_vm_deallocate"), + CALL(2, 0, "mig_deallocate"), + CALL(2, 1, "mach_port_deallocate"), + CALL(1, 0, "device_deallocate"), + CALL(1, 0, "iokit_remove_connect_reference"), + CALL(1, 0, "iokit_remove_reference"), + CALL(1, 0, "iokit_release_port"), + CALL(1, 0, "ipc_port_release"), + CALL(1, 0, "ipc_port_release_sonce"), + CALL(1, 0, "ipc_voucher_attr_control_release"), + CALL(1, 0, "ipc_voucher_release"), + CALL(1, 0, "lock_set_dereference"), + CALL(1, 0, "memory_object_control_deallocate"), + CALL(1, 0, "pset_deallocate"), + CALL(1, 0, "semaphore_dereference"), + CALL(1, 0, "space_deallocate"), + CALL(1, 0, "space_inspect_deallocate"), + CALL(1, 0, "task_deallocate"), + CALL(1, 0, "task_inspect_deallocate"), + CALL(1, 0, "task_name_deallocate"), + CALL(1, 0, "thread_deallocate"), + CALL(1, 0, "thread_inspect_deallocate"), + CALL(1, 0, "upl_deallocate"), + CALL(1, 0, "vm_map_deallocate"), + // E.g., if the checker sees a method 'releaseAsyncReference64()' that is + // defined on class 'IOUserClient' that takes exactly 1 argument, it knows + // that the argument is going to be consumed in the sense of the MIG + // consume-on-success convention. + CALL(1, 0, "IOUserClient", "releaseAsyncReference64"), + CALL(1, 0, "IOUserClient", "releaseNotificationPort"), +#undef CALL + }; + + CallDescription OsRefRetain{"os_ref_retain", 1}; + + void checkReturnAux(const ReturnStmt *RS, CheckerContext &C) const; + +public: + void checkPostCall(const CallEvent &Call, CheckerContext &C) const; + + // HACK: We're making two attempts to find the bug: checkEndFunction + // should normally be enough but it fails when the return value is a literal + // that never gets put into the Environment and ends of function with multiple + // returns get agglutinated across returns, preventing us from obtaining + // the return value. The problem is similar to https://reviews.llvm.org/D25326 + // but now we step into it in the top-level function. + void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const { + checkReturnAux(RS, C); + } + void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const { + checkReturnAux(RS, C); + } + +}; +} // end anonymous namespace + +// A flag that says that the programmer has called a MIG destructor +// for at least one parameter. +REGISTER_TRAIT_WITH_PROGRAMSTATE(ReleasedParameter, bool) +// A set of parameters for which the check is suppressed because +// reference counting is being performed. +REGISTER_SET_WITH_PROGRAMSTATE(RefCountedParameters, const ParmVarDecl *) + +static const ParmVarDecl *getOriginParam(SVal V, CheckerContext &C, + bool IncludeBaseRegions = false) { + // TODO: We should most likely always include base regions here. + SymbolRef Sym = V.getAsSymbol(IncludeBaseRegions); + if (!Sym) + return nullptr; + + // If we optimistically assume that the MIG routine never re-uses the storage + // that was passed to it as arguments when it invalidates it (but at most when + // it assigns to parameter variables directly), this procedure correctly + // determines if the value was loaded from the transitive closure of MIG + // routine arguments in the heap. + while (const MemRegion *MR = Sym->getOriginRegion()) { + const auto *VR = dyn_cast<VarRegion>(MR); + if (VR && VR->hasStackParametersStorage() && + VR->getStackFrame()->inTopFrame()) + return cast<ParmVarDecl>(VR->getDecl()); + + const SymbolicRegion *SR = MR->getSymbolicBase(); + if (!SR) + return nullptr; + + Sym = SR->getSymbol(); + } + + return nullptr; +} + +static bool isInMIGCall(CheckerContext &C) { + const LocationContext *LC = C.getLocationContext(); + assert(LC && "Unknown location context"); + + const StackFrameContext *SFC; + // Find the top frame. + while (LC) { + SFC = LC->getStackFrame(); + LC = SFC->getParent(); + } + + const Decl *D = SFC->getDecl(); + + if (Optional<AnyCall> AC = AnyCall::forDecl(D)) { + // Even though there's a Sema warning when the return type of an annotated + // function is not a kern_return_t, this warning isn't an error, so we need + // an extra sanity check here. + // FIXME: AnyCall doesn't support blocks yet, so they remain unchecked + // for now. + if (!AC->getReturnType(C.getASTContext()) + .getCanonicalType()->isSignedIntegerType()) + return false; + } + + if (D->hasAttr<MIGServerRoutineAttr>()) + return true; + + // See if there's an annotated method in the superclass. + if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) + for (const auto *OMD: MD->overridden_methods()) + if (OMD->hasAttr<MIGServerRoutineAttr>()) + return true; + + return false; +} + +void MIGChecker::checkPostCall(const CallEvent &Call, CheckerContext &C) const { + if (Call.isCalled(OsRefRetain)) { + // If the code is doing reference counting over the parameter, + // it opens up an opportunity for safely calling a destructor function. + // TODO: We should still check for over-releases. + if (const ParmVarDecl *PVD = + getOriginParam(Call.getArgSVal(0), C, /*IncludeBaseRegions=*/true)) { + // We never need to clean up the program state because these are + // top-level parameters anyway, so they're always live. + C.addTransition(C.getState()->add<RefCountedParameters>(PVD)); + } + return; + } + + if (!isInMIGCall(C)) + return; + + auto I = llvm::find_if(Deallocators, + [&](const std::pair<CallDescription, unsigned> &Item) { + return Call.isCalled(Item.first); + }); + if (I == Deallocators.end()) + return; + + ProgramStateRef State = C.getState(); + unsigned ArgIdx = I->second; + SVal Arg = Call.getArgSVal(ArgIdx); + const ParmVarDecl *PVD = getOriginParam(Arg, C); + if (!PVD || State->contains<RefCountedParameters>(PVD)) + return; + + const NoteTag *T = C.getNoteTag([this, PVD](BugReport &BR) -> std::string { + if (&BR.getBugType() != &BT) + return ""; + SmallString<64> Str; + llvm::raw_svector_ostream OS(Str); + OS << "Value passed through parameter '" << PVD->getName() + << "\' is deallocated"; + return OS.str(); + }); + C.addTransition(State->set<ReleasedParameter>(true), T); +} + +// Returns true if V can potentially represent a "successful" kern_return_t. +static bool mayBeSuccess(SVal V, CheckerContext &C) { + ProgramStateRef State = C.getState(); + + // Can V represent KERN_SUCCESS? + if (!State->isNull(V).isConstrainedFalse()) + return true; + + SValBuilder &SVB = C.getSValBuilder(); + ASTContext &ACtx = C.getASTContext(); + + // Can V represent MIG_NO_REPLY? + static const int MigNoReply = -305; + V = SVB.evalEQ(C.getState(), V, SVB.makeIntVal(MigNoReply, ACtx.IntTy)); + if (!State->isNull(V).isConstrainedTrue()) + return true; + + // If none of the above, it's definitely an error. + return false; +} + +void MIGChecker::checkReturnAux(const ReturnStmt *RS, CheckerContext &C) const { + // It is very unlikely that a MIG callback will be called from anywhere + // within the project under analysis and the caller isn't itself a routine + // that follows the MIG calling convention. Therefore we're safe to believe + // that it's always the top frame that is of interest. There's a slight chance + // that the user would want to enforce the MIG calling convention upon + // a random routine in the middle of nowhere, but given that the convention is + // fairly weird and hard to follow in the first place, there's relatively + // little motivation to spread it this way. + if (!C.inTopFrame()) + return; + + if (!isInMIGCall(C)) + return; + + // We know that the function is non-void, but what if the return statement + // is not there in the code? It's not a compile error, we should not crash. + if (!RS) + return; + + ProgramStateRef State = C.getState(); + if (!State->get<ReleasedParameter>()) + return; + + SVal V = C.getSVal(RS); + if (mayBeSuccess(V, C)) + return; + + ExplodedNode *N = C.generateErrorNode(); + if (!N) + return; + + auto R = std::make_unique<PathSensitiveBugReport>( + BT, + "MIG callback fails with error after deallocating argument value. " + "This is a use-after-free vulnerability because the caller will try to " + "deallocate it again", + N); + + R->addRange(RS->getSourceRange()); + bugreporter::trackExpressionValue(N, RS->getRetValue(), *R, + bugreporter::TrackingKind::Thorough, false); + C.emitReport(std::move(R)); +} + +void ento::registerMIGChecker(CheckerManager &Mgr) { + Mgr.registerChecker<MIGChecker>(); +} + +bool ento::shouldRegisterMIGChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.cpp b/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.cpp new file mode 100644 index 000000000000..bbf2ddec5762 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.cpp @@ -0,0 +1,117 @@ +//===-- MPIBugReporter.cpp - bug reporter -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines prefabricated reports which are emitted in +/// case of MPI related bugs, detected by path-sensitive analysis. +/// +//===----------------------------------------------------------------------===// + +#include "MPIBugReporter.h" +#include "MPIChecker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" + +namespace clang { +namespace ento { +namespace mpi { + +void MPIBugReporter::reportDoubleNonblocking( + const CallEvent &MPICallEvent, const ento::mpi::Request &Req, + const MemRegion *const RequestRegion, + const ExplodedNode *const ExplNode, + BugReporter &BReporter) const { + + std::string ErrorText; + ErrorText = "Double nonblocking on request " + + RequestRegion->getDescriptiveName() + ". "; + + auto Report = std::make_unique<PathSensitiveBugReport>( + *DoubleNonblockingBugType, ErrorText, ExplNode); + + Report->addRange(MPICallEvent.getSourceRange()); + SourceRange Range = RequestRegion->sourceRange(); + + if (Range.isValid()) + Report->addRange(Range); + + Report->addVisitor(std::make_unique<RequestNodeVisitor>( + RequestRegion, "Request is previously used by nonblocking call here. ")); + Report->markInteresting(RequestRegion); + + BReporter.emitReport(std::move(Report)); +} + +void MPIBugReporter::reportMissingWait( + const ento::mpi::Request &Req, const MemRegion *const RequestRegion, + const ExplodedNode *const ExplNode, + BugReporter &BReporter) const { + std::string ErrorText{"Request " + RequestRegion->getDescriptiveName() + + " has no matching wait. "}; + + auto Report = std::make_unique<PathSensitiveBugReport>(*MissingWaitBugType, + ErrorText, ExplNode); + + SourceRange Range = RequestRegion->sourceRange(); + if (Range.isValid()) + Report->addRange(Range); + Report->addVisitor(std::make_unique<RequestNodeVisitor>( + RequestRegion, "Request is previously used by nonblocking call here. ")); + Report->markInteresting(RequestRegion); + + BReporter.emitReport(std::move(Report)); +} + +void MPIBugReporter::reportUnmatchedWait( + const CallEvent &CE, const clang::ento::MemRegion *const RequestRegion, + const ExplodedNode *const ExplNode, + BugReporter &BReporter) const { + std::string ErrorText{"Request " + RequestRegion->getDescriptiveName() + + " has no matching nonblocking call. "}; + + auto Report = std::make_unique<PathSensitiveBugReport>(*UnmatchedWaitBugType, + ErrorText, ExplNode); + + Report->addRange(CE.getSourceRange()); + SourceRange Range = RequestRegion->sourceRange(); + if (Range.isValid()) + Report->addRange(Range); + + BReporter.emitReport(std::move(Report)); +} + +PathDiagnosticPieceRef +MPIBugReporter::RequestNodeVisitor::VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) { + + if (IsNodeFound) + return nullptr; + + const Request *const Req = N->getState()->get<RequestMap>(RequestRegion); + assert(Req && "The region must be tracked and alive, given that we've " + "just emitted a report against it"); + const Request *const PrevReq = + N->getFirstPred()->getState()->get<RequestMap>(RequestRegion); + + // Check if request was previously unused or in a different state. + if (!PrevReq || (Req->CurrentState != PrevReq->CurrentState)) { + IsNodeFound = true; + + ProgramPoint P = N->getFirstPred()->getLocation(); + PathDiagnosticLocation L = + PathDiagnosticLocation::create(P, BRC.getSourceManager()); + + return std::make_shared<PathDiagnosticEventPiece>(L, ErrorText); + } + + return nullptr; +} + +} // end of namespace: mpi +} // end of namespace: ento +} // end of namespace: clang diff --git a/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.h b/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.h new file mode 100644 index 000000000000..9871da026b04 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.h @@ -0,0 +1,107 @@ +//===-- MPIBugReporter.h - bug reporter -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines prefabricated reports which are emitted in +/// case of MPI related bugs, detected by path-sensitive analysis. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MPICHECKER_MPIBUGREPORTER_H +#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MPICHECKER_MPIBUGREPORTER_H + +#include "MPITypes.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" + +namespace clang { +namespace ento { +namespace mpi { + +class MPIBugReporter { +public: + MPIBugReporter(const CheckerBase &CB) { + UnmatchedWaitBugType.reset(new BugType(&CB, "Unmatched wait", MPIError)); + DoubleNonblockingBugType.reset( + new BugType(&CB, "Double nonblocking", MPIError)); + MissingWaitBugType.reset(new BugType(&CB, "Missing wait", MPIError)); + } + + /// Report duplicate request use by nonblocking calls without intermediate + /// wait. + /// + /// \param MPICallEvent MPI call that caused the double nonblocking + /// \param Req request that was used by two nonblocking calls in sequence + /// \param RequestRegion memory region of the request + /// \param ExplNode node in the graph the bug appeared at + /// \param BReporter bug reporter for current context + void reportDoubleNonblocking(const CallEvent &MPICallEvent, + const Request &Req, + const MemRegion *const RequestRegion, + const ExplodedNode *const ExplNode, + BugReporter &BReporter) const; + + /// Report a missing wait for a nonblocking call. + /// + /// \param Req request that is not matched by a wait + /// \param RequestRegion memory region of the request + /// \param ExplNode node in the graph the bug appeared at + /// \param BReporter bug reporter for current context + void reportMissingWait(const Request &Req, + const MemRegion *const RequestRegion, + const ExplodedNode *const ExplNode, + BugReporter &BReporter) const; + + /// Report a wait on a request that has not been used at all before. + /// + /// \param CE wait call that uses the request + /// \param RequestRegion memory region of the request + /// \param ExplNode node in the graph the bug appeared at + /// \param BReporter bug reporter for current context + void reportUnmatchedWait(const CallEvent &CE, + const MemRegion *const RequestRegion, + const ExplodedNode *const ExplNode, + BugReporter &BReporter) const; + +private: + const std::string MPIError = "MPI Error"; + + // path-sensitive bug types + std::unique_ptr<BugType> UnmatchedWaitBugType; + std::unique_ptr<BugType> MissingWaitBugType; + std::unique_ptr<BugType> DoubleNonblockingBugType; + + /// Bug visitor class to find the node where the request region was previously + /// used in order to include it into the BugReport path. + class RequestNodeVisitor : public BugReporterVisitor { + public: + RequestNodeVisitor(const MemRegion *const MemoryRegion, + const std::string &ErrText) + : RequestRegion(MemoryRegion), ErrorText(ErrText) {} + + void Profile(llvm::FoldingSetNodeID &ID) const override { + static int X = 0; + ID.AddPointer(&X); + ID.AddPointer(RequestRegion); + } + + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; + + private: + const MemRegion *const RequestRegion; + bool IsNodeFound = false; + std::string ErrorText; + }; +}; + +} // end of namespace: mpi +} // end of namespace: ento +} // end of namespace: clang + +#endif diff --git a/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIChecker.cpp new file mode 100644 index 000000000000..7f9ba0de1dc2 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIChecker.cpp @@ -0,0 +1,193 @@ +//===-- MPIChecker.cpp - Checker Entry Point Class --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the main class of MPI-Checker which serves as an entry +/// point. It is created once for each translation unit analysed. +/// The checker defines path-sensitive checks, to verify correct usage of the +/// MPI API. +/// +//===----------------------------------------------------------------------===// + +#include "MPIChecker.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" + +namespace clang { +namespace ento { +namespace mpi { + +void MPIChecker::checkDoubleNonblocking(const CallEvent &PreCallEvent, + CheckerContext &Ctx) const { + if (!FuncClassifier->isNonBlockingType(PreCallEvent.getCalleeIdentifier())) { + return; + } + const MemRegion *const MR = + PreCallEvent.getArgSVal(PreCallEvent.getNumArgs() - 1).getAsRegion(); + if (!MR) + return; + const ElementRegion *const ER = dyn_cast<ElementRegion>(MR); + + // The region must be typed, in order to reason about it. + if (!isa<TypedRegion>(MR) || (ER && !isa<TypedRegion>(ER->getSuperRegion()))) + return; + + ProgramStateRef State = Ctx.getState(); + const Request *const Req = State->get<RequestMap>(MR); + + // double nonblocking detected + if (Req && Req->CurrentState == Request::State::Nonblocking) { + ExplodedNode *ErrorNode = Ctx.generateNonFatalErrorNode(); + BReporter.reportDoubleNonblocking(PreCallEvent, *Req, MR, ErrorNode, + Ctx.getBugReporter()); + Ctx.addTransition(ErrorNode->getState(), ErrorNode); + } + // no error + else { + State = State->set<RequestMap>(MR, Request::State::Nonblocking); + Ctx.addTransition(State); + } +} + +void MPIChecker::checkUnmatchedWaits(const CallEvent &PreCallEvent, + CheckerContext &Ctx) const { + if (!FuncClassifier->isWaitType(PreCallEvent.getCalleeIdentifier())) + return; + const MemRegion *const MR = topRegionUsedByWait(PreCallEvent); + if (!MR) + return; + const ElementRegion *const ER = dyn_cast<ElementRegion>(MR); + + // The region must be typed, in order to reason about it. + if (!isa<TypedRegion>(MR) || (ER && !isa<TypedRegion>(ER->getSuperRegion()))) + return; + + llvm::SmallVector<const MemRegion *, 2> ReqRegions; + allRegionsUsedByWait(ReqRegions, MR, PreCallEvent, Ctx); + if (ReqRegions.empty()) + return; + + ProgramStateRef State = Ctx.getState(); + static CheckerProgramPointTag Tag("MPI-Checker", "UnmatchedWait"); + ExplodedNode *ErrorNode{nullptr}; + + // Check all request regions used by the wait function. + for (const auto &ReqRegion : ReqRegions) { + const Request *const Req = State->get<RequestMap>(ReqRegion); + State = State->set<RequestMap>(ReqRegion, Request::State::Wait); + if (!Req) { + if (!ErrorNode) { + ErrorNode = Ctx.generateNonFatalErrorNode(State, &Tag); + State = ErrorNode->getState(); + } + // A wait has no matching nonblocking call. + BReporter.reportUnmatchedWait(PreCallEvent, ReqRegion, ErrorNode, + Ctx.getBugReporter()); + } + } + + if (!ErrorNode) { + Ctx.addTransition(State); + } else { + Ctx.addTransition(State, ErrorNode); + } +} + +void MPIChecker::checkMissingWaits(SymbolReaper &SymReaper, + CheckerContext &Ctx) const { + ProgramStateRef State = Ctx.getState(); + const auto &Requests = State->get<RequestMap>(); + if (Requests.isEmpty()) + return; + + static CheckerProgramPointTag Tag("MPI-Checker", "MissingWait"); + ExplodedNode *ErrorNode{nullptr}; + + auto ReqMap = State->get<RequestMap>(); + for (const auto &Req : ReqMap) { + if (!SymReaper.isLiveRegion(Req.first)) { + if (Req.second.CurrentState == Request::State::Nonblocking) { + + if (!ErrorNode) { + ErrorNode = Ctx.generateNonFatalErrorNode(State, &Tag); + State = ErrorNode->getState(); + } + BReporter.reportMissingWait(Req.second, Req.first, ErrorNode, + Ctx.getBugReporter()); + } + State = State->remove<RequestMap>(Req.first); + } + } + + // Transition to update the state regarding removed requests. + if (!ErrorNode) { + Ctx.addTransition(State); + } else { + Ctx.addTransition(State, ErrorNode); + } +} + +const MemRegion *MPIChecker::topRegionUsedByWait(const CallEvent &CE) const { + + if (FuncClassifier->isMPI_Wait(CE.getCalleeIdentifier())) { + return CE.getArgSVal(0).getAsRegion(); + } else if (FuncClassifier->isMPI_Waitall(CE.getCalleeIdentifier())) { + return CE.getArgSVal(1).getAsRegion(); + } else { + return (const MemRegion *)nullptr; + } +} + +void MPIChecker::allRegionsUsedByWait( + llvm::SmallVector<const MemRegion *, 2> &ReqRegions, + const MemRegion *const MR, const CallEvent &CE, CheckerContext &Ctx) const { + + MemRegionManager *const RegionManager = MR->getMemRegionManager(); + + if (FuncClassifier->isMPI_Waitall(CE.getCalleeIdentifier())) { + const SubRegion *SuperRegion{nullptr}; + if (const ElementRegion *const ER = MR->getAs<ElementRegion>()) { + SuperRegion = cast<SubRegion>(ER->getSuperRegion()); + } + + // A single request is passed to MPI_Waitall. + if (!SuperRegion) { + ReqRegions.push_back(MR); + return; + } + + const auto &Size = Ctx.getStoreManager().getSizeInElements( + Ctx.getState(), SuperRegion, + CE.getArgExpr(1)->getType()->getPointeeType()); + const llvm::APSInt &ArrSize = Size.getAs<nonloc::ConcreteInt>()->getValue(); + + for (size_t i = 0; i < ArrSize; ++i) { + const NonLoc Idx = Ctx.getSValBuilder().makeArrayIndex(i); + + const ElementRegion *const ER = RegionManager->getElementRegion( + CE.getArgExpr(1)->getType()->getPointeeType(), Idx, SuperRegion, + Ctx.getASTContext()); + + ReqRegions.push_back(ER->getAs<MemRegion>()); + } + } else if (FuncClassifier->isMPI_Wait(CE.getCalleeIdentifier())) { + ReqRegions.push_back(MR); + } +} + +} // end of namespace: mpi +} // end of namespace: ento +} // end of namespace: clang + +// Registers the checker for static analysis. +void clang::ento::registerMPIChecker(CheckerManager &MGR) { + MGR.registerChecker<clang::ento::mpi::MPIChecker>(); +} + +bool clang::ento::shouldRegisterMPIChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIChecker.h b/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIChecker.h new file mode 100644 index 000000000000..ce9f1afac209 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIChecker.h @@ -0,0 +1,104 @@ +//===-- MPIChecker.h - Verify MPI API usage- --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the main class of MPI-Checker which serves as an entry +/// point. It is created once for each translation unit analysed. +/// The checker defines path-sensitive checks, to verify correct usage of the +/// MPI API. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MPICHECKER_MPICHECKER_H +#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MPICHECKER_MPICHECKER_H + +#include "MPIBugReporter.h" +#include "MPITypes.h" +#include "clang/StaticAnalyzer/Checkers/MPIFunctionClassifier.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +namespace clang { +namespace ento { +namespace mpi { + +class MPIChecker : public Checker<check::PreCall, check::DeadSymbols> { +public: + MPIChecker() : BReporter(*this) {} + + // path-sensitive callbacks + void checkPreCall(const CallEvent &CE, CheckerContext &Ctx) const { + dynamicInit(Ctx); + checkUnmatchedWaits(CE, Ctx); + checkDoubleNonblocking(CE, Ctx); + } + + void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &Ctx) const { + dynamicInit(Ctx); + checkMissingWaits(SymReaper, Ctx); + } + + void dynamicInit(CheckerContext &Ctx) const { + if (FuncClassifier) + return; + const_cast<std::unique_ptr<MPIFunctionClassifier> &>(FuncClassifier) + .reset(new MPIFunctionClassifier{Ctx.getASTContext()}); + } + + /// Checks if a request is used by nonblocking calls multiple times + /// in sequence without intermediate wait. The check contains a guard, + /// in order to only inspect nonblocking functions. + /// + /// \param PreCallEvent MPI call to verify + void checkDoubleNonblocking(const clang::ento::CallEvent &PreCallEvent, + clang::ento::CheckerContext &Ctx) const; + + /// Checks if the request used by the wait function was not used at all + /// before. The check contains a guard, in order to only inspect wait + /// functions. + /// + /// \param PreCallEvent MPI call to verify + void checkUnmatchedWaits(const clang::ento::CallEvent &PreCallEvent, + clang::ento::CheckerContext &Ctx) const; + + /// Check if a nonblocking call is not matched by a wait. + /// If a memory region is not alive and the last function using the + /// request was a nonblocking call, this is rated as a missing wait. + void checkMissingWaits(clang::ento::SymbolReaper &SymReaper, + clang::ento::CheckerContext &Ctx) const; + +private: + /// Collects all memory regions of a request(array) used by a wait + /// function. If the wait function uses a single request, this is a single + /// region. For wait functions using multiple requests, multiple regions + /// representing elements in the array are collected. + /// + /// \param ReqRegions vector the regions get pushed into + /// \param MR top most region to iterate + /// \param CE MPI wait call using the request(s) + void allRegionsUsedByWait( + llvm::SmallVector<const clang::ento::MemRegion *, 2> &ReqRegions, + const clang::ento::MemRegion *const MR, const clang::ento::CallEvent &CE, + clang::ento::CheckerContext &Ctx) const; + + /// Returns the memory region used by a wait function. + /// Distinguishes between MPI_Wait and MPI_Waitall. + /// + /// \param CE MPI wait call + const clang::ento::MemRegion * + topRegionUsedByWait(const clang::ento::CallEvent &CE) const; + + const std::unique_ptr<MPIFunctionClassifier> FuncClassifier; + MPIBugReporter BReporter; +}; + +} // end of namespace: mpi +} // end of namespace: ento +} // end of namespace: clang + +#endif diff --git a/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIFunctionClassifier.cpp b/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIFunctionClassifier.cpp new file mode 100644 index 000000000000..277b3ed2e105 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIFunctionClassifier.cpp @@ -0,0 +1,283 @@ +//===-- MPIFunctionClassifier.cpp - classifies MPI functions ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functionality to identify and classify MPI functions. +/// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/MPIFunctionClassifier.h" +#include "llvm/ADT/STLExtras.h" + +namespace clang { +namespace ento { +namespace mpi { + +void MPIFunctionClassifier::identifierInit(ASTContext &ASTCtx) { + // Initialize function identifiers. + initPointToPointIdentifiers(ASTCtx); + initCollectiveIdentifiers(ASTCtx); + initAdditionalIdentifiers(ASTCtx); +} + +void MPIFunctionClassifier::initPointToPointIdentifiers(ASTContext &ASTCtx) { + // Copy identifiers into the correct classification containers. + IdentInfo_MPI_Send = &ASTCtx.Idents.get("MPI_Send"); + MPIPointToPointTypes.push_back(IdentInfo_MPI_Send); + MPIType.push_back(IdentInfo_MPI_Send); + assert(IdentInfo_MPI_Send); + + IdentInfo_MPI_Isend = &ASTCtx.Idents.get("MPI_Isend"); + MPIPointToPointTypes.push_back(IdentInfo_MPI_Isend); + MPINonBlockingTypes.push_back(IdentInfo_MPI_Isend); + MPIType.push_back(IdentInfo_MPI_Isend); + assert(IdentInfo_MPI_Isend); + + IdentInfo_MPI_Ssend = &ASTCtx.Idents.get("MPI_Ssend"); + MPIPointToPointTypes.push_back(IdentInfo_MPI_Ssend); + MPIType.push_back(IdentInfo_MPI_Ssend); + assert(IdentInfo_MPI_Ssend); + + IdentInfo_MPI_Issend = &ASTCtx.Idents.get("MPI_Issend"); + MPIPointToPointTypes.push_back(IdentInfo_MPI_Issend); + MPINonBlockingTypes.push_back(IdentInfo_MPI_Issend); + MPIType.push_back(IdentInfo_MPI_Issend); + assert(IdentInfo_MPI_Issend); + + IdentInfo_MPI_Bsend = &ASTCtx.Idents.get("MPI_Bsend"); + MPIPointToPointTypes.push_back(IdentInfo_MPI_Bsend); + MPIType.push_back(IdentInfo_MPI_Bsend); + assert(IdentInfo_MPI_Bsend); + + IdentInfo_MPI_Ibsend = &ASTCtx.Idents.get("MPI_Ibsend"); + MPIPointToPointTypes.push_back(IdentInfo_MPI_Ibsend); + MPINonBlockingTypes.push_back(IdentInfo_MPI_Ibsend); + MPIType.push_back(IdentInfo_MPI_Ibsend); + assert(IdentInfo_MPI_Ibsend); + + IdentInfo_MPI_Rsend = &ASTCtx.Idents.get("MPI_Rsend"); + MPIPointToPointTypes.push_back(IdentInfo_MPI_Rsend); + MPIType.push_back(IdentInfo_MPI_Rsend); + assert(IdentInfo_MPI_Rsend); + + IdentInfo_MPI_Irsend = &ASTCtx.Idents.get("MPI_Irsend"); + MPIPointToPointTypes.push_back(IdentInfo_MPI_Irsend); + MPIType.push_back(IdentInfo_MPI_Irsend); + assert(IdentInfo_MPI_Irsend); + + IdentInfo_MPI_Recv = &ASTCtx.Idents.get("MPI_Recv"); + MPIPointToPointTypes.push_back(IdentInfo_MPI_Recv); + MPIType.push_back(IdentInfo_MPI_Recv); + assert(IdentInfo_MPI_Recv); + + IdentInfo_MPI_Irecv = &ASTCtx.Idents.get("MPI_Irecv"); + MPIPointToPointTypes.push_back(IdentInfo_MPI_Irecv); + MPINonBlockingTypes.push_back(IdentInfo_MPI_Irecv); + MPIType.push_back(IdentInfo_MPI_Irecv); + assert(IdentInfo_MPI_Irecv); +} + +void MPIFunctionClassifier::initCollectiveIdentifiers(ASTContext &ASTCtx) { + // Copy identifiers into the correct classification containers. + IdentInfo_MPI_Scatter = &ASTCtx.Idents.get("MPI_Scatter"); + MPICollectiveTypes.push_back(IdentInfo_MPI_Scatter); + MPIPointToCollTypes.push_back(IdentInfo_MPI_Scatter); + MPIType.push_back(IdentInfo_MPI_Scatter); + assert(IdentInfo_MPI_Scatter); + + IdentInfo_MPI_Iscatter = &ASTCtx.Idents.get("MPI_Iscatter"); + MPICollectiveTypes.push_back(IdentInfo_MPI_Iscatter); + MPIPointToCollTypes.push_back(IdentInfo_MPI_Iscatter); + MPINonBlockingTypes.push_back(IdentInfo_MPI_Iscatter); + MPIType.push_back(IdentInfo_MPI_Iscatter); + assert(IdentInfo_MPI_Iscatter); + + IdentInfo_MPI_Gather = &ASTCtx.Idents.get("MPI_Gather"); + MPICollectiveTypes.push_back(IdentInfo_MPI_Gather); + MPICollToPointTypes.push_back(IdentInfo_MPI_Gather); + MPIType.push_back(IdentInfo_MPI_Gather); + assert(IdentInfo_MPI_Gather); + + IdentInfo_MPI_Igather = &ASTCtx.Idents.get("MPI_Igather"); + MPICollectiveTypes.push_back(IdentInfo_MPI_Igather); + MPICollToPointTypes.push_back(IdentInfo_MPI_Igather); + MPINonBlockingTypes.push_back(IdentInfo_MPI_Igather); + MPIType.push_back(IdentInfo_MPI_Igather); + assert(IdentInfo_MPI_Igather); + + IdentInfo_MPI_Allgather = &ASTCtx.Idents.get("MPI_Allgather"); + MPICollectiveTypes.push_back(IdentInfo_MPI_Allgather); + MPICollToCollTypes.push_back(IdentInfo_MPI_Allgather); + MPIType.push_back(IdentInfo_MPI_Allgather); + assert(IdentInfo_MPI_Allgather); + + IdentInfo_MPI_Iallgather = &ASTCtx.Idents.get("MPI_Iallgather"); + MPICollectiveTypes.push_back(IdentInfo_MPI_Iallgather); + MPICollToCollTypes.push_back(IdentInfo_MPI_Iallgather); + MPINonBlockingTypes.push_back(IdentInfo_MPI_Iallgather); + MPIType.push_back(IdentInfo_MPI_Iallgather); + assert(IdentInfo_MPI_Iallgather); + + IdentInfo_MPI_Bcast = &ASTCtx.Idents.get("MPI_Bcast"); + MPICollectiveTypes.push_back(IdentInfo_MPI_Bcast); + MPIPointToCollTypes.push_back(IdentInfo_MPI_Bcast); + MPIType.push_back(IdentInfo_MPI_Bcast); + assert(IdentInfo_MPI_Bcast); + + IdentInfo_MPI_Ibcast = &ASTCtx.Idents.get("MPI_Ibcast"); + MPICollectiveTypes.push_back(IdentInfo_MPI_Ibcast); + MPIPointToCollTypes.push_back(IdentInfo_MPI_Ibcast); + MPINonBlockingTypes.push_back(IdentInfo_MPI_Ibcast); + MPIType.push_back(IdentInfo_MPI_Ibcast); + assert(IdentInfo_MPI_Ibcast); + + IdentInfo_MPI_Reduce = &ASTCtx.Idents.get("MPI_Reduce"); + MPICollectiveTypes.push_back(IdentInfo_MPI_Reduce); + MPICollToPointTypes.push_back(IdentInfo_MPI_Reduce); + MPIType.push_back(IdentInfo_MPI_Reduce); + assert(IdentInfo_MPI_Reduce); + + IdentInfo_MPI_Ireduce = &ASTCtx.Idents.get("MPI_Ireduce"); + MPICollectiveTypes.push_back(IdentInfo_MPI_Ireduce); + MPICollToPointTypes.push_back(IdentInfo_MPI_Ireduce); + MPINonBlockingTypes.push_back(IdentInfo_MPI_Ireduce); + MPIType.push_back(IdentInfo_MPI_Ireduce); + assert(IdentInfo_MPI_Ireduce); + + IdentInfo_MPI_Allreduce = &ASTCtx.Idents.get("MPI_Allreduce"); + MPICollectiveTypes.push_back(IdentInfo_MPI_Allreduce); + MPICollToCollTypes.push_back(IdentInfo_MPI_Allreduce); + MPIType.push_back(IdentInfo_MPI_Allreduce); + assert(IdentInfo_MPI_Allreduce); + + IdentInfo_MPI_Iallreduce = &ASTCtx.Idents.get("MPI_Iallreduce"); + MPICollectiveTypes.push_back(IdentInfo_MPI_Iallreduce); + MPICollToCollTypes.push_back(IdentInfo_MPI_Iallreduce); + MPINonBlockingTypes.push_back(IdentInfo_MPI_Iallreduce); + MPIType.push_back(IdentInfo_MPI_Iallreduce); + assert(IdentInfo_MPI_Iallreduce); + + IdentInfo_MPI_Alltoall = &ASTCtx.Idents.get("MPI_Alltoall"); + MPICollectiveTypes.push_back(IdentInfo_MPI_Alltoall); + MPICollToCollTypes.push_back(IdentInfo_MPI_Alltoall); + MPIType.push_back(IdentInfo_MPI_Alltoall); + assert(IdentInfo_MPI_Alltoall); + + IdentInfo_MPI_Ialltoall = &ASTCtx.Idents.get("MPI_Ialltoall"); + MPICollectiveTypes.push_back(IdentInfo_MPI_Ialltoall); + MPICollToCollTypes.push_back(IdentInfo_MPI_Ialltoall); + MPINonBlockingTypes.push_back(IdentInfo_MPI_Ialltoall); + MPIType.push_back(IdentInfo_MPI_Ialltoall); + assert(IdentInfo_MPI_Ialltoall); +} + +void MPIFunctionClassifier::initAdditionalIdentifiers(ASTContext &ASTCtx) { + IdentInfo_MPI_Comm_rank = &ASTCtx.Idents.get("MPI_Comm_rank"); + MPIType.push_back(IdentInfo_MPI_Comm_rank); + assert(IdentInfo_MPI_Comm_rank); + + IdentInfo_MPI_Comm_size = &ASTCtx.Idents.get("MPI_Comm_size"); + MPIType.push_back(IdentInfo_MPI_Comm_size); + assert(IdentInfo_MPI_Comm_size); + + IdentInfo_MPI_Wait = &ASTCtx.Idents.get("MPI_Wait"); + MPIType.push_back(IdentInfo_MPI_Wait); + assert(IdentInfo_MPI_Wait); + + IdentInfo_MPI_Waitall = &ASTCtx.Idents.get("MPI_Waitall"); + MPIType.push_back(IdentInfo_MPI_Waitall); + assert(IdentInfo_MPI_Waitall); + + IdentInfo_MPI_Barrier = &ASTCtx.Idents.get("MPI_Barrier"); + MPICollectiveTypes.push_back(IdentInfo_MPI_Barrier); + MPIType.push_back(IdentInfo_MPI_Barrier); + assert(IdentInfo_MPI_Barrier); +} + +// general identifiers +bool MPIFunctionClassifier::isMPIType(const IdentifierInfo *IdentInfo) const { + return llvm::is_contained(MPIType, IdentInfo); +} + +bool MPIFunctionClassifier::isNonBlockingType( + const IdentifierInfo *IdentInfo) const { + return llvm::is_contained(MPINonBlockingTypes, IdentInfo); +} + +// point-to-point identifiers +bool MPIFunctionClassifier::isPointToPointType( + const IdentifierInfo *IdentInfo) const { + return llvm::is_contained(MPIPointToPointTypes, IdentInfo); +} + +// collective identifiers +bool MPIFunctionClassifier::isCollectiveType( + const IdentifierInfo *IdentInfo) const { + return llvm::is_contained(MPICollectiveTypes, IdentInfo); +} + +bool MPIFunctionClassifier::isCollToColl( + const IdentifierInfo *IdentInfo) const { + return llvm::is_contained(MPICollToCollTypes, IdentInfo); +} + +bool MPIFunctionClassifier::isScatterType( + const IdentifierInfo *IdentInfo) const { + return IdentInfo == IdentInfo_MPI_Scatter || + IdentInfo == IdentInfo_MPI_Iscatter; +} + +bool MPIFunctionClassifier::isGatherType( + const IdentifierInfo *IdentInfo) const { + return IdentInfo == IdentInfo_MPI_Gather || + IdentInfo == IdentInfo_MPI_Igather || + IdentInfo == IdentInfo_MPI_Allgather || + IdentInfo == IdentInfo_MPI_Iallgather; +} + +bool MPIFunctionClassifier::isAllgatherType( + const IdentifierInfo *IdentInfo) const { + return IdentInfo == IdentInfo_MPI_Allgather || + IdentInfo == IdentInfo_MPI_Iallgather; +} + +bool MPIFunctionClassifier::isAlltoallType( + const IdentifierInfo *IdentInfo) const { + return IdentInfo == IdentInfo_MPI_Alltoall || + IdentInfo == IdentInfo_MPI_Ialltoall; +} + +bool MPIFunctionClassifier::isBcastType(const IdentifierInfo *IdentInfo) const { + return IdentInfo == IdentInfo_MPI_Bcast || IdentInfo == IdentInfo_MPI_Ibcast; +} + +bool MPIFunctionClassifier::isReduceType( + const IdentifierInfo *IdentInfo) const { + return IdentInfo == IdentInfo_MPI_Reduce || + IdentInfo == IdentInfo_MPI_Ireduce || + IdentInfo == IdentInfo_MPI_Allreduce || + IdentInfo == IdentInfo_MPI_Iallreduce; +} + +// additional identifiers +bool MPIFunctionClassifier::isMPI_Wait(const IdentifierInfo *IdentInfo) const { + return IdentInfo == IdentInfo_MPI_Wait; +} + +bool MPIFunctionClassifier::isMPI_Waitall( + const IdentifierInfo *IdentInfo) const { + return IdentInfo == IdentInfo_MPI_Waitall; +} + +bool MPIFunctionClassifier::isWaitType(const IdentifierInfo *IdentInfo) const { + return IdentInfo == IdentInfo_MPI_Wait || IdentInfo == IdentInfo_MPI_Waitall; +} + +} // end of namespace: mpi +} // end of namespace: ento +} // end of namespace: clang diff --git a/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPITypes.h b/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPITypes.h new file mode 100644 index 000000000000..fe0fb2a4d0e7 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPITypes.h @@ -0,0 +1,66 @@ +//===-- MPITypes.h - Functionality to model MPI concepts --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file provides definitions to model concepts of MPI. The mpi::Request +/// class defines a wrapper class, in order to make MPI requests trackable for +/// path-sensitive analysis. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MPICHECKER_MPITYPES_H +#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MPICHECKER_MPITYPES_H + +#include "clang/StaticAnalyzer/Checkers/MPIFunctionClassifier.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "llvm/ADT/SmallSet.h" + +namespace clang { +namespace ento { +namespace mpi { + +class Request { +public: + enum State : unsigned char { Nonblocking, Wait }; + + Request(State S) : CurrentState{S} {} + + void Profile(llvm::FoldingSetNodeID &Id) const { + Id.AddInteger(CurrentState); + } + + bool operator==(const Request &ToCompare) const { + return CurrentState == ToCompare.CurrentState; + } + + const State CurrentState; +}; + +// The RequestMap stores MPI requests which are identified by their memory +// region. Requests are used in MPI to complete nonblocking operations with wait +// operations. A custom map implementation is used, in order to make it +// available in an arbitrary amount of translation units. +struct RequestMap {}; +typedef llvm::ImmutableMap<const clang::ento::MemRegion *, + clang::ento::mpi::Request> + RequestMapImpl; + +} // end of namespace: mpi + +template <> +struct ProgramStateTrait<mpi::RequestMap> + : public ProgramStatePartialTrait<mpi::RequestMapImpl> { + static void *GDMIndex() { + static int index = 0; + return &index; + } +}; + +} // end of namespace: ento +} // end of namespace: clang +#endif diff --git a/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp new file mode 100644 index 000000000000..e064ca6bd88f --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp @@ -0,0 +1,672 @@ +//==--- MacOSKeychainAPIChecker.cpp ------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This checker flags misuses of KeyChainAPI. In particular, the password data +// allocated/returned by SecKeychainItemCopyContent, +// SecKeychainFindGenericPassword, SecKeychainFindInternetPassword functions has +// to be freed using a call to SecKeychainItemFreeContent. +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +namespace { +class MacOSKeychainAPIChecker : public Checker<check::PreStmt<CallExpr>, + check::PostStmt<CallExpr>, + check::DeadSymbols, + check::PointerEscape, + eval::Assume> { + mutable std::unique_ptr<BugType> BT; + +public: + /// AllocationState is a part of the checker specific state together with the + /// MemRegion corresponding to the allocated data. + struct AllocationState { + /// The index of the allocator function. + unsigned int AllocatorIdx; + SymbolRef Region; + + AllocationState(const Expr *E, unsigned int Idx, SymbolRef R) : + AllocatorIdx(Idx), + Region(R) {} + + bool operator==(const AllocationState &X) const { + return (AllocatorIdx == X.AllocatorIdx && + Region == X.Region); + } + + void Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddInteger(AllocatorIdx); + ID.AddPointer(Region); + } + }; + + void checkPreStmt(const CallExpr *S, CheckerContext &C) const; + void checkPostStmt(const CallExpr *S, CheckerContext &C) const; + void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; + ProgramStateRef checkPointerEscape(ProgramStateRef State, + const InvalidatedSymbols &Escaped, + const CallEvent *Call, + PointerEscapeKind Kind) const; + ProgramStateRef evalAssume(ProgramStateRef state, SVal Cond, + bool Assumption) const; + void printState(raw_ostream &Out, ProgramStateRef State, + const char *NL, const char *Sep) const; + +private: + typedef std::pair<SymbolRef, const AllocationState*> AllocationPair; + typedef SmallVector<AllocationPair, 2> AllocationPairVec; + + enum APIKind { + /// Denotes functions tracked by this checker. + ValidAPI = 0, + /// The functions commonly/mistakenly used in place of the given API. + ErrorAPI = 1, + /// The functions which may allocate the data. These are tracked to reduce + /// the false alarm rate. + PossibleAPI = 2 + }; + /// Stores the information about the allocator and deallocator functions - + /// these are the functions the checker is tracking. + struct ADFunctionInfo { + const char* Name; + unsigned int Param; + unsigned int DeallocatorIdx; + APIKind Kind; + }; + static const unsigned InvalidIdx = 100000; + static const unsigned FunctionsToTrackSize = 8; + static const ADFunctionInfo FunctionsToTrack[FunctionsToTrackSize]; + /// The value, which represents no error return value for allocator functions. + static const unsigned NoErr = 0; + + /// Given the function name, returns the index of the allocator/deallocator + /// function. + static unsigned getTrackedFunctionIndex(StringRef Name, bool IsAllocator); + + inline void initBugType() const { + if (!BT) + BT.reset(new BugType(this, "Improper use of SecKeychain API", + "API Misuse (Apple)")); + } + + void generateDeallocatorMismatchReport(const AllocationPair &AP, + const Expr *ArgExpr, + CheckerContext &C) const; + + /// Find the allocation site for Sym on the path leading to the node N. + const ExplodedNode *getAllocationNode(const ExplodedNode *N, SymbolRef Sym, + CheckerContext &C) const; + + std::unique_ptr<PathSensitiveBugReport> + generateAllocatedDataNotReleasedReport(const AllocationPair &AP, + ExplodedNode *N, + CheckerContext &C) const; + + /// Mark an AllocationPair interesting for diagnostic reporting. + void markInteresting(PathSensitiveBugReport *R, + const AllocationPair &AP) const { + R->markInteresting(AP.first); + R->markInteresting(AP.second->Region); + } + + /// The bug visitor which allows us to print extra diagnostics along the + /// BugReport path. For example, showing the allocation site of the leaked + /// region. + class SecKeychainBugVisitor : public BugReporterVisitor { + protected: + // The allocated region symbol tracked by the main analysis. + SymbolRef Sym; + + public: + SecKeychainBugVisitor(SymbolRef S) : Sym(S) {} + + void Profile(llvm::FoldingSetNodeID &ID) const override { + static int X = 0; + ID.AddPointer(&X); + ID.AddPointer(Sym); + } + + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; + }; +}; +} + +/// ProgramState traits to store the currently allocated (and not yet freed) +/// symbols. This is a map from the allocated content symbol to the +/// corresponding AllocationState. +REGISTER_MAP_WITH_PROGRAMSTATE(AllocatedData, + SymbolRef, + MacOSKeychainAPIChecker::AllocationState) + +static bool isEnclosingFunctionParam(const Expr *E) { + E = E->IgnoreParenCasts(); + if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) { + const ValueDecl *VD = DRE->getDecl(); + if (isa<ImplicitParamDecl>(VD) || isa<ParmVarDecl>(VD)) + return true; + } + return false; +} + +const MacOSKeychainAPIChecker::ADFunctionInfo + MacOSKeychainAPIChecker::FunctionsToTrack[FunctionsToTrackSize] = { + {"SecKeychainItemCopyContent", 4, 3, ValidAPI}, // 0 + {"SecKeychainFindGenericPassword", 6, 3, ValidAPI}, // 1 + {"SecKeychainFindInternetPassword", 13, 3, ValidAPI}, // 2 + {"SecKeychainItemFreeContent", 1, InvalidIdx, ValidAPI}, // 3 + {"SecKeychainItemCopyAttributesAndData", 5, 5, ValidAPI}, // 4 + {"SecKeychainItemFreeAttributesAndData", 1, InvalidIdx, ValidAPI}, // 5 + {"free", 0, InvalidIdx, ErrorAPI}, // 6 + {"CFStringCreateWithBytesNoCopy", 1, InvalidIdx, PossibleAPI}, // 7 +}; + +unsigned MacOSKeychainAPIChecker::getTrackedFunctionIndex(StringRef Name, + bool IsAllocator) { + for (unsigned I = 0; I < FunctionsToTrackSize; ++I) { + ADFunctionInfo FI = FunctionsToTrack[I]; + if (FI.Name != Name) + continue; + // Make sure the function is of the right type (allocator vs deallocator). + if (IsAllocator && (FI.DeallocatorIdx == InvalidIdx)) + return InvalidIdx; + if (!IsAllocator && (FI.DeallocatorIdx != InvalidIdx)) + return InvalidIdx; + + return I; + } + // The function is not tracked. + return InvalidIdx; +} + +static bool isBadDeallocationArgument(const MemRegion *Arg) { + if (!Arg) + return false; + return isa<AllocaRegion>(Arg) || isa<BlockDataRegion>(Arg) || + isa<TypedRegion>(Arg); +} + +/// Given the address expression, retrieve the value it's pointing to. Assume +/// that value is itself an address, and return the corresponding symbol. +static SymbolRef getAsPointeeSymbol(const Expr *Expr, + CheckerContext &C) { + ProgramStateRef State = C.getState(); + SVal ArgV = C.getSVal(Expr); + + if (Optional<loc::MemRegionVal> X = ArgV.getAs<loc::MemRegionVal>()) { + StoreManager& SM = C.getStoreManager(); + SymbolRef sym = SM.getBinding(State->getStore(), *X).getAsLocSymbol(); + if (sym) + return sym; + } + return nullptr; +} + +// Report deallocator mismatch. Remove the region from tracking - reporting a +// missing free error after this one is redundant. +void MacOSKeychainAPIChecker:: + generateDeallocatorMismatchReport(const AllocationPair &AP, + const Expr *ArgExpr, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + State = State->remove<AllocatedData>(AP.first); + ExplodedNode *N = C.generateNonFatalErrorNode(State); + + if (!N) + return; + initBugType(); + SmallString<80> sbuf; + llvm::raw_svector_ostream os(sbuf); + unsigned int PDeallocIdx = + FunctionsToTrack[AP.second->AllocatorIdx].DeallocatorIdx; + + os << "Deallocator doesn't match the allocator: '" + << FunctionsToTrack[PDeallocIdx].Name << "' should be used."; + auto Report = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N); + Report->addVisitor(std::make_unique<SecKeychainBugVisitor>(AP.first)); + Report->addRange(ArgExpr->getSourceRange()); + markInteresting(Report.get(), AP); + C.emitReport(std::move(Report)); +} + +void MacOSKeychainAPIChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const { + unsigned idx = InvalidIdx; + ProgramStateRef State = C.getState(); + + const FunctionDecl *FD = C.getCalleeDecl(CE); + if (!FD || FD->getKind() != Decl::Function) + return; + + StringRef funName = C.getCalleeName(FD); + if (funName.empty()) + return; + + // If it is a call to an allocator function, it could be a double allocation. + idx = getTrackedFunctionIndex(funName, true); + if (idx != InvalidIdx) { + unsigned paramIdx = FunctionsToTrack[idx].Param; + if (CE->getNumArgs() <= paramIdx) + return; + + const Expr *ArgExpr = CE->getArg(paramIdx); + if (SymbolRef V = getAsPointeeSymbol(ArgExpr, C)) + if (const AllocationState *AS = State->get<AllocatedData>(V)) { + // Remove the value from the state. The new symbol will be added for + // tracking when the second allocator is processed in checkPostStmt(). + State = State->remove<AllocatedData>(V); + ExplodedNode *N = C.generateNonFatalErrorNode(State); + if (!N) + return; + initBugType(); + SmallString<128> sbuf; + llvm::raw_svector_ostream os(sbuf); + unsigned int DIdx = FunctionsToTrack[AS->AllocatorIdx].DeallocatorIdx; + os << "Allocated data should be released before another call to " + << "the allocator: missing a call to '" + << FunctionsToTrack[DIdx].Name + << "'."; + auto Report = + std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N); + Report->addVisitor(std::make_unique<SecKeychainBugVisitor>(V)); + Report->addRange(ArgExpr->getSourceRange()); + Report->markInteresting(AS->Region); + C.emitReport(std::move(Report)); + } + return; + } + + // Is it a call to one of deallocator functions? + idx = getTrackedFunctionIndex(funName, false); + if (idx == InvalidIdx) + return; + + unsigned paramIdx = FunctionsToTrack[idx].Param; + if (CE->getNumArgs() <= paramIdx) + return; + + // Check the argument to the deallocator. + const Expr *ArgExpr = CE->getArg(paramIdx); + SVal ArgSVal = C.getSVal(ArgExpr); + + // Undef is reported by another checker. + if (ArgSVal.isUndef()) + return; + + SymbolRef ArgSM = ArgSVal.getAsLocSymbol(); + + // If the argument is coming from the heap, globals, or unknown, do not + // report it. + bool RegionArgIsBad = false; + if (!ArgSM) { + if (!isBadDeallocationArgument(ArgSVal.getAsRegion())) + return; + RegionArgIsBad = true; + } + + // Is the argument to the call being tracked? + const AllocationState *AS = State->get<AllocatedData>(ArgSM); + if (!AS) + return; + + // TODO: We might want to report double free here. + // (that would involve tracking all the freed symbols in the checker state). + if (RegionArgIsBad) { + // It is possible that this is a false positive - the argument might + // have entered as an enclosing function parameter. + if (isEnclosingFunctionParam(ArgExpr)) + return; + + ExplodedNode *N = C.generateNonFatalErrorNode(State); + if (!N) + return; + initBugType(); + auto Report = std::make_unique<PathSensitiveBugReport>( + *BT, "Trying to free data which has not been allocated.", N); + Report->addRange(ArgExpr->getSourceRange()); + if (AS) + Report->markInteresting(AS->Region); + C.emitReport(std::move(Report)); + return; + } + + // Process functions which might deallocate. + if (FunctionsToTrack[idx].Kind == PossibleAPI) { + + if (funName == "CFStringCreateWithBytesNoCopy") { + const Expr *DeallocatorExpr = CE->getArg(5)->IgnoreParenCasts(); + // NULL ~ default deallocator, so warn. + if (DeallocatorExpr->isNullPointerConstant(C.getASTContext(), + Expr::NPC_ValueDependentIsNotNull)) { + const AllocationPair AP = std::make_pair(ArgSM, AS); + generateDeallocatorMismatchReport(AP, ArgExpr, C); + return; + } + // One of the default allocators, so warn. + if (const DeclRefExpr *DE = dyn_cast<DeclRefExpr>(DeallocatorExpr)) { + StringRef DeallocatorName = DE->getFoundDecl()->getName(); + if (DeallocatorName == "kCFAllocatorDefault" || + DeallocatorName == "kCFAllocatorSystemDefault" || + DeallocatorName == "kCFAllocatorMalloc") { + const AllocationPair AP = std::make_pair(ArgSM, AS); + generateDeallocatorMismatchReport(AP, ArgExpr, C); + return; + } + // If kCFAllocatorNull, which does not deallocate, we still have to + // find the deallocator. + if (DE->getFoundDecl()->getName() == "kCFAllocatorNull") + return; + } + // In all other cases, assume the user supplied a correct deallocator + // that will free memory so stop tracking. + State = State->remove<AllocatedData>(ArgSM); + C.addTransition(State); + return; + } + + llvm_unreachable("We know of no other possible APIs."); + } + + // The call is deallocating a value we previously allocated, so remove it + // from the next state. + State = State->remove<AllocatedData>(ArgSM); + + // Check if the proper deallocator is used. + unsigned int PDeallocIdx = FunctionsToTrack[AS->AllocatorIdx].DeallocatorIdx; + if (PDeallocIdx != idx || (FunctionsToTrack[idx].Kind == ErrorAPI)) { + const AllocationPair AP = std::make_pair(ArgSM, AS); + generateDeallocatorMismatchReport(AP, ArgExpr, C); + return; + } + + C.addTransition(State); +} + +void MacOSKeychainAPIChecker::checkPostStmt(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + const FunctionDecl *FD = C.getCalleeDecl(CE); + if (!FD || FD->getKind() != Decl::Function) + return; + + StringRef funName = C.getCalleeName(FD); + + // If a value has been allocated, add it to the set for tracking. + unsigned idx = getTrackedFunctionIndex(funName, true); + if (idx == InvalidIdx) + return; + + const Expr *ArgExpr = CE->getArg(FunctionsToTrack[idx].Param); + // If the argument entered as an enclosing function parameter, skip it to + // avoid false positives. + if (isEnclosingFunctionParam(ArgExpr) && + C.getLocationContext()->getParent() == nullptr) + return; + + if (SymbolRef V = getAsPointeeSymbol(ArgExpr, C)) { + // If the argument points to something that's not a symbolic region, it + // can be: + // - unknown (cannot reason about it) + // - undefined (already reported by other checker) + // - constant (null - should not be tracked, + // other constant will generate a compiler warning) + // - goto (should be reported by other checker) + + // The call return value symbol should stay alive for as long as the + // allocated value symbol, since our diagnostics depend on the value + // returned by the call. Ex: Data should only be freed if noErr was + // returned during allocation.) + SymbolRef RetStatusSymbol = C.getSVal(CE).getAsSymbol(); + C.getSymbolManager().addSymbolDependency(V, RetStatusSymbol); + + // Track the allocated value in the checker state. + State = State->set<AllocatedData>(V, AllocationState(ArgExpr, idx, + RetStatusSymbol)); + assert(State); + C.addTransition(State); + } +} + +// TODO: This logic is the same as in Malloc checker. +const ExplodedNode * +MacOSKeychainAPIChecker::getAllocationNode(const ExplodedNode *N, + SymbolRef Sym, + CheckerContext &C) const { + const LocationContext *LeakContext = N->getLocationContext(); + // Walk the ExplodedGraph backwards and find the first node that referred to + // the tracked symbol. + const ExplodedNode *AllocNode = N; + + while (N) { + if (!N->getState()->get<AllocatedData>(Sym)) + break; + // Allocation node, is the last node in the current or parent context in + // which the symbol was tracked. + const LocationContext *NContext = N->getLocationContext(); + if (NContext == LeakContext || + NContext->isParentOf(LeakContext)) + AllocNode = N; + N = N->pred_empty() ? nullptr : *(N->pred_begin()); + } + + return AllocNode; +} + +std::unique_ptr<PathSensitiveBugReport> +MacOSKeychainAPIChecker::generateAllocatedDataNotReleasedReport( + const AllocationPair &AP, ExplodedNode *N, CheckerContext &C) const { + const ADFunctionInfo &FI = FunctionsToTrack[AP.second->AllocatorIdx]; + initBugType(); + SmallString<70> sbuf; + llvm::raw_svector_ostream os(sbuf); + os << "Allocated data is not released: missing a call to '" + << FunctionsToTrack[FI.DeallocatorIdx].Name << "'."; + + // Most bug reports are cached at the location where they occurred. + // With leaks, we want to unique them by the location where they were + // allocated, and only report a single path. + PathDiagnosticLocation LocUsedForUniqueing; + const ExplodedNode *AllocNode = getAllocationNode(N, AP.first, C); + const Stmt *AllocStmt = AllocNode->getStmtForDiagnostics(); + + if (AllocStmt) + LocUsedForUniqueing = PathDiagnosticLocation::createBegin(AllocStmt, + C.getSourceManager(), + AllocNode->getLocationContext()); + + auto Report = std::make_unique<PathSensitiveBugReport>( + *BT, os.str(), N, LocUsedForUniqueing, + AllocNode->getLocationContext()->getDecl()); + + Report->addVisitor(std::make_unique<SecKeychainBugVisitor>(AP.first)); + markInteresting(Report.get(), AP); + return Report; +} + +/// If the return symbol is assumed to be error, remove the allocated info +/// from consideration. +ProgramStateRef MacOSKeychainAPIChecker::evalAssume(ProgramStateRef State, + SVal Cond, + bool Assumption) const { + AllocatedDataTy AMap = State->get<AllocatedData>(); + if (AMap.isEmpty()) + return State; + + auto *CondBSE = dyn_cast_or_null<BinarySymExpr>(Cond.getAsSymExpr()); + if (!CondBSE) + return State; + BinaryOperator::Opcode OpCode = CondBSE->getOpcode(); + if (OpCode != BO_EQ && OpCode != BO_NE) + return State; + + // Match for a restricted set of patterns for cmparison of error codes. + // Note, the comparisons of type '0 == st' are transformed into SymIntExpr. + SymbolRef ReturnSymbol = nullptr; + if (auto *SIE = dyn_cast<SymIntExpr>(CondBSE)) { + const llvm::APInt &RHS = SIE->getRHS(); + bool ErrorIsReturned = (OpCode == BO_EQ && RHS != NoErr) || + (OpCode == BO_NE && RHS == NoErr); + if (!Assumption) + ErrorIsReturned = !ErrorIsReturned; + if (ErrorIsReturned) + ReturnSymbol = SIE->getLHS(); + } + + if (ReturnSymbol) + for (auto I = AMap.begin(), E = AMap.end(); I != E; ++I) { + if (ReturnSymbol == I->second.Region) + State = State->remove<AllocatedData>(I->first); + } + + return State; +} + +void MacOSKeychainAPIChecker::checkDeadSymbols(SymbolReaper &SR, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + AllocatedDataTy AMap = State->get<AllocatedData>(); + if (AMap.isEmpty()) + return; + + bool Changed = false; + AllocationPairVec Errors; + for (auto I = AMap.begin(), E = AMap.end(); I != E; ++I) { + if (!SR.isDead(I->first)) + continue; + + Changed = true; + State = State->remove<AllocatedData>(I->first); + // If the allocated symbol is null do not report. + ConstraintManager &CMgr = State->getConstraintManager(); + ConditionTruthVal AllocFailed = CMgr.isNull(State, I.getKey()); + if (AllocFailed.isConstrainedTrue()) + continue; + Errors.push_back(std::make_pair(I->first, &I->second)); + } + if (!Changed) { + // Generate the new, cleaned up state. + C.addTransition(State); + return; + } + + static CheckerProgramPointTag Tag(this, "DeadSymbolsLeak"); + ExplodedNode *N = C.generateNonFatalErrorNode(C.getState(), &Tag); + if (!N) + return; + + // Generate the error reports. + for (const auto &P : Errors) + C.emitReport(generateAllocatedDataNotReleasedReport(P, N, C)); + + // Generate the new, cleaned up state. + C.addTransition(State, N); +} + +ProgramStateRef MacOSKeychainAPIChecker::checkPointerEscape( + ProgramStateRef State, const InvalidatedSymbols &Escaped, + const CallEvent *Call, PointerEscapeKind Kind) const { + // FIXME: This branch doesn't make any sense at all, but it is an overfitted + // replacement for a previous overfitted code that was making even less sense. + if (!Call || Call->getDecl()) + return State; + + for (auto I : State->get<AllocatedData>()) { + SymbolRef Sym = I.first; + if (Escaped.count(Sym)) + State = State->remove<AllocatedData>(Sym); + + // This checker is special. Most checkers in fact only track symbols of + // SymbolConjured type, eg. symbols returned from functions such as + // malloc(). This checker tracks symbols returned as out-parameters. + // + // When a function is evaluated conservatively, the out-parameter's pointee + // base region gets invalidated with a SymbolConjured. If the base region is + // larger than the region we're interested in, the value we're interested in + // would be SymbolDerived based on that SymbolConjured. However, such + // SymbolDerived will never be listed in the Escaped set when the base + // region is invalidated because ExprEngine doesn't know which symbols + // were derived from a given symbol, while there can be infinitely many + // valid symbols derived from any given symbol. + // + // Hence the extra boilerplate: remove the derived symbol when its parent + // symbol escapes. + // + if (const auto *SD = dyn_cast<SymbolDerived>(Sym)) { + SymbolRef ParentSym = SD->getParentSymbol(); + if (Escaped.count(ParentSym)) + State = State->remove<AllocatedData>(Sym); + } + } + return State; +} + +PathDiagnosticPieceRef +MacOSKeychainAPIChecker::SecKeychainBugVisitor::VisitNode( + const ExplodedNode *N, BugReporterContext &BRC, + PathSensitiveBugReport &BR) { + const AllocationState *AS = N->getState()->get<AllocatedData>(Sym); + if (!AS) + return nullptr; + const AllocationState *ASPrev = + N->getFirstPred()->getState()->get<AllocatedData>(Sym); + if (ASPrev) + return nullptr; + + // (!ASPrev && AS) ~ We started tracking symbol in node N, it must be the + // allocation site. + const CallExpr *CE = + cast<CallExpr>(N->getLocation().castAs<StmtPoint>().getStmt()); + const FunctionDecl *funDecl = CE->getDirectCallee(); + assert(funDecl && "We do not support indirect function calls as of now."); + StringRef funName = funDecl->getName(); + + // Get the expression of the corresponding argument. + unsigned Idx = getTrackedFunctionIndex(funName, true); + assert(Idx != InvalidIdx && "This should be a call to an allocator."); + const Expr *ArgExpr = CE->getArg(FunctionsToTrack[Idx].Param); + PathDiagnosticLocation Pos(ArgExpr, BRC.getSourceManager(), + N->getLocationContext()); + return std::make_shared<PathDiagnosticEventPiece>(Pos, + "Data is allocated here."); +} + +void MacOSKeychainAPIChecker::printState(raw_ostream &Out, + ProgramStateRef State, + const char *NL, + const char *Sep) const { + + AllocatedDataTy AMap = State->get<AllocatedData>(); + + if (!AMap.isEmpty()) { + Out << Sep << "KeychainAPIChecker :" << NL; + for (auto I = AMap.begin(), E = AMap.end(); I != E; ++I) { + I.getKey()->dumpToStream(Out); + } + } +} + + +void ento::registerMacOSKeychainAPIChecker(CheckerManager &mgr) { + mgr.registerChecker<MacOSKeychainAPIChecker>(); +} + +bool ento::shouldRegisterMacOSKeychainAPIChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp new file mode 100644 index 000000000000..d964a1668eaa --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp @@ -0,0 +1,180 @@ +// MacOSXAPIChecker.h - Checks proper use of various MacOS X APIs --*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines MacOSXAPIChecker, which is an assortment of checks on calls +// to various, widely used Apple APIs. +// +// FIXME: What's currently in BasicObjCFoundationChecks.cpp should be migrated +// to here, using the new Checker interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +namespace { +class MacOSXAPIChecker : public Checker< check::PreStmt<CallExpr> > { + mutable std::unique_ptr<BugType> BT_dispatchOnce; + + static const ObjCIvarRegion *getParentIvarRegion(const MemRegion *R); + +public: + void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; + + void CheckDispatchOnce(CheckerContext &C, const CallExpr *CE, + StringRef FName) const; + + typedef void (MacOSXAPIChecker::*SubChecker)(CheckerContext &, + const CallExpr *, + StringRef FName) const; +}; +} //end anonymous namespace + +//===----------------------------------------------------------------------===// +// dispatch_once and dispatch_once_f +//===----------------------------------------------------------------------===// + +const ObjCIvarRegion * +MacOSXAPIChecker::getParentIvarRegion(const MemRegion *R) { + const SubRegion *SR = dyn_cast<SubRegion>(R); + while (SR) { + if (const ObjCIvarRegion *IR = dyn_cast<ObjCIvarRegion>(SR)) + return IR; + SR = dyn_cast<SubRegion>(SR->getSuperRegion()); + } + return nullptr; +} + +void MacOSXAPIChecker::CheckDispatchOnce(CheckerContext &C, const CallExpr *CE, + StringRef FName) const { + if (CE->getNumArgs() < 1) + return; + + // Check if the first argument is improperly allocated. If so, issue a + // warning because that's likely to be bad news. + const MemRegion *R = C.getSVal(CE->getArg(0)).getAsRegion(); + if (!R) + return; + + // Global variables are fine. + const MemRegion *RB = R->getBaseRegion(); + const MemSpaceRegion *RS = RB->getMemorySpace(); + if (isa<GlobalsSpaceRegion>(RS)) + return; + + // Handle _dispatch_once. In some versions of the OS X SDK we have the case + // that dispatch_once is a macro that wraps a call to _dispatch_once. + // _dispatch_once is then a function which then calls the real dispatch_once. + // Users do not care; they just want the warning at the top-level call. + if (CE->getBeginLoc().isMacroID()) { + StringRef TrimmedFName = FName.ltrim('_'); + if (TrimmedFName != FName) + FName = TrimmedFName; + } + + SmallString<256> S; + llvm::raw_svector_ostream os(S); + bool SuggestStatic = false; + os << "Call to '" << FName << "' uses"; + if (const VarRegion *VR = dyn_cast<VarRegion>(RB)) { + const VarDecl *VD = VR->getDecl(); + // FIXME: These should have correct memory space and thus should be filtered + // out earlier. This branch only fires when we're looking from a block, + // which we analyze as a top-level declaration, onto a static local + // in a function that contains the block. + if (VD->isStaticLocal()) + return; + // We filtered out globals earlier, so it must be a local variable + // or a block variable which is under UnknownSpaceRegion. + if (VR != R) + os << " memory within"; + if (VD->hasAttr<BlocksAttr>()) + os << " the block variable '"; + else + os << " the local variable '"; + os << VR->getDecl()->getName() << '\''; + SuggestStatic = true; + } else if (const ObjCIvarRegion *IVR = getParentIvarRegion(R)) { + if (IVR != R) + os << " memory within"; + os << " the instance variable '" << IVR->getDecl()->getName() << '\''; + } else if (isa<HeapSpaceRegion>(RS)) { + os << " heap-allocated memory"; + } else if (isa<UnknownSpaceRegion>(RS)) { + // Presence of an IVar superregion has priority over this branch, because + // ObjC objects are on the heap even if the core doesn't realize this. + // Presence of a block variable base region has priority over this branch, + // because block variables are known to be either on stack or on heap + // (might actually move between the two, hence UnknownSpace). + return; + } else { + os << " stack allocated memory"; + } + os << " for the predicate value. Using such transient memory for " + "the predicate is potentially dangerous."; + if (SuggestStatic) + os << " Perhaps you intended to declare the variable as 'static'?"; + + ExplodedNode *N = C.generateErrorNode(); + if (!N) + return; + + if (!BT_dispatchOnce) + BT_dispatchOnce.reset(new BugType(this, "Improper use of 'dispatch_once'", + "API Misuse (Apple)")); + + auto report = + std::make_unique<PathSensitiveBugReport>(*BT_dispatchOnce, os.str(), N); + report->addRange(CE->getArg(0)->getSourceRange()); + C.emitReport(std::move(report)); +} + +//===----------------------------------------------------------------------===// +// Central dispatch function. +//===----------------------------------------------------------------------===// + +void MacOSXAPIChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const { + StringRef Name = C.getCalleeName(CE); + if (Name.empty()) + return; + + SubChecker SC = + llvm::StringSwitch<SubChecker>(Name) + .Cases("dispatch_once", + "_dispatch_once", + "dispatch_once_f", + &MacOSXAPIChecker::CheckDispatchOnce) + .Default(nullptr); + + if (SC) + (this->*SC)(C, CE, Name); +} + +//===----------------------------------------------------------------------===// +// Registration. +//===----------------------------------------------------------------------===// + +void ento::registerMacOSXAPIChecker(CheckerManager &mgr) { + mgr.registerChecker<MacOSXAPIChecker>(); +} + +bool ento::shouldRegisterMacOSXAPIChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp new file mode 100644 index 000000000000..a82449951873 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp @@ -0,0 +1,3426 @@ +//=== MallocChecker.cpp - A malloc/free checker -------------------*- C++ -*--// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a variety of memory management related checkers, such as +// leak, double free, and use-after-free. +// +// The following checkers are defined here: +// +// * MallocChecker +// Despite its name, it models all sorts of memory allocations and +// de- or reallocation, including but not limited to malloc, free, +// relloc, new, delete. It also reports on a variety of memory misuse +// errors. +// Many other checkers interact very closely with this checker, in fact, +// most are merely options to this one. Other checkers may register +// MallocChecker, but do not enable MallocChecker's reports (more details +// to follow around its field, ChecksEnabled). +// It also has a boolean "Optimistic" checker option, which if set to true +// will cause the checker to model user defined memory management related +// functions annotated via the attribute ownership_takes, ownership_holds +// and ownership_returns. +// +// * NewDeleteChecker +// Enables the modeling of new, new[], delete, delete[] in MallocChecker, +// and checks for related double-free and use-after-free errors. +// +// * NewDeleteLeaksChecker +// Checks for leaks related to new, new[], delete, delete[]. +// Depends on NewDeleteChecker. +// +// * MismatchedDeallocatorChecker +// Enables checking whether memory is deallocated with the correspending +// allocation function in MallocChecker, such as malloc() allocated +// regions are only freed by free(), new by delete, new[] by delete[]. +// +// InnerPointerChecker interacts very closely with MallocChecker, but unlike +// the above checkers, it has it's own file, hence the many InnerPointerChecker +// related headers and non-static functions. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "InterCheckerAPI.h" +#include "clang/AST/Attr.h" +#include "clang/AST/ParentMap.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Lex/Lexer.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "AllocationState.h" +#include <climits> +#include <utility> + +using namespace clang; +using namespace ento; + +//===----------------------------------------------------------------------===// +// The types of allocation we're modeling. +//===----------------------------------------------------------------------===// + +namespace { + +// Used to check correspondence between allocators and deallocators. +enum AllocationFamily { + AF_None, + AF_Malloc, + AF_CXXNew, + AF_CXXNewArray, + AF_IfNameIndex, + AF_Alloca, + AF_InnerBuffer +}; + +struct MemFunctionInfoTy; + +} // end of anonymous namespace + +/// Determine family of a deallocation expression. +static AllocationFamily +getAllocationFamily(const MemFunctionInfoTy &MemFunctionInfo, CheckerContext &C, + const Stmt *S); + +/// Print names of allocators and deallocators. +/// +/// \returns true on success. +static bool printAllocDeallocName(raw_ostream &os, CheckerContext &C, + const Expr *E); + +/// Print expected name of an allocator based on the deallocator's +/// family derived from the DeallocExpr. +static void printExpectedAllocName(raw_ostream &os, + const MemFunctionInfoTy &MemFunctionInfo, + CheckerContext &C, const Expr *E); + +/// Print expected name of a deallocator based on the allocator's +/// family. +static void printExpectedDeallocName(raw_ostream &os, AllocationFamily Family); + +//===----------------------------------------------------------------------===// +// The state of a symbol, in terms of memory management. +//===----------------------------------------------------------------------===// + +namespace { + +class RefState { + enum Kind { + // Reference to allocated memory. + Allocated, + // Reference to zero-allocated memory. + AllocatedOfSizeZero, + // Reference to released/freed memory. + Released, + // The responsibility for freeing resources has transferred from + // this reference. A relinquished symbol should not be freed. + Relinquished, + // We are no longer guaranteed to have observed all manipulations + // of this pointer/memory. For example, it could have been + // passed as a parameter to an opaque function. + Escaped + }; + + const Stmt *S; + + Kind K; + AllocationFamily Family; + + RefState(Kind k, const Stmt *s, AllocationFamily family) + : S(s), K(k), Family(family) { + assert(family != AF_None); + } + +public: + bool isAllocated() const { return K == Allocated; } + bool isAllocatedOfSizeZero() const { return K == AllocatedOfSizeZero; } + bool isReleased() const { return K == Released; } + bool isRelinquished() const { return K == Relinquished; } + bool isEscaped() const { return K == Escaped; } + AllocationFamily getAllocationFamily() const { return Family; } + const Stmt *getStmt() const { return S; } + + bool operator==(const RefState &X) const { + return K == X.K && S == X.S && Family == X.Family; + } + + static RefState getAllocated(AllocationFamily family, const Stmt *s) { + return RefState(Allocated, s, family); + } + static RefState getAllocatedOfSizeZero(const RefState *RS) { + return RefState(AllocatedOfSizeZero, RS->getStmt(), + RS->getAllocationFamily()); + } + static RefState getReleased(AllocationFamily family, const Stmt *s) { + return RefState(Released, s, family); + } + static RefState getRelinquished(AllocationFamily family, const Stmt *s) { + return RefState(Relinquished, s, family); + } + static RefState getEscaped(const RefState *RS) { + return RefState(Escaped, RS->getStmt(), RS->getAllocationFamily()); + } + + void Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddInteger(K); + ID.AddPointer(S); + ID.AddInteger(Family); + } + + LLVM_DUMP_METHOD void dump(raw_ostream &OS) const { + switch (K) { +#define CASE(ID) case ID: OS << #ID; break; + CASE(Allocated) + CASE(AllocatedOfSizeZero) + CASE(Released) + CASE(Relinquished) + CASE(Escaped) + } + } + + LLVM_DUMP_METHOD void dump() const { dump(llvm::errs()); } +}; + +} // end of anonymous namespace + +REGISTER_MAP_WITH_PROGRAMSTATE(RegionState, SymbolRef, RefState) + +/// Check if the memory associated with this symbol was released. +static bool isReleased(SymbolRef Sym, CheckerContext &C); + +/// Update the RefState to reflect the new memory allocation. +/// The optional \p RetVal parameter specifies the newly allocated pointer +/// value; if unspecified, the value of expression \p E is used. +static ProgramStateRef MallocUpdateRefState(CheckerContext &C, const Expr *E, + ProgramStateRef State, + AllocationFamily Family = AF_Malloc, + Optional<SVal> RetVal = None); + +//===----------------------------------------------------------------------===// +// The modeling of memory reallocation. +// +// The terminology 'toPtr' and 'fromPtr' will be used: +// toPtr = realloc(fromPtr, 20); +//===----------------------------------------------------------------------===// + +REGISTER_SET_WITH_PROGRAMSTATE(ReallocSizeZeroSymbols, SymbolRef) + +namespace { + +/// The state of 'fromPtr' after reallocation is known to have failed. +enum OwnershipAfterReallocKind { + // The symbol needs to be freed (e.g.: realloc) + OAR_ToBeFreedAfterFailure, + // The symbol has been freed (e.g.: reallocf) + OAR_FreeOnFailure, + // The symbol doesn't have to freed (e.g.: we aren't sure if, how and where + // 'fromPtr' was allocated: + // void Haha(int *ptr) { + // ptr = realloc(ptr, 67); + // // ... + // } + // ). + OAR_DoNotTrackAfterFailure +}; + +/// Stores information about the 'fromPtr' symbol after reallocation. +/// +/// This is important because realloc may fail, and that needs special modeling. +/// Whether reallocation failed or not will not be known until later, so we'll +/// store whether upon failure 'fromPtr' will be freed, or needs to be freed +/// later, etc. +struct ReallocPair { + + // The 'fromPtr'. + SymbolRef ReallocatedSym; + OwnershipAfterReallocKind Kind; + + ReallocPair(SymbolRef S, OwnershipAfterReallocKind K) + : ReallocatedSym(S), Kind(K) {} + void Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddInteger(Kind); + ID.AddPointer(ReallocatedSym); + } + bool operator==(const ReallocPair &X) const { + return ReallocatedSym == X.ReallocatedSym && + Kind == X.Kind; + } +}; + +} // end of anonymous namespace + +REGISTER_MAP_WITH_PROGRAMSTATE(ReallocPairs, SymbolRef, ReallocPair) + +//===----------------------------------------------------------------------===// +// Kinds of memory operations, information about resource managing functions. +//===----------------------------------------------------------------------===// + +namespace { + +enum class MemoryOperationKind { MOK_Allocate, MOK_Free, MOK_Any }; + +struct MemFunctionInfoTy { + /// The value of the MallocChecker:Optimistic is stored in this variable. + /// + /// In pessimistic mode, the checker assumes that it does not know which + /// functions might free the memory. + /// In optimistic mode, the checker assumes that all user-defined functions + /// which might free a pointer are annotated. + DefaultBool ShouldIncludeOwnershipAnnotatedFunctions; + + // TODO: Change these to CallDescription, and get rid of lazy initialization. + mutable IdentifierInfo *II_alloca = nullptr, *II_win_alloca = nullptr, + *II_malloc = nullptr, *II_free = nullptr, + *II_realloc = nullptr, *II_calloc = nullptr, + *II_valloc = nullptr, *II_reallocf = nullptr, + *II_strndup = nullptr, *II_strdup = nullptr, + *II_win_strdup = nullptr, *II_kmalloc = nullptr, + *II_if_nameindex = nullptr, + *II_if_freenameindex = nullptr, *II_wcsdup = nullptr, + *II_win_wcsdup = nullptr, *II_g_malloc = nullptr, + *II_g_malloc0 = nullptr, *II_g_realloc = nullptr, + *II_g_try_malloc = nullptr, + *II_g_try_malloc0 = nullptr, + *II_g_try_realloc = nullptr, *II_g_free = nullptr, + *II_g_memdup = nullptr, *II_g_malloc_n = nullptr, + *II_g_malloc0_n = nullptr, *II_g_realloc_n = nullptr, + *II_g_try_malloc_n = nullptr, + *II_g_try_malloc0_n = nullptr, *II_kfree = nullptr, + *II_g_try_realloc_n = nullptr; + + void initIdentifierInfo(ASTContext &C) const; + + ///@{ + /// Check if this is one of the functions which can allocate/reallocate + /// memory pointed to by one of its arguments. + bool isMemFunction(const FunctionDecl *FD, ASTContext &C) const; + bool isCMemFunction(const FunctionDecl *FD, ASTContext &C, + AllocationFamily Family, + MemoryOperationKind MemKind) const; + + /// Tells if the callee is one of the builtin new/delete operators, including + /// placement operators and other standard overloads. + bool isStandardNewDelete(const FunctionDecl *FD, ASTContext &C) const; + ///@} +}; + +} // end of anonymous namespace + +//===----------------------------------------------------------------------===// +// Definition of the MallocChecker class. +//===----------------------------------------------------------------------===// + +namespace { + +class MallocChecker + : public Checker<check::DeadSymbols, check::PointerEscape, + check::ConstPointerEscape, check::PreStmt<ReturnStmt>, + check::EndFunction, check::PreCall, + check::PostStmt<CallExpr>, check::PostStmt<CXXNewExpr>, + check::NewAllocator, check::PreStmt<CXXDeleteExpr>, + check::PostStmt<BlockExpr>, check::PostObjCMessage, + check::Location, eval::Assume> { +public: + MemFunctionInfoTy MemFunctionInfo; + + /// Many checkers are essentially built into this one, so enabling them will + /// make MallocChecker perform additional modeling and reporting. + enum CheckKind { + /// When a subchecker is enabled but MallocChecker isn't, model memory + /// management but do not emit warnings emitted with MallocChecker only + /// enabled. + CK_MallocChecker, + CK_NewDeleteChecker, + CK_NewDeleteLeaksChecker, + CK_MismatchedDeallocatorChecker, + CK_InnerPointerChecker, + CK_NumCheckKinds + }; + + using LeakInfo = std::pair<const ExplodedNode *, const MemRegion *>; + + DefaultBool ChecksEnabled[CK_NumCheckKinds]; + CheckerNameRef CheckNames[CK_NumCheckKinds]; + + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; + void checkPostStmt(const CXXNewExpr *NE, CheckerContext &C) const; + void checkNewAllocator(const CXXNewExpr *NE, SVal Target, + CheckerContext &C) const; + void checkPreStmt(const CXXDeleteExpr *DE, CheckerContext &C) const; + void checkPostObjCMessage(const ObjCMethodCall &Call, CheckerContext &C) const; + void checkPostStmt(const BlockExpr *BE, CheckerContext &C) const; + void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const; + void checkPreStmt(const ReturnStmt *S, CheckerContext &C) const; + void checkEndFunction(const ReturnStmt *S, CheckerContext &C) const; + ProgramStateRef evalAssume(ProgramStateRef state, SVal Cond, + bool Assumption) const; + void checkLocation(SVal l, bool isLoad, const Stmt *S, + CheckerContext &C) const; + + ProgramStateRef checkPointerEscape(ProgramStateRef State, + const InvalidatedSymbols &Escaped, + const CallEvent *Call, + PointerEscapeKind Kind) const; + ProgramStateRef checkConstPointerEscape(ProgramStateRef State, + const InvalidatedSymbols &Escaped, + const CallEvent *Call, + PointerEscapeKind Kind) const; + + void printState(raw_ostream &Out, ProgramStateRef State, + const char *NL, const char *Sep) const override; + +private: + mutable std::unique_ptr<BugType> BT_DoubleFree[CK_NumCheckKinds]; + mutable std::unique_ptr<BugType> BT_DoubleDelete; + mutable std::unique_ptr<BugType> BT_Leak[CK_NumCheckKinds]; + mutable std::unique_ptr<BugType> BT_UseFree[CK_NumCheckKinds]; + mutable std::unique_ptr<BugType> BT_BadFree[CK_NumCheckKinds]; + mutable std::unique_ptr<BugType> BT_FreeAlloca[CK_NumCheckKinds]; + mutable std::unique_ptr<BugType> BT_MismatchedDealloc; + mutable std::unique_ptr<BugType> BT_OffsetFree[CK_NumCheckKinds]; + mutable std::unique_ptr<BugType> BT_UseZerroAllocated[CK_NumCheckKinds]; + + // TODO: Remove mutable by moving the initializtaion to the registry function. + mutable Optional<uint64_t> KernelZeroFlagVal; + + /// Process C++ operator new()'s allocation, which is the part of C++ + /// new-expression that goes before the constructor. + void processNewAllocation(const CXXNewExpr *NE, CheckerContext &C, + SVal Target) const; + + /// Perform a zero-allocation check. + /// + /// \param [in] E The expression that allocates memory. + /// \param [in] IndexOfSizeArg Index of the argument that specifies the size + /// of the memory that needs to be allocated. E.g. for malloc, this would be + /// 0. + /// \param [in] RetVal Specifies the newly allocated pointer value; + /// if unspecified, the value of expression \p E is used. + static ProgramStateRef ProcessZeroAllocCheck(CheckerContext &C, const Expr *E, + const unsigned IndexOfSizeArg, + ProgramStateRef State, + Optional<SVal> RetVal = None); + + /// Model functions with the ownership_returns attribute. + /// + /// User-defined function may have the ownership_returns attribute, which + /// annotates that the function returns with an object that was allocated on + /// the heap, and passes the ownertship to the callee. + /// + /// void __attribute((ownership_returns(malloc, 1))) *my_malloc(size_t); + /// + /// It has two parameters: + /// - first: name of the resource (e.g. 'malloc') + /// - (OPTIONAL) second: size of the allocated region + /// + /// \param [in] CE The expression that allocates memory. + /// \param [in] Att The ownership_returns attribute. + /// \param [in] State The \c ProgramState right before allocation. + /// \returns The ProgramState right after allocation. + ProgramStateRef MallocMemReturnsAttr(CheckerContext &C, + const CallExpr *CE, + const OwnershipAttr* Att, + ProgramStateRef State) const; + + /// Models memory allocation. + /// + /// \param [in] CE The expression that allocates memory. + /// \param [in] SizeEx Size of the memory that needs to be allocated. + /// \param [in] Init The value the allocated memory needs to be initialized. + /// with. For example, \c calloc initializes the allocated memory to 0, + /// malloc leaves it undefined. + /// \param [in] State The \c ProgramState right before allocation. + /// \returns The ProgramState right after allocation. + static ProgramStateRef MallocMemAux(CheckerContext &C, const CallExpr *CE, + const Expr *SizeEx, SVal Init, + ProgramStateRef State, + AllocationFamily Family = AF_Malloc); + + /// Models memory allocation. + /// + /// \param [in] CE The expression that allocates memory. + /// \param [in] Size Size of the memory that needs to be allocated. + /// \param [in] Init The value the allocated memory needs to be initialized. + /// with. For example, \c calloc initializes the allocated memory to 0, + /// malloc leaves it undefined. + /// \param [in] State The \c ProgramState right before allocation. + /// \returns The ProgramState right after allocation. + static ProgramStateRef MallocMemAux(CheckerContext &C, const CallExpr *CE, + SVal Size, SVal Init, + ProgramStateRef State, + AllocationFamily Family = AF_Malloc); + + static ProgramStateRef addExtentSize(CheckerContext &C, const CXXNewExpr *NE, + ProgramStateRef State, SVal Target); + + // Check if this malloc() for special flags. At present that means M_ZERO or + // __GFP_ZERO (in which case, treat it like calloc). + llvm::Optional<ProgramStateRef> + performKernelMalloc(const CallExpr *CE, CheckerContext &C, + const ProgramStateRef &State) const; + + /// Model functions with the ownership_takes and ownership_holds attributes. + /// + /// User-defined function may have the ownership_takes and/or ownership_holds + /// attributes, which annotates that the function frees the memory passed as a + /// parameter. + /// + /// void __attribute((ownership_takes(malloc, 1))) my_free(void *); + /// void __attribute((ownership_holds(malloc, 1))) my_hold(void *); + /// + /// They have two parameters: + /// - first: name of the resource (e.g. 'malloc') + /// - second: index of the parameter the attribute applies to + /// + /// \param [in] CE The expression that frees memory. + /// \param [in] Att The ownership_takes or ownership_holds attribute. + /// \param [in] State The \c ProgramState right before allocation. + /// \returns The ProgramState right after deallocation. + ProgramStateRef FreeMemAttr(CheckerContext &C, const CallExpr *CE, + const OwnershipAttr* Att, + ProgramStateRef State) const; + + /// Models memory deallocation. + /// + /// \param [in] CE The expression that frees memory. + /// \param [in] State The \c ProgramState right before allocation. + /// \param [in] Num Index of the argument that needs to be freed. This is + /// normally 0, but for custom free functions it may be different. + /// \param [in] Hold Whether the parameter at \p Index has the ownership_holds + /// attribute. + /// \param [out] IsKnownToBeAllocated Whether the memory to be freed is known + /// to have been allocated, or in other words, the symbol to be freed was + /// registered as allocated by this checker. In the following case, \c ptr + /// isn't known to be allocated. + /// void Haha(int *ptr) { + /// ptr = realloc(ptr, 67); + /// // ... + /// } + /// \param [in] ReturnsNullOnFailure Whether the memory deallocation function + /// we're modeling returns with Null on failure. + /// \returns The ProgramState right after deallocation. + ProgramStateRef FreeMemAux(CheckerContext &C, const CallExpr *CE, + ProgramStateRef State, unsigned Num, bool Hold, + bool &IsKnownToBeAllocated, + bool ReturnsNullOnFailure = false) const; + + /// Models memory deallocation. + /// + /// \param [in] ArgExpr The variable who's pointee needs to be freed. + /// \param [in] ParentExpr The expression that frees the memory. + /// \param [in] State The \c ProgramState right before allocation. + /// normally 0, but for custom free functions it may be different. + /// \param [in] Hold Whether the parameter at \p Index has the ownership_holds + /// attribute. + /// \param [out] IsKnownToBeAllocated Whether the memory to be freed is known + /// to have been allocated, or in other words, the symbol to be freed was + /// registered as allocated by this checker. In the following case, \c ptr + /// isn't known to be allocated. + /// void Haha(int *ptr) { + /// ptr = realloc(ptr, 67); + /// // ... + /// } + /// \param [in] ReturnsNullOnFailure Whether the memory deallocation function + /// we're modeling returns with Null on failure. + /// \returns The ProgramState right after deallocation. + ProgramStateRef FreeMemAux(CheckerContext &C, const Expr *ArgExpr, + const Expr *ParentExpr, ProgramStateRef State, + bool Hold, bool &IsKnownToBeAllocated, + bool ReturnsNullOnFailure = false) const; + + // TODO: Needs some refactoring, as all other deallocation modeling + // functions are suffering from out parameters and messy code due to how + // realloc is handled. + // + /// Models memory reallocation. + /// + /// \param [in] CE The expression that reallocated memory + /// \param [in] ShouldFreeOnFail Whether if reallocation fails, the supplied + /// memory should be freed. + /// \param [in] State The \c ProgramState right before reallocation. + /// \param [in] SuffixWithN Whether the reallocation function we're modeling + /// has an '_n' suffix, such as g_realloc_n. + /// \returns The ProgramState right after reallocation. + ProgramStateRef ReallocMemAux(CheckerContext &C, const CallExpr *CE, + bool ShouldFreeOnFail, ProgramStateRef State, + bool SuffixWithN = false) const; + + /// Evaluates the buffer size that needs to be allocated. + /// + /// \param [in] Blocks The amount of blocks that needs to be allocated. + /// \param [in] BlockBytes The size of a block. + /// \returns The symbolic value of \p Blocks * \p BlockBytes. + static SVal evalMulForBufferSize(CheckerContext &C, const Expr *Blocks, + const Expr *BlockBytes); + + /// Models zero initialized array allocation. + /// + /// \param [in] CE The expression that reallocated memory + /// \param [in] State The \c ProgramState right before reallocation. + /// \returns The ProgramState right after allocation. + static ProgramStateRef CallocMem(CheckerContext &C, const CallExpr *CE, + ProgramStateRef State); + + /// See if deallocation happens in a suspicious context. If so, escape the + /// pointers that otherwise would have been deallocated and return true. + bool suppressDeallocationsInSuspiciousContexts(const CallExpr *CE, + CheckerContext &C) const; + + /// If in \p S \p Sym is used, check whether \p Sym was already freed. + bool checkUseAfterFree(SymbolRef Sym, CheckerContext &C, const Stmt *S) const; + + /// If in \p S \p Sym is used, check whether \p Sym was allocated as a zero + /// sized memory region. + void checkUseZeroAllocated(SymbolRef Sym, CheckerContext &C, + const Stmt *S) const; + + /// If in \p S \p Sym is being freed, check whether \p Sym was already freed. + bool checkDoubleDelete(SymbolRef Sym, CheckerContext &C) const; + + /// Check if the function is known to free memory, or if it is + /// "interesting" and should be modeled explicitly. + /// + /// \param [out] EscapingSymbol A function might not free memory in general, + /// but could be known to free a particular symbol. In this case, false is + /// returned and the single escaping symbol is returned through the out + /// parameter. + /// + /// We assume that pointers do not escape through calls to system functions + /// not handled by this checker. + bool mayFreeAnyEscapedMemoryOrIsModeledExplicitly(const CallEvent *Call, + ProgramStateRef State, + SymbolRef &EscapingSymbol) const; + + /// Implementation of the checkPointerEscape callbacks. + ProgramStateRef checkPointerEscapeAux(ProgramStateRef State, + const InvalidatedSymbols &Escaped, + const CallEvent *Call, + PointerEscapeKind Kind, + bool IsConstPointerEscape) const; + + // Implementation of the checkPreStmt and checkEndFunction callbacks. + void checkEscapeOnReturn(const ReturnStmt *S, CheckerContext &C) const; + + ///@{ + /// Tells if a given family/call/symbol is tracked by the current checker. + /// Sets CheckKind to the kind of the checker responsible for this + /// family/call/symbol. + Optional<CheckKind> getCheckIfTracked(AllocationFamily Family, + bool IsALeakCheck = false) const; + Optional<CheckKind> getCheckIfTracked(CheckerContext &C, + const Stmt *AllocDeallocStmt, + bool IsALeakCheck = false) const; + Optional<CheckKind> getCheckIfTracked(CheckerContext &C, SymbolRef Sym, + bool IsALeakCheck = false) const; + ///@} + static bool SummarizeValue(raw_ostream &os, SVal V); + static bool SummarizeRegion(raw_ostream &os, const MemRegion *MR); + + void ReportBadFree(CheckerContext &C, SVal ArgVal, SourceRange Range, + const Expr *DeallocExpr) const; + void ReportFreeAlloca(CheckerContext &C, SVal ArgVal, + SourceRange Range) const; + void ReportMismatchedDealloc(CheckerContext &C, SourceRange Range, + const Expr *DeallocExpr, const RefState *RS, + SymbolRef Sym, bool OwnershipTransferred) const; + void ReportOffsetFree(CheckerContext &C, SVal ArgVal, SourceRange Range, + const Expr *DeallocExpr, + const Expr *AllocExpr = nullptr) const; + void ReportUseAfterFree(CheckerContext &C, SourceRange Range, + SymbolRef Sym) const; + void ReportDoubleFree(CheckerContext &C, SourceRange Range, bool Released, + SymbolRef Sym, SymbolRef PrevSym) const; + + void ReportDoubleDelete(CheckerContext &C, SymbolRef Sym) const; + + void ReportUseZeroAllocated(CheckerContext &C, SourceRange Range, + SymbolRef Sym) const; + + void ReportFunctionPointerFree(CheckerContext &C, SVal ArgVal, + SourceRange Range, const Expr *FreeExpr) const; + + /// Find the location of the allocation for Sym on the path leading to the + /// exploded node N. + static LeakInfo getAllocationSite(const ExplodedNode *N, SymbolRef Sym, + CheckerContext &C); + + void reportLeak(SymbolRef Sym, ExplodedNode *N, CheckerContext &C) const; +}; + +//===----------------------------------------------------------------------===// +// Definition of MallocBugVisitor. +//===----------------------------------------------------------------------===// + +/// The bug visitor which allows us to print extra diagnostics along the +/// BugReport path. For example, showing the allocation site of the leaked +/// region. +class MallocBugVisitor final : public BugReporterVisitor { +protected: + enum NotificationMode { Normal, ReallocationFailed }; + + // The allocated region symbol tracked by the main analysis. + SymbolRef Sym; + + // The mode we are in, i.e. what kind of diagnostics will be emitted. + NotificationMode Mode; + + // A symbol from when the primary region should have been reallocated. + SymbolRef FailedReallocSymbol; + + // A C++ destructor stack frame in which memory was released. Used for + // miscellaneous false positive suppression. + const StackFrameContext *ReleaseDestructorLC; + + bool IsLeak; + +public: + MallocBugVisitor(SymbolRef S, bool isLeak = false) + : Sym(S), Mode(Normal), FailedReallocSymbol(nullptr), + ReleaseDestructorLC(nullptr), IsLeak(isLeak) {} + + static void *getTag() { + static int Tag = 0; + return &Tag; + } + + void Profile(llvm::FoldingSetNodeID &ID) const override { + ID.AddPointer(getTag()); + ID.AddPointer(Sym); + } + + /// Did not track -> allocated. Other state (released) -> allocated. + static inline bool isAllocated(const RefState *RSCurr, const RefState *RSPrev, + const Stmt *Stmt) { + return (Stmt && (isa<CallExpr>(Stmt) || isa<CXXNewExpr>(Stmt)) && + (RSCurr && + (RSCurr->isAllocated() || RSCurr->isAllocatedOfSizeZero())) && + (!RSPrev || + !(RSPrev->isAllocated() || RSPrev->isAllocatedOfSizeZero()))); + } + + /// Did not track -> released. Other state (allocated) -> released. + /// The statement associated with the release might be missing. + static inline bool isReleased(const RefState *RSCurr, const RefState *RSPrev, + const Stmt *Stmt) { + bool IsReleased = + (RSCurr && RSCurr->isReleased()) && (!RSPrev || !RSPrev->isReleased()); + assert(!IsReleased || + (Stmt && (isa<CallExpr>(Stmt) || isa<CXXDeleteExpr>(Stmt))) || + (!Stmt && RSCurr->getAllocationFamily() == AF_InnerBuffer)); + return IsReleased; + } + + /// Did not track -> relinquished. Other state (allocated) -> relinquished. + static inline bool isRelinquished(const RefState *RSCurr, + const RefState *RSPrev, const Stmt *Stmt) { + return (Stmt && + (isa<CallExpr>(Stmt) || isa<ObjCMessageExpr>(Stmt) || + isa<ObjCPropertyRefExpr>(Stmt)) && + (RSCurr && RSCurr->isRelinquished()) && + (!RSPrev || !RSPrev->isRelinquished())); + } + + /// If the expression is not a call, and the state change is + /// released -> allocated, it must be the realloc return value + /// check. If we have to handle more cases here, it might be cleaner just + /// to track this extra bit in the state itself. + static inline bool hasReallocFailed(const RefState *RSCurr, + const RefState *RSPrev, + const Stmt *Stmt) { + return ((!Stmt || !isa<CallExpr>(Stmt)) && + (RSCurr && + (RSCurr->isAllocated() || RSCurr->isAllocatedOfSizeZero())) && + (RSPrev && + !(RSPrev->isAllocated() || RSPrev->isAllocatedOfSizeZero()))); + } + + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; + + PathDiagnosticPieceRef getEndPath(BugReporterContext &BRC, + const ExplodedNode *EndPathNode, + PathSensitiveBugReport &BR) override { + if (!IsLeak) + return nullptr; + + PathDiagnosticLocation L = BR.getLocation(); + // Do not add the statement itself as a range in case of leak. + return std::make_shared<PathDiagnosticEventPiece>(L, BR.getDescription(), + false); + } + +private: + class StackHintGeneratorForReallocationFailed + : public StackHintGeneratorForSymbol { + public: + StackHintGeneratorForReallocationFailed(SymbolRef S, StringRef M) + : StackHintGeneratorForSymbol(S, M) {} + + std::string getMessageForArg(const Expr *ArgE, unsigned ArgIndex) override { + // Printed parameters start at 1, not 0. + ++ArgIndex; + + SmallString<200> buf; + llvm::raw_svector_ostream os(buf); + + os << "Reallocation of " << ArgIndex << llvm::getOrdinalSuffix(ArgIndex) + << " parameter failed"; + + return os.str(); + } + + std::string getMessageForReturn(const CallExpr *CallExpr) override { + return "Reallocation of returned value failed"; + } + }; +}; + +} // end anonymous namespace + +// A map from the freed symbol to the symbol representing the return value of +// the free function. +REGISTER_MAP_WITH_PROGRAMSTATE(FreeReturnValue, SymbolRef, SymbolRef) + +namespace { +class StopTrackingCallback final : public SymbolVisitor { + ProgramStateRef state; +public: + StopTrackingCallback(ProgramStateRef st) : state(std::move(st)) {} + ProgramStateRef getState() const { return state; } + + bool VisitSymbol(SymbolRef sym) override { + state = state->remove<RegionState>(sym); + return true; + } +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Methods of MemFunctionInfoTy. +//===----------------------------------------------------------------------===// + +void MemFunctionInfoTy::initIdentifierInfo(ASTContext &Ctx) const { + if (II_malloc) + return; + II_alloca = &Ctx.Idents.get("alloca"); + II_malloc = &Ctx.Idents.get("malloc"); + II_free = &Ctx.Idents.get("free"); + II_realloc = &Ctx.Idents.get("realloc"); + II_reallocf = &Ctx.Idents.get("reallocf"); + II_calloc = &Ctx.Idents.get("calloc"); + II_valloc = &Ctx.Idents.get("valloc"); + II_strdup = &Ctx.Idents.get("strdup"); + II_strndup = &Ctx.Idents.get("strndup"); + II_wcsdup = &Ctx.Idents.get("wcsdup"); + II_kmalloc = &Ctx.Idents.get("kmalloc"); + II_kfree = &Ctx.Idents.get("kfree"); + II_if_nameindex = &Ctx.Idents.get("if_nameindex"); + II_if_freenameindex = &Ctx.Idents.get("if_freenameindex"); + + //MSVC uses `_`-prefixed instead, so we check for them too. + II_win_strdup = &Ctx.Idents.get("_strdup"); + II_win_wcsdup = &Ctx.Idents.get("_wcsdup"); + II_win_alloca = &Ctx.Idents.get("_alloca"); + + // Glib + II_g_malloc = &Ctx.Idents.get("g_malloc"); + II_g_malloc0 = &Ctx.Idents.get("g_malloc0"); + II_g_realloc = &Ctx.Idents.get("g_realloc"); + II_g_try_malloc = &Ctx.Idents.get("g_try_malloc"); + II_g_try_malloc0 = &Ctx.Idents.get("g_try_malloc0"); + II_g_try_realloc = &Ctx.Idents.get("g_try_realloc"); + II_g_free = &Ctx.Idents.get("g_free"); + II_g_memdup = &Ctx.Idents.get("g_memdup"); + II_g_malloc_n = &Ctx.Idents.get("g_malloc_n"); + II_g_malloc0_n = &Ctx.Idents.get("g_malloc0_n"); + II_g_realloc_n = &Ctx.Idents.get("g_realloc_n"); + II_g_try_malloc_n = &Ctx.Idents.get("g_try_malloc_n"); + II_g_try_malloc0_n = &Ctx.Idents.get("g_try_malloc0_n"); + II_g_try_realloc_n = &Ctx.Idents.get("g_try_realloc_n"); +} + +bool MemFunctionInfoTy::isMemFunction(const FunctionDecl *FD, + ASTContext &C) const { + if (isCMemFunction(FD, C, AF_Malloc, MemoryOperationKind::MOK_Any)) + return true; + + if (isCMemFunction(FD, C, AF_IfNameIndex, MemoryOperationKind::MOK_Any)) + return true; + + if (isCMemFunction(FD, C, AF_Alloca, MemoryOperationKind::MOK_Any)) + return true; + + if (isStandardNewDelete(FD, C)) + return true; + + return false; +} + +bool MemFunctionInfoTy::isCMemFunction(const FunctionDecl *FD, ASTContext &C, + AllocationFamily Family, + MemoryOperationKind MemKind) const { + if (!FD) + return false; + + bool CheckFree = (MemKind == MemoryOperationKind::MOK_Any || + MemKind == MemoryOperationKind::MOK_Free); + bool CheckAlloc = (MemKind == MemoryOperationKind::MOK_Any || + MemKind == MemoryOperationKind::MOK_Allocate); + + if (FD->getKind() == Decl::Function) { + const IdentifierInfo *FunI = FD->getIdentifier(); + initIdentifierInfo(C); + + if (Family == AF_Malloc && CheckFree) { + if (FunI == II_free || FunI == II_realloc || FunI == II_reallocf || + FunI == II_g_free || FunI == II_kfree) + return true; + } + + if (Family == AF_Malloc && CheckAlloc) { + if (FunI == II_malloc || FunI == II_realloc || FunI == II_reallocf || + FunI == II_calloc || FunI == II_valloc || FunI == II_strdup || + FunI == II_win_strdup || FunI == II_strndup || FunI == II_wcsdup || + FunI == II_win_wcsdup || FunI == II_kmalloc || + FunI == II_g_malloc || FunI == II_g_malloc0 || + FunI == II_g_realloc || FunI == II_g_try_malloc || + FunI == II_g_try_malloc0 || FunI == II_g_try_realloc || + FunI == II_g_memdup || FunI == II_g_malloc_n || + FunI == II_g_malloc0_n || FunI == II_g_realloc_n || + FunI == II_g_try_malloc_n || FunI == II_g_try_malloc0_n || + FunI == II_g_try_realloc_n) + return true; + } + + if (Family == AF_IfNameIndex && CheckFree) { + if (FunI == II_if_freenameindex) + return true; + } + + if (Family == AF_IfNameIndex && CheckAlloc) { + if (FunI == II_if_nameindex) + return true; + } + + if (Family == AF_Alloca && CheckAlloc) { + if (FunI == II_alloca || FunI == II_win_alloca) + return true; + } + } + + if (Family != AF_Malloc) + return false; + + if (ShouldIncludeOwnershipAnnotatedFunctions && FD->hasAttrs()) { + for (const auto *I : FD->specific_attrs<OwnershipAttr>()) { + OwnershipAttr::OwnershipKind OwnKind = I->getOwnKind(); + if(OwnKind == OwnershipAttr::Takes || OwnKind == OwnershipAttr::Holds) { + if (CheckFree) + return true; + } else if (OwnKind == OwnershipAttr::Returns) { + if (CheckAlloc) + return true; + } + } + } + + return false; +} +bool MemFunctionInfoTy::isStandardNewDelete(const FunctionDecl *FD, + ASTContext &C) const { + if (!FD) + return false; + + OverloadedOperatorKind Kind = FD->getOverloadedOperator(); + if (Kind != OO_New && Kind != OO_Array_New && + Kind != OO_Delete && Kind != OO_Array_Delete) + return false; + + // This is standard if and only if it's not defined in a user file. + SourceLocation L = FD->getLocation(); + // If the header for operator delete is not included, it's still defined + // in an invalid source location. Check to make sure we don't crash. + return !L.isValid() || C.getSourceManager().isInSystemHeader(L); +} + +//===----------------------------------------------------------------------===// +// Methods of MallocChecker and MallocBugVisitor. +//===----------------------------------------------------------------------===// + +llvm::Optional<ProgramStateRef> MallocChecker::performKernelMalloc( + const CallExpr *CE, CheckerContext &C, const ProgramStateRef &State) const { + // 3-argument malloc(), as commonly used in {Free,Net,Open}BSD Kernels: + // + // void *malloc(unsigned long size, struct malloc_type *mtp, int flags); + // + // One of the possible flags is M_ZERO, which means 'give me back an + // allocation which is already zeroed', like calloc. + + // 2-argument kmalloc(), as used in the Linux kernel: + // + // void *kmalloc(size_t size, gfp_t flags); + // + // Has the similar flag value __GFP_ZERO. + + // This logic is largely cloned from O_CREAT in UnixAPIChecker, maybe some + // code could be shared. + + ASTContext &Ctx = C.getASTContext(); + llvm::Triple::OSType OS = Ctx.getTargetInfo().getTriple().getOS(); + + if (!KernelZeroFlagVal.hasValue()) { + if (OS == llvm::Triple::FreeBSD) + KernelZeroFlagVal = 0x0100; + else if (OS == llvm::Triple::NetBSD) + KernelZeroFlagVal = 0x0002; + else if (OS == llvm::Triple::OpenBSD) + KernelZeroFlagVal = 0x0008; + else if (OS == llvm::Triple::Linux) + // __GFP_ZERO + KernelZeroFlagVal = 0x8000; + else + // FIXME: We need a more general way of getting the M_ZERO value. + // See also: O_CREAT in UnixAPIChecker.cpp. + + // Fall back to normal malloc behavior on platforms where we don't + // know M_ZERO. + return None; + } + + // We treat the last argument as the flags argument, and callers fall-back to + // normal malloc on a None return. This works for the FreeBSD kernel malloc + // as well as Linux kmalloc. + if (CE->getNumArgs() < 2) + return None; + + const Expr *FlagsEx = CE->getArg(CE->getNumArgs() - 1); + const SVal V = C.getSVal(FlagsEx); + if (!V.getAs<NonLoc>()) { + // The case where 'V' can be a location can only be due to a bad header, + // so in this case bail out. + return None; + } + + NonLoc Flags = V.castAs<NonLoc>(); + NonLoc ZeroFlag = C.getSValBuilder() + .makeIntVal(KernelZeroFlagVal.getValue(), FlagsEx->getType()) + .castAs<NonLoc>(); + SVal MaskedFlagsUC = C.getSValBuilder().evalBinOpNN(State, BO_And, + Flags, ZeroFlag, + FlagsEx->getType()); + if (MaskedFlagsUC.isUnknownOrUndef()) + return None; + DefinedSVal MaskedFlags = MaskedFlagsUC.castAs<DefinedSVal>(); + + // Check if maskedFlags is non-zero. + ProgramStateRef TrueState, FalseState; + std::tie(TrueState, FalseState) = State->assume(MaskedFlags); + + // If M_ZERO is set, treat this like calloc (initialized). + if (TrueState && !FalseState) { + SVal ZeroVal = C.getSValBuilder().makeZeroVal(Ctx.CharTy); + return MallocMemAux(C, CE, CE->getArg(0), ZeroVal, TrueState); + } + + return None; +} + +SVal MallocChecker::evalMulForBufferSize(CheckerContext &C, const Expr *Blocks, + const Expr *BlockBytes) { + SValBuilder &SB = C.getSValBuilder(); + SVal BlocksVal = C.getSVal(Blocks); + SVal BlockBytesVal = C.getSVal(BlockBytes); + ProgramStateRef State = C.getState(); + SVal TotalSize = SB.evalBinOp(State, BO_Mul, BlocksVal, BlockBytesVal, + SB.getContext().getSizeType()); + return TotalSize; +} + +void MallocChecker::checkPostStmt(const CallExpr *CE, CheckerContext &C) const { + if (C.wasInlined) + return; + + const FunctionDecl *FD = C.getCalleeDecl(CE); + if (!FD) + return; + + ProgramStateRef State = C.getState(); + bool IsKnownToBeAllocatedMemory = false; + + if (FD->getKind() == Decl::Function) { + MemFunctionInfo.initIdentifierInfo(C.getASTContext()); + IdentifierInfo *FunI = FD->getIdentifier(); + + if (FunI == MemFunctionInfo.II_malloc || + FunI == MemFunctionInfo.II_g_malloc || + FunI == MemFunctionInfo.II_g_try_malloc) { + switch (CE->getNumArgs()) { + default: + return; + case 1: + State = MallocMemAux(C, CE, CE->getArg(0), UndefinedVal(), State); + State = ProcessZeroAllocCheck(C, CE, 0, State); + break; + case 2: + State = MallocMemAux(C, CE, CE->getArg(0), UndefinedVal(), State); + break; + case 3: + llvm::Optional<ProgramStateRef> MaybeState = + performKernelMalloc(CE, C, State); + if (MaybeState.hasValue()) + State = MaybeState.getValue(); + else + State = MallocMemAux(C, CE, CE->getArg(0), UndefinedVal(), State); + break; + } + } else if (FunI == MemFunctionInfo.II_kmalloc) { + if (CE->getNumArgs() < 1) + return; + llvm::Optional<ProgramStateRef> MaybeState = + performKernelMalloc(CE, C, State); + if (MaybeState.hasValue()) + State = MaybeState.getValue(); + else + State = MallocMemAux(C, CE, CE->getArg(0), UndefinedVal(), State); + } else if (FunI == MemFunctionInfo.II_valloc) { + if (CE->getNumArgs() < 1) + return; + State = MallocMemAux(C, CE, CE->getArg(0), UndefinedVal(), State); + State = ProcessZeroAllocCheck(C, CE, 0, State); + } else if (FunI == MemFunctionInfo.II_realloc || + FunI == MemFunctionInfo.II_g_realloc || + FunI == MemFunctionInfo.II_g_try_realloc) { + State = ReallocMemAux(C, CE, /*ShouldFreeOnFail*/ false, State); + State = ProcessZeroAllocCheck(C, CE, 1, State); + } else if (FunI == MemFunctionInfo.II_reallocf) { + State = ReallocMemAux(C, CE, /*ShouldFreeOnFail*/ true, State); + State = ProcessZeroAllocCheck(C, CE, 1, State); + } else if (FunI == MemFunctionInfo.II_calloc) { + State = CallocMem(C, CE, State); + State = ProcessZeroAllocCheck(C, CE, 0, State); + State = ProcessZeroAllocCheck(C, CE, 1, State); + } else if (FunI == MemFunctionInfo.II_free || + FunI == MemFunctionInfo.II_g_free || + FunI == MemFunctionInfo.II_kfree) { + if (suppressDeallocationsInSuspiciousContexts(CE, C)) + return; + + State = FreeMemAux(C, CE, State, 0, false, IsKnownToBeAllocatedMemory); + } else if (FunI == MemFunctionInfo.II_strdup || + FunI == MemFunctionInfo.II_win_strdup || + FunI == MemFunctionInfo.II_wcsdup || + FunI == MemFunctionInfo.II_win_wcsdup) { + State = MallocUpdateRefState(C, CE, State); + } else if (FunI == MemFunctionInfo.II_strndup) { + State = MallocUpdateRefState(C, CE, State); + } else if (FunI == MemFunctionInfo.II_alloca || + FunI == MemFunctionInfo.II_win_alloca) { + if (CE->getNumArgs() < 1) + return; + State = MallocMemAux(C, CE, CE->getArg(0), UndefinedVal(), State, + AF_Alloca); + State = ProcessZeroAllocCheck(C, CE, 0, State); + } else if (MemFunctionInfo.isStandardNewDelete(FD, C.getASTContext())) { + // Process direct calls to operator new/new[]/delete/delete[] functions + // as distinct from new/new[]/delete/delete[] expressions that are + // processed by the checkPostStmt callbacks for CXXNewExpr and + // CXXDeleteExpr. + switch (FD->getOverloadedOperator()) { + case OO_New: + State = MallocMemAux(C, CE, CE->getArg(0), UndefinedVal(), State, + AF_CXXNew); + State = ProcessZeroAllocCheck(C, CE, 0, State); + break; + case OO_Array_New: + State = MallocMemAux(C, CE, CE->getArg(0), UndefinedVal(), State, + AF_CXXNewArray); + State = ProcessZeroAllocCheck(C, CE, 0, State); + break; + case OO_Delete: + case OO_Array_Delete: + State = FreeMemAux(C, CE, State, 0, false, IsKnownToBeAllocatedMemory); + break; + default: + llvm_unreachable("not a new/delete operator"); + } + } else if (FunI == MemFunctionInfo.II_if_nameindex) { + // Should we model this differently? We can allocate a fixed number of + // elements with zeros in the last one. + State = MallocMemAux(C, CE, UnknownVal(), UnknownVal(), State, + AF_IfNameIndex); + } else if (FunI == MemFunctionInfo.II_if_freenameindex) { + State = FreeMemAux(C, CE, State, 0, false, IsKnownToBeAllocatedMemory); + } else if (FunI == MemFunctionInfo.II_g_malloc0 || + FunI == MemFunctionInfo.II_g_try_malloc0) { + if (CE->getNumArgs() < 1) + return; + SValBuilder &svalBuilder = C.getSValBuilder(); + SVal zeroVal = svalBuilder.makeZeroVal(svalBuilder.getContext().CharTy); + State = MallocMemAux(C, CE, CE->getArg(0), zeroVal, State); + State = ProcessZeroAllocCheck(C, CE, 0, State); + } else if (FunI == MemFunctionInfo.II_g_memdup) { + if (CE->getNumArgs() < 2) + return; + State = MallocMemAux(C, CE, CE->getArg(1), UndefinedVal(), State); + State = ProcessZeroAllocCheck(C, CE, 1, State); + } else if (FunI == MemFunctionInfo.II_g_malloc_n || + FunI == MemFunctionInfo.II_g_try_malloc_n || + FunI == MemFunctionInfo.II_g_malloc0_n || + FunI == MemFunctionInfo.II_g_try_malloc0_n) { + if (CE->getNumArgs() < 2) + return; + SVal Init = UndefinedVal(); + if (FunI == MemFunctionInfo.II_g_malloc0_n || + FunI == MemFunctionInfo.II_g_try_malloc0_n) { + SValBuilder &SB = C.getSValBuilder(); + Init = SB.makeZeroVal(SB.getContext().CharTy); + } + SVal TotalSize = evalMulForBufferSize(C, CE->getArg(0), CE->getArg(1)); + State = MallocMemAux(C, CE, TotalSize, Init, State); + State = ProcessZeroAllocCheck(C, CE, 0, State); + State = ProcessZeroAllocCheck(C, CE, 1, State); + } else if (FunI == MemFunctionInfo.II_g_realloc_n || + FunI == MemFunctionInfo.II_g_try_realloc_n) { + if (CE->getNumArgs() < 3) + return; + State = ReallocMemAux(C, CE, /*ShouldFreeOnFail*/ false, State, + /*SuffixWithN*/ true); + State = ProcessZeroAllocCheck(C, CE, 1, State); + State = ProcessZeroAllocCheck(C, CE, 2, State); + } + } + + if (MemFunctionInfo.ShouldIncludeOwnershipAnnotatedFunctions || + ChecksEnabled[CK_MismatchedDeallocatorChecker]) { + // Check all the attributes, if there are any. + // There can be multiple of these attributes. + if (FD->hasAttrs()) + for (const auto *I : FD->specific_attrs<OwnershipAttr>()) { + switch (I->getOwnKind()) { + case OwnershipAttr::Returns: + State = MallocMemReturnsAttr(C, CE, I, State); + break; + case OwnershipAttr::Takes: + case OwnershipAttr::Holds: + State = FreeMemAttr(C, CE, I, State); + break; + } + } + } + C.addTransition(State); +} + +// Performs a 0-sized allocations check. +ProgramStateRef MallocChecker::ProcessZeroAllocCheck( + CheckerContext &C, const Expr *E, const unsigned IndexOfSizeArg, + ProgramStateRef State, Optional<SVal> RetVal) { + if (!State) + return nullptr; + + if (!RetVal) + RetVal = C.getSVal(E); + + const Expr *Arg = nullptr; + + if (const CallExpr *CE = dyn_cast<CallExpr>(E)) { + Arg = CE->getArg(IndexOfSizeArg); + } + else if (const CXXNewExpr *NE = dyn_cast<CXXNewExpr>(E)) { + if (NE->isArray()) + Arg = *NE->getArraySize(); + else + return State; + } + else + llvm_unreachable("not a CallExpr or CXXNewExpr"); + + assert(Arg); + + Optional<DefinedSVal> DefArgVal = C.getSVal(Arg).getAs<DefinedSVal>(); + + if (!DefArgVal) + return State; + + // Check if the allocation size is 0. + ProgramStateRef TrueState, FalseState; + SValBuilder &SvalBuilder = C.getSValBuilder(); + DefinedSVal Zero = + SvalBuilder.makeZeroVal(Arg->getType()).castAs<DefinedSVal>(); + + std::tie(TrueState, FalseState) = + State->assume(SvalBuilder.evalEQ(State, *DefArgVal, Zero)); + + if (TrueState && !FalseState) { + SymbolRef Sym = RetVal->getAsLocSymbol(); + if (!Sym) + return State; + + const RefState *RS = State->get<RegionState>(Sym); + if (RS) { + if (RS->isAllocated()) + return TrueState->set<RegionState>(Sym, + RefState::getAllocatedOfSizeZero(RS)); + else + return State; + } else { + // Case of zero-size realloc. Historically 'realloc(ptr, 0)' is treated as + // 'free(ptr)' and the returned value from 'realloc(ptr, 0)' is not + // tracked. Add zero-reallocated Sym to the state to catch references + // to zero-allocated memory. + return TrueState->add<ReallocSizeZeroSymbols>(Sym); + } + } + + // Assume the value is non-zero going forward. + assert(FalseState); + return FalseState; +} + +static QualType getDeepPointeeType(QualType T) { + QualType Result = T, PointeeType = T->getPointeeType(); + while (!PointeeType.isNull()) { + Result = PointeeType; + PointeeType = PointeeType->getPointeeType(); + } + return Result; +} + +/// \returns true if the constructor invoked by \p NE has an argument of a +/// pointer/reference to a record type. +static bool hasNonTrivialConstructorCall(const CXXNewExpr *NE) { + + const CXXConstructExpr *ConstructE = NE->getConstructExpr(); + if (!ConstructE) + return false; + + if (!NE->getAllocatedType()->getAsCXXRecordDecl()) + return false; + + const CXXConstructorDecl *CtorD = ConstructE->getConstructor(); + + // Iterate over the constructor parameters. + for (const auto *CtorParam : CtorD->parameters()) { + + QualType CtorParamPointeeT = CtorParam->getType()->getPointeeType(); + if (CtorParamPointeeT.isNull()) + continue; + + CtorParamPointeeT = getDeepPointeeType(CtorParamPointeeT); + + if (CtorParamPointeeT->getAsCXXRecordDecl()) + return true; + } + + return false; +} + +void MallocChecker::processNewAllocation(const CXXNewExpr *NE, + CheckerContext &C, + SVal Target) const { + if (!MemFunctionInfo.isStandardNewDelete(NE->getOperatorNew(), + C.getASTContext())) + return; + + const ParentMap &PM = C.getLocationContext()->getParentMap(); + + // Non-trivial constructors have a chance to escape 'this', but marking all + // invocations of trivial constructors as escaped would cause too great of + // reduction of true positives, so let's just do that for constructors that + // have an argument of a pointer-to-record type. + if (!PM.isConsumedExpr(NE) && hasNonTrivialConstructorCall(NE)) + return; + + ProgramStateRef State = C.getState(); + // The return value from operator new is bound to a specified initialization + // value (if any) and we don't want to loose this value. So we call + // MallocUpdateRefState() instead of MallocMemAux() which breaks the + // existing binding. + State = MallocUpdateRefState(C, NE, State, NE->isArray() ? AF_CXXNewArray + : AF_CXXNew, Target); + State = addExtentSize(C, NE, State, Target); + State = ProcessZeroAllocCheck(C, NE, 0, State, Target); + C.addTransition(State); +} + +void MallocChecker::checkPostStmt(const CXXNewExpr *NE, + CheckerContext &C) const { + if (!C.getAnalysisManager().getAnalyzerOptions().MayInlineCXXAllocator) + processNewAllocation(NE, C, C.getSVal(NE)); +} + +void MallocChecker::checkNewAllocator(const CXXNewExpr *NE, SVal Target, + CheckerContext &C) const { + if (!C.wasInlined) + processNewAllocation(NE, C, Target); +} + +// Sets the extent value of the MemRegion allocated by +// new expression NE to its size in Bytes. +// +ProgramStateRef MallocChecker::addExtentSize(CheckerContext &C, + const CXXNewExpr *NE, + ProgramStateRef State, + SVal Target) { + if (!State) + return nullptr; + SValBuilder &svalBuilder = C.getSValBuilder(); + SVal ElementCount; + const SubRegion *Region; + if (NE->isArray()) { + const Expr *SizeExpr = *NE->getArraySize(); + ElementCount = C.getSVal(SizeExpr); + // Store the extent size for the (symbolic)region + // containing the elements. + Region = Target.getAsRegion() + ->castAs<SubRegion>() + ->StripCasts() + ->castAs<SubRegion>(); + } else { + ElementCount = svalBuilder.makeIntVal(1, true); + Region = Target.getAsRegion()->castAs<SubRegion>(); + } + + // Set the region's extent equal to the Size in Bytes. + QualType ElementType = NE->getAllocatedType(); + ASTContext &AstContext = C.getASTContext(); + CharUnits TypeSize = AstContext.getTypeSizeInChars(ElementType); + + if (ElementCount.getAs<NonLoc>()) { + DefinedOrUnknownSVal Extent = Region->getExtent(svalBuilder); + // size in Bytes = ElementCount*TypeSize + SVal SizeInBytes = svalBuilder.evalBinOpNN( + State, BO_Mul, ElementCount.castAs<NonLoc>(), + svalBuilder.makeArrayIndex(TypeSize.getQuantity()), + svalBuilder.getArrayIndexType()); + DefinedOrUnknownSVal extentMatchesSize = svalBuilder.evalEQ( + State, Extent, SizeInBytes.castAs<DefinedOrUnknownSVal>()); + State = State->assume(extentMatchesSize, true); + } + return State; +} + +void MallocChecker::checkPreStmt(const CXXDeleteExpr *DE, + CheckerContext &C) const { + + if (!ChecksEnabled[CK_NewDeleteChecker]) + if (SymbolRef Sym = C.getSVal(DE->getArgument()).getAsSymbol()) + checkUseAfterFree(Sym, C, DE->getArgument()); + + if (!MemFunctionInfo.isStandardNewDelete(DE->getOperatorDelete(), + C.getASTContext())) + return; + + ProgramStateRef State = C.getState(); + bool IsKnownToBeAllocated; + State = FreeMemAux(C, DE->getArgument(), DE, State, + /*Hold*/ false, IsKnownToBeAllocated); + + C.addTransition(State); +} + +static bool isKnownDeallocObjCMethodName(const ObjCMethodCall &Call) { + // If the first selector piece is one of the names below, assume that the + // object takes ownership of the memory, promising to eventually deallocate it + // with free(). + // Ex: [NSData dataWithBytesNoCopy:bytes length:10]; + // (...unless a 'freeWhenDone' parameter is false, but that's checked later.) + StringRef FirstSlot = Call.getSelector().getNameForSlot(0); + return FirstSlot == "dataWithBytesNoCopy" || + FirstSlot == "initWithBytesNoCopy" || + FirstSlot == "initWithCharactersNoCopy"; +} + +static Optional<bool> getFreeWhenDoneArg(const ObjCMethodCall &Call) { + Selector S = Call.getSelector(); + + // FIXME: We should not rely on fully-constrained symbols being folded. + for (unsigned i = 1; i < S.getNumArgs(); ++i) + if (S.getNameForSlot(i).equals("freeWhenDone")) + return !Call.getArgSVal(i).isZeroConstant(); + + return None; +} + +void MallocChecker::checkPostObjCMessage(const ObjCMethodCall &Call, + CheckerContext &C) const { + if (C.wasInlined) + return; + + if (!isKnownDeallocObjCMethodName(Call)) + return; + + if (Optional<bool> FreeWhenDone = getFreeWhenDoneArg(Call)) + if (!*FreeWhenDone) + return; + + bool IsKnownToBeAllocatedMemory; + ProgramStateRef State = + FreeMemAux(C, Call.getArgExpr(0), Call.getOriginExpr(), C.getState(), + /*Hold=*/true, IsKnownToBeAllocatedMemory, + /*RetNullOnFailure=*/true); + + C.addTransition(State); +} + +ProgramStateRef +MallocChecker::MallocMemReturnsAttr(CheckerContext &C, const CallExpr *CE, + const OwnershipAttr *Att, + ProgramStateRef State) const { + if (!State) + return nullptr; + + if (Att->getModule() != MemFunctionInfo.II_malloc) + return nullptr; + + OwnershipAttr::args_iterator I = Att->args_begin(), E = Att->args_end(); + if (I != E) { + return MallocMemAux(C, CE, CE->getArg(I->getASTIndex()), UndefinedVal(), + State); + } + return MallocMemAux(C, CE, UnknownVal(), UndefinedVal(), State); +} + +ProgramStateRef MallocChecker::MallocMemAux(CheckerContext &C, + const CallExpr *CE, + const Expr *SizeEx, SVal Init, + ProgramStateRef State, + AllocationFamily Family) { + if (!State) + return nullptr; + + return MallocMemAux(C, CE, C.getSVal(SizeEx), Init, State, Family); +} + +ProgramStateRef MallocChecker::MallocMemAux(CheckerContext &C, + const CallExpr *CE, + SVal Size, SVal Init, + ProgramStateRef State, + AllocationFamily Family) { + if (!State) + return nullptr; + + // We expect the malloc functions to return a pointer. + if (!Loc::isLocType(CE->getType())) + return nullptr; + + // Bind the return value to the symbolic value from the heap region. + // TODO: We could rewrite post visit to eval call; 'malloc' does not have + // side effects other than what we model here. + unsigned Count = C.blockCount(); + SValBuilder &svalBuilder = C.getSValBuilder(); + const LocationContext *LCtx = C.getPredecessor()->getLocationContext(); + DefinedSVal RetVal = svalBuilder.getConjuredHeapSymbolVal(CE, LCtx, Count) + .castAs<DefinedSVal>(); + State = State->BindExpr(CE, C.getLocationContext(), RetVal); + + // Fill the region with the initialization value. + State = State->bindDefaultInitial(RetVal, Init, LCtx); + + // Set the region's extent equal to the Size parameter. + const SymbolicRegion *R = + dyn_cast_or_null<SymbolicRegion>(RetVal.getAsRegion()); + if (!R) + return nullptr; + if (Optional<DefinedOrUnknownSVal> DefinedSize = + Size.getAs<DefinedOrUnknownSVal>()) { + SValBuilder &svalBuilder = C.getSValBuilder(); + DefinedOrUnknownSVal Extent = R->getExtent(svalBuilder); + DefinedOrUnknownSVal extentMatchesSize = + svalBuilder.evalEQ(State, Extent, *DefinedSize); + + State = State->assume(extentMatchesSize, true); + assert(State); + } + + return MallocUpdateRefState(C, CE, State, Family); +} + +static ProgramStateRef MallocUpdateRefState(CheckerContext &C, const Expr *E, + ProgramStateRef State, + AllocationFamily Family, + Optional<SVal> RetVal) { + if (!State) + return nullptr; + + // Get the return value. + if (!RetVal) + RetVal = C.getSVal(E); + + // We expect the malloc functions to return a pointer. + if (!RetVal->getAs<Loc>()) + return nullptr; + + SymbolRef Sym = RetVal->getAsLocSymbol(); + // This is a return value of a function that was not inlined, such as malloc() + // or new(). We've checked that in the caller. Therefore, it must be a symbol. + assert(Sym); + + // Set the symbol's state to Allocated. + return State->set<RegionState>(Sym, RefState::getAllocated(Family, E)); +} + +ProgramStateRef MallocChecker::FreeMemAttr(CheckerContext &C, + const CallExpr *CE, + const OwnershipAttr *Att, + ProgramStateRef State) const { + if (!State) + return nullptr; + + if (Att->getModule() != MemFunctionInfo.II_malloc) + return nullptr; + + bool IsKnownToBeAllocated = false; + + for (const auto &Arg : Att->args()) { + ProgramStateRef StateI = FreeMemAux( + C, CE, State, Arg.getASTIndex(), + Att->getOwnKind() == OwnershipAttr::Holds, IsKnownToBeAllocated); + if (StateI) + State = StateI; + } + return State; +} + +ProgramStateRef MallocChecker::FreeMemAux(CheckerContext &C, const CallExpr *CE, + ProgramStateRef State, unsigned Num, + bool Hold, bool &IsKnownToBeAllocated, + bool ReturnsNullOnFailure) const { + if (!State) + return nullptr; + + if (CE->getNumArgs() < (Num + 1)) + return nullptr; + + return FreeMemAux(C, CE->getArg(Num), CE, State, Hold, IsKnownToBeAllocated, + ReturnsNullOnFailure); +} + +/// Checks if the previous call to free on the given symbol failed - if free +/// failed, returns true. Also, returns the corresponding return value symbol. +static bool didPreviousFreeFail(ProgramStateRef State, + SymbolRef Sym, SymbolRef &RetStatusSymbol) { + const SymbolRef *Ret = State->get<FreeReturnValue>(Sym); + if (Ret) { + assert(*Ret && "We should not store the null return symbol"); + ConstraintManager &CMgr = State->getConstraintManager(); + ConditionTruthVal FreeFailed = CMgr.isNull(State, *Ret); + RetStatusSymbol = *Ret; + return FreeFailed.isConstrainedTrue(); + } + return false; +} + +static AllocationFamily +getAllocationFamily(const MemFunctionInfoTy &MemFunctionInfo, CheckerContext &C, + const Stmt *S) { + + if (!S) + return AF_None; + + if (const CallExpr *CE = dyn_cast<CallExpr>(S)) { + const FunctionDecl *FD = C.getCalleeDecl(CE); + + if (!FD) + FD = dyn_cast<FunctionDecl>(CE->getCalleeDecl()); + + ASTContext &Ctx = C.getASTContext(); + + if (MemFunctionInfo.isCMemFunction(FD, Ctx, AF_Malloc, + MemoryOperationKind::MOK_Any)) + return AF_Malloc; + + if (MemFunctionInfo.isStandardNewDelete(FD, Ctx)) { + OverloadedOperatorKind Kind = FD->getOverloadedOperator(); + if (Kind == OO_New || Kind == OO_Delete) + return AF_CXXNew; + else if (Kind == OO_Array_New || Kind == OO_Array_Delete) + return AF_CXXNewArray; + } + + if (MemFunctionInfo.isCMemFunction(FD, Ctx, AF_IfNameIndex, + MemoryOperationKind::MOK_Any)) + return AF_IfNameIndex; + + if (MemFunctionInfo.isCMemFunction(FD, Ctx, AF_Alloca, + MemoryOperationKind::MOK_Any)) + return AF_Alloca; + + return AF_None; + } + + if (const CXXNewExpr *NE = dyn_cast<CXXNewExpr>(S)) + return NE->isArray() ? AF_CXXNewArray : AF_CXXNew; + + if (const CXXDeleteExpr *DE = dyn_cast<CXXDeleteExpr>(S)) + return DE->isArrayForm() ? AF_CXXNewArray : AF_CXXNew; + + if (isa<ObjCMessageExpr>(S)) + return AF_Malloc; + + return AF_None; +} + +static bool printAllocDeallocName(raw_ostream &os, CheckerContext &C, + const Expr *E) { + if (const CallExpr *CE = dyn_cast<CallExpr>(E)) { + // FIXME: This doesn't handle indirect calls. + const FunctionDecl *FD = CE->getDirectCallee(); + if (!FD) + return false; + + os << *FD; + if (!FD->isOverloadedOperator()) + os << "()"; + return true; + } + + if (const ObjCMessageExpr *Msg = dyn_cast<ObjCMessageExpr>(E)) { + if (Msg->isInstanceMessage()) + os << "-"; + else + os << "+"; + Msg->getSelector().print(os); + return true; + } + + if (const CXXNewExpr *NE = dyn_cast<CXXNewExpr>(E)) { + os << "'" + << getOperatorSpelling(NE->getOperatorNew()->getOverloadedOperator()) + << "'"; + return true; + } + + if (const CXXDeleteExpr *DE = dyn_cast<CXXDeleteExpr>(E)) { + os << "'" + << getOperatorSpelling(DE->getOperatorDelete()->getOverloadedOperator()) + << "'"; + return true; + } + + return false; +} + +static void printExpectedAllocName(raw_ostream &os, + const MemFunctionInfoTy &MemFunctionInfo, + CheckerContext &C, const Expr *E) { + AllocationFamily Family = getAllocationFamily(MemFunctionInfo, C, E); + + switch(Family) { + case AF_Malloc: os << "malloc()"; return; + case AF_CXXNew: os << "'new'"; return; + case AF_CXXNewArray: os << "'new[]'"; return; + case AF_IfNameIndex: os << "'if_nameindex()'"; return; + case AF_InnerBuffer: os << "container-specific allocator"; return; + case AF_Alloca: + case AF_None: llvm_unreachable("not a deallocation expression"); + } +} + +static void printExpectedDeallocName(raw_ostream &os, AllocationFamily Family) { + switch(Family) { + case AF_Malloc: os << "free()"; return; + case AF_CXXNew: os << "'delete'"; return; + case AF_CXXNewArray: os << "'delete[]'"; return; + case AF_IfNameIndex: os << "'if_freenameindex()'"; return; + case AF_InnerBuffer: os << "container-specific deallocator"; return; + case AF_Alloca: + case AF_None: llvm_unreachable("suspicious argument"); + } +} + +ProgramStateRef MallocChecker::FreeMemAux(CheckerContext &C, + const Expr *ArgExpr, + const Expr *ParentExpr, + ProgramStateRef State, bool Hold, + bool &IsKnownToBeAllocated, + bool ReturnsNullOnFailure) const { + + if (!State) + return nullptr; + + SVal ArgVal = C.getSVal(ArgExpr); + if (!ArgVal.getAs<DefinedOrUnknownSVal>()) + return nullptr; + DefinedOrUnknownSVal location = ArgVal.castAs<DefinedOrUnknownSVal>(); + + // Check for null dereferences. + if (!location.getAs<Loc>()) + return nullptr; + + // The explicit NULL case, no operation is performed. + ProgramStateRef notNullState, nullState; + std::tie(notNullState, nullState) = State->assume(location); + if (nullState && !notNullState) + return nullptr; + + // Unknown values could easily be okay + // Undefined values are handled elsewhere + if (ArgVal.isUnknownOrUndef()) + return nullptr; + + const MemRegion *R = ArgVal.getAsRegion(); + + // Nonlocs can't be freed, of course. + // Non-region locations (labels and fixed addresses) also shouldn't be freed. + if (!R) { + ReportBadFree(C, ArgVal, ArgExpr->getSourceRange(), ParentExpr); + return nullptr; + } + + R = R->StripCasts(); + + // Blocks might show up as heap data, but should not be free()d + if (isa<BlockDataRegion>(R)) { + ReportBadFree(C, ArgVal, ArgExpr->getSourceRange(), ParentExpr); + return nullptr; + } + + const MemSpaceRegion *MS = R->getMemorySpace(); + + // Parameters, locals, statics, globals, and memory returned by + // __builtin_alloca() shouldn't be freed. + if (!(isa<UnknownSpaceRegion>(MS) || isa<HeapSpaceRegion>(MS))) { + // FIXME: at the time this code was written, malloc() regions were + // represented by conjured symbols, which are all in UnknownSpaceRegion. + // This means that there isn't actually anything from HeapSpaceRegion + // that should be freed, even though we allow it here. + // Of course, free() can work on memory allocated outside the current + // function, so UnknownSpaceRegion is always a possibility. + // False negatives are better than false positives. + + if (isa<AllocaRegion>(R)) + ReportFreeAlloca(C, ArgVal, ArgExpr->getSourceRange()); + else + ReportBadFree(C, ArgVal, ArgExpr->getSourceRange(), ParentExpr); + + return nullptr; + } + + const SymbolicRegion *SrBase = dyn_cast<SymbolicRegion>(R->getBaseRegion()); + // Various cases could lead to non-symbol values here. + // For now, ignore them. + if (!SrBase) + return nullptr; + + SymbolRef SymBase = SrBase->getSymbol(); + const RefState *RsBase = State->get<RegionState>(SymBase); + SymbolRef PreviousRetStatusSymbol = nullptr; + + IsKnownToBeAllocated = + RsBase && (RsBase->isAllocated() || RsBase->isAllocatedOfSizeZero()); + + if (RsBase) { + + // Memory returned by alloca() shouldn't be freed. + if (RsBase->getAllocationFamily() == AF_Alloca) { + ReportFreeAlloca(C, ArgVal, ArgExpr->getSourceRange()); + return nullptr; + } + + // Check for double free first. + if ((RsBase->isReleased() || RsBase->isRelinquished()) && + !didPreviousFreeFail(State, SymBase, PreviousRetStatusSymbol)) { + ReportDoubleFree(C, ParentExpr->getSourceRange(), RsBase->isReleased(), + SymBase, PreviousRetStatusSymbol); + return nullptr; + + // If the pointer is allocated or escaped, but we are now trying to free it, + // check that the call to free is proper. + } else if (RsBase->isAllocated() || RsBase->isAllocatedOfSizeZero() || + RsBase->isEscaped()) { + + // Check if an expected deallocation function matches the real one. + bool DeallocMatchesAlloc = + RsBase->getAllocationFamily() == + getAllocationFamily(MemFunctionInfo, C, ParentExpr); + if (!DeallocMatchesAlloc) { + ReportMismatchedDealloc(C, ArgExpr->getSourceRange(), + ParentExpr, RsBase, SymBase, Hold); + return nullptr; + } + + // Check if the memory location being freed is the actual location + // allocated, or an offset. + RegionOffset Offset = R->getAsOffset(); + if (Offset.isValid() && + !Offset.hasSymbolicOffset() && + Offset.getOffset() != 0) { + const Expr *AllocExpr = cast<Expr>(RsBase->getStmt()); + ReportOffsetFree(C, ArgVal, ArgExpr->getSourceRange(), ParentExpr, + AllocExpr); + return nullptr; + } + } + } + + if (SymBase->getType()->isFunctionPointerType()) { + ReportFunctionPointerFree(C, ArgVal, ArgExpr->getSourceRange(), ParentExpr); + return nullptr; + } + + // Clean out the info on previous call to free return info. + State = State->remove<FreeReturnValue>(SymBase); + + // Keep track of the return value. If it is NULL, we will know that free + // failed. + if (ReturnsNullOnFailure) { + SVal RetVal = C.getSVal(ParentExpr); + SymbolRef RetStatusSymbol = RetVal.getAsSymbol(); + if (RetStatusSymbol) { + C.getSymbolManager().addSymbolDependency(SymBase, RetStatusSymbol); + State = State->set<FreeReturnValue>(SymBase, RetStatusSymbol); + } + } + + AllocationFamily Family = + RsBase ? RsBase->getAllocationFamily() + : getAllocationFamily(MemFunctionInfo, C, ParentExpr); + // Normal free. + if (Hold) + return State->set<RegionState>(SymBase, + RefState::getRelinquished(Family, + ParentExpr)); + + return State->set<RegionState>(SymBase, + RefState::getReleased(Family, ParentExpr)); +} + +Optional<MallocChecker::CheckKind> +MallocChecker::getCheckIfTracked(AllocationFamily Family, + bool IsALeakCheck) const { + switch (Family) { + case AF_Malloc: + case AF_Alloca: + case AF_IfNameIndex: { + if (ChecksEnabled[CK_MallocChecker]) + return CK_MallocChecker; + return None; + } + case AF_CXXNew: + case AF_CXXNewArray: { + if (IsALeakCheck) { + if (ChecksEnabled[CK_NewDeleteLeaksChecker]) + return CK_NewDeleteLeaksChecker; + } + else { + if (ChecksEnabled[CK_NewDeleteChecker]) + return CK_NewDeleteChecker; + } + return None; + } + case AF_InnerBuffer: { + if (ChecksEnabled[CK_InnerPointerChecker]) + return CK_InnerPointerChecker; + return None; + } + case AF_None: { + llvm_unreachable("no family"); + } + } + llvm_unreachable("unhandled family"); +} + +Optional<MallocChecker::CheckKind> +MallocChecker::getCheckIfTracked(CheckerContext &C, + const Stmt *AllocDeallocStmt, + bool IsALeakCheck) const { + return getCheckIfTracked( + getAllocationFamily(MemFunctionInfo, C, AllocDeallocStmt), IsALeakCheck); +} + +Optional<MallocChecker::CheckKind> +MallocChecker::getCheckIfTracked(CheckerContext &C, SymbolRef Sym, + bool IsALeakCheck) const { + if (C.getState()->contains<ReallocSizeZeroSymbols>(Sym)) + return CK_MallocChecker; + + const RefState *RS = C.getState()->get<RegionState>(Sym); + assert(RS); + return getCheckIfTracked(RS->getAllocationFamily(), IsALeakCheck); +} + +bool MallocChecker::SummarizeValue(raw_ostream &os, SVal V) { + if (Optional<nonloc::ConcreteInt> IntVal = V.getAs<nonloc::ConcreteInt>()) + os << "an integer (" << IntVal->getValue() << ")"; + else if (Optional<loc::ConcreteInt> ConstAddr = V.getAs<loc::ConcreteInt>()) + os << "a constant address (" << ConstAddr->getValue() << ")"; + else if (Optional<loc::GotoLabel> Label = V.getAs<loc::GotoLabel>()) + os << "the address of the label '" << Label->getLabel()->getName() << "'"; + else + return false; + + return true; +} + +bool MallocChecker::SummarizeRegion(raw_ostream &os, + const MemRegion *MR) { + switch (MR->getKind()) { + case MemRegion::FunctionCodeRegionKind: { + const NamedDecl *FD = cast<FunctionCodeRegion>(MR)->getDecl(); + if (FD) + os << "the address of the function '" << *FD << '\''; + else + os << "the address of a function"; + return true; + } + case MemRegion::BlockCodeRegionKind: + os << "block text"; + return true; + case MemRegion::BlockDataRegionKind: + // FIXME: where the block came from? + os << "a block"; + return true; + default: { + const MemSpaceRegion *MS = MR->getMemorySpace(); + + if (isa<StackLocalsSpaceRegion>(MS)) { + const VarRegion *VR = dyn_cast<VarRegion>(MR); + const VarDecl *VD; + if (VR) + VD = VR->getDecl(); + else + VD = nullptr; + + if (VD) + os << "the address of the local variable '" << VD->getName() << "'"; + else + os << "the address of a local stack variable"; + return true; + } + + if (isa<StackArgumentsSpaceRegion>(MS)) { + const VarRegion *VR = dyn_cast<VarRegion>(MR); + const VarDecl *VD; + if (VR) + VD = VR->getDecl(); + else + VD = nullptr; + + if (VD) + os << "the address of the parameter '" << VD->getName() << "'"; + else + os << "the address of a parameter"; + return true; + } + + if (isa<GlobalsSpaceRegion>(MS)) { + const VarRegion *VR = dyn_cast<VarRegion>(MR); + const VarDecl *VD; + if (VR) + VD = VR->getDecl(); + else + VD = nullptr; + + if (VD) { + if (VD->isStaticLocal()) + os << "the address of the static variable '" << VD->getName() << "'"; + else + os << "the address of the global variable '" << VD->getName() << "'"; + } else + os << "the address of a global variable"; + return true; + } + + return false; + } + } +} + +void MallocChecker::ReportBadFree(CheckerContext &C, SVal ArgVal, + SourceRange Range, + const Expr *DeallocExpr) const { + + if (!ChecksEnabled[CK_MallocChecker] && + !ChecksEnabled[CK_NewDeleteChecker]) + return; + + Optional<MallocChecker::CheckKind> CheckKind = + getCheckIfTracked(C, DeallocExpr); + if (!CheckKind.hasValue()) + return; + + if (ExplodedNode *N = C.generateErrorNode()) { + if (!BT_BadFree[*CheckKind]) + BT_BadFree[*CheckKind].reset(new BugType( + CheckNames[*CheckKind], "Bad free", categories::MemoryError)); + + SmallString<100> buf; + llvm::raw_svector_ostream os(buf); + + const MemRegion *MR = ArgVal.getAsRegion(); + while (const ElementRegion *ER = dyn_cast_or_null<ElementRegion>(MR)) + MR = ER->getSuperRegion(); + + os << "Argument to "; + if (!printAllocDeallocName(os, C, DeallocExpr)) + os << "deallocator"; + + os << " is "; + bool Summarized = MR ? SummarizeRegion(os, MR) + : SummarizeValue(os, ArgVal); + if (Summarized) + os << ", which is not memory allocated by "; + else + os << "not memory allocated by "; + + printExpectedAllocName(os, MemFunctionInfo, C, DeallocExpr); + + auto R = std::make_unique<PathSensitiveBugReport>(*BT_BadFree[*CheckKind], + os.str(), N); + R->markInteresting(MR); + R->addRange(Range); + C.emitReport(std::move(R)); + } +} + +void MallocChecker::ReportFreeAlloca(CheckerContext &C, SVal ArgVal, + SourceRange Range) const { + + Optional<MallocChecker::CheckKind> CheckKind; + + if (ChecksEnabled[CK_MallocChecker]) + CheckKind = CK_MallocChecker; + else if (ChecksEnabled[CK_MismatchedDeallocatorChecker]) + CheckKind = CK_MismatchedDeallocatorChecker; + else + return; + + if (ExplodedNode *N = C.generateErrorNode()) { + if (!BT_FreeAlloca[*CheckKind]) + BT_FreeAlloca[*CheckKind].reset(new BugType( + CheckNames[*CheckKind], "Free alloca()", categories::MemoryError)); + + auto R = std::make_unique<PathSensitiveBugReport>( + *BT_FreeAlloca[*CheckKind], + "Memory allocated by alloca() should not be deallocated", N); + R->markInteresting(ArgVal.getAsRegion()); + R->addRange(Range); + C.emitReport(std::move(R)); + } +} + +void MallocChecker::ReportMismatchedDealloc(CheckerContext &C, + SourceRange Range, + const Expr *DeallocExpr, + const RefState *RS, + SymbolRef Sym, + bool OwnershipTransferred) const { + + if (!ChecksEnabled[CK_MismatchedDeallocatorChecker]) + return; + + if (ExplodedNode *N = C.generateErrorNode()) { + if (!BT_MismatchedDealloc) + BT_MismatchedDealloc.reset( + new BugType(CheckNames[CK_MismatchedDeallocatorChecker], + "Bad deallocator", categories::MemoryError)); + + SmallString<100> buf; + llvm::raw_svector_ostream os(buf); + + const Expr *AllocExpr = cast<Expr>(RS->getStmt()); + SmallString<20> AllocBuf; + llvm::raw_svector_ostream AllocOs(AllocBuf); + SmallString<20> DeallocBuf; + llvm::raw_svector_ostream DeallocOs(DeallocBuf); + + if (OwnershipTransferred) { + if (printAllocDeallocName(DeallocOs, C, DeallocExpr)) + os << DeallocOs.str() << " cannot"; + else + os << "Cannot"; + + os << " take ownership of memory"; + + if (printAllocDeallocName(AllocOs, C, AllocExpr)) + os << " allocated by " << AllocOs.str(); + } else { + os << "Memory"; + if (printAllocDeallocName(AllocOs, C, AllocExpr)) + os << " allocated by " << AllocOs.str(); + + os << " should be deallocated by "; + printExpectedDeallocName(os, RS->getAllocationFamily()); + + if (printAllocDeallocName(DeallocOs, C, DeallocExpr)) + os << ", not " << DeallocOs.str(); + } + + auto R = std::make_unique<PathSensitiveBugReport>(*BT_MismatchedDealloc, + os.str(), N); + R->markInteresting(Sym); + R->addRange(Range); + R->addVisitor(std::make_unique<MallocBugVisitor>(Sym)); + C.emitReport(std::move(R)); + } +} + +void MallocChecker::ReportOffsetFree(CheckerContext &C, SVal ArgVal, + SourceRange Range, const Expr *DeallocExpr, + const Expr *AllocExpr) const { + + + if (!ChecksEnabled[CK_MallocChecker] && + !ChecksEnabled[CK_NewDeleteChecker]) + return; + + Optional<MallocChecker::CheckKind> CheckKind = + getCheckIfTracked(C, AllocExpr); + if (!CheckKind.hasValue()) + return; + + ExplodedNode *N = C.generateErrorNode(); + if (!N) + return; + + if (!BT_OffsetFree[*CheckKind]) + BT_OffsetFree[*CheckKind].reset(new BugType( + CheckNames[*CheckKind], "Offset free", categories::MemoryError)); + + SmallString<100> buf; + llvm::raw_svector_ostream os(buf); + SmallString<20> AllocNameBuf; + llvm::raw_svector_ostream AllocNameOs(AllocNameBuf); + + const MemRegion *MR = ArgVal.getAsRegion(); + assert(MR && "Only MemRegion based symbols can have offset free errors"); + + RegionOffset Offset = MR->getAsOffset(); + assert((Offset.isValid() && + !Offset.hasSymbolicOffset() && + Offset.getOffset() != 0) && + "Only symbols with a valid offset can have offset free errors"); + + int offsetBytes = Offset.getOffset() / C.getASTContext().getCharWidth(); + + os << "Argument to "; + if (!printAllocDeallocName(os, C, DeallocExpr)) + os << "deallocator"; + os << " is offset by " + << offsetBytes + << " " + << ((abs(offsetBytes) > 1) ? "bytes" : "byte") + << " from the start of "; + if (AllocExpr && printAllocDeallocName(AllocNameOs, C, AllocExpr)) + os << "memory allocated by " << AllocNameOs.str(); + else + os << "allocated memory"; + + auto R = std::make_unique<PathSensitiveBugReport>(*BT_OffsetFree[*CheckKind], + os.str(), N); + R->markInteresting(MR->getBaseRegion()); + R->addRange(Range); + C.emitReport(std::move(R)); +} + +void MallocChecker::ReportUseAfterFree(CheckerContext &C, SourceRange Range, + SymbolRef Sym) const { + + if (!ChecksEnabled[CK_MallocChecker] && + !ChecksEnabled[CK_NewDeleteChecker] && + !ChecksEnabled[CK_InnerPointerChecker]) + return; + + Optional<MallocChecker::CheckKind> CheckKind = getCheckIfTracked(C, Sym); + if (!CheckKind.hasValue()) + return; + + if (ExplodedNode *N = C.generateErrorNode()) { + if (!BT_UseFree[*CheckKind]) + BT_UseFree[*CheckKind].reset(new BugType( + CheckNames[*CheckKind], "Use-after-free", categories::MemoryError)); + + AllocationFamily AF = + C.getState()->get<RegionState>(Sym)->getAllocationFamily(); + + auto R = std::make_unique<PathSensitiveBugReport>( + *BT_UseFree[*CheckKind], + AF == AF_InnerBuffer + ? "Inner pointer of container used after re/deallocation" + : "Use of memory after it is freed", + N); + + R->markInteresting(Sym); + R->addRange(Range); + R->addVisitor(std::make_unique<MallocBugVisitor>(Sym)); + + if (AF == AF_InnerBuffer) + R->addVisitor(allocation_state::getInnerPointerBRVisitor(Sym)); + + C.emitReport(std::move(R)); + } +} + +void MallocChecker::ReportDoubleFree(CheckerContext &C, SourceRange Range, + bool Released, SymbolRef Sym, + SymbolRef PrevSym) const { + + if (!ChecksEnabled[CK_MallocChecker] && + !ChecksEnabled[CK_NewDeleteChecker]) + return; + + Optional<MallocChecker::CheckKind> CheckKind = getCheckIfTracked(C, Sym); + if (!CheckKind.hasValue()) + return; + + if (ExplodedNode *N = C.generateErrorNode()) { + if (!BT_DoubleFree[*CheckKind]) + BT_DoubleFree[*CheckKind].reset(new BugType( + CheckNames[*CheckKind], "Double free", categories::MemoryError)); + + auto R = std::make_unique<PathSensitiveBugReport>( + *BT_DoubleFree[*CheckKind], + (Released ? "Attempt to free released memory" + : "Attempt to free non-owned memory"), + N); + R->addRange(Range); + R->markInteresting(Sym); + if (PrevSym) + R->markInteresting(PrevSym); + R->addVisitor(std::make_unique<MallocBugVisitor>(Sym)); + C.emitReport(std::move(R)); + } +} + +void MallocChecker::ReportDoubleDelete(CheckerContext &C, SymbolRef Sym) const { + + if (!ChecksEnabled[CK_NewDeleteChecker]) + return; + + Optional<MallocChecker::CheckKind> CheckKind = getCheckIfTracked(C, Sym); + if (!CheckKind.hasValue()) + return; + + if (ExplodedNode *N = C.generateErrorNode()) { + if (!BT_DoubleDelete) + BT_DoubleDelete.reset(new BugType(CheckNames[CK_NewDeleteChecker], + "Double delete", + categories::MemoryError)); + + auto R = std::make_unique<PathSensitiveBugReport>( + *BT_DoubleDelete, "Attempt to delete released memory", N); + + R->markInteresting(Sym); + R->addVisitor(std::make_unique<MallocBugVisitor>(Sym)); + C.emitReport(std::move(R)); + } +} + +void MallocChecker::ReportUseZeroAllocated(CheckerContext &C, + SourceRange Range, + SymbolRef Sym) const { + + if (!ChecksEnabled[CK_MallocChecker] && + !ChecksEnabled[CK_NewDeleteChecker]) + return; + + Optional<MallocChecker::CheckKind> CheckKind = getCheckIfTracked(C, Sym); + + if (!CheckKind.hasValue()) + return; + + if (ExplodedNode *N = C.generateErrorNode()) { + if (!BT_UseZerroAllocated[*CheckKind]) + BT_UseZerroAllocated[*CheckKind].reset( + new BugType(CheckNames[*CheckKind], "Use of zero allocated", + categories::MemoryError)); + + auto R = std::make_unique<PathSensitiveBugReport>( + *BT_UseZerroAllocated[*CheckKind], "Use of zero-allocated memory", N); + + R->addRange(Range); + if (Sym) { + R->markInteresting(Sym); + R->addVisitor(std::make_unique<MallocBugVisitor>(Sym)); + } + C.emitReport(std::move(R)); + } +} + +void MallocChecker::ReportFunctionPointerFree(CheckerContext &C, SVal ArgVal, + SourceRange Range, + const Expr *FreeExpr) const { + if (!ChecksEnabled[CK_MallocChecker]) + return; + + Optional<MallocChecker::CheckKind> CheckKind = getCheckIfTracked(C, FreeExpr); + if (!CheckKind.hasValue()) + return; + + if (ExplodedNode *N = C.generateErrorNode()) { + if (!BT_BadFree[*CheckKind]) + BT_BadFree[*CheckKind].reset(new BugType( + CheckNames[*CheckKind], "Bad free", categories::MemoryError)); + + SmallString<100> Buf; + llvm::raw_svector_ostream Os(Buf); + + const MemRegion *MR = ArgVal.getAsRegion(); + while (const ElementRegion *ER = dyn_cast_or_null<ElementRegion>(MR)) + MR = ER->getSuperRegion(); + + Os << "Argument to "; + if (!printAllocDeallocName(Os, C, FreeExpr)) + Os << "deallocator"; + + Os << " is a function pointer"; + + auto R = std::make_unique<PathSensitiveBugReport>(*BT_BadFree[*CheckKind], + Os.str(), N); + R->markInteresting(MR); + R->addRange(Range); + C.emitReport(std::move(R)); + } +} + +ProgramStateRef MallocChecker::ReallocMemAux(CheckerContext &C, + const CallExpr *CE, + bool ShouldFreeOnFail, + ProgramStateRef State, + bool SuffixWithN) const { + if (!State) + return nullptr; + + if (SuffixWithN && CE->getNumArgs() < 3) + return nullptr; + else if (CE->getNumArgs() < 2) + return nullptr; + + const Expr *arg0Expr = CE->getArg(0); + SVal Arg0Val = C.getSVal(arg0Expr); + if (!Arg0Val.getAs<DefinedOrUnknownSVal>()) + return nullptr; + DefinedOrUnknownSVal arg0Val = Arg0Val.castAs<DefinedOrUnknownSVal>(); + + SValBuilder &svalBuilder = C.getSValBuilder(); + + DefinedOrUnknownSVal PtrEQ = + svalBuilder.evalEQ(State, arg0Val, svalBuilder.makeNull()); + + // Get the size argument. + const Expr *Arg1 = CE->getArg(1); + + // Get the value of the size argument. + SVal TotalSize = C.getSVal(Arg1); + if (SuffixWithN) + TotalSize = evalMulForBufferSize(C, Arg1, CE->getArg(2)); + if (!TotalSize.getAs<DefinedOrUnknownSVal>()) + return nullptr; + + // Compare the size argument to 0. + DefinedOrUnknownSVal SizeZero = + svalBuilder.evalEQ(State, TotalSize.castAs<DefinedOrUnknownSVal>(), + svalBuilder.makeIntValWithPtrWidth(0, false)); + + ProgramStateRef StatePtrIsNull, StatePtrNotNull; + std::tie(StatePtrIsNull, StatePtrNotNull) = State->assume(PtrEQ); + ProgramStateRef StateSizeIsZero, StateSizeNotZero; + std::tie(StateSizeIsZero, StateSizeNotZero) = State->assume(SizeZero); + // We only assume exceptional states if they are definitely true; if the + // state is under-constrained, assume regular realloc behavior. + bool PrtIsNull = StatePtrIsNull && !StatePtrNotNull; + bool SizeIsZero = StateSizeIsZero && !StateSizeNotZero; + + // If the ptr is NULL and the size is not 0, the call is equivalent to + // malloc(size). + if (PrtIsNull && !SizeIsZero) { + ProgramStateRef stateMalloc = MallocMemAux(C, CE, TotalSize, + UndefinedVal(), StatePtrIsNull); + return stateMalloc; + } + + if (PrtIsNull && SizeIsZero) + return State; + + // Get the from and to pointer symbols as in toPtr = realloc(fromPtr, size). + assert(!PrtIsNull); + SymbolRef FromPtr = arg0Val.getAsSymbol(); + SVal RetVal = C.getSVal(CE); + SymbolRef ToPtr = RetVal.getAsSymbol(); + if (!FromPtr || !ToPtr) + return nullptr; + + bool IsKnownToBeAllocated = false; + + // If the size is 0, free the memory. + if (SizeIsZero) + // The semantics of the return value are: + // If size was equal to 0, either NULL or a pointer suitable to be passed + // to free() is returned. We just free the input pointer and do not add + // any constrains on the output pointer. + if (ProgramStateRef stateFree = + FreeMemAux(C, CE, StateSizeIsZero, 0, false, IsKnownToBeAllocated)) + return stateFree; + + // Default behavior. + if (ProgramStateRef stateFree = + FreeMemAux(C, CE, State, 0, false, IsKnownToBeAllocated)) { + + ProgramStateRef stateRealloc = MallocMemAux(C, CE, TotalSize, + UnknownVal(), stateFree); + if (!stateRealloc) + return nullptr; + + OwnershipAfterReallocKind Kind = OAR_ToBeFreedAfterFailure; + if (ShouldFreeOnFail) + Kind = OAR_FreeOnFailure; + else if (!IsKnownToBeAllocated) + Kind = OAR_DoNotTrackAfterFailure; + + // Record the info about the reallocated symbol so that we could properly + // process failed reallocation. + stateRealloc = stateRealloc->set<ReallocPairs>(ToPtr, + ReallocPair(FromPtr, Kind)); + // The reallocated symbol should stay alive for as long as the new symbol. + C.getSymbolManager().addSymbolDependency(ToPtr, FromPtr); + return stateRealloc; + } + return nullptr; +} + +ProgramStateRef MallocChecker::CallocMem(CheckerContext &C, const CallExpr *CE, + ProgramStateRef State) { + if (!State) + return nullptr; + + if (CE->getNumArgs() < 2) + return nullptr; + + SValBuilder &svalBuilder = C.getSValBuilder(); + SVal zeroVal = svalBuilder.makeZeroVal(svalBuilder.getContext().CharTy); + SVal TotalSize = evalMulForBufferSize(C, CE->getArg(0), CE->getArg(1)); + + return MallocMemAux(C, CE, TotalSize, zeroVal, State); +} + +MallocChecker::LeakInfo MallocChecker::getAllocationSite(const ExplodedNode *N, + SymbolRef Sym, + CheckerContext &C) { + const LocationContext *LeakContext = N->getLocationContext(); + // Walk the ExplodedGraph backwards and find the first node that referred to + // the tracked symbol. + const ExplodedNode *AllocNode = N; + const MemRegion *ReferenceRegion = nullptr; + + while (N) { + ProgramStateRef State = N->getState(); + if (!State->get<RegionState>(Sym)) + break; + + // Find the most recent expression bound to the symbol in the current + // context. + if (!ReferenceRegion) { + if (const MemRegion *MR = C.getLocationRegionIfPostStore(N)) { + SVal Val = State->getSVal(MR); + if (Val.getAsLocSymbol() == Sym) { + const VarRegion* VR = MR->getBaseRegion()->getAs<VarRegion>(); + // Do not show local variables belonging to a function other than + // where the error is reported. + if (!VR || + (VR->getStackFrame() == LeakContext->getStackFrame())) + ReferenceRegion = MR; + } + } + } + + // Allocation node, is the last node in the current or parent context in + // which the symbol was tracked. + const LocationContext *NContext = N->getLocationContext(); + if (NContext == LeakContext || + NContext->isParentOf(LeakContext)) + AllocNode = N; + N = N->pred_empty() ? nullptr : *(N->pred_begin()); + } + + return LeakInfo(AllocNode, ReferenceRegion); +} + +void MallocChecker::reportLeak(SymbolRef Sym, ExplodedNode *N, + CheckerContext &C) const { + + if (!ChecksEnabled[CK_MallocChecker] && + !ChecksEnabled[CK_NewDeleteLeaksChecker]) + return; + + const RefState *RS = C.getState()->get<RegionState>(Sym); + assert(RS && "cannot leak an untracked symbol"); + AllocationFamily Family = RS->getAllocationFamily(); + + if (Family == AF_Alloca) + return; + + Optional<MallocChecker::CheckKind> + CheckKind = getCheckIfTracked(Family, true); + + if (!CheckKind.hasValue()) + return; + + assert(N); + if (!BT_Leak[*CheckKind]) { + // Leaks should not be reported if they are post-dominated by a sink: + // (1) Sinks are higher importance bugs. + // (2) NoReturnFunctionChecker uses sink nodes to represent paths ending + // with __noreturn functions such as assert() or exit(). We choose not + // to report leaks on such paths. + BT_Leak[*CheckKind].reset(new BugType(CheckNames[*CheckKind], "Memory leak", + categories::MemoryError, + /*SuppressOnSink=*/true)); + } + + // Most bug reports are cached at the location where they occurred. + // With leaks, we want to unique them by the location where they were + // allocated, and only report a single path. + PathDiagnosticLocation LocUsedForUniqueing; + const ExplodedNode *AllocNode = nullptr; + const MemRegion *Region = nullptr; + std::tie(AllocNode, Region) = getAllocationSite(N, Sym, C); + + const Stmt *AllocationStmt = AllocNode->getStmtForDiagnostics(); + if (AllocationStmt) + LocUsedForUniqueing = PathDiagnosticLocation::createBegin(AllocationStmt, + C.getSourceManager(), + AllocNode->getLocationContext()); + + SmallString<200> buf; + llvm::raw_svector_ostream os(buf); + if (Region && Region->canPrintPretty()) { + os << "Potential leak of memory pointed to by "; + Region->printPretty(os); + } else { + os << "Potential memory leak"; + } + + auto R = std::make_unique<PathSensitiveBugReport>( + *BT_Leak[*CheckKind], os.str(), N, LocUsedForUniqueing, + AllocNode->getLocationContext()->getDecl()); + R->markInteresting(Sym); + R->addVisitor(std::make_unique<MallocBugVisitor>(Sym, true)); + C.emitReport(std::move(R)); +} + +void MallocChecker::checkDeadSymbols(SymbolReaper &SymReaper, + CheckerContext &C) const +{ + ProgramStateRef state = C.getState(); + RegionStateTy OldRS = state->get<RegionState>(); + RegionStateTy::Factory &F = state->get_context<RegionState>(); + + RegionStateTy RS = OldRS; + SmallVector<SymbolRef, 2> Errors; + for (RegionStateTy::iterator I = RS.begin(), E = RS.end(); I != E; ++I) { + if (SymReaper.isDead(I->first)) { + if (I->second.isAllocated() || I->second.isAllocatedOfSizeZero()) + Errors.push_back(I->first); + // Remove the dead symbol from the map. + RS = F.remove(RS, I->first); + } + } + + if (RS == OldRS) { + // We shouldn't have touched other maps yet. + assert(state->get<ReallocPairs>() == + C.getState()->get<ReallocPairs>()); + assert(state->get<FreeReturnValue>() == + C.getState()->get<FreeReturnValue>()); + return; + } + + // Cleanup the Realloc Pairs Map. + ReallocPairsTy RP = state->get<ReallocPairs>(); + for (ReallocPairsTy::iterator I = RP.begin(), E = RP.end(); I != E; ++I) { + if (SymReaper.isDead(I->first) || + SymReaper.isDead(I->second.ReallocatedSym)) { + state = state->remove<ReallocPairs>(I->first); + } + } + + // Cleanup the FreeReturnValue Map. + FreeReturnValueTy FR = state->get<FreeReturnValue>(); + for (FreeReturnValueTy::iterator I = FR.begin(), E = FR.end(); I != E; ++I) { + if (SymReaper.isDead(I->first) || + SymReaper.isDead(I->second)) { + state = state->remove<FreeReturnValue>(I->first); + } + } + + // Generate leak node. + ExplodedNode *N = C.getPredecessor(); + if (!Errors.empty()) { + static CheckerProgramPointTag Tag("MallocChecker", "DeadSymbolsLeak"); + N = C.generateNonFatalErrorNode(C.getState(), &Tag); + if (N) { + for (SmallVectorImpl<SymbolRef>::iterator + I = Errors.begin(), E = Errors.end(); I != E; ++I) { + reportLeak(*I, N, C); + } + } + } + + C.addTransition(state->set<RegionState>(RS), N); +} + +void MallocChecker::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + + if (const CXXDestructorCall *DC = dyn_cast<CXXDestructorCall>(&Call)) { + SymbolRef Sym = DC->getCXXThisVal().getAsSymbol(); + if (!Sym || checkDoubleDelete(Sym, C)) + return; + } + + // We will check for double free in the post visit. + if (const AnyFunctionCall *FC = dyn_cast<AnyFunctionCall>(&Call)) { + const FunctionDecl *FD = FC->getDecl(); + if (!FD) + return; + + ASTContext &Ctx = C.getASTContext(); + if (ChecksEnabled[CK_MallocChecker] && + (MemFunctionInfo.isCMemFunction(FD, Ctx, AF_Malloc, + MemoryOperationKind::MOK_Free) || + MemFunctionInfo.isCMemFunction(FD, Ctx, AF_IfNameIndex, + MemoryOperationKind::MOK_Free))) + return; + } + + // Check if the callee of a method is deleted. + if (const CXXInstanceCall *CC = dyn_cast<CXXInstanceCall>(&Call)) { + SymbolRef Sym = CC->getCXXThisVal().getAsSymbol(); + if (!Sym || checkUseAfterFree(Sym, C, CC->getCXXThisExpr())) + return; + } + + // Check arguments for being used after free. + for (unsigned I = 0, E = Call.getNumArgs(); I != E; ++I) { + SVal ArgSVal = Call.getArgSVal(I); + if (ArgSVal.getAs<Loc>()) { + SymbolRef Sym = ArgSVal.getAsSymbol(); + if (!Sym) + continue; + if (checkUseAfterFree(Sym, C, Call.getArgExpr(I))) + return; + } + } +} + +void MallocChecker::checkPreStmt(const ReturnStmt *S, + CheckerContext &C) const { + checkEscapeOnReturn(S, C); +} + +// In the CFG, automatic destructors come after the return statement. +// This callback checks for returning memory that is freed by automatic +// destructors, as those cannot be reached in checkPreStmt(). +void MallocChecker::checkEndFunction(const ReturnStmt *S, + CheckerContext &C) const { + checkEscapeOnReturn(S, C); +} + +void MallocChecker::checkEscapeOnReturn(const ReturnStmt *S, + CheckerContext &C) const { + if (!S) + return; + + const Expr *E = S->getRetValue(); + if (!E) + return; + + // Check if we are returning a symbol. + ProgramStateRef State = C.getState(); + SVal RetVal = C.getSVal(E); + SymbolRef Sym = RetVal.getAsSymbol(); + if (!Sym) + // If we are returning a field of the allocated struct or an array element, + // the callee could still free the memory. + // TODO: This logic should be a part of generic symbol escape callback. + if (const MemRegion *MR = RetVal.getAsRegion()) + if (isa<FieldRegion>(MR) || isa<ElementRegion>(MR)) + if (const SymbolicRegion *BMR = + dyn_cast<SymbolicRegion>(MR->getBaseRegion())) + Sym = BMR->getSymbol(); + + // Check if we are returning freed memory. + if (Sym) + checkUseAfterFree(Sym, C, E); +} + +// TODO: Blocks should be either inlined or should call invalidate regions +// upon invocation. After that's in place, special casing here will not be +// needed. +void MallocChecker::checkPostStmt(const BlockExpr *BE, + CheckerContext &C) const { + + // Scan the BlockDecRefExprs for any object the retain count checker + // may be tracking. + if (!BE->getBlockDecl()->hasCaptures()) + return; + + ProgramStateRef state = C.getState(); + const BlockDataRegion *R = + cast<BlockDataRegion>(C.getSVal(BE).getAsRegion()); + + BlockDataRegion::referenced_vars_iterator I = R->referenced_vars_begin(), + E = R->referenced_vars_end(); + + if (I == E) + return; + + SmallVector<const MemRegion*, 10> Regions; + const LocationContext *LC = C.getLocationContext(); + MemRegionManager &MemMgr = C.getSValBuilder().getRegionManager(); + + for ( ; I != E; ++I) { + const VarRegion *VR = I.getCapturedRegion(); + if (VR->getSuperRegion() == R) { + VR = MemMgr.getVarRegion(VR->getDecl(), LC); + } + Regions.push_back(VR); + } + + state = + state->scanReachableSymbols<StopTrackingCallback>(Regions).getState(); + C.addTransition(state); +} + +static bool isReleased(SymbolRef Sym, CheckerContext &C) { + assert(Sym); + const RefState *RS = C.getState()->get<RegionState>(Sym); + return (RS && RS->isReleased()); +} + +bool MallocChecker::suppressDeallocationsInSuspiciousContexts( + const CallExpr *CE, CheckerContext &C) const { + if (CE->getNumArgs() == 0) + return false; + + StringRef FunctionStr = ""; + if (const auto *FD = dyn_cast<FunctionDecl>(C.getStackFrame()->getDecl())) + if (const Stmt *Body = FD->getBody()) + if (Body->getBeginLoc().isValid()) + FunctionStr = + Lexer::getSourceText(CharSourceRange::getTokenRange( + {FD->getBeginLoc(), Body->getBeginLoc()}), + C.getSourceManager(), C.getLangOpts()); + + // We do not model the Integer Set Library's retain-count based allocation. + if (!FunctionStr.contains("__isl_")) + return false; + + ProgramStateRef State = C.getState(); + + for (const Expr *Arg : CE->arguments()) + if (SymbolRef Sym = C.getSVal(Arg).getAsSymbol()) + if (const RefState *RS = State->get<RegionState>(Sym)) + State = State->set<RegionState>(Sym, RefState::getEscaped(RS)); + + C.addTransition(State); + return true; +} + +bool MallocChecker::checkUseAfterFree(SymbolRef Sym, CheckerContext &C, + const Stmt *S) const { + + if (isReleased(Sym, C)) { + ReportUseAfterFree(C, S->getSourceRange(), Sym); + return true; + } + + return false; +} + +void MallocChecker::checkUseZeroAllocated(SymbolRef Sym, CheckerContext &C, + const Stmt *S) const { + assert(Sym); + + if (const RefState *RS = C.getState()->get<RegionState>(Sym)) { + if (RS->isAllocatedOfSizeZero()) + ReportUseZeroAllocated(C, RS->getStmt()->getSourceRange(), Sym); + } + else if (C.getState()->contains<ReallocSizeZeroSymbols>(Sym)) { + ReportUseZeroAllocated(C, S->getSourceRange(), Sym); + } +} + +bool MallocChecker::checkDoubleDelete(SymbolRef Sym, CheckerContext &C) const { + + if (isReleased(Sym, C)) { + ReportDoubleDelete(C, Sym); + return true; + } + return false; +} + +// Check if the location is a freed symbolic region. +void MallocChecker::checkLocation(SVal l, bool isLoad, const Stmt *S, + CheckerContext &C) const { + SymbolRef Sym = l.getLocSymbolInBase(); + if (Sym) { + checkUseAfterFree(Sym, C, S); + checkUseZeroAllocated(Sym, C, S); + } +} + +// If a symbolic region is assumed to NULL (or another constant), stop tracking +// it - assuming that allocation failed on this path. +ProgramStateRef MallocChecker::evalAssume(ProgramStateRef state, + SVal Cond, + bool Assumption) const { + RegionStateTy RS = state->get<RegionState>(); + for (RegionStateTy::iterator I = RS.begin(), E = RS.end(); I != E; ++I) { + // If the symbol is assumed to be NULL, remove it from consideration. + ConstraintManager &CMgr = state->getConstraintManager(); + ConditionTruthVal AllocFailed = CMgr.isNull(state, I.getKey()); + if (AllocFailed.isConstrainedTrue()) + state = state->remove<RegionState>(I.getKey()); + } + + // Realloc returns 0 when reallocation fails, which means that we should + // restore the state of the pointer being reallocated. + ReallocPairsTy RP = state->get<ReallocPairs>(); + for (ReallocPairsTy::iterator I = RP.begin(), E = RP.end(); I != E; ++I) { + // If the symbol is assumed to be NULL, remove it from consideration. + ConstraintManager &CMgr = state->getConstraintManager(); + ConditionTruthVal AllocFailed = CMgr.isNull(state, I.getKey()); + if (!AllocFailed.isConstrainedTrue()) + continue; + + SymbolRef ReallocSym = I.getData().ReallocatedSym; + if (const RefState *RS = state->get<RegionState>(ReallocSym)) { + if (RS->isReleased()) { + switch (I.getData().Kind) { + case OAR_ToBeFreedAfterFailure: + state = state->set<RegionState>(ReallocSym, + RefState::getAllocated(RS->getAllocationFamily(), RS->getStmt())); + break; + case OAR_DoNotTrackAfterFailure: + state = state->remove<RegionState>(ReallocSym); + break; + default: + assert(I.getData().Kind == OAR_FreeOnFailure); + } + } + } + state = state->remove<ReallocPairs>(I.getKey()); + } + + return state; +} + +bool MallocChecker::mayFreeAnyEscapedMemoryOrIsModeledExplicitly( + const CallEvent *Call, + ProgramStateRef State, + SymbolRef &EscapingSymbol) const { + assert(Call); + EscapingSymbol = nullptr; + + // For now, assume that any C++ or block call can free memory. + // TODO: If we want to be more optimistic here, we'll need to make sure that + // regions escape to C++ containers. They seem to do that even now, but for + // mysterious reasons. + if (!(isa<SimpleFunctionCall>(Call) || isa<ObjCMethodCall>(Call))) + return true; + + // Check Objective-C messages by selector name. + if (const ObjCMethodCall *Msg = dyn_cast<ObjCMethodCall>(Call)) { + // If it's not a framework call, or if it takes a callback, assume it + // can free memory. + if (!Call->isInSystemHeader() || Call->argumentsMayEscape()) + return true; + + // If it's a method we know about, handle it explicitly post-call. + // This should happen before the "freeWhenDone" check below. + if (isKnownDeallocObjCMethodName(*Msg)) + return false; + + // If there's a "freeWhenDone" parameter, but the method isn't one we know + // about, we can't be sure that the object will use free() to deallocate the + // memory, so we can't model it explicitly. The best we can do is use it to + // decide whether the pointer escapes. + if (Optional<bool> FreeWhenDone = getFreeWhenDoneArg(*Msg)) + return *FreeWhenDone; + + // If the first selector piece ends with "NoCopy", and there is no + // "freeWhenDone" parameter set to zero, we know ownership is being + // transferred. Again, though, we can't be sure that the object will use + // free() to deallocate the memory, so we can't model it explicitly. + StringRef FirstSlot = Msg->getSelector().getNameForSlot(0); + if (FirstSlot.endswith("NoCopy")) + return true; + + // If the first selector starts with addPointer, insertPointer, + // or replacePointer, assume we are dealing with NSPointerArray or similar. + // This is similar to C++ containers (vector); we still might want to check + // that the pointers get freed by following the container itself. + if (FirstSlot.startswith("addPointer") || + FirstSlot.startswith("insertPointer") || + FirstSlot.startswith("replacePointer") || + FirstSlot.equals("valueWithPointer")) { + return true; + } + + // We should escape receiver on call to 'init'. This is especially relevant + // to the receiver, as the corresponding symbol is usually not referenced + // after the call. + if (Msg->getMethodFamily() == OMF_init) { + EscapingSymbol = Msg->getReceiverSVal().getAsSymbol(); + return true; + } + + // Otherwise, assume that the method does not free memory. + // Most framework methods do not free memory. + return false; + } + + // At this point the only thing left to handle is straight function calls. + const FunctionDecl *FD = cast<SimpleFunctionCall>(Call)->getDecl(); + if (!FD) + return true; + + ASTContext &ASTC = State->getStateManager().getContext(); + + // If it's one of the allocation functions we can reason about, we model + // its behavior explicitly. + if (MemFunctionInfo.isMemFunction(FD, ASTC)) + return false; + + // If it's not a system call, assume it frees memory. + if (!Call->isInSystemHeader()) + return true; + + // White list the system functions whose arguments escape. + const IdentifierInfo *II = FD->getIdentifier(); + if (!II) + return true; + StringRef FName = II->getName(); + + // White list the 'XXXNoCopy' CoreFoundation functions. + // We specifically check these before + if (FName.endswith("NoCopy")) { + // Look for the deallocator argument. We know that the memory ownership + // is not transferred only if the deallocator argument is + // 'kCFAllocatorNull'. + for (unsigned i = 1; i < Call->getNumArgs(); ++i) { + const Expr *ArgE = Call->getArgExpr(i)->IgnoreParenCasts(); + if (const DeclRefExpr *DE = dyn_cast<DeclRefExpr>(ArgE)) { + StringRef DeallocatorName = DE->getFoundDecl()->getName(); + if (DeallocatorName == "kCFAllocatorNull") + return false; + } + } + return true; + } + + // Associating streams with malloced buffers. The pointer can escape if + // 'closefn' is specified (and if that function does free memory), + // but it will not if closefn is not specified. + // Currently, we do not inspect the 'closefn' function (PR12101). + if (FName == "funopen") + if (Call->getNumArgs() >= 4 && Call->getArgSVal(4).isConstant(0)) + return false; + + // Do not warn on pointers passed to 'setbuf' when used with std streams, + // these leaks might be intentional when setting the buffer for stdio. + // http://stackoverflow.com/questions/2671151/who-frees-setvbuf-buffer + if (FName == "setbuf" || FName =="setbuffer" || + FName == "setlinebuf" || FName == "setvbuf") { + if (Call->getNumArgs() >= 1) { + const Expr *ArgE = Call->getArgExpr(0)->IgnoreParenCasts(); + if (const DeclRefExpr *ArgDRE = dyn_cast<DeclRefExpr>(ArgE)) + if (const VarDecl *D = dyn_cast<VarDecl>(ArgDRE->getDecl())) + if (D->getCanonicalDecl()->getName().find("std") != StringRef::npos) + return true; + } + } + + // A bunch of other functions which either take ownership of a pointer or + // wrap the result up in a struct or object, meaning it can be freed later. + // (See RetainCountChecker.) Not all the parameters here are invalidated, + // but the Malloc checker cannot differentiate between them. The right way + // of doing this would be to implement a pointer escapes callback. + if (FName == "CGBitmapContextCreate" || + FName == "CGBitmapContextCreateWithData" || + FName == "CVPixelBufferCreateWithBytes" || + FName == "CVPixelBufferCreateWithPlanarBytes" || + FName == "OSAtomicEnqueue") { + return true; + } + + if (FName == "postEvent" && + FD->getQualifiedNameAsString() == "QCoreApplication::postEvent") { + return true; + } + + if (FName == "postEvent" && + FD->getQualifiedNameAsString() == "QCoreApplication::postEvent") { + return true; + } + + if (FName == "connectImpl" && + FD->getQualifiedNameAsString() == "QObject::connectImpl") { + return true; + } + + // Handle cases where we know a buffer's /address/ can escape. + // Note that the above checks handle some special cases where we know that + // even though the address escapes, it's still our responsibility to free the + // buffer. + if (Call->argumentsMayEscape()) + return true; + + // Otherwise, assume that the function does not free memory. + // Most system calls do not free the memory. + return false; +} + +ProgramStateRef MallocChecker::checkPointerEscape(ProgramStateRef State, + const InvalidatedSymbols &Escaped, + const CallEvent *Call, + PointerEscapeKind Kind) const { + return checkPointerEscapeAux(State, Escaped, Call, Kind, + /*IsConstPointerEscape*/ false); +} + +ProgramStateRef MallocChecker::checkConstPointerEscape(ProgramStateRef State, + const InvalidatedSymbols &Escaped, + const CallEvent *Call, + PointerEscapeKind Kind) const { + // If a const pointer escapes, it may not be freed(), but it could be deleted. + return checkPointerEscapeAux(State, Escaped, Call, Kind, + /*IsConstPointerEscape*/ true); +} + +static bool checkIfNewOrNewArrayFamily(const RefState *RS) { + return (RS->getAllocationFamily() == AF_CXXNewArray || + RS->getAllocationFamily() == AF_CXXNew); +} + +ProgramStateRef MallocChecker::checkPointerEscapeAux( + ProgramStateRef State, const InvalidatedSymbols &Escaped, + const CallEvent *Call, PointerEscapeKind Kind, + bool IsConstPointerEscape) const { + // If we know that the call does not free memory, or we want to process the + // call later, keep tracking the top level arguments. + SymbolRef EscapingSymbol = nullptr; + if (Kind == PSK_DirectEscapeOnCall && + !mayFreeAnyEscapedMemoryOrIsModeledExplicitly(Call, State, + EscapingSymbol) && + !EscapingSymbol) { + return State; + } + + for (InvalidatedSymbols::const_iterator I = Escaped.begin(), + E = Escaped.end(); + I != E; ++I) { + SymbolRef sym = *I; + + if (EscapingSymbol && EscapingSymbol != sym) + continue; + + if (const RefState *RS = State->get<RegionState>(sym)) + if (RS->isAllocated() || RS->isAllocatedOfSizeZero()) + if (!IsConstPointerEscape || checkIfNewOrNewArrayFamily(RS)) + State = State->set<RegionState>(sym, RefState::getEscaped(RS)); + } + return State; +} + +static SymbolRef findFailedReallocSymbol(ProgramStateRef currState, + ProgramStateRef prevState) { + ReallocPairsTy currMap = currState->get<ReallocPairs>(); + ReallocPairsTy prevMap = prevState->get<ReallocPairs>(); + + for (const ReallocPairsTy::value_type &Pair : prevMap) { + SymbolRef sym = Pair.first; + if (!currMap.lookup(sym)) + return sym; + } + + return nullptr; +} + +static bool isReferenceCountingPointerDestructor(const CXXDestructorDecl *DD) { + if (const IdentifierInfo *II = DD->getParent()->getIdentifier()) { + StringRef N = II->getName(); + if (N.contains_lower("ptr") || N.contains_lower("pointer")) { + if (N.contains_lower("ref") || N.contains_lower("cnt") || + N.contains_lower("intrusive") || N.contains_lower("shared")) { + return true; + } + } + } + return false; +} + +PathDiagnosticPieceRef MallocBugVisitor::VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) { + ProgramStateRef state = N->getState(); + ProgramStateRef statePrev = N->getFirstPred()->getState(); + + const RefState *RSCurr = state->get<RegionState>(Sym); + const RefState *RSPrev = statePrev->get<RegionState>(Sym); + + const Stmt *S = N->getStmtForDiagnostics(); + // When dealing with containers, we sometimes want to give a note + // even if the statement is missing. + if (!S && (!RSCurr || RSCurr->getAllocationFamily() != AF_InnerBuffer)) + return nullptr; + + const LocationContext *CurrentLC = N->getLocationContext(); + + // If we find an atomic fetch_add or fetch_sub within the destructor in which + // the pointer was released (before the release), this is likely a destructor + // of a shared pointer. + // Because we don't model atomics, and also because we don't know that the + // original reference count is positive, we should not report use-after-frees + // on objects deleted in such destructors. This can probably be improved + // through better shared pointer modeling. + if (ReleaseDestructorLC) { + if (const auto *AE = dyn_cast<AtomicExpr>(S)) { + AtomicExpr::AtomicOp Op = AE->getOp(); + if (Op == AtomicExpr::AO__c11_atomic_fetch_add || + Op == AtomicExpr::AO__c11_atomic_fetch_sub) { + if (ReleaseDestructorLC == CurrentLC || + ReleaseDestructorLC->isParentOf(CurrentLC)) { + BR.markInvalid(getTag(), S); + } + } + } + } + + // FIXME: We will eventually need to handle non-statement-based events + // (__attribute__((cleanup))). + + // Find out if this is an interesting point and what is the kind. + StringRef Msg; + std::unique_ptr<StackHintGeneratorForSymbol> StackHint = nullptr; + SmallString<256> Buf; + llvm::raw_svector_ostream OS(Buf); + + if (Mode == Normal) { + if (isAllocated(RSCurr, RSPrev, S)) { + Msg = "Memory is allocated"; + StackHint = std::make_unique<StackHintGeneratorForSymbol>( + Sym, "Returned allocated memory"); + } else if (isReleased(RSCurr, RSPrev, S)) { + const auto Family = RSCurr->getAllocationFamily(); + switch (Family) { + case AF_Alloca: + case AF_Malloc: + case AF_CXXNew: + case AF_CXXNewArray: + case AF_IfNameIndex: + Msg = "Memory is released"; + StackHint = std::make_unique<StackHintGeneratorForSymbol>( + Sym, "Returning; memory was released"); + break; + case AF_InnerBuffer: { + const MemRegion *ObjRegion = + allocation_state::getContainerObjRegion(statePrev, Sym); + const auto *TypedRegion = cast<TypedValueRegion>(ObjRegion); + QualType ObjTy = TypedRegion->getValueType(); + OS << "Inner buffer of '" << ObjTy.getAsString() << "' "; + + if (N->getLocation().getKind() == ProgramPoint::PostImplicitCallKind) { + OS << "deallocated by call to destructor"; + StackHint = std::make_unique<StackHintGeneratorForSymbol>( + Sym, "Returning; inner buffer was deallocated"); + } else { + OS << "reallocated by call to '"; + const Stmt *S = RSCurr->getStmt(); + if (const auto *MemCallE = dyn_cast<CXXMemberCallExpr>(S)) { + OS << MemCallE->getMethodDecl()->getNameAsString(); + } else if (const auto *OpCallE = dyn_cast<CXXOperatorCallExpr>(S)) { + OS << OpCallE->getDirectCallee()->getNameAsString(); + } else if (const auto *CallE = dyn_cast<CallExpr>(S)) { + auto &CEMgr = BRC.getStateManager().getCallEventManager(); + CallEventRef<> Call = CEMgr.getSimpleCall(CallE, state, CurrentLC); + const auto *D = dyn_cast_or_null<NamedDecl>(Call->getDecl()); + OS << (D ? D->getNameAsString() : "unknown"); + } + OS << "'"; + StackHint = std::make_unique<StackHintGeneratorForSymbol>( + Sym, "Returning; inner buffer was reallocated"); + } + Msg = OS.str(); + break; + } + case AF_None: + llvm_unreachable("Unhandled allocation family!"); + } + + // See if we're releasing memory while inlining a destructor + // (or one of its callees). This turns on various common + // false positive suppressions. + bool FoundAnyDestructor = false; + for (const LocationContext *LC = CurrentLC; LC; LC = LC->getParent()) { + if (const auto *DD = dyn_cast<CXXDestructorDecl>(LC->getDecl())) { + if (isReferenceCountingPointerDestructor(DD)) { + // This immediately looks like a reference-counting destructor. + // We're bad at guessing the original reference count of the object, + // so suppress the report for now. + BR.markInvalid(getTag(), DD); + } else if (!FoundAnyDestructor) { + assert(!ReleaseDestructorLC && + "There can be only one release point!"); + // Suspect that it's a reference counting pointer destructor. + // On one of the next nodes might find out that it has atomic + // reference counting operations within it (see the code above), + // and if so, we'd conclude that it likely is a reference counting + // pointer destructor. + ReleaseDestructorLC = LC->getStackFrame(); + // It is unlikely that releasing memory is delegated to a destructor + // inside a destructor of a shared pointer, because it's fairly hard + // to pass the information that the pointer indeed needs to be + // released into it. So we're only interested in the innermost + // destructor. + FoundAnyDestructor = true; + } + } + } + } else if (isRelinquished(RSCurr, RSPrev, S)) { + Msg = "Memory ownership is transferred"; + StackHint = std::make_unique<StackHintGeneratorForSymbol>(Sym, ""); + } else if (hasReallocFailed(RSCurr, RSPrev, S)) { + Mode = ReallocationFailed; + Msg = "Reallocation failed"; + StackHint = std::make_unique<StackHintGeneratorForReallocationFailed>( + Sym, "Reallocation failed"); + + if (SymbolRef sym = findFailedReallocSymbol(state, statePrev)) { + // Is it possible to fail two reallocs WITHOUT testing in between? + assert((!FailedReallocSymbol || FailedReallocSymbol == sym) && + "We only support one failed realloc at a time."); + BR.markInteresting(sym); + FailedReallocSymbol = sym; + } + } + + // We are in a special mode if a reallocation failed later in the path. + } else if (Mode == ReallocationFailed) { + assert(FailedReallocSymbol && "No symbol to look for."); + + // Is this is the first appearance of the reallocated symbol? + if (!statePrev->get<RegionState>(FailedReallocSymbol)) { + // We're at the reallocation point. + Msg = "Attempt to reallocate memory"; + StackHint = std::make_unique<StackHintGeneratorForSymbol>( + Sym, "Returned reallocated memory"); + FailedReallocSymbol = nullptr; + Mode = Normal; + } + } + + if (Msg.empty()) { + assert(!StackHint); + return nullptr; + } + + assert(StackHint); + + // Generate the extra diagnostic. + PathDiagnosticLocation Pos; + if (!S) { + assert(RSCurr->getAllocationFamily() == AF_InnerBuffer); + auto PostImplCall = N->getLocation().getAs<PostImplicitCall>(); + if (!PostImplCall) + return nullptr; + Pos = PathDiagnosticLocation(PostImplCall->getLocation(), + BRC.getSourceManager()); + } else { + Pos = PathDiagnosticLocation(S, BRC.getSourceManager(), + N->getLocationContext()); + } + + auto P = std::make_shared<PathDiagnosticEventPiece>(Pos, Msg, true); + BR.addCallStackHint(P, std::move(StackHint)); + return P; +} + +void MallocChecker::printState(raw_ostream &Out, ProgramStateRef State, + const char *NL, const char *Sep) const { + + RegionStateTy RS = State->get<RegionState>(); + + if (!RS.isEmpty()) { + Out << Sep << "MallocChecker :" << NL; + for (RegionStateTy::iterator I = RS.begin(), E = RS.end(); I != E; ++I) { + const RefState *RefS = State->get<RegionState>(I.getKey()); + AllocationFamily Family = RefS->getAllocationFamily(); + Optional<MallocChecker::CheckKind> CheckKind = getCheckIfTracked(Family); + if (!CheckKind.hasValue()) + CheckKind = getCheckIfTracked(Family, true); + + I.getKey()->dumpToStream(Out); + Out << " : "; + I.getData().dump(Out); + if (CheckKind.hasValue()) + Out << " (" << CheckNames[*CheckKind].getName() << ")"; + Out << NL; + } + } +} + +namespace clang { +namespace ento { +namespace allocation_state { + +ProgramStateRef +markReleased(ProgramStateRef State, SymbolRef Sym, const Expr *Origin) { + AllocationFamily Family = AF_InnerBuffer; + return State->set<RegionState>(Sym, RefState::getReleased(Family, Origin)); +} + +} // end namespace allocation_state +} // end namespace ento +} // end namespace clang + +// Intended to be used in InnerPointerChecker to register the part of +// MallocChecker connected to it. +void ento::registerInnerPointerCheckerAux(CheckerManager &mgr) { + MallocChecker *checker = mgr.getChecker<MallocChecker>(); + checker->ChecksEnabled[MallocChecker::CK_InnerPointerChecker] = true; + checker->CheckNames[MallocChecker::CK_InnerPointerChecker] = + mgr.getCurrentCheckerName(); +} + +void ento::registerDynamicMemoryModeling(CheckerManager &mgr) { + auto *checker = mgr.registerChecker<MallocChecker>(); + checker->MemFunctionInfo.ShouldIncludeOwnershipAnnotatedFunctions = + mgr.getAnalyzerOptions().getCheckerBooleanOption(checker, "Optimistic"); +} + +bool ento::shouldRegisterDynamicMemoryModeling(const LangOptions &LO) { + return true; +} + +#define REGISTER_CHECKER(name) \ + void ento::register##name(CheckerManager &mgr) { \ + MallocChecker *checker = mgr.getChecker<MallocChecker>(); \ + checker->ChecksEnabled[MallocChecker::CK_##name] = true; \ + checker->CheckNames[MallocChecker::CK_##name] = \ + mgr.getCurrentCheckerName(); \ + } \ + \ + bool ento::shouldRegister##name(const LangOptions &LO) { return true; } + +REGISTER_CHECKER(MallocChecker) +REGISTER_CHECKER(NewDeleteChecker) +REGISTER_CHECKER(NewDeleteLeaksChecker) +REGISTER_CHECKER(MismatchedDeallocatorChecker) diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocOverflowSecurityChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocOverflowSecurityChecker.cpp new file mode 100644 index 000000000000..4fd06f24c5bc --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/MallocOverflowSecurityChecker.cpp @@ -0,0 +1,342 @@ +// MallocOverflowSecurityChecker.cpp - Check for malloc overflows -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This checker detects a common memory allocation security flaw. +// Suppose 'unsigned int n' comes from an untrusted source. If the +// code looks like 'malloc (n * 4)', and an attacker can make 'n' be +// say MAX_UINT/4+2, then instead of allocating the correct 'n' 4-byte +// elements, this will actually allocate only two because of overflow. +// Then when the rest of the program attempts to store values past the +// second element, these values will actually overwrite other items in +// the heap, probably allowing the attacker to execute arbitrary code. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/EvaluatedExprVisitor.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/SmallVector.h" +#include <utility> + +using namespace clang; +using namespace ento; +using llvm::APSInt; + +namespace { +struct MallocOverflowCheck { + const BinaryOperator *mulop; + const Expr *variable; + APSInt maxVal; + + MallocOverflowCheck(const BinaryOperator *m, const Expr *v, APSInt val) + : mulop(m), variable(v), maxVal(std::move(val)) {} +}; + +class MallocOverflowSecurityChecker : public Checker<check::ASTCodeBody> { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager &mgr, + BugReporter &BR) const; + + void CheckMallocArgument( + SmallVectorImpl<MallocOverflowCheck> &PossibleMallocOverflows, + const Expr *TheArgument, ASTContext &Context) const; + + void OutputPossibleOverflows( + SmallVectorImpl<MallocOverflowCheck> &PossibleMallocOverflows, + const Decl *D, BugReporter &BR, AnalysisManager &mgr) const; + +}; +} // end anonymous namespace + +// Return true for computations which evaluate to zero: e.g., mult by 0. +static inline bool EvaluatesToZero(APSInt &Val, BinaryOperatorKind op) { + return (op == BO_Mul) && (Val == 0); +} + +void MallocOverflowSecurityChecker::CheckMallocArgument( + SmallVectorImpl<MallocOverflowCheck> &PossibleMallocOverflows, + const Expr *TheArgument, + ASTContext &Context) const { + + /* Look for a linear combination with a single variable, and at least + one multiplication. + Reject anything that applies to the variable: an explicit cast, + conditional expression, an operation that could reduce the range + of the result, or anything too complicated :-). */ + const Expr *e = TheArgument; + const BinaryOperator * mulop = nullptr; + APSInt maxVal; + + for (;;) { + maxVal = 0; + e = e->IgnoreParenImpCasts(); + if (const BinaryOperator *binop = dyn_cast<BinaryOperator>(e)) { + BinaryOperatorKind opc = binop->getOpcode(); + // TODO: ignore multiplications by 1, reject if multiplied by 0. + if (mulop == nullptr && opc == BO_Mul) + mulop = binop; + if (opc != BO_Mul && opc != BO_Add && opc != BO_Sub && opc != BO_Shl) + return; + + const Expr *lhs = binop->getLHS(); + const Expr *rhs = binop->getRHS(); + if (rhs->isEvaluatable(Context)) { + e = lhs; + maxVal = rhs->EvaluateKnownConstInt(Context); + if (EvaluatesToZero(maxVal, opc)) + return; + } else if ((opc == BO_Add || opc == BO_Mul) && + lhs->isEvaluatable(Context)) { + maxVal = lhs->EvaluateKnownConstInt(Context); + if (EvaluatesToZero(maxVal, opc)) + return; + e = rhs; + } else + return; + } + else if (isa<DeclRefExpr>(e) || isa<MemberExpr>(e)) + break; + else + return; + } + + if (mulop == nullptr) + return; + + // We've found the right structure of malloc argument, now save + // the data so when the body of the function is completely available + // we can check for comparisons. + + // TODO: Could push this into the innermost scope where 'e' is + // defined, rather than the whole function. + PossibleMallocOverflows.push_back(MallocOverflowCheck(mulop, e, maxVal)); +} + +namespace { +// A worker class for OutputPossibleOverflows. +class CheckOverflowOps : + public EvaluatedExprVisitor<CheckOverflowOps> { +public: + typedef SmallVectorImpl<MallocOverflowCheck> theVecType; + +private: + theVecType &toScanFor; + ASTContext &Context; + + bool isIntZeroExpr(const Expr *E) const { + if (!E->getType()->isIntegralOrEnumerationType()) + return false; + Expr::EvalResult Result; + if (E->EvaluateAsInt(Result, Context)) + return Result.Val.getInt() == 0; + return false; + } + + static const Decl *getDecl(const DeclRefExpr *DR) { return DR->getDecl(); } + static const Decl *getDecl(const MemberExpr *ME) { + return ME->getMemberDecl(); + } + + template <typename T1> + void Erase(const T1 *DR, + llvm::function_ref<bool(const MallocOverflowCheck &)> Pred) { + auto P = [DR, Pred](const MallocOverflowCheck &Check) { + if (const auto *CheckDR = dyn_cast<T1>(Check.variable)) + return getDecl(CheckDR) == getDecl(DR) && Pred(Check); + return false; + }; + toScanFor.erase(std::remove_if(toScanFor.begin(), toScanFor.end(), P), + toScanFor.end()); + } + + void CheckExpr(const Expr *E_p) { + auto PredTrue = [](const MallocOverflowCheck &) { return true; }; + const Expr *E = E_p->IgnoreParenImpCasts(); + if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(E)) + Erase<DeclRefExpr>(DR, PredTrue); + else if (const auto *ME = dyn_cast<MemberExpr>(E)) { + Erase<MemberExpr>(ME, PredTrue); + } + } + + // Check if the argument to malloc is assigned a value + // which cannot cause an overflow. + // e.g., malloc (mul * x) and, + // case 1: mul = <constant value> + // case 2: mul = a/b, where b > x + void CheckAssignmentExpr(BinaryOperator *AssignEx) { + bool assignKnown = false; + bool numeratorKnown = false, denomKnown = false; + APSInt denomVal; + denomVal = 0; + + // Erase if the multiplicand was assigned a constant value. + const Expr *rhs = AssignEx->getRHS(); + if (rhs->isEvaluatable(Context)) + assignKnown = true; + + // Discard the report if the multiplicand was assigned a value, + // that can never overflow after multiplication. e.g., the assignment + // is a division operator and the denominator is > other multiplicand. + const Expr *rhse = rhs->IgnoreParenImpCasts(); + if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(rhse)) { + if (BOp->getOpcode() == BO_Div) { + const Expr *denom = BOp->getRHS()->IgnoreParenImpCasts(); + Expr::EvalResult Result; + if (denom->EvaluateAsInt(Result, Context)) { + denomVal = Result.Val.getInt(); + denomKnown = true; + } + const Expr *numerator = BOp->getLHS()->IgnoreParenImpCasts(); + if (numerator->isEvaluatable(Context)) + numeratorKnown = true; + } + } + if (!assignKnown && !denomKnown) + return; + auto denomExtVal = denomVal.getExtValue(); + + // Ignore negative denominator. + if (denomExtVal < 0) + return; + + const Expr *lhs = AssignEx->getLHS(); + const Expr *E = lhs->IgnoreParenImpCasts(); + + auto pred = [assignKnown, numeratorKnown, + denomExtVal](const MallocOverflowCheck &Check) { + return assignKnown || + (numeratorKnown && (denomExtVal >= Check.maxVal.getExtValue())); + }; + + if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(E)) + Erase<DeclRefExpr>(DR, pred); + else if (const auto *ME = dyn_cast<MemberExpr>(E)) + Erase<MemberExpr>(ME, pred); + } + + public: + void VisitBinaryOperator(BinaryOperator *E) { + if (E->isComparisonOp()) { + const Expr * lhs = E->getLHS(); + const Expr * rhs = E->getRHS(); + // Ignore comparisons against zero, since they generally don't + // protect against an overflow. + if (!isIntZeroExpr(lhs) && !isIntZeroExpr(rhs)) { + CheckExpr(lhs); + CheckExpr(rhs); + } + } + if (E->isAssignmentOp()) + CheckAssignmentExpr(E); + EvaluatedExprVisitor<CheckOverflowOps>::VisitBinaryOperator(E); + } + + /* We specifically ignore loop conditions, because they're typically + not error checks. */ + void VisitWhileStmt(WhileStmt *S) { + return this->Visit(S->getBody()); + } + void VisitForStmt(ForStmt *S) { + return this->Visit(S->getBody()); + } + void VisitDoStmt(DoStmt *S) { + return this->Visit(S->getBody()); + } + + CheckOverflowOps(theVecType &v, ASTContext &ctx) + : EvaluatedExprVisitor<CheckOverflowOps>(ctx), + toScanFor(v), Context(ctx) + { } + }; +} + +// OutputPossibleOverflows - We've found a possible overflow earlier, +// now check whether Body might contain a comparison which might be +// preventing the overflow. +// This doesn't do flow analysis, range analysis, or points-to analysis; it's +// just a dumb "is there a comparison" scan. The aim here is to +// detect the most blatent cases of overflow and educate the +// programmer. +void MallocOverflowSecurityChecker::OutputPossibleOverflows( + SmallVectorImpl<MallocOverflowCheck> &PossibleMallocOverflows, + const Decl *D, BugReporter &BR, AnalysisManager &mgr) const { + // By far the most common case: nothing to check. + if (PossibleMallocOverflows.empty()) + return; + + // Delete any possible overflows which have a comparison. + CheckOverflowOps c(PossibleMallocOverflows, BR.getContext()); + c.Visit(mgr.getAnalysisDeclContext(D)->getBody()); + + // Output warnings for all overflows that are left. + for (CheckOverflowOps::theVecType::iterator + i = PossibleMallocOverflows.begin(), + e = PossibleMallocOverflows.end(); + i != e; + ++i) { + BR.EmitBasicReport( + D, this, "malloc() size overflow", categories::UnixAPI, + "the computation of the size of the memory allocation may overflow", + PathDiagnosticLocation::createOperatorLoc(i->mulop, + BR.getSourceManager()), + i->mulop->getSourceRange()); + } +} + +void MallocOverflowSecurityChecker::checkASTCodeBody(const Decl *D, + AnalysisManager &mgr, + BugReporter &BR) const { + + CFG *cfg = mgr.getCFG(D); + if (!cfg) + return; + + // A list of variables referenced in possibly overflowing malloc operands. + SmallVector<MallocOverflowCheck, 2> PossibleMallocOverflows; + + for (CFG::iterator it = cfg->begin(), ei = cfg->end(); it != ei; ++it) { + CFGBlock *block = *it; + for (CFGBlock::iterator bi = block->begin(), be = block->end(); + bi != be; ++bi) { + if (Optional<CFGStmt> CS = bi->getAs<CFGStmt>()) { + if (const CallExpr *TheCall = dyn_cast<CallExpr>(CS->getStmt())) { + // Get the callee. + const FunctionDecl *FD = TheCall->getDirectCallee(); + + if (!FD) + continue; + + // Get the name of the callee. If it's a builtin, strip off the prefix. + IdentifierInfo *FnInfo = FD->getIdentifier(); + if (!FnInfo) + continue; + + if (FnInfo->isStr ("malloc") || FnInfo->isStr ("_MALLOC")) { + if (TheCall->getNumArgs() == 1) + CheckMallocArgument(PossibleMallocOverflows, TheCall->getArg(0), + mgr.getASTContext()); + } + } + } + } + } + + OutputPossibleOverflows(PossibleMallocOverflows, D, BR, mgr); +} + +void ento::registerMallocOverflowSecurityChecker(CheckerManager &mgr) { + mgr.registerChecker<MallocOverflowSecurityChecker>(); +} + +bool ento::shouldRegisterMallocOverflowSecurityChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp new file mode 100644 index 000000000000..b5881a9e6533 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp @@ -0,0 +1,255 @@ +// MallocSizeofChecker.cpp - Check for dubious malloc arguments ---*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Reports inconsistencies between the casted type of the return value of a +// malloc/calloc/realloc call and the operand of any sizeof expressions +// contained within its argument(s). +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/TypeLoc.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +namespace { + +typedef std::pair<const TypeSourceInfo *, const CallExpr *> TypeCallPair; +typedef llvm::PointerUnion<const Stmt *, const VarDecl *> ExprParent; + +class CastedAllocFinder + : public ConstStmtVisitor<CastedAllocFinder, TypeCallPair> { + IdentifierInfo *II_malloc, *II_calloc, *II_realloc; + +public: + struct CallRecord { + ExprParent CastedExprParent; + const Expr *CastedExpr; + const TypeSourceInfo *ExplicitCastType; + const CallExpr *AllocCall; + + CallRecord(ExprParent CastedExprParent, const Expr *CastedExpr, + const TypeSourceInfo *ExplicitCastType, + const CallExpr *AllocCall) + : CastedExprParent(CastedExprParent), CastedExpr(CastedExpr), + ExplicitCastType(ExplicitCastType), AllocCall(AllocCall) {} + }; + + typedef std::vector<CallRecord> CallVec; + CallVec Calls; + + CastedAllocFinder(ASTContext *Ctx) : + II_malloc(&Ctx->Idents.get("malloc")), + II_calloc(&Ctx->Idents.get("calloc")), + II_realloc(&Ctx->Idents.get("realloc")) {} + + void VisitChild(ExprParent Parent, const Stmt *S) { + TypeCallPair AllocCall = Visit(S); + if (AllocCall.second && AllocCall.second != S) + Calls.push_back(CallRecord(Parent, cast<Expr>(S), AllocCall.first, + AllocCall.second)); + } + + void VisitChildren(const Stmt *S) { + for (const Stmt *Child : S->children()) + if (Child) + VisitChild(S, Child); + } + + TypeCallPair VisitCastExpr(const CastExpr *E) { + return Visit(E->getSubExpr()); + } + + TypeCallPair VisitExplicitCastExpr(const ExplicitCastExpr *E) { + return TypeCallPair(E->getTypeInfoAsWritten(), + Visit(E->getSubExpr()).second); + } + + TypeCallPair VisitParenExpr(const ParenExpr *E) { + return Visit(E->getSubExpr()); + } + + TypeCallPair VisitStmt(const Stmt *S) { + VisitChildren(S); + return TypeCallPair(); + } + + TypeCallPair VisitCallExpr(const CallExpr *E) { + VisitChildren(E); + const FunctionDecl *FD = E->getDirectCallee(); + if (FD) { + IdentifierInfo *II = FD->getIdentifier(); + if (II == II_malloc || II == II_calloc || II == II_realloc) + return TypeCallPair((const TypeSourceInfo *)nullptr, E); + } + return TypeCallPair(); + } + + TypeCallPair VisitDeclStmt(const DeclStmt *S) { + for (const auto *I : S->decls()) + if (const VarDecl *VD = dyn_cast<VarDecl>(I)) + if (const Expr *Init = VD->getInit()) + VisitChild(VD, Init); + return TypeCallPair(); + } +}; + +class SizeofFinder : public ConstStmtVisitor<SizeofFinder> { +public: + std::vector<const UnaryExprOrTypeTraitExpr *> Sizeofs; + + void VisitBinMul(const BinaryOperator *E) { + Visit(E->getLHS()); + Visit(E->getRHS()); + } + + void VisitImplicitCastExpr(const ImplicitCastExpr *E) { + return Visit(E->getSubExpr()); + } + + void VisitParenExpr(const ParenExpr *E) { + return Visit(E->getSubExpr()); + } + + void VisitUnaryExprOrTypeTraitExpr(const UnaryExprOrTypeTraitExpr *E) { + if (E->getKind() != UETT_SizeOf) + return; + + Sizeofs.push_back(E); + } +}; + +// Determine if the pointee and sizeof types are compatible. Here +// we ignore constness of pointer types. +static bool typesCompatible(ASTContext &C, QualType A, QualType B) { + // sizeof(void*) is compatible with any other pointer. + if (B->isVoidPointerType() && A->getAs<PointerType>()) + return true; + + while (true) { + A = A.getCanonicalType(); + B = B.getCanonicalType(); + + if (A.getTypePtr() == B.getTypePtr()) + return true; + + if (const PointerType *ptrA = A->getAs<PointerType>()) + if (const PointerType *ptrB = B->getAs<PointerType>()) { + A = ptrA->getPointeeType(); + B = ptrB->getPointeeType(); + continue; + } + + break; + } + + return false; +} + +static bool compatibleWithArrayType(ASTContext &C, QualType PT, QualType T) { + // Ex: 'int a[10][2]' is compatible with 'int', 'int[2]', 'int[10][2]'. + while (const ArrayType *AT = T->getAsArrayTypeUnsafe()) { + QualType ElemType = AT->getElementType(); + if (typesCompatible(C, PT, AT->getElementType())) + return true; + T = ElemType; + } + + return false; +} + +class MallocSizeofChecker : public Checker<check::ASTCodeBody> { +public: + void checkASTCodeBody(const Decl *D, AnalysisManager& mgr, + BugReporter &BR) const { + AnalysisDeclContext *ADC = mgr.getAnalysisDeclContext(D); + CastedAllocFinder Finder(&BR.getContext()); + Finder.Visit(D->getBody()); + for (CastedAllocFinder::CallVec::iterator i = Finder.Calls.begin(), + e = Finder.Calls.end(); i != e; ++i) { + QualType CastedType = i->CastedExpr->getType(); + if (!CastedType->isPointerType()) + continue; + QualType PointeeType = CastedType->getPointeeType(); + if (PointeeType->isVoidType()) + continue; + + for (CallExpr::const_arg_iterator ai = i->AllocCall->arg_begin(), + ae = i->AllocCall->arg_end(); ai != ae; ++ai) { + if (!(*ai)->getType()->isIntegralOrUnscopedEnumerationType()) + continue; + + SizeofFinder SFinder; + SFinder.Visit(*ai); + if (SFinder.Sizeofs.size() != 1) + continue; + + QualType SizeofType = SFinder.Sizeofs[0]->getTypeOfArgument(); + + if (typesCompatible(BR.getContext(), PointeeType, SizeofType)) + continue; + + // If the argument to sizeof is an array, the result could be a + // pointer to any array element. + if (compatibleWithArrayType(BR.getContext(), PointeeType, SizeofType)) + continue; + + const TypeSourceInfo *TSI = nullptr; + if (i->CastedExprParent.is<const VarDecl *>()) { + TSI = + i->CastedExprParent.get<const VarDecl *>()->getTypeSourceInfo(); + } else { + TSI = i->ExplicitCastType; + } + + SmallString<64> buf; + llvm::raw_svector_ostream OS(buf); + + OS << "Result of "; + const FunctionDecl *Callee = i->AllocCall->getDirectCallee(); + if (Callee && Callee->getIdentifier()) + OS << '\'' << Callee->getIdentifier()->getName() << '\''; + else + OS << "call"; + OS << " is converted to a pointer of type '" + << PointeeType.getAsString() << "', which is incompatible with " + << "sizeof operand type '" << SizeofType.getAsString() << "'"; + SmallVector<SourceRange, 4> Ranges; + Ranges.push_back(i->AllocCall->getCallee()->getSourceRange()); + Ranges.push_back(SFinder.Sizeofs[0]->getSourceRange()); + if (TSI) + Ranges.push_back(TSI->getTypeLoc().getSourceRange()); + + PathDiagnosticLocation L = + PathDiagnosticLocation::createBegin(i->AllocCall->getCallee(), + BR.getSourceManager(), ADC); + + BR.EmitBasicReport(D, this, "Allocator sizeof operand mismatch", + categories::UnixAPI, OS.str(), L, Ranges); + } + } + } +}; + +} + +void ento::registerMallocSizeofChecker(CheckerManager &mgr) { + mgr.registerChecker<MallocSizeofChecker>(); +} + +bool ento::shouldRegisterMallocSizeofChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp new file mode 100644 index 000000000000..ceea62160545 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp @@ -0,0 +1,93 @@ +// MmapWriteExecChecker.cpp - Check for the prot argument -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This checker tests the 3rd argument of mmap's calls to check if +// it is writable and executable in the same time. It's somehow +// an optional checker since for example in JIT libraries it is pretty common. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" + +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; +using llvm::APSInt; + +namespace { +class MmapWriteExecChecker : public Checker<check::PreCall> { + CallDescription MmapFn; + CallDescription MprotectFn; + static int ProtWrite; + static int ProtExec; + static int ProtRead; + mutable std::unique_ptr<BugType> BT; +public: + MmapWriteExecChecker() : MmapFn("mmap", 6), MprotectFn("mprotect", 3) {} + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + int ProtExecOv; + int ProtReadOv; +}; +} + +int MmapWriteExecChecker::ProtWrite = 0x02; +int MmapWriteExecChecker::ProtExec = 0x04; +int MmapWriteExecChecker::ProtRead = 0x01; + +void MmapWriteExecChecker::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + if (Call.isCalled(MmapFn) || Call.isCalled(MprotectFn)) { + SVal ProtVal = Call.getArgSVal(2); + Optional<nonloc::ConcreteInt> ProtLoc = ProtVal.getAs<nonloc::ConcreteInt>(); + int64_t Prot = ProtLoc->getValue().getSExtValue(); + if (ProtExecOv != ProtExec) + ProtExec = ProtExecOv; + if (ProtReadOv != ProtRead) + ProtRead = ProtReadOv; + + // Wrong settings + if (ProtRead == ProtExec) + return; + + if ((Prot & (ProtWrite | ProtExec)) == (ProtWrite | ProtExec)) { + if (!BT) + BT.reset(new BugType(this, "W^X check fails, Write Exec prot flags set", "Security")); + + ExplodedNode *N = C.generateNonFatalErrorNode(); + if (!N) + return; + + auto Report = std::make_unique<PathSensitiveBugReport>( + *BT, "Both PROT_WRITE and PROT_EXEC flags are set. This can " + "lead to exploitable memory regions, which could be overwritten " + "with malicious code", N); + Report->addRange(Call.getArgSourceRange(2)); + C.emitReport(std::move(Report)); + } + } +} + +void ento::registerMmapWriteExecChecker(CheckerManager &mgr) { + MmapWriteExecChecker *Mwec = + mgr.registerChecker<MmapWriteExecChecker>(); + Mwec->ProtExecOv = + mgr.getAnalyzerOptions() + .getCheckerIntegerOption(Mwec, "MmapProtExec"); + Mwec->ProtReadOv = + mgr.getAnalyzerOptions() + .getCheckerIntegerOption(Mwec, "MmapProtRead"); +} + +bool ento::shouldRegisterMmapWriteExecChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/Move.h b/clang/lib/StaticAnalyzer/Checkers/Move.h new file mode 100644 index 000000000000..10644a8fcb37 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/Move.h @@ -0,0 +1,30 @@ +//=== Move.h - Tracking moved-from objects. ------------------------*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines inter-checker API for the use-after-move checker. It allows +// dependent checkers to figure out if an object is in a moved-from state. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MOVE_H +#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MOVE_H + +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" + +namespace clang { +namespace ento { +namespace move { + +/// Returns true if the object is known to have been recently std::moved. +bool isMovedFrom(ProgramStateRef State, const MemRegion *Region); + +} // namespace move +} // namespace ento +} // namespace clang + +#endif // LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MOVE_H diff --git a/clang/lib/StaticAnalyzer/Checkers/MoveChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MoveChecker.cpp new file mode 100644 index 000000000000..1473c05d7e3f --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/MoveChecker.cpp @@ -0,0 +1,761 @@ +// MoveChecker.cpp - Check use of moved-from objects. - C++ ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines checker which checks for potential misuses of a moved-from +// object. That means method calls on the object or copying it in moved-from +// state. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/ExprCXX.h" +#include "clang/Driver/DriverDiagnostic.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "llvm/ADT/StringSet.h" + +using namespace clang; +using namespace ento; + +namespace { +struct RegionState { +private: + enum Kind { Moved, Reported } K; + RegionState(Kind InK) : K(InK) {} + +public: + bool isReported() const { return K == Reported; } + bool isMoved() const { return K == Moved; } + + static RegionState getReported() { return RegionState(Reported); } + static RegionState getMoved() { return RegionState(Moved); } + + bool operator==(const RegionState &X) const { return K == X.K; } + void Profile(llvm::FoldingSetNodeID &ID) const { ID.AddInteger(K); } +}; +} // end of anonymous namespace + +namespace { +class MoveChecker + : public Checker<check::PreCall, check::PostCall, + check::DeadSymbols, check::RegionChanges> { +public: + void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const; + void checkPreCall(const CallEvent &MC, CheckerContext &C) const; + void checkPostCall(const CallEvent &MC, CheckerContext &C) const; + void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; + ProgramStateRef + checkRegionChanges(ProgramStateRef State, + const InvalidatedSymbols *Invalidated, + ArrayRef<const MemRegion *> RequestedRegions, + ArrayRef<const MemRegion *> InvalidatedRegions, + const LocationContext *LCtx, const CallEvent *Call) const; + void printState(raw_ostream &Out, ProgramStateRef State, + const char *NL, const char *Sep) const override; + +private: + enum MisuseKind { MK_FunCall, MK_Copy, MK_Move, MK_Dereference }; + enum StdObjectKind { SK_NonStd, SK_Unsafe, SK_Safe, SK_SmartPtr }; + + enum AggressivenessKind { // In any case, don't warn after a reset. + AK_Invalid = -1, + AK_KnownsOnly = 0, // Warn only about known move-unsafe classes. + AK_KnownsAndLocals = 1, // Also warn about all local objects. + AK_All = 2, // Warn on any use-after-move. + AK_NumKinds = AK_All + }; + + static bool misuseCausesCrash(MisuseKind MK) { + return MK == MK_Dereference; + } + + struct ObjectKind { + // Is this a local variable or a local rvalue reference? + bool IsLocal; + // Is this an STL object? If so, of what kind? + StdObjectKind StdKind; + }; + + // STL smart pointers are automatically re-initialized to null when moved + // from. So we can't warn on many methods, but we can warn when it is + // dereferenced, which is UB even if the resulting lvalue never gets read. + const llvm::StringSet<> StdSmartPtrClasses = { + "shared_ptr", + "unique_ptr", + "weak_ptr", + }; + + // Not all of these are entirely move-safe, but they do provide *some* + // guarantees, and it means that somebody is using them after move + // in a valid manner. + // TODO: We can still try to identify *unsafe* use after move, + // like we did with smart pointers. + const llvm::StringSet<> StdSafeClasses = { + "basic_filebuf", + "basic_ios", + "future", + "optional", + "packaged_task" + "promise", + "shared_future", + "shared_lock", + "thread", + "unique_lock", + }; + + // Should we bother tracking the state of the object? + bool shouldBeTracked(ObjectKind OK) const { + // In non-aggressive mode, only warn on use-after-move of local variables + // (or local rvalue references) and of STL objects. The former is possible + // because local variables (or local rvalue references) are not tempting + // their user to re-use the storage. The latter is possible because STL + // objects are known to end up in a valid but unspecified state after the + // move and their state-reset methods are also known, which allows us to + // predict precisely when use-after-move is invalid. + // Some STL objects are known to conform to additional contracts after move, + // so they are not tracked. However, smart pointers specifically are tracked + // because we can perform extra checking over them. + // In aggressive mode, warn on any use-after-move because the user has + // intentionally asked us to completely eliminate use-after-move + // in his code. + return (Aggressiveness == AK_All) || + (Aggressiveness >= AK_KnownsAndLocals && OK.IsLocal) || + OK.StdKind == SK_Unsafe || OK.StdKind == SK_SmartPtr; + } + + // Some objects only suffer from some kinds of misuses, but we need to track + // them anyway because we cannot know in advance what misuse will we find. + bool shouldWarnAbout(ObjectKind OK, MisuseKind MK) const { + // Additionally, only warn on smart pointers when they are dereferenced (or + // local or we are aggressive). + return shouldBeTracked(OK) && + ((Aggressiveness == AK_All) || + (Aggressiveness >= AK_KnownsAndLocals && OK.IsLocal) || + OK.StdKind != SK_SmartPtr || MK == MK_Dereference); + } + + // Obtains ObjectKind of an object. Because class declaration cannot always + // be easily obtained from the memory region, it is supplied separately. + ObjectKind classifyObject(const MemRegion *MR, const CXXRecordDecl *RD) const; + + // Classifies the object and dumps a user-friendly description string to + // the stream. + void explainObject(llvm::raw_ostream &OS, const MemRegion *MR, + const CXXRecordDecl *RD, MisuseKind MK) const; + + bool belongsTo(const CXXRecordDecl *RD, const llvm::StringSet<> &Set) const; + + class MovedBugVisitor : public BugReporterVisitor { + public: + MovedBugVisitor(const MoveChecker &Chk, const MemRegion *R, + const CXXRecordDecl *RD, MisuseKind MK) + : Chk(Chk), Region(R), RD(RD), MK(MK), Found(false) {} + + void Profile(llvm::FoldingSetNodeID &ID) const override { + static int X = 0; + ID.AddPointer(&X); + ID.AddPointer(Region); + // Don't add RD because it's, in theory, uniquely determined by + // the region. In practice though, it's not always possible to obtain + // the declaration directly from the region, that's why we store it + // in the first place. + } + + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; + + private: + const MoveChecker &Chk; + // The tracked region. + const MemRegion *Region; + // The class of the tracked object. + const CXXRecordDecl *RD; + // How exactly the object was misused. + const MisuseKind MK; + bool Found; + }; + + AggressivenessKind Aggressiveness; + +public: + void setAggressiveness(StringRef Str, CheckerManager &Mgr) { + Aggressiveness = + llvm::StringSwitch<AggressivenessKind>(Str) + .Case("KnownsOnly", AK_KnownsOnly) + .Case("KnownsAndLocals", AK_KnownsAndLocals) + .Case("All", AK_All) + .Default(AK_Invalid); + + if (Aggressiveness == AK_Invalid) + Mgr.reportInvalidCheckerOptionValue(this, "WarnOn", + "either \"KnownsOnly\", \"KnownsAndLocals\" or \"All\" string value"); + }; + +private: + mutable std::unique_ptr<BugType> BT; + + // Check if the given form of potential misuse of a given object + // should be reported. If so, get it reported. The callback from which + // this function was called should immediately return after the call + // because this function adds one or two transitions. + void modelUse(ProgramStateRef State, const MemRegion *Region, + const CXXRecordDecl *RD, MisuseKind MK, + CheckerContext &C) const; + + // Returns the exploded node against which the report was emitted. + // The caller *must* add any further transitions against this node. + ExplodedNode *reportBug(const MemRegion *Region, const CXXRecordDecl *RD, + CheckerContext &C, MisuseKind MK) const; + + bool isInMoveSafeContext(const LocationContext *LC) const; + bool isStateResetMethod(const CXXMethodDecl *MethodDec) const; + bool isMoveSafeMethod(const CXXMethodDecl *MethodDec) const; + const ExplodedNode *getMoveLocation(const ExplodedNode *N, + const MemRegion *Region, + CheckerContext &C) const; +}; +} // end anonymous namespace + +REGISTER_MAP_WITH_PROGRAMSTATE(TrackedRegionMap, const MemRegion *, RegionState) + +// Define the inter-checker API. +namespace clang { +namespace ento { +namespace move { +bool isMovedFrom(ProgramStateRef State, const MemRegion *Region) { + const RegionState *RS = State->get<TrackedRegionMap>(Region); + return RS && (RS->isMoved() || RS->isReported()); +} +} // namespace move +} // namespace ento +} // namespace clang + +// If a region is removed all of the subregions needs to be removed too. +static ProgramStateRef removeFromState(ProgramStateRef State, + const MemRegion *Region) { + if (!Region) + return State; + for (auto &E : State->get<TrackedRegionMap>()) { + if (E.first->isSubRegionOf(Region)) + State = State->remove<TrackedRegionMap>(E.first); + } + return State; +} + +static bool isAnyBaseRegionReported(ProgramStateRef State, + const MemRegion *Region) { + for (auto &E : State->get<TrackedRegionMap>()) { + if (Region->isSubRegionOf(E.first) && E.second.isReported()) + return true; + } + return false; +} + +static const MemRegion *unwrapRValueReferenceIndirection(const MemRegion *MR) { + if (const auto *SR = dyn_cast_or_null<SymbolicRegion>(MR)) { + SymbolRef Sym = SR->getSymbol(); + if (Sym->getType()->isRValueReferenceType()) + if (const MemRegion *OriginMR = Sym->getOriginRegion()) + return OriginMR; + } + return MR; +} + +PathDiagnosticPieceRef +MoveChecker::MovedBugVisitor::VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) { + // We need only the last move of the reported object's region. + // The visitor walks the ExplodedGraph backwards. + if (Found) + return nullptr; + ProgramStateRef State = N->getState(); + ProgramStateRef StatePrev = N->getFirstPred()->getState(); + const RegionState *TrackedObject = State->get<TrackedRegionMap>(Region); + const RegionState *TrackedObjectPrev = + StatePrev->get<TrackedRegionMap>(Region); + if (!TrackedObject) + return nullptr; + if (TrackedObjectPrev && TrackedObject) + return nullptr; + + // Retrieve the associated statement. + const Stmt *S = N->getStmtForDiagnostics(); + if (!S) + return nullptr; + Found = true; + + SmallString<128> Str; + llvm::raw_svector_ostream OS(Str); + + ObjectKind OK = Chk.classifyObject(Region, RD); + switch (OK.StdKind) { + case SK_SmartPtr: + if (MK == MK_Dereference) { + OS << "Smart pointer"; + Chk.explainObject(OS, Region, RD, MK); + OS << " is reset to null when moved from"; + break; + } + + // If it's not a dereference, we don't care if it was reset to null + // or that it is even a smart pointer. + LLVM_FALLTHROUGH; + case SK_NonStd: + case SK_Safe: + OS << "Object"; + Chk.explainObject(OS, Region, RD, MK); + OS << " is moved"; + break; + case SK_Unsafe: + OS << "Object"; + Chk.explainObject(OS, Region, RD, MK); + OS << " is left in a valid but unspecified state after move"; + break; + } + + // Generate the extra diagnostic. + PathDiagnosticLocation Pos(S, BRC.getSourceManager(), + N->getLocationContext()); + return std::make_shared<PathDiagnosticEventPiece>(Pos, OS.str(), true); +} + +const ExplodedNode *MoveChecker::getMoveLocation(const ExplodedNode *N, + const MemRegion *Region, + CheckerContext &C) const { + // Walk the ExplodedGraph backwards and find the first node that referred to + // the tracked region. + const ExplodedNode *MoveNode = N; + + while (N) { + ProgramStateRef State = N->getState(); + if (!State->get<TrackedRegionMap>(Region)) + break; + MoveNode = N; + N = N->pred_empty() ? nullptr : *(N->pred_begin()); + } + return MoveNode; +} + +void MoveChecker::modelUse(ProgramStateRef State, const MemRegion *Region, + const CXXRecordDecl *RD, MisuseKind MK, + CheckerContext &C) const { + assert(!C.isDifferent() && "No transitions should have been made by now"); + const RegionState *RS = State->get<TrackedRegionMap>(Region); + ObjectKind OK = classifyObject(Region, RD); + + // Just in case: if it's not a smart pointer but it does have operator *, + // we shouldn't call the bug a dereference. + if (MK == MK_Dereference && OK.StdKind != SK_SmartPtr) + MK = MK_FunCall; + + if (!RS || !shouldWarnAbout(OK, MK) + || isInMoveSafeContext(C.getLocationContext())) { + // Finalize changes made by the caller. + C.addTransition(State); + return; + } + + // Don't report it in case if any base region is already reported. + // But still generate a sink in case of UB. + // And still finalize changes made by the caller. + if (isAnyBaseRegionReported(State, Region)) { + if (misuseCausesCrash(MK)) { + C.generateSink(State, C.getPredecessor()); + } else { + C.addTransition(State); + } + return; + } + + ExplodedNode *N = reportBug(Region, RD, C, MK); + + // If the program has already crashed on this path, don't bother. + if (N->isSink()) + return; + + State = State->set<TrackedRegionMap>(Region, RegionState::getReported()); + C.addTransition(State, N); +} + +ExplodedNode *MoveChecker::reportBug(const MemRegion *Region, + const CXXRecordDecl *RD, CheckerContext &C, + MisuseKind MK) const { + if (ExplodedNode *N = misuseCausesCrash(MK) ? C.generateErrorNode() + : C.generateNonFatalErrorNode()) { + + if (!BT) + BT.reset(new BugType(this, "Use-after-move", + "C++ move semantics")); + + // Uniqueing report to the same object. + PathDiagnosticLocation LocUsedForUniqueing; + const ExplodedNode *MoveNode = getMoveLocation(N, Region, C); + + if (const Stmt *MoveStmt = MoveNode->getStmtForDiagnostics()) + LocUsedForUniqueing = PathDiagnosticLocation::createBegin( + MoveStmt, C.getSourceManager(), MoveNode->getLocationContext()); + + // Creating the error message. + llvm::SmallString<128> Str; + llvm::raw_svector_ostream OS(Str); + switch(MK) { + case MK_FunCall: + OS << "Method called on moved-from object"; + explainObject(OS, Region, RD, MK); + break; + case MK_Copy: + OS << "Moved-from object"; + explainObject(OS, Region, RD, MK); + OS << " is copied"; + break; + case MK_Move: + OS << "Moved-from object"; + explainObject(OS, Region, RD, MK); + OS << " is moved"; + break; + case MK_Dereference: + OS << "Dereference of null smart pointer"; + explainObject(OS, Region, RD, MK); + break; + } + + auto R = std::make_unique<PathSensitiveBugReport>( + *BT, OS.str(), N, LocUsedForUniqueing, + MoveNode->getLocationContext()->getDecl()); + R->addVisitor(std::make_unique<MovedBugVisitor>(*this, Region, RD, MK)); + C.emitReport(std::move(R)); + return N; + } + return nullptr; +} + +void MoveChecker::checkPostCall(const CallEvent &Call, + CheckerContext &C) const { + const auto *AFC = dyn_cast<AnyFunctionCall>(&Call); + if (!AFC) + return; + + ProgramStateRef State = C.getState(); + const auto MethodDecl = dyn_cast_or_null<CXXMethodDecl>(AFC->getDecl()); + if (!MethodDecl) + return; + + // Check if an object became moved-from. + // Object can become moved from after a call to move assignment operator or + // move constructor . + const auto *ConstructorDecl = dyn_cast<CXXConstructorDecl>(MethodDecl); + if (ConstructorDecl && !ConstructorDecl->isMoveConstructor()) + return; + + if (!ConstructorDecl && !MethodDecl->isMoveAssignmentOperator()) + return; + + const auto ArgRegion = AFC->getArgSVal(0).getAsRegion(); + if (!ArgRegion) + return; + + // Skip moving the object to itself. + const auto *CC = dyn_cast_or_null<CXXConstructorCall>(&Call); + if (CC && CC->getCXXThisVal().getAsRegion() == ArgRegion) + return; + + if (const auto *IC = dyn_cast<CXXInstanceCall>(AFC)) + if (IC->getCXXThisVal().getAsRegion() == ArgRegion) + return; + + const MemRegion *BaseRegion = ArgRegion->getBaseRegion(); + // Skip temp objects because of their short lifetime. + if (BaseRegion->getAs<CXXTempObjectRegion>() || + AFC->getArgExpr(0)->isRValue()) + return; + // If it has already been reported do not need to modify the state. + + if (State->get<TrackedRegionMap>(ArgRegion)) + return; + + const CXXRecordDecl *RD = MethodDecl->getParent(); + ObjectKind OK = classifyObject(ArgRegion, RD); + if (shouldBeTracked(OK)) { + // Mark object as moved-from. + State = State->set<TrackedRegionMap>(ArgRegion, RegionState::getMoved()); + C.addTransition(State); + return; + } + assert(!C.isDifferent() && "Should not have made transitions on this path!"); +} + +bool MoveChecker::isMoveSafeMethod(const CXXMethodDecl *MethodDec) const { + // We abandon the cases where bool/void/void* conversion happens. + if (const auto *ConversionDec = + dyn_cast_or_null<CXXConversionDecl>(MethodDec)) { + const Type *Tp = ConversionDec->getConversionType().getTypePtrOrNull(); + if (!Tp) + return false; + if (Tp->isBooleanType() || Tp->isVoidType() || Tp->isVoidPointerType()) + return true; + } + // Function call `empty` can be skipped. + return (MethodDec && MethodDec->getDeclName().isIdentifier() && + (MethodDec->getName().lower() == "empty" || + MethodDec->getName().lower() == "isempty")); +} + +bool MoveChecker::isStateResetMethod(const CXXMethodDecl *MethodDec) const { + if (!MethodDec) + return false; + if (MethodDec->hasAttr<ReinitializesAttr>()) + return true; + if (MethodDec->getDeclName().isIdentifier()) { + std::string MethodName = MethodDec->getName().lower(); + // TODO: Some of these methods (eg., resize) are not always resetting + // the state, so we should consider looking at the arguments. + if (MethodName == "assign" || MethodName == "clear" || + MethodName == "destroy" || MethodName == "reset" || + MethodName == "resize" || MethodName == "shrink") + return true; + } + return false; +} + +// Don't report an error inside a move related operation. +// We assume that the programmer knows what she does. +bool MoveChecker::isInMoveSafeContext(const LocationContext *LC) const { + do { + const auto *CtxDec = LC->getDecl(); + auto *CtorDec = dyn_cast_or_null<CXXConstructorDecl>(CtxDec); + auto *DtorDec = dyn_cast_or_null<CXXDestructorDecl>(CtxDec); + auto *MethodDec = dyn_cast_or_null<CXXMethodDecl>(CtxDec); + if (DtorDec || (CtorDec && CtorDec->isCopyOrMoveConstructor()) || + (MethodDec && MethodDec->isOverloadedOperator() && + MethodDec->getOverloadedOperator() == OO_Equal) || + isStateResetMethod(MethodDec) || isMoveSafeMethod(MethodDec)) + return true; + } while ((LC = LC->getParent())); + return false; +} + +bool MoveChecker::belongsTo(const CXXRecordDecl *RD, + const llvm::StringSet<> &Set) const { + const IdentifierInfo *II = RD->getIdentifier(); + return II && Set.count(II->getName()); +} + +MoveChecker::ObjectKind +MoveChecker::classifyObject(const MemRegion *MR, + const CXXRecordDecl *RD) const { + // Local variables and local rvalue references are classified as "Local". + // For the purposes of this checker, we classify move-safe STL types + // as not-"STL" types, because that's how the checker treats them. + MR = unwrapRValueReferenceIndirection(MR); + bool IsLocal = + MR && isa<VarRegion>(MR) && isa<StackSpaceRegion>(MR->getMemorySpace()); + + if (!RD || !RD->getDeclContext()->isStdNamespace()) + return { IsLocal, SK_NonStd }; + + if (belongsTo(RD, StdSmartPtrClasses)) + return { IsLocal, SK_SmartPtr }; + + if (belongsTo(RD, StdSafeClasses)) + return { IsLocal, SK_Safe }; + + return { IsLocal, SK_Unsafe }; +} + +void MoveChecker::explainObject(llvm::raw_ostream &OS, const MemRegion *MR, + const CXXRecordDecl *RD, MisuseKind MK) const { + // We may need a leading space every time we actually explain anything, + // and we never know if we are to explain anything until we try. + if (const auto DR = + dyn_cast_or_null<DeclRegion>(unwrapRValueReferenceIndirection(MR))) { + const auto *RegionDecl = cast<NamedDecl>(DR->getDecl()); + OS << " '" << RegionDecl->getNameAsString() << "'"; + } + + ObjectKind OK = classifyObject(MR, RD); + switch (OK.StdKind) { + case SK_NonStd: + case SK_Safe: + break; + case SK_SmartPtr: + if (MK != MK_Dereference) + break; + + // We only care about the type if it's a dereference. + LLVM_FALLTHROUGH; + case SK_Unsafe: + OS << " of type '" << RD->getQualifiedNameAsString() << "'"; + break; + }; +} + +void MoveChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const { + ProgramStateRef State = C.getState(); + + // Remove the MemRegions from the map on which a ctor/dtor call or assignment + // happened. + + // Checking constructor calls. + if (const auto *CC = dyn_cast<CXXConstructorCall>(&Call)) { + State = removeFromState(State, CC->getCXXThisVal().getAsRegion()); + auto CtorDec = CC->getDecl(); + // Check for copying a moved-from object and report the bug. + if (CtorDec && CtorDec->isCopyOrMoveConstructor()) { + const MemRegion *ArgRegion = CC->getArgSVal(0).getAsRegion(); + const CXXRecordDecl *RD = CtorDec->getParent(); + MisuseKind MK = CtorDec->isMoveConstructor() ? MK_Move : MK_Copy; + modelUse(State, ArgRegion, RD, MK, C); + return; + } + } + + const auto IC = dyn_cast<CXXInstanceCall>(&Call); + if (!IC) + return; + + // Calling a destructor on a moved object is fine. + if (isa<CXXDestructorCall>(IC)) + return; + + const MemRegion *ThisRegion = IC->getCXXThisVal().getAsRegion(); + if (!ThisRegion) + return; + + // The remaining part is check only for method call on a moved-from object. + const auto MethodDecl = dyn_cast_or_null<CXXMethodDecl>(IC->getDecl()); + if (!MethodDecl) + return; + + // We want to investigate the whole object, not only sub-object of a parent + // class in which the encountered method defined. + ThisRegion = ThisRegion->getMostDerivedObjectRegion(); + + if (isStateResetMethod(MethodDecl)) { + State = removeFromState(State, ThisRegion); + C.addTransition(State); + return; + } + + if (isMoveSafeMethod(MethodDecl)) + return; + + // Store class declaration as well, for bug reporting purposes. + const CXXRecordDecl *RD = MethodDecl->getParent(); + + if (MethodDecl->isOverloadedOperator()) { + OverloadedOperatorKind OOK = MethodDecl->getOverloadedOperator(); + + if (OOK == OO_Equal) { + // Remove the tracked object for every assignment operator, but report bug + // only for move or copy assignment's argument. + State = removeFromState(State, ThisRegion); + + if (MethodDecl->isCopyAssignmentOperator() || + MethodDecl->isMoveAssignmentOperator()) { + const MemRegion *ArgRegion = IC->getArgSVal(0).getAsRegion(); + MisuseKind MK = + MethodDecl->isMoveAssignmentOperator() ? MK_Move : MK_Copy; + modelUse(State, ArgRegion, RD, MK, C); + return; + } + C.addTransition(State); + return; + } + + if (OOK == OO_Star || OOK == OO_Arrow) { + modelUse(State, ThisRegion, RD, MK_Dereference, C); + return; + } + } + + modelUse(State, ThisRegion, RD, MK_FunCall, C); +} + +void MoveChecker::checkDeadSymbols(SymbolReaper &SymReaper, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + TrackedRegionMapTy TrackedRegions = State->get<TrackedRegionMap>(); + for (TrackedRegionMapTy::value_type E : TrackedRegions) { + const MemRegion *Region = E.first; + bool IsRegDead = !SymReaper.isLiveRegion(Region); + + // Remove the dead regions from the region map. + if (IsRegDead) { + State = State->remove<TrackedRegionMap>(Region); + } + } + C.addTransition(State); +} + +ProgramStateRef MoveChecker::checkRegionChanges( + ProgramStateRef State, const InvalidatedSymbols *Invalidated, + ArrayRef<const MemRegion *> RequestedRegions, + ArrayRef<const MemRegion *> InvalidatedRegions, + const LocationContext *LCtx, const CallEvent *Call) const { + if (Call) { + // Relax invalidation upon function calls: only invalidate parameters + // that are passed directly via non-const pointers or non-const references + // or rvalue references. + // In case of an InstanceCall don't invalidate the this-region since + // it is fully handled in checkPreCall and checkPostCall. + const MemRegion *ThisRegion = nullptr; + if (const auto *IC = dyn_cast<CXXInstanceCall>(Call)) + ThisRegion = IC->getCXXThisVal().getAsRegion(); + + // Requested ("explicit") regions are the regions passed into the call + // directly, but not all of them end up being invalidated. + // But when they do, they appear in the InvalidatedRegions array as well. + for (const auto *Region : RequestedRegions) { + if (ThisRegion != Region) { + if (llvm::find(InvalidatedRegions, Region) != + std::end(InvalidatedRegions)) { + State = removeFromState(State, Region); + } + } + } + } else { + // For invalidations that aren't caused by calls, assume nothing. In + // particular, direct write into an object's field invalidates the status. + for (const auto *Region : InvalidatedRegions) + State = removeFromState(State, Region->getBaseRegion()); + } + + return State; +} + +void MoveChecker::printState(raw_ostream &Out, ProgramStateRef State, + const char *NL, const char *Sep) const { + + TrackedRegionMapTy RS = State->get<TrackedRegionMap>(); + + if (!RS.isEmpty()) { + Out << Sep << "Moved-from objects :" << NL; + for (auto I: RS) { + I.first->dumpToStream(Out); + if (I.second.isMoved()) + Out << ": moved"; + else + Out << ": moved and reported"; + Out << NL; + } + } +} +void ento::registerMoveChecker(CheckerManager &mgr) { + MoveChecker *chk = mgr.registerChecker<MoveChecker>(); + chk->setAggressiveness( + mgr.getAnalyzerOptions().getCheckerStringOption(chk, "WarnOn"), mgr); +} + +bool ento::shouldRegisterMoveChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/NSAutoreleasePoolChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NSAutoreleasePoolChecker.cpp new file mode 100644 index 000000000000..41b7fe5e43b6 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/NSAutoreleasePoolChecker.cpp @@ -0,0 +1,85 @@ +//=- NSAutoreleasePoolChecker.cpp --------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a NSAutoreleasePoolChecker, a small checker that warns +// about subpar uses of NSAutoreleasePool. Note that while the check itself +// (in its current form) could be written as a flow-insensitive check, in +// can be potentially enhanced in the future with flow-sensitive information. +// It is also a good example of the CheckerVisitor interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclObjC.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" + +using namespace clang; +using namespace ento; + +namespace { +class NSAutoreleasePoolChecker + : public Checker<check::PreObjCMessage> { + mutable std::unique_ptr<BugType> BT; + mutable Selector releaseS; + +public: + void checkPreObjCMessage(const ObjCMethodCall &msg, CheckerContext &C) const; +}; + +} // end anonymous namespace + +void NSAutoreleasePoolChecker::checkPreObjCMessage(const ObjCMethodCall &msg, + CheckerContext &C) const { + if (!msg.isInstanceMessage()) + return; + + const ObjCInterfaceDecl *OD = msg.getReceiverInterface(); + if (!OD) + return; + if (!OD->getIdentifier()->isStr("NSAutoreleasePool")) + return; + + if (releaseS.isNull()) + releaseS = GetNullarySelector("release", C.getASTContext()); + // Sending 'release' message? + if (msg.getSelector() != releaseS) + return; + + if (!BT) + BT.reset(new BugType(this, "Use -drain instead of -release", + "API Upgrade (Apple)")); + + ExplodedNode *N = C.generateNonFatalErrorNode(); + if (!N) { + assert(0); + return; + } + + auto Report = std::make_unique<PathSensitiveBugReport>( + *BT, + "Use -drain instead of -release when using NSAutoreleasePool and " + "garbage collection", + N); + Report->addRange(msg.getSourceRange()); + C.emitReport(std::move(Report)); +} + +void ento::registerNSAutoreleasePoolChecker(CheckerManager &mgr) { + mgr.registerChecker<NSAutoreleasePoolChecker>(); +} + +bool ento::shouldRegisterNSAutoreleasePoolChecker(const LangOptions &LO) { + return LO.getGC() != LangOptions::NonGC; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp new file mode 100644 index 000000000000..85370bf133cd --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp @@ -0,0 +1,337 @@ +//=- NSErrorChecker.cpp - Coding conventions for uses of NSError -*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a CheckNSError, a flow-insenstive check +// that determines if an Objective-C class interface correctly returns +// a non-void return type. +// +// File under feature request PR 2600. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclObjC.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +static bool IsNSError(QualType T, IdentifierInfo *II); +static bool IsCFError(QualType T, IdentifierInfo *II); + +//===----------------------------------------------------------------------===// +// NSErrorMethodChecker +//===----------------------------------------------------------------------===// + +namespace { +class NSErrorMethodChecker + : public Checker< check::ASTDecl<ObjCMethodDecl> > { + mutable IdentifierInfo *II; + +public: + NSErrorMethodChecker() : II(nullptr) {} + + void checkASTDecl(const ObjCMethodDecl *D, + AnalysisManager &mgr, BugReporter &BR) const; +}; +} + +void NSErrorMethodChecker::checkASTDecl(const ObjCMethodDecl *D, + AnalysisManager &mgr, + BugReporter &BR) const { + if (!D->isThisDeclarationADefinition()) + return; + if (!D->getReturnType()->isVoidType()) + return; + + if (!II) + II = &D->getASTContext().Idents.get("NSError"); + + bool hasNSError = false; + for (const auto *I : D->parameters()) { + if (IsNSError(I->getType(), II)) { + hasNSError = true; + break; + } + } + + if (hasNSError) { + const char *err = "Method accepting NSError** " + "should have a non-void return value to indicate whether or not an " + "error occurred"; + PathDiagnosticLocation L = + PathDiagnosticLocation::create(D, BR.getSourceManager()); + BR.EmitBasicReport(D, this, "Bad return type when passing NSError**", + "Coding conventions (Apple)", err, L); + } +} + +//===----------------------------------------------------------------------===// +// CFErrorFunctionChecker +//===----------------------------------------------------------------------===// + +namespace { +class CFErrorFunctionChecker + : public Checker< check::ASTDecl<FunctionDecl> > { + mutable IdentifierInfo *II; + +public: + CFErrorFunctionChecker() : II(nullptr) {} + + void checkASTDecl(const FunctionDecl *D, + AnalysisManager &mgr, BugReporter &BR) const; +}; +} + +void CFErrorFunctionChecker::checkASTDecl(const FunctionDecl *D, + AnalysisManager &mgr, + BugReporter &BR) const { + if (!D->doesThisDeclarationHaveABody()) + return; + if (!D->getReturnType()->isVoidType()) + return; + + if (!II) + II = &D->getASTContext().Idents.get("CFErrorRef"); + + bool hasCFError = false; + for (auto I : D->parameters()) { + if (IsCFError(I->getType(), II)) { + hasCFError = true; + break; + } + } + + if (hasCFError) { + const char *err = "Function accepting CFErrorRef* " + "should have a non-void return value to indicate whether or not an " + "error occurred"; + PathDiagnosticLocation L = + PathDiagnosticLocation::create(D, BR.getSourceManager()); + BR.EmitBasicReport(D, this, "Bad return type when passing CFErrorRef*", + "Coding conventions (Apple)", err, L); + } +} + +//===----------------------------------------------------------------------===// +// NSOrCFErrorDerefChecker +//===----------------------------------------------------------------------===// + +namespace { + +class NSErrorDerefBug : public BugType { +public: + NSErrorDerefBug(const CheckerBase *Checker) + : BugType(Checker, "NSError** null dereference", + "Coding conventions (Apple)") {} +}; + +class CFErrorDerefBug : public BugType { +public: + CFErrorDerefBug(const CheckerBase *Checker) + : BugType(Checker, "CFErrorRef* null dereference", + "Coding conventions (Apple)") {} +}; + +} + +namespace { +class NSOrCFErrorDerefChecker + : public Checker< check::Location, + check::Event<ImplicitNullDerefEvent> > { + mutable IdentifierInfo *NSErrorII, *CFErrorII; + mutable std::unique_ptr<NSErrorDerefBug> NSBT; + mutable std::unique_ptr<CFErrorDerefBug> CFBT; +public: + bool ShouldCheckNSError, ShouldCheckCFError; + NSOrCFErrorDerefChecker() : NSErrorII(nullptr), CFErrorII(nullptr), + ShouldCheckNSError(0), ShouldCheckCFError(0) { } + + void checkLocation(SVal loc, bool isLoad, const Stmt *S, + CheckerContext &C) const; + void checkEvent(ImplicitNullDerefEvent event) const; +}; +} + +typedef llvm::ImmutableMap<SymbolRef, unsigned> ErrorOutFlag; +REGISTER_TRAIT_WITH_PROGRAMSTATE(NSErrorOut, ErrorOutFlag) +REGISTER_TRAIT_WITH_PROGRAMSTATE(CFErrorOut, ErrorOutFlag) + +template <typename T> +static bool hasFlag(SVal val, ProgramStateRef state) { + if (SymbolRef sym = val.getAsSymbol()) + if (const unsigned *attachedFlags = state->get<T>(sym)) + return *attachedFlags; + return false; +} + +template <typename T> +static void setFlag(ProgramStateRef state, SVal val, CheckerContext &C) { + // We tag the symbol that the SVal wraps. + if (SymbolRef sym = val.getAsSymbol()) + C.addTransition(state->set<T>(sym, true)); +} + +static QualType parameterTypeFromSVal(SVal val, CheckerContext &C) { + const StackFrameContext * SFC = C.getStackFrame(); + if (Optional<loc::MemRegionVal> X = val.getAs<loc::MemRegionVal>()) { + const MemRegion* R = X->getRegion(); + if (const VarRegion *VR = R->getAs<VarRegion>()) + if (const StackArgumentsSpaceRegion * + stackReg = dyn_cast<StackArgumentsSpaceRegion>(VR->getMemorySpace())) + if (stackReg->getStackFrame() == SFC) + return VR->getValueType(); + } + + return QualType(); +} + +void NSOrCFErrorDerefChecker::checkLocation(SVal loc, bool isLoad, + const Stmt *S, + CheckerContext &C) const { + if (!isLoad) + return; + if (loc.isUndef() || !loc.getAs<Loc>()) + return; + + ASTContext &Ctx = C.getASTContext(); + ProgramStateRef state = C.getState(); + + // If we are loading from NSError**/CFErrorRef* parameter, mark the resulting + // SVal so that we can later check it when handling the + // ImplicitNullDerefEvent event. + // FIXME: Cumbersome! Maybe add hook at construction of SVals at start of + // function ? + + QualType parmT = parameterTypeFromSVal(loc, C); + if (parmT.isNull()) + return; + + if (!NSErrorII) + NSErrorII = &Ctx.Idents.get("NSError"); + if (!CFErrorII) + CFErrorII = &Ctx.Idents.get("CFErrorRef"); + + if (ShouldCheckNSError && IsNSError(parmT, NSErrorII)) { + setFlag<NSErrorOut>(state, state->getSVal(loc.castAs<Loc>()), C); + return; + } + + if (ShouldCheckCFError && IsCFError(parmT, CFErrorII)) { + setFlag<CFErrorOut>(state, state->getSVal(loc.castAs<Loc>()), C); + return; + } +} + +void NSOrCFErrorDerefChecker::checkEvent(ImplicitNullDerefEvent event) const { + if (event.IsLoad) + return; + + SVal loc = event.Location; + ProgramStateRef state = event.SinkNode->getState(); + BugReporter &BR = *event.BR; + + bool isNSError = hasFlag<NSErrorOut>(loc, state); + bool isCFError = false; + if (!isNSError) + isCFError = hasFlag<CFErrorOut>(loc, state); + + if (!(isNSError || isCFError)) + return; + + // Storing to possible null NSError/CFErrorRef out parameter. + SmallString<128> Buf; + llvm::raw_svector_ostream os(Buf); + + os << "Potential null dereference. According to coding standards "; + os << (isNSError + ? "in 'Creating and Returning NSError Objects' the parameter" + : "documented in CoreFoundation/CFError.h the parameter"); + + os << " may be null"; + + BugType *bug = nullptr; + if (isNSError) { + if (!NSBT) + NSBT.reset(new NSErrorDerefBug(this)); + bug = NSBT.get(); + } + else { + if (!CFBT) + CFBT.reset(new CFErrorDerefBug(this)); + bug = CFBT.get(); + } + BR.emitReport( + std::make_unique<PathSensitiveBugReport>(*bug, os.str(), event.SinkNode)); +} + +static bool IsNSError(QualType T, IdentifierInfo *II) { + + const PointerType* PPT = T->getAs<PointerType>(); + if (!PPT) + return false; + + const ObjCObjectPointerType* PT = + PPT->getPointeeType()->getAs<ObjCObjectPointerType>(); + + if (!PT) + return false; + + const ObjCInterfaceDecl *ID = PT->getInterfaceDecl(); + + // FIXME: Can ID ever be NULL? + if (ID) + return II == ID->getIdentifier(); + + return false; +} + +static bool IsCFError(QualType T, IdentifierInfo *II) { + const PointerType* PPT = T->getAs<PointerType>(); + if (!PPT) return false; + + const TypedefType* TT = PPT->getPointeeType()->getAs<TypedefType>(); + if (!TT) return false; + + return TT->getDecl()->getIdentifier() == II; +} + +void ento::registerNSOrCFErrorDerefChecker(CheckerManager &mgr) { + mgr.registerChecker<NSOrCFErrorDerefChecker>(); +} + +bool ento::shouldRegisterNSOrCFErrorDerefChecker(const LangOptions &LO) { + return true; +} + +void ento::registerNSErrorChecker(CheckerManager &mgr) { + mgr.registerChecker<NSErrorMethodChecker>(); + NSOrCFErrorDerefChecker *checker = mgr.getChecker<NSOrCFErrorDerefChecker>(); + checker->ShouldCheckNSError = true; +} + +bool ento::shouldRegisterNSErrorChecker(const LangOptions &LO) { + return true; +} + +void ento::registerCFErrorChecker(CheckerManager &mgr) { + mgr.registerChecker<CFErrorFunctionChecker>(); + NSOrCFErrorDerefChecker *checker = mgr.getChecker<NSOrCFErrorDerefChecker>(); + checker->ShouldCheckCFError = true; +} + +bool ento::shouldRegisterCFErrorChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/NoReturnFunctionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NoReturnFunctionChecker.cpp new file mode 100644 index 000000000000..fc34255bf6c9 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/NoReturnFunctionChecker.cpp @@ -0,0 +1,148 @@ +//=== NoReturnFunctionChecker.cpp -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines NoReturnFunctionChecker, which evaluates functions that do not +// return to the caller. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/Attr.h" +#include "clang/Analysis/SelectorExtras.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "llvm/ADT/StringSwitch.h" +#include <cstdarg> + +using namespace clang; +using namespace ento; + +namespace { + +class NoReturnFunctionChecker : public Checker< check::PostCall, + check::PostObjCMessage > { + mutable Selector HandleFailureInFunctionSel; + mutable Selector HandleFailureInMethodSel; +public: + void checkPostCall(const CallEvent &CE, CheckerContext &C) const; + void checkPostObjCMessage(const ObjCMethodCall &msg, CheckerContext &C) const; +}; + +} + +void NoReturnFunctionChecker::checkPostCall(const CallEvent &CE, + CheckerContext &C) const { + bool BuildSinks = false; + + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CE.getDecl())) + BuildSinks = FD->hasAttr<AnalyzerNoReturnAttr>() || FD->isNoReturn(); + + const Expr *Callee = CE.getOriginExpr(); + if (!BuildSinks && Callee) + BuildSinks = getFunctionExtInfo(Callee->getType()).getNoReturn(); + + if (!BuildSinks && CE.isGlobalCFunction()) { + if (const IdentifierInfo *II = CE.getCalleeIdentifier()) { + // HACK: Some functions are not marked noreturn, and don't return. + // Here are a few hardwired ones. If this takes too long, we can + // potentially cache these results. + BuildSinks + = llvm::StringSwitch<bool>(StringRef(II->getName())) + .Case("exit", true) + .Case("panic", true) + .Case("error", true) + .Case("Assert", true) + // FIXME: This is just a wrapper around throwing an exception. + // Eventually inter-procedural analysis should handle this easily. + .Case("ziperr", true) + .Case("assfail", true) + .Case("db_error", true) + .Case("__assert", true) + .Case("__assert2", true) + // For the purpose of static analysis, we do not care that + // this MSVC function will return if the user decides to continue. + .Case("_wassert", true) + .Case("__assert_rtn", true) + .Case("__assert_fail", true) + .Case("dtrace_assfail", true) + .Case("yy_fatal_error", true) + .Case("_XCAssertionFailureHandler", true) + .Case("_DTAssertionFailureHandler", true) + .Case("_TSAssertionFailureHandler", true) + .Default(false); + } + } + + if (BuildSinks) + C.generateSink(C.getState(), C.getPredecessor()); +} + +void NoReturnFunctionChecker::checkPostObjCMessage(const ObjCMethodCall &Msg, + CheckerContext &C) const { + // Check if the method is annotated with analyzer_noreturn. + if (const ObjCMethodDecl *MD = Msg.getDecl()) { + MD = MD->getCanonicalDecl(); + if (MD->hasAttr<AnalyzerNoReturnAttr>()) { + C.generateSink(C.getState(), C.getPredecessor()); + return; + } + } + + // HACK: This entire check is to handle two messages in the Cocoa frameworks: + // -[NSAssertionHandler + // handleFailureInMethod:object:file:lineNumber:description:] + // -[NSAssertionHandler + // handleFailureInFunction:file:lineNumber:description:] + // Eventually these should be annotated with __attribute__((noreturn)). + // Because ObjC messages use dynamic dispatch, it is not generally safe to + // assume certain methods can't return. In cases where it is definitely valid, + // see if you can mark the methods noreturn or analyzer_noreturn instead of + // adding more explicit checks to this method. + + if (!Msg.isInstanceMessage()) + return; + + const ObjCInterfaceDecl *Receiver = Msg.getReceiverInterface(); + if (!Receiver) + return; + if (!Receiver->getIdentifier()->isStr("NSAssertionHandler")) + return; + + Selector Sel = Msg.getSelector(); + switch (Sel.getNumArgs()) { + default: + return; + case 4: + lazyInitKeywordSelector(HandleFailureInFunctionSel, C.getASTContext(), + "handleFailureInFunction", "file", "lineNumber", + "description"); + if (Sel != HandleFailureInFunctionSel) + return; + break; + case 5: + lazyInitKeywordSelector(HandleFailureInMethodSel, C.getASTContext(), + "handleFailureInMethod", "object", "file", + "lineNumber", "description"); + if (Sel != HandleFailureInMethodSel) + return; + break; + } + + // If we got here, it's one of the messages we care about. + C.generateSink(C.getState(), C.getPredecessor()); +} + +void ento::registerNoReturnFunctionChecker(CheckerManager &mgr) { + mgr.registerChecker<NoReturnFunctionChecker>(); +} + +bool ento::shouldRegisterNoReturnFunctionChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp new file mode 100644 index 000000000000..6ffc89745365 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp @@ -0,0 +1,231 @@ +//===--- NonNullParamChecker.cpp - Undefined arguments checker -*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines NonNullParamChecker, which checks for arguments expected not to +// be null due to: +// - the corresponding parameters being declared to have nonnull attribute +// - the corresponding parameters being references; since the call would form +// a reference to a null pointer +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/Attr.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class NonNullParamChecker + : public Checker< check::PreCall, EventDispatcher<ImplicitNullDerefEvent> > { + mutable std::unique_ptr<BugType> BTAttrNonNull; + mutable std::unique_ptr<BugType> BTNullRefArg; + +public: + + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + + std::unique_ptr<PathSensitiveBugReport> + genReportNullAttrNonNull(const ExplodedNode *ErrorN, + const Expr *ArgE, + unsigned IdxOfArg) const; + std::unique_ptr<PathSensitiveBugReport> + genReportReferenceToNullPointer(const ExplodedNode *ErrorN, + const Expr *ArgE) const; +}; +} // end anonymous namespace + +/// \return Bitvector marking non-null attributes. +static llvm::SmallBitVector getNonNullAttrs(const CallEvent &Call) { + const Decl *FD = Call.getDecl(); + unsigned NumArgs = Call.getNumArgs(); + llvm::SmallBitVector AttrNonNull(NumArgs); + for (const auto *NonNull : FD->specific_attrs<NonNullAttr>()) { + if (!NonNull->args_size()) { + AttrNonNull.set(0, NumArgs); + break; + } + for (const ParamIdx &Idx : NonNull->args()) { + unsigned IdxAST = Idx.getASTIndex(); + if (IdxAST >= NumArgs) + continue; + AttrNonNull.set(IdxAST); + } + } + return AttrNonNull; +} + +void NonNullParamChecker::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + if (!Call.getDecl()) + return; + + llvm::SmallBitVector AttrNonNull = getNonNullAttrs(Call); + unsigned NumArgs = Call.getNumArgs(); + + ProgramStateRef state = C.getState(); + ArrayRef<ParmVarDecl*> parms = Call.parameters(); + + for (unsigned idx = 0; idx < NumArgs; ++idx) { + // For vararg functions, a corresponding parameter decl may not exist. + bool HasParam = idx < parms.size(); + + // Check if the parameter is a reference. We want to report when reference + // to a null pointer is passed as a parameter. + bool haveRefTypeParam = + HasParam ? parms[idx]->getType()->isReferenceType() : false; + bool haveAttrNonNull = AttrNonNull[idx]; + + // Check if the parameter is also marked 'nonnull'. + if (!haveAttrNonNull && HasParam) + haveAttrNonNull = parms[idx]->hasAttr<NonNullAttr>(); + + if (!haveAttrNonNull && !haveRefTypeParam) + continue; + + // If the value is unknown or undefined, we can't perform this check. + const Expr *ArgE = Call.getArgExpr(idx); + SVal V = Call.getArgSVal(idx); + auto DV = V.getAs<DefinedSVal>(); + if (!DV) + continue; + + assert(!haveRefTypeParam || DV->getAs<Loc>()); + + // Process the case when the argument is not a location. + if (haveAttrNonNull && !DV->getAs<Loc>()) { + // If the argument is a union type, we want to handle a potential + // transparent_union GCC extension. + if (!ArgE) + continue; + + QualType T = ArgE->getType(); + const RecordType *UT = T->getAsUnionType(); + if (!UT || !UT->getDecl()->hasAttr<TransparentUnionAttr>()) + continue; + + auto CSV = DV->getAs<nonloc::CompoundVal>(); + + // FIXME: Handle LazyCompoundVals? + if (!CSV) + continue; + + V = *(CSV->begin()); + DV = V.getAs<DefinedSVal>(); + assert(++CSV->begin() == CSV->end()); + // FIXME: Handle (some_union){ some_other_union_val }, which turns into + // a LazyCompoundVal inside a CompoundVal. + if (!V.getAs<Loc>()) + continue; + + // Retrieve the corresponding expression. + if (const auto *CE = dyn_cast<CompoundLiteralExpr>(ArgE)) + if (const auto *IE = dyn_cast<InitListExpr>(CE->getInitializer())) + ArgE = dyn_cast<Expr>(*(IE->begin())); + } + + ConstraintManager &CM = C.getConstraintManager(); + ProgramStateRef stateNotNull, stateNull; + std::tie(stateNotNull, stateNull) = CM.assumeDual(state, *DV); + + // Generate an error node. Check for a null node in case + // we cache out. + if (stateNull && !stateNotNull) { + if (ExplodedNode *errorNode = C.generateErrorNode(stateNull)) { + + std::unique_ptr<BugReport> R; + if (haveAttrNonNull) + R = genReportNullAttrNonNull(errorNode, ArgE, idx + 1); + else if (haveRefTypeParam) + R = genReportReferenceToNullPointer(errorNode, ArgE); + + // Highlight the range of the argument that was null. + R->addRange(Call.getArgSourceRange(idx)); + + // Emit the bug report. + C.emitReport(std::move(R)); + } + + // Always return. Either we cached out or we just emitted an error. + return; + } + + if (stateNull) { + if (ExplodedNode *N = C.generateSink(stateNull, C.getPredecessor())) { + ImplicitNullDerefEvent event = { + V, false, N, &C.getBugReporter(), + /*IsDirectDereference=*/haveRefTypeParam}; + dispatchEvent(event); + } + } + + // If a pointer value passed the check we should assume that it is + // indeed not null from this point forward. + state = stateNotNull; + } + + // If we reach here all of the arguments passed the nonnull check. + // If 'state' has been updated generated a new node. + C.addTransition(state); +} + +std::unique_ptr<PathSensitiveBugReport> +NonNullParamChecker::genReportNullAttrNonNull(const ExplodedNode *ErrorNode, + const Expr *ArgE, + unsigned IdxOfArg) const { + // Lazily allocate the BugType object if it hasn't already been + // created. Ownership is transferred to the BugReporter object once + // the BugReport is passed to 'EmitWarning'. + if (!BTAttrNonNull) + BTAttrNonNull.reset(new BugType( + this, "Argument with 'nonnull' attribute passed null", "API")); + + llvm::SmallString<256> SBuf; + llvm::raw_svector_ostream OS(SBuf); + OS << "Null pointer passed to " + << IdxOfArg << llvm::getOrdinalSuffix(IdxOfArg) + << " parameter expecting 'nonnull'"; + + auto R = + std::make_unique<PathSensitiveBugReport>(*BTAttrNonNull, SBuf, ErrorNode); + if (ArgE) + bugreporter::trackExpressionValue(ErrorNode, ArgE, *R); + + return R; +} + +std::unique_ptr<PathSensitiveBugReport> +NonNullParamChecker::genReportReferenceToNullPointer( + const ExplodedNode *ErrorNode, const Expr *ArgE) const { + if (!BTNullRefArg) + BTNullRefArg.reset(new BuiltinBug(this, "Dereference of null pointer")); + + auto R = std::make_unique<PathSensitiveBugReport>( + *BTNullRefArg, "Forming reference to null pointer", ErrorNode); + if (ArgE) { + const Expr *ArgEDeref = bugreporter::getDerefExpr(ArgE); + if (!ArgEDeref) + ArgEDeref = ArgE; + bugreporter::trackExpressionValue(ErrorNode, ArgEDeref, *R); + } + return R; + +} + +void ento::registerNonNullParamChecker(CheckerManager &mgr) { + mgr.registerChecker<NonNullParamChecker>(); +} + +bool ento::shouldRegisterNonNullParamChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/NonnullGlobalConstantsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NonnullGlobalConstantsChecker.cpp new file mode 100644 index 000000000000..43dbe57b8432 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/NonnullGlobalConstantsChecker.cpp @@ -0,0 +1,150 @@ +//==- NonnullGlobalConstantsChecker.cpp ---------------------------*- C++ -*--// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This checker adds an assumption that constant globals of certain types* are +// non-null, as otherwise they generally do not convey any useful information. +// The assumption is useful, as many framework use e. g. global const strings, +// and the analyzer might not be able to infer the global value if the +// definition is in a separate translation unit. +// The following types (and their typedef aliases) are considered to be +// non-null: +// - `char* const` +// - `const CFStringRef` from CoreFoundation +// - `NSString* const` from Foundation +// - `CFBooleanRef` from Foundation +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" + +using namespace clang; +using namespace ento; + +namespace { + +class NonnullGlobalConstantsChecker : public Checker<check::Location> { + mutable IdentifierInfo *NSStringII = nullptr; + mutable IdentifierInfo *CFStringRefII = nullptr; + mutable IdentifierInfo *CFBooleanRefII = nullptr; + +public: + NonnullGlobalConstantsChecker() {} + + void checkLocation(SVal l, bool isLoad, const Stmt *S, + CheckerContext &C) const; + +private: + void initIdentifierInfo(ASTContext &Ctx) const; + + bool isGlobalConstString(SVal V) const; + + bool isNonnullType(QualType Ty) const; +}; + +} // namespace + +/// Lazily initialize cache for required identifier information. +void NonnullGlobalConstantsChecker::initIdentifierInfo(ASTContext &Ctx) const { + if (NSStringII) + return; + + NSStringII = &Ctx.Idents.get("NSString"); + CFStringRefII = &Ctx.Idents.get("CFStringRef"); + CFBooleanRefII = &Ctx.Idents.get("CFBooleanRef"); +} + +/// Add an assumption that const string-like globals are non-null. +void NonnullGlobalConstantsChecker::checkLocation(SVal location, bool isLoad, + const Stmt *S, + CheckerContext &C) const { + initIdentifierInfo(C.getASTContext()); + if (!isLoad || !location.isValid()) + return; + + ProgramStateRef State = C.getState(); + + if (isGlobalConstString(location)) { + SVal V = State->getSVal(location.castAs<Loc>()); + Optional<DefinedOrUnknownSVal> Constr = V.getAs<DefinedOrUnknownSVal>(); + + if (Constr) { + + // Assume that the variable is non-null. + ProgramStateRef OutputState = State->assume(*Constr, true); + C.addTransition(OutputState); + } + } +} + +/// \param V loaded lvalue. +/// \return whether {@code val} is a string-like const global. +bool NonnullGlobalConstantsChecker::isGlobalConstString(SVal V) const { + Optional<loc::MemRegionVal> RegionVal = V.getAs<loc::MemRegionVal>(); + if (!RegionVal) + return false; + auto *Region = dyn_cast<VarRegion>(RegionVal->getAsRegion()); + if (!Region) + return false; + const VarDecl *Decl = Region->getDecl(); + + if (!Decl->hasGlobalStorage()) + return false; + + QualType Ty = Decl->getType(); + bool HasConst = Ty.isConstQualified(); + if (isNonnullType(Ty) && HasConst) + return true; + + // Look through the typedefs. + while (const Type *T = Ty.getTypePtr()) { + if (const auto *TT = dyn_cast<TypedefType>(T)) { + Ty = TT->getDecl()->getUnderlyingType(); + // It is sufficient for any intermediate typedef + // to be classified const. + HasConst = HasConst || Ty.isConstQualified(); + if (isNonnullType(Ty) && HasConst) + return true; + } else if (const auto *AT = dyn_cast<AttributedType>(T)) { + if (AT->getAttrKind() == attr::TypeNonNull) + return true; + Ty = AT->getModifiedType(); + } else { + return false; + } + } + return false; +} + +/// \return whether {@code type} is extremely unlikely to be null +bool NonnullGlobalConstantsChecker::isNonnullType(QualType Ty) const { + + if (Ty->isPointerType() && Ty->getPointeeType()->isCharType()) + return true; + + if (auto *T = dyn_cast<ObjCObjectPointerType>(Ty)) { + return T->getInterfaceDecl() && + T->getInterfaceDecl()->getIdentifier() == NSStringII; + } else if (auto *T = dyn_cast<TypedefType>(Ty)) { + IdentifierInfo* II = T->getDecl()->getIdentifier(); + return II == CFStringRefII || II == CFBooleanRefII; + } + return false; +} + +void ento::registerNonnullGlobalConstantsChecker(CheckerManager &Mgr) { + Mgr.registerChecker<NonnullGlobalConstantsChecker>(); +} + +bool ento::shouldRegisterNonnullGlobalConstantsChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp new file mode 100644 index 000000000000..4322ac207112 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp @@ -0,0 +1,1225 @@ +//===-- NullabilityChecker.cpp - Nullability checker ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This checker tries to find nullability violations. There are several kinds of +// possible violations: +// * Null pointer is passed to a pointer which has a _Nonnull type. +// * Null pointer is returned from a function which has a _Nonnull return type. +// * Nullable pointer is passed to a pointer which has a _Nonnull type. +// * Nullable pointer is returned from a function which has a _Nonnull return +// type. +// * Nullable pointer is dereferenced. +// +// This checker propagates the nullability information of the pointers and looks +// for the patterns that are described above. Explicit casts are trusted and are +// considered a way to suppress false positives for this checker. The other way +// to suppress warnings would be to add asserts or guarding if statements to the +// code. In addition to the nullability propagation this checker also uses some +// heuristics to suppress potential false positives. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" + +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" + +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Path.h" + +using namespace clang; +using namespace ento; + +namespace { + +/// Returns the most nullable nullability. This is used for message expressions +/// like [receiver method], where the nullability of this expression is either +/// the nullability of the receiver or the nullability of the return type of the +/// method, depending on which is more nullable. Contradicted is considered to +/// be the most nullable, to avoid false positive results. +Nullability getMostNullable(Nullability Lhs, Nullability Rhs) { + return static_cast<Nullability>( + std::min(static_cast<char>(Lhs), static_cast<char>(Rhs))); +} + +const char *getNullabilityString(Nullability Nullab) { + switch (Nullab) { + case Nullability::Contradicted: + return "contradicted"; + case Nullability::Nullable: + return "nullable"; + case Nullability::Unspecified: + return "unspecified"; + case Nullability::Nonnull: + return "nonnull"; + } + llvm_unreachable("Unexpected enumeration."); + return ""; +} + +// These enums are used as an index to ErrorMessages array. +enum class ErrorKind : int { + NilAssignedToNonnull, + NilPassedToNonnull, + NilReturnedToNonnull, + NullableAssignedToNonnull, + NullableReturnedToNonnull, + NullableDereferenced, + NullablePassedToNonnull +}; + +class NullabilityChecker + : public Checker<check::Bind, check::PreCall, check::PreStmt<ReturnStmt>, + check::PostCall, check::PostStmt<ExplicitCastExpr>, + check::PostObjCMessage, check::DeadSymbols, + check::Event<ImplicitNullDerefEvent>> { + mutable std::unique_ptr<BugType> BT; + +public: + // If true, the checker will not diagnose nullabilility issues for calls + // to system headers. This option is motivated by the observation that large + // projects may have many nullability warnings. These projects may + // find warnings about nullability annotations that they have explicitly + // added themselves higher priority to fix than warnings on calls to system + // libraries. + DefaultBool NoDiagnoseCallsToSystemHeaders; + + void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const; + void checkPostStmt(const ExplicitCastExpr *CE, CheckerContext &C) const; + void checkPreStmt(const ReturnStmt *S, CheckerContext &C) const; + void checkPostObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const; + void checkPostCall(const CallEvent &Call, CheckerContext &C) const; + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; + void checkEvent(ImplicitNullDerefEvent Event) const; + + void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, + const char *Sep) const override; + + struct NullabilityChecksFilter { + DefaultBool CheckNullPassedToNonnull; + DefaultBool CheckNullReturnedFromNonnull; + DefaultBool CheckNullableDereferenced; + DefaultBool CheckNullablePassedToNonnull; + DefaultBool CheckNullableReturnedFromNonnull; + + CheckerNameRef CheckNameNullPassedToNonnull; + CheckerNameRef CheckNameNullReturnedFromNonnull; + CheckerNameRef CheckNameNullableDereferenced; + CheckerNameRef CheckNameNullablePassedToNonnull; + CheckerNameRef CheckNameNullableReturnedFromNonnull; + }; + + NullabilityChecksFilter Filter; + // When set to false no nullability information will be tracked in + // NullabilityMap. It is possible to catch errors like passing a null pointer + // to a callee that expects nonnull argument without the information that is + // stroed in the NullabilityMap. This is an optimization. + DefaultBool NeedTracking; + +private: + class NullabilityBugVisitor : public BugReporterVisitor { + public: + NullabilityBugVisitor(const MemRegion *M) : Region(M) {} + + void Profile(llvm::FoldingSetNodeID &ID) const override { + static int X = 0; + ID.AddPointer(&X); + ID.AddPointer(Region); + } + + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; + + private: + // The tracked region. + const MemRegion *Region; + }; + + /// When any of the nonnull arguments of the analyzed function is null, do not + /// report anything and turn off the check. + /// + /// When \p SuppressPath is set to true, no more bugs will be reported on this + /// path by this checker. + void reportBugIfInvariantHolds(StringRef Msg, ErrorKind Error, + ExplodedNode *N, const MemRegion *Region, + CheckerContext &C, + const Stmt *ValueExpr = nullptr, + bool SuppressPath = false) const; + + void reportBug(StringRef Msg, ErrorKind Error, ExplodedNode *N, + const MemRegion *Region, BugReporter &BR, + const Stmt *ValueExpr = nullptr) const { + if (!BT) + BT.reset(new BugType(this, "Nullability", categories::MemoryError)); + + auto R = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); + if (Region) { + R->markInteresting(Region); + R->addVisitor(std::make_unique<NullabilityBugVisitor>(Region)); + } + if (ValueExpr) { + R->addRange(ValueExpr->getSourceRange()); + if (Error == ErrorKind::NilAssignedToNonnull || + Error == ErrorKind::NilPassedToNonnull || + Error == ErrorKind::NilReturnedToNonnull) + if (const auto *Ex = dyn_cast<Expr>(ValueExpr)) + bugreporter::trackExpressionValue(N, Ex, *R); + } + BR.emitReport(std::move(R)); + } + + /// If an SVal wraps a region that should be tracked, it will return a pointer + /// to the wrapped region. Otherwise it will return a nullptr. + const SymbolicRegion *getTrackRegion(SVal Val, + bool CheckSuperRegion = false) const; + + /// Returns true if the call is diagnosable in the current analyzer + /// configuration. + bool isDiagnosableCall(const CallEvent &Call) const { + if (NoDiagnoseCallsToSystemHeaders && Call.isInSystemHeader()) + return false; + + return true; + } +}; + +class NullabilityState { +public: + NullabilityState(Nullability Nullab, const Stmt *Source = nullptr) + : Nullab(Nullab), Source(Source) {} + + const Stmt *getNullabilitySource() const { return Source; } + + Nullability getValue() const { return Nullab; } + + void Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddInteger(static_cast<char>(Nullab)); + ID.AddPointer(Source); + } + + void print(raw_ostream &Out) const { + Out << getNullabilityString(Nullab) << "\n"; + } + +private: + Nullability Nullab; + // Source is the expression which determined the nullability. For example in a + // message like [nullable nonnull_returning] has nullable nullability, because + // the receiver is nullable. Here the receiver will be the source of the + // nullability. This is useful information when the diagnostics are generated. + const Stmt *Source; +}; + +bool operator==(NullabilityState Lhs, NullabilityState Rhs) { + return Lhs.getValue() == Rhs.getValue() && + Lhs.getNullabilitySource() == Rhs.getNullabilitySource(); +} + +} // end anonymous namespace + +REGISTER_MAP_WITH_PROGRAMSTATE(NullabilityMap, const MemRegion *, + NullabilityState) + +// We say "the nullability type invariant is violated" when a location with a +// non-null type contains NULL or a function with a non-null return type returns +// NULL. Violations of the nullability type invariant can be detected either +// directly (for example, when NULL is passed as an argument to a nonnull +// parameter) or indirectly (for example, when, inside a function, the +// programmer defensively checks whether a nonnull parameter contains NULL and +// finds that it does). +// +// As a matter of policy, the nullability checker typically warns on direct +// violations of the nullability invariant (although it uses various +// heuristics to suppress warnings in some cases) but will not warn if the +// invariant has already been violated along the path (either directly or +// indirectly). As a practical matter, this prevents the analyzer from +// (1) warning on defensive code paths where a nullability precondition is +// determined to have been violated, (2) warning additional times after an +// initial direct violation has been discovered, and (3) warning after a direct +// violation that has been implicitly or explicitly suppressed (for +// example, with a cast of NULL to _Nonnull). In essence, once an invariant +// violation is detected on a path, this checker will be essentially turned off +// for the rest of the analysis +// +// The analyzer takes this approach (rather than generating a sink node) to +// ensure coverage of defensive paths, which may be important for backwards +// compatibility in codebases that were developed without nullability in mind. +REGISTER_TRAIT_WITH_PROGRAMSTATE(InvariantViolated, bool) + +enum class NullConstraint { IsNull, IsNotNull, Unknown }; + +static NullConstraint getNullConstraint(DefinedOrUnknownSVal Val, + ProgramStateRef State) { + ConditionTruthVal Nullness = State->isNull(Val); + if (Nullness.isConstrainedFalse()) + return NullConstraint::IsNotNull; + if (Nullness.isConstrainedTrue()) + return NullConstraint::IsNull; + return NullConstraint::Unknown; +} + +const SymbolicRegion * +NullabilityChecker::getTrackRegion(SVal Val, bool CheckSuperRegion) const { + if (!NeedTracking) + return nullptr; + + auto RegionSVal = Val.getAs<loc::MemRegionVal>(); + if (!RegionSVal) + return nullptr; + + const MemRegion *Region = RegionSVal->getRegion(); + + if (CheckSuperRegion) { + if (auto FieldReg = Region->getAs<FieldRegion>()) + return dyn_cast<SymbolicRegion>(FieldReg->getSuperRegion()); + if (auto ElementReg = Region->getAs<ElementRegion>()) + return dyn_cast<SymbolicRegion>(ElementReg->getSuperRegion()); + } + + return dyn_cast<SymbolicRegion>(Region); +} + +PathDiagnosticPieceRef NullabilityChecker::NullabilityBugVisitor::VisitNode( + const ExplodedNode *N, BugReporterContext &BRC, + PathSensitiveBugReport &BR) { + ProgramStateRef State = N->getState(); + ProgramStateRef StatePrev = N->getFirstPred()->getState(); + + const NullabilityState *TrackedNullab = State->get<NullabilityMap>(Region); + const NullabilityState *TrackedNullabPrev = + StatePrev->get<NullabilityMap>(Region); + if (!TrackedNullab) + return nullptr; + + if (TrackedNullabPrev && + TrackedNullabPrev->getValue() == TrackedNullab->getValue()) + return nullptr; + + // Retrieve the associated statement. + const Stmt *S = TrackedNullab->getNullabilitySource(); + if (!S || S->getBeginLoc().isInvalid()) { + S = N->getStmtForDiagnostics(); + } + + if (!S) + return nullptr; + + std::string InfoText = + (llvm::Twine("Nullability '") + + getNullabilityString(TrackedNullab->getValue()) + "' is inferred") + .str(); + + // Generate the extra diagnostic. + PathDiagnosticLocation Pos(S, BRC.getSourceManager(), + N->getLocationContext()); + return std::make_shared<PathDiagnosticEventPiece>(Pos, InfoText, true); +} + +/// Returns true when the value stored at the given location has been +/// constrained to null after being passed through an object of nonnnull type. +static bool checkValueAtLValForInvariantViolation(ProgramStateRef State, + SVal LV, QualType T) { + if (getNullabilityAnnotation(T) != Nullability::Nonnull) + return false; + + auto RegionVal = LV.getAs<loc::MemRegionVal>(); + if (!RegionVal) + return false; + + // If the value was constrained to null *after* it was passed through that + // location, it could not have been a concrete pointer *when* it was passed. + // In that case we would have handled the situation when the value was + // bound to that location, by emitting (or not emitting) a report. + // Therefore we are only interested in symbolic regions that can be either + // null or non-null depending on the value of their respective symbol. + auto StoredVal = State->getSVal(*RegionVal).getAs<loc::MemRegionVal>(); + if (!StoredVal || !isa<SymbolicRegion>(StoredVal->getRegion())) + return false; + + if (getNullConstraint(*StoredVal, State) == NullConstraint::IsNull) + return true; + + return false; +} + +static bool +checkParamsForPreconditionViolation(ArrayRef<ParmVarDecl *> Params, + ProgramStateRef State, + const LocationContext *LocCtxt) { + for (const auto *ParamDecl : Params) { + if (ParamDecl->isParameterPack()) + break; + + SVal LV = State->getLValue(ParamDecl, LocCtxt); + if (checkValueAtLValForInvariantViolation(State, LV, + ParamDecl->getType())) { + return true; + } + } + return false; +} + +static bool +checkSelfIvarsForInvariantViolation(ProgramStateRef State, + const LocationContext *LocCtxt) { + auto *MD = dyn_cast<ObjCMethodDecl>(LocCtxt->getDecl()); + if (!MD || !MD->isInstanceMethod()) + return false; + + const ImplicitParamDecl *SelfDecl = LocCtxt->getSelfDecl(); + if (!SelfDecl) + return false; + + SVal SelfVal = State->getSVal(State->getRegion(SelfDecl, LocCtxt)); + + const ObjCObjectPointerType *SelfType = + dyn_cast<ObjCObjectPointerType>(SelfDecl->getType()); + if (!SelfType) + return false; + + const ObjCInterfaceDecl *ID = SelfType->getInterfaceDecl(); + if (!ID) + return false; + + for (const auto *IvarDecl : ID->ivars()) { + SVal LV = State->getLValue(IvarDecl, SelfVal); + if (checkValueAtLValForInvariantViolation(State, LV, IvarDecl->getType())) { + return true; + } + } + return false; +} + +static bool checkInvariantViolation(ProgramStateRef State, ExplodedNode *N, + CheckerContext &C) { + if (State->get<InvariantViolated>()) + return true; + + const LocationContext *LocCtxt = C.getLocationContext(); + const Decl *D = LocCtxt->getDecl(); + if (!D) + return false; + + ArrayRef<ParmVarDecl*> Params; + if (const auto *BD = dyn_cast<BlockDecl>(D)) + Params = BD->parameters(); + else if (const auto *FD = dyn_cast<FunctionDecl>(D)) + Params = FD->parameters(); + else if (const auto *MD = dyn_cast<ObjCMethodDecl>(D)) + Params = MD->parameters(); + else + return false; + + if (checkParamsForPreconditionViolation(Params, State, LocCtxt) || + checkSelfIvarsForInvariantViolation(State, LocCtxt)) { + if (!N->isSink()) + C.addTransition(State->set<InvariantViolated>(true), N); + return true; + } + return false; +} + +void NullabilityChecker::reportBugIfInvariantHolds(StringRef Msg, + ErrorKind Error, ExplodedNode *N, const MemRegion *Region, + CheckerContext &C, const Stmt *ValueExpr, bool SuppressPath) const { + ProgramStateRef OriginalState = N->getState(); + + if (checkInvariantViolation(OriginalState, N, C)) + return; + if (SuppressPath) { + OriginalState = OriginalState->set<InvariantViolated>(true); + N = C.addTransition(OriginalState, N); + } + + reportBug(Msg, Error, N, Region, C.getBugReporter(), ValueExpr); +} + +/// Cleaning up the program state. +void NullabilityChecker::checkDeadSymbols(SymbolReaper &SR, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + NullabilityMapTy Nullabilities = State->get<NullabilityMap>(); + for (NullabilityMapTy::iterator I = Nullabilities.begin(), + E = Nullabilities.end(); + I != E; ++I) { + const auto *Region = I->first->getAs<SymbolicRegion>(); + assert(Region && "Non-symbolic region is tracked."); + if (SR.isDead(Region->getSymbol())) { + State = State->remove<NullabilityMap>(I->first); + } + } + // When one of the nonnull arguments are constrained to be null, nullability + // preconditions are violated. It is not enough to check this only when we + // actually report an error, because at that time interesting symbols might be + // reaped. + if (checkInvariantViolation(State, C.getPredecessor(), C)) + return; + C.addTransition(State); +} + +/// This callback triggers when a pointer is dereferenced and the analyzer does +/// not know anything about the value of that pointer. When that pointer is +/// nullable, this code emits a warning. +void NullabilityChecker::checkEvent(ImplicitNullDerefEvent Event) const { + if (Event.SinkNode->getState()->get<InvariantViolated>()) + return; + + const MemRegion *Region = + getTrackRegion(Event.Location, /*CheckSuperRegion=*/true); + if (!Region) + return; + + ProgramStateRef State = Event.SinkNode->getState(); + const NullabilityState *TrackedNullability = + State->get<NullabilityMap>(Region); + + if (!TrackedNullability) + return; + + if (Filter.CheckNullableDereferenced && + TrackedNullability->getValue() == Nullability::Nullable) { + BugReporter &BR = *Event.BR; + // Do not suppress errors on defensive code paths, because dereferencing + // a nullable pointer is always an error. + if (Event.IsDirectDereference) + reportBug("Nullable pointer is dereferenced", + ErrorKind::NullableDereferenced, Event.SinkNode, Region, BR); + else { + reportBug("Nullable pointer is passed to a callee that requires a " + "non-null", ErrorKind::NullablePassedToNonnull, + Event.SinkNode, Region, BR); + } + } +} + +/// Find the outermost subexpression of E that is not an implicit cast. +/// This looks through the implicit casts to _Nonnull that ARC adds to +/// return expressions of ObjC types when the return type of the function or +/// method is non-null but the express is not. +static const Expr *lookThroughImplicitCasts(const Expr *E) { + assert(E); + + while (auto *ICE = dyn_cast<ImplicitCastExpr>(E)) { + E = ICE->getSubExpr(); + } + + return E; +} + +/// This method check when nullable pointer or null value is returned from a +/// function that has nonnull return type. +void NullabilityChecker::checkPreStmt(const ReturnStmt *S, + CheckerContext &C) const { + auto RetExpr = S->getRetValue(); + if (!RetExpr) + return; + + if (!RetExpr->getType()->isAnyPointerType()) + return; + + ProgramStateRef State = C.getState(); + if (State->get<InvariantViolated>()) + return; + + auto RetSVal = C.getSVal(S).getAs<DefinedOrUnknownSVal>(); + if (!RetSVal) + return; + + bool InSuppressedMethodFamily = false; + + QualType RequiredRetType; + AnalysisDeclContext *DeclCtxt = + C.getLocationContext()->getAnalysisDeclContext(); + const Decl *D = DeclCtxt->getDecl(); + if (auto *MD = dyn_cast<ObjCMethodDecl>(D)) { + // HACK: This is a big hammer to avoid warning when there are defensive + // nil checks in -init and -copy methods. We should add more sophisticated + // logic here to suppress on common defensive idioms but still + // warn when there is a likely problem. + ObjCMethodFamily Family = MD->getMethodFamily(); + if (OMF_init == Family || OMF_copy == Family || OMF_mutableCopy == Family) + InSuppressedMethodFamily = true; + + RequiredRetType = MD->getReturnType(); + } else if (auto *FD = dyn_cast<FunctionDecl>(D)) { + RequiredRetType = FD->getReturnType(); + } else { + return; + } + + NullConstraint Nullness = getNullConstraint(*RetSVal, State); + + Nullability RequiredNullability = getNullabilityAnnotation(RequiredRetType); + + // If the returned value is null but the type of the expression + // generating it is nonnull then we will suppress the diagnostic. + // This enables explicit suppression when returning a nil literal in a + // function with a _Nonnull return type: + // return (NSString * _Nonnull)0; + Nullability RetExprTypeLevelNullability = + getNullabilityAnnotation(lookThroughImplicitCasts(RetExpr)->getType()); + + bool NullReturnedFromNonNull = (RequiredNullability == Nullability::Nonnull && + Nullness == NullConstraint::IsNull); + if (Filter.CheckNullReturnedFromNonnull && + NullReturnedFromNonNull && + RetExprTypeLevelNullability != Nullability::Nonnull && + !InSuppressedMethodFamily && + C.getLocationContext()->inTopFrame()) { + static CheckerProgramPointTag Tag(this, "NullReturnedFromNonnull"); + ExplodedNode *N = C.generateErrorNode(State, &Tag); + if (!N) + return; + + SmallString<256> SBuf; + llvm::raw_svector_ostream OS(SBuf); + OS << (RetExpr->getType()->isObjCObjectPointerType() ? "nil" : "Null"); + OS << " returned from a " << C.getDeclDescription(D) << + " that is expected to return a non-null value"; + reportBugIfInvariantHolds(OS.str(), + ErrorKind::NilReturnedToNonnull, N, nullptr, C, + RetExpr); + return; + } + + // If null was returned from a non-null function, mark the nullability + // invariant as violated even if the diagnostic was suppressed. + if (NullReturnedFromNonNull) { + State = State->set<InvariantViolated>(true); + C.addTransition(State); + return; + } + + const MemRegion *Region = getTrackRegion(*RetSVal); + if (!Region) + return; + + const NullabilityState *TrackedNullability = + State->get<NullabilityMap>(Region); + if (TrackedNullability) { + Nullability TrackedNullabValue = TrackedNullability->getValue(); + if (Filter.CheckNullableReturnedFromNonnull && + Nullness != NullConstraint::IsNotNull && + TrackedNullabValue == Nullability::Nullable && + RequiredNullability == Nullability::Nonnull) { + static CheckerProgramPointTag Tag(this, "NullableReturnedFromNonnull"); + ExplodedNode *N = C.addTransition(State, C.getPredecessor(), &Tag); + + SmallString<256> SBuf; + llvm::raw_svector_ostream OS(SBuf); + OS << "Nullable pointer is returned from a " << C.getDeclDescription(D) << + " that is expected to return a non-null value"; + + reportBugIfInvariantHolds(OS.str(), + ErrorKind::NullableReturnedToNonnull, N, + Region, C); + } + return; + } + if (RequiredNullability == Nullability::Nullable) { + State = State->set<NullabilityMap>(Region, + NullabilityState(RequiredNullability, + S)); + C.addTransition(State); + } +} + +/// This callback warns when a nullable pointer or a null value is passed to a +/// function that expects its argument to be nonnull. +void NullabilityChecker::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + if (!Call.getDecl()) + return; + + ProgramStateRef State = C.getState(); + if (State->get<InvariantViolated>()) + return; + + ProgramStateRef OrigState = State; + + unsigned Idx = 0; + for (const ParmVarDecl *Param : Call.parameters()) { + if (Param->isParameterPack()) + break; + + if (Idx >= Call.getNumArgs()) + break; + + const Expr *ArgExpr = Call.getArgExpr(Idx); + auto ArgSVal = Call.getArgSVal(Idx++).getAs<DefinedOrUnknownSVal>(); + if (!ArgSVal) + continue; + + if (!Param->getType()->isAnyPointerType() && + !Param->getType()->isReferenceType()) + continue; + + NullConstraint Nullness = getNullConstraint(*ArgSVal, State); + + Nullability RequiredNullability = + getNullabilityAnnotation(Param->getType()); + Nullability ArgExprTypeLevelNullability = + getNullabilityAnnotation(ArgExpr->getType()); + + unsigned ParamIdx = Param->getFunctionScopeIndex() + 1; + + if (Filter.CheckNullPassedToNonnull && Nullness == NullConstraint::IsNull && + ArgExprTypeLevelNullability != Nullability::Nonnull && + RequiredNullability == Nullability::Nonnull && + isDiagnosableCall(Call)) { + ExplodedNode *N = C.generateErrorNode(State); + if (!N) + return; + + SmallString<256> SBuf; + llvm::raw_svector_ostream OS(SBuf); + OS << (Param->getType()->isObjCObjectPointerType() ? "nil" : "Null"); + OS << " passed to a callee that requires a non-null " << ParamIdx + << llvm::getOrdinalSuffix(ParamIdx) << " parameter"; + reportBugIfInvariantHolds(OS.str(), ErrorKind::NilPassedToNonnull, N, + nullptr, C, + ArgExpr, /*SuppressPath=*/false); + return; + } + + const MemRegion *Region = getTrackRegion(*ArgSVal); + if (!Region) + continue; + + const NullabilityState *TrackedNullability = + State->get<NullabilityMap>(Region); + + if (TrackedNullability) { + if (Nullness == NullConstraint::IsNotNull || + TrackedNullability->getValue() != Nullability::Nullable) + continue; + + if (Filter.CheckNullablePassedToNonnull && + RequiredNullability == Nullability::Nonnull && + isDiagnosableCall(Call)) { + ExplodedNode *N = C.addTransition(State); + SmallString<256> SBuf; + llvm::raw_svector_ostream OS(SBuf); + OS << "Nullable pointer is passed to a callee that requires a non-null " + << ParamIdx << llvm::getOrdinalSuffix(ParamIdx) << " parameter"; + reportBugIfInvariantHolds(OS.str(), + ErrorKind::NullablePassedToNonnull, N, + Region, C, ArgExpr, /*SuppressPath=*/true); + return; + } + if (Filter.CheckNullableDereferenced && + Param->getType()->isReferenceType()) { + ExplodedNode *N = C.addTransition(State); + reportBugIfInvariantHolds("Nullable pointer is dereferenced", + ErrorKind::NullableDereferenced, N, Region, + C, ArgExpr, /*SuppressPath=*/true); + return; + } + continue; + } + // No tracked nullability yet. + if (ArgExprTypeLevelNullability != Nullability::Nullable) + continue; + State = State->set<NullabilityMap>( + Region, NullabilityState(ArgExprTypeLevelNullability, ArgExpr)); + } + if (State != OrigState) + C.addTransition(State); +} + +/// Suppress the nullability warnings for some functions. +void NullabilityChecker::checkPostCall(const CallEvent &Call, + CheckerContext &C) const { + auto Decl = Call.getDecl(); + if (!Decl) + return; + // ObjC Messages handles in a different callback. + if (Call.getKind() == CE_ObjCMessage) + return; + const FunctionType *FuncType = Decl->getFunctionType(); + if (!FuncType) + return; + QualType ReturnType = FuncType->getReturnType(); + if (!ReturnType->isAnyPointerType()) + return; + ProgramStateRef State = C.getState(); + if (State->get<InvariantViolated>()) + return; + + const MemRegion *Region = getTrackRegion(Call.getReturnValue()); + if (!Region) + return; + + // CG headers are misannotated. Do not warn for symbols that are the results + // of CG calls. + const SourceManager &SM = C.getSourceManager(); + StringRef FilePath = SM.getFilename(SM.getSpellingLoc(Decl->getBeginLoc())); + if (llvm::sys::path::filename(FilePath).startswith("CG")) { + State = State->set<NullabilityMap>(Region, Nullability::Contradicted); + C.addTransition(State); + return; + } + + const NullabilityState *TrackedNullability = + State->get<NullabilityMap>(Region); + + if (!TrackedNullability && + getNullabilityAnnotation(ReturnType) == Nullability::Nullable) { + State = State->set<NullabilityMap>(Region, Nullability::Nullable); + C.addTransition(State); + } +} + +static Nullability getReceiverNullability(const ObjCMethodCall &M, + ProgramStateRef State) { + if (M.isReceiverSelfOrSuper()) { + // For super and super class receivers we assume that the receiver is + // nonnull. + return Nullability::Nonnull; + } + // Otherwise look up nullability in the state. + SVal Receiver = M.getReceiverSVal(); + if (auto DefOrUnknown = Receiver.getAs<DefinedOrUnknownSVal>()) { + // If the receiver is constrained to be nonnull, assume that it is nonnull + // regardless of its type. + NullConstraint Nullness = getNullConstraint(*DefOrUnknown, State); + if (Nullness == NullConstraint::IsNotNull) + return Nullability::Nonnull; + } + auto ValueRegionSVal = Receiver.getAs<loc::MemRegionVal>(); + if (ValueRegionSVal) { + const MemRegion *SelfRegion = ValueRegionSVal->getRegion(); + assert(SelfRegion); + + const NullabilityState *TrackedSelfNullability = + State->get<NullabilityMap>(SelfRegion); + if (TrackedSelfNullability) + return TrackedSelfNullability->getValue(); + } + return Nullability::Unspecified; +} + +/// Calculate the nullability of the result of a message expr based on the +/// nullability of the receiver, the nullability of the return value, and the +/// constraints. +void NullabilityChecker::checkPostObjCMessage(const ObjCMethodCall &M, + CheckerContext &C) const { + auto Decl = M.getDecl(); + if (!Decl) + return; + QualType RetType = Decl->getReturnType(); + if (!RetType->isAnyPointerType()) + return; + + ProgramStateRef State = C.getState(); + if (State->get<InvariantViolated>()) + return; + + const MemRegion *ReturnRegion = getTrackRegion(M.getReturnValue()); + if (!ReturnRegion) + return; + + auto Interface = Decl->getClassInterface(); + auto Name = Interface ? Interface->getName() : ""; + // In order to reduce the noise in the diagnostics generated by this checker, + // some framework and programming style based heuristics are used. These + // heuristics are for Cocoa APIs which have NS prefix. + if (Name.startswith("NS")) { + // Developers rely on dynamic invariants such as an item should be available + // in a collection, or a collection is not empty often. Those invariants can + // not be inferred by any static analysis tool. To not to bother the users + // with too many false positives, every item retrieval function should be + // ignored for collections. The instance methods of dictionaries in Cocoa + // are either item retrieval related or not interesting nullability wise. + // Using this fact, to keep the code easier to read just ignore the return + // value of every instance method of dictionaries. + if (M.isInstanceMessage() && Name.contains("Dictionary")) { + State = + State->set<NullabilityMap>(ReturnRegion, Nullability::Contradicted); + C.addTransition(State); + return; + } + // For similar reasons ignore some methods of Cocoa arrays. + StringRef FirstSelectorSlot = M.getSelector().getNameForSlot(0); + if (Name.contains("Array") && + (FirstSelectorSlot == "firstObject" || + FirstSelectorSlot == "lastObject")) { + State = + State->set<NullabilityMap>(ReturnRegion, Nullability::Contradicted); + C.addTransition(State); + return; + } + + // Encoding related methods of string should not fail when lossless + // encodings are used. Using lossless encodings is so frequent that ignoring + // this class of methods reduced the emitted diagnostics by about 30% on + // some projects (and all of that was false positives). + if (Name.contains("String")) { + for (auto Param : M.parameters()) { + if (Param->getName() == "encoding") { + State = State->set<NullabilityMap>(ReturnRegion, + Nullability::Contradicted); + C.addTransition(State); + return; + } + } + } + } + + const ObjCMessageExpr *Message = M.getOriginExpr(); + Nullability SelfNullability = getReceiverNullability(M, State); + + const NullabilityState *NullabilityOfReturn = + State->get<NullabilityMap>(ReturnRegion); + + if (NullabilityOfReturn) { + // When we have a nullability tracked for the return value, the nullability + // of the expression will be the most nullable of the receiver and the + // return value. + Nullability RetValTracked = NullabilityOfReturn->getValue(); + Nullability ComputedNullab = + getMostNullable(RetValTracked, SelfNullability); + if (ComputedNullab != RetValTracked && + ComputedNullab != Nullability::Unspecified) { + const Stmt *NullabilitySource = + ComputedNullab == RetValTracked + ? NullabilityOfReturn->getNullabilitySource() + : Message->getInstanceReceiver(); + State = State->set<NullabilityMap>( + ReturnRegion, NullabilityState(ComputedNullab, NullabilitySource)); + C.addTransition(State); + } + return; + } + + // No tracked information. Use static type information for return value. + Nullability RetNullability = getNullabilityAnnotation(RetType); + + // Properties might be computed. For this reason the static analyzer creates a + // new symbol each time an unknown property is read. To avoid false pozitives + // do not treat unknown properties as nullable, even when they explicitly + // marked nullable. + if (M.getMessageKind() == OCM_PropertyAccess && !C.wasInlined) + RetNullability = Nullability::Nonnull; + + Nullability ComputedNullab = getMostNullable(RetNullability, SelfNullability); + if (ComputedNullab == Nullability::Nullable) { + const Stmt *NullabilitySource = ComputedNullab == RetNullability + ? Message + : Message->getInstanceReceiver(); + State = State->set<NullabilityMap>( + ReturnRegion, NullabilityState(ComputedNullab, NullabilitySource)); + C.addTransition(State); + } +} + +/// Explicit casts are trusted. If there is a disagreement in the nullability +/// annotations in the destination and the source or '0' is casted to nonnull +/// track the value as having contraditory nullability. This will allow users to +/// suppress warnings. +void NullabilityChecker::checkPostStmt(const ExplicitCastExpr *CE, + CheckerContext &C) const { + QualType OriginType = CE->getSubExpr()->getType(); + QualType DestType = CE->getType(); + if (!OriginType->isAnyPointerType()) + return; + if (!DestType->isAnyPointerType()) + return; + + ProgramStateRef State = C.getState(); + if (State->get<InvariantViolated>()) + return; + + Nullability DestNullability = getNullabilityAnnotation(DestType); + + // No explicit nullability in the destination type, so this cast does not + // change the nullability. + if (DestNullability == Nullability::Unspecified) + return; + + auto RegionSVal = C.getSVal(CE).getAs<DefinedOrUnknownSVal>(); + const MemRegion *Region = getTrackRegion(*RegionSVal); + if (!Region) + return; + + // When 0 is converted to nonnull mark it as contradicted. + if (DestNullability == Nullability::Nonnull) { + NullConstraint Nullness = getNullConstraint(*RegionSVal, State); + if (Nullness == NullConstraint::IsNull) { + State = State->set<NullabilityMap>(Region, Nullability::Contradicted); + C.addTransition(State); + return; + } + } + + const NullabilityState *TrackedNullability = + State->get<NullabilityMap>(Region); + + if (!TrackedNullability) { + if (DestNullability != Nullability::Nullable) + return; + State = State->set<NullabilityMap>(Region, + NullabilityState(DestNullability, CE)); + C.addTransition(State); + return; + } + + if (TrackedNullability->getValue() != DestNullability && + TrackedNullability->getValue() != Nullability::Contradicted) { + State = State->set<NullabilityMap>(Region, Nullability::Contradicted); + C.addTransition(State); + } +} + +/// For a given statement performing a bind, attempt to syntactically +/// match the expression resulting in the bound value. +static const Expr * matchValueExprForBind(const Stmt *S) { + // For `x = e` the value expression is the right-hand side. + if (auto *BinOp = dyn_cast<BinaryOperator>(S)) { + if (BinOp->getOpcode() == BO_Assign) + return BinOp->getRHS(); + } + + // For `int x = e` the value expression is the initializer. + if (auto *DS = dyn_cast<DeclStmt>(S)) { + if (DS->isSingleDecl()) { + auto *VD = dyn_cast<VarDecl>(DS->getSingleDecl()); + if (!VD) + return nullptr; + + if (const Expr *Init = VD->getInit()) + return Init; + } + } + + return nullptr; +} + +/// Returns true if \param S is a DeclStmt for a local variable that +/// ObjC automated reference counting initialized with zero. +static bool isARCNilInitializedLocal(CheckerContext &C, const Stmt *S) { + // We suppress diagnostics for ARC zero-initialized _Nonnull locals. This + // prevents false positives when a _Nonnull local variable cannot be + // initialized with an initialization expression: + // NSString * _Nonnull s; // no-warning + // @autoreleasepool { + // s = ... + // } + // + // FIXME: We should treat implicitly zero-initialized _Nonnull locals as + // uninitialized in Sema's UninitializedValues analysis to warn when a use of + // the zero-initialized definition will unexpectedly yield nil. + + // Locals are only zero-initialized when automated reference counting + // is turned on. + if (!C.getASTContext().getLangOpts().ObjCAutoRefCount) + return false; + + auto *DS = dyn_cast<DeclStmt>(S); + if (!DS || !DS->isSingleDecl()) + return false; + + auto *VD = dyn_cast<VarDecl>(DS->getSingleDecl()); + if (!VD) + return false; + + // Sema only zero-initializes locals with ObjCLifetimes. + if(!VD->getType().getQualifiers().hasObjCLifetime()) + return false; + + const Expr *Init = VD->getInit(); + assert(Init && "ObjC local under ARC without initializer"); + + // Return false if the local is explicitly initialized (e.g., with '= nil'). + if (!isa<ImplicitValueInitExpr>(Init)) + return false; + + return true; +} + +/// Propagate the nullability information through binds and warn when nullable +/// pointer or null symbol is assigned to a pointer with a nonnull type. +void NullabilityChecker::checkBind(SVal L, SVal V, const Stmt *S, + CheckerContext &C) const { + const TypedValueRegion *TVR = + dyn_cast_or_null<TypedValueRegion>(L.getAsRegion()); + if (!TVR) + return; + + QualType LocType = TVR->getValueType(); + if (!LocType->isAnyPointerType()) + return; + + ProgramStateRef State = C.getState(); + if (State->get<InvariantViolated>()) + return; + + auto ValDefOrUnknown = V.getAs<DefinedOrUnknownSVal>(); + if (!ValDefOrUnknown) + return; + + NullConstraint RhsNullness = getNullConstraint(*ValDefOrUnknown, State); + + Nullability ValNullability = Nullability::Unspecified; + if (SymbolRef Sym = ValDefOrUnknown->getAsSymbol()) + ValNullability = getNullabilityAnnotation(Sym->getType()); + + Nullability LocNullability = getNullabilityAnnotation(LocType); + + // If the type of the RHS expression is nonnull, don't warn. This + // enables explicit suppression with a cast to nonnull. + Nullability ValueExprTypeLevelNullability = Nullability::Unspecified; + const Expr *ValueExpr = matchValueExprForBind(S); + if (ValueExpr) { + ValueExprTypeLevelNullability = + getNullabilityAnnotation(lookThroughImplicitCasts(ValueExpr)->getType()); + } + + bool NullAssignedToNonNull = (LocNullability == Nullability::Nonnull && + RhsNullness == NullConstraint::IsNull); + if (Filter.CheckNullPassedToNonnull && + NullAssignedToNonNull && + ValNullability != Nullability::Nonnull && + ValueExprTypeLevelNullability != Nullability::Nonnull && + !isARCNilInitializedLocal(C, S)) { + static CheckerProgramPointTag Tag(this, "NullPassedToNonnull"); + ExplodedNode *N = C.generateErrorNode(State, &Tag); + if (!N) + return; + + + const Stmt *ValueStmt = S; + if (ValueExpr) + ValueStmt = ValueExpr; + + SmallString<256> SBuf; + llvm::raw_svector_ostream OS(SBuf); + OS << (LocType->isObjCObjectPointerType() ? "nil" : "Null"); + OS << " assigned to a pointer which is expected to have non-null value"; + reportBugIfInvariantHolds(OS.str(), + ErrorKind::NilAssignedToNonnull, N, nullptr, C, + ValueStmt); + return; + } + + // If null was returned from a non-null function, mark the nullability + // invariant as violated even if the diagnostic was suppressed. + if (NullAssignedToNonNull) { + State = State->set<InvariantViolated>(true); + C.addTransition(State); + return; + } + + // Intentionally missing case: '0' is bound to a reference. It is handled by + // the DereferenceChecker. + + const MemRegion *ValueRegion = getTrackRegion(*ValDefOrUnknown); + if (!ValueRegion) + return; + + const NullabilityState *TrackedNullability = + State->get<NullabilityMap>(ValueRegion); + + if (TrackedNullability) { + if (RhsNullness == NullConstraint::IsNotNull || + TrackedNullability->getValue() != Nullability::Nullable) + return; + if (Filter.CheckNullablePassedToNonnull && + LocNullability == Nullability::Nonnull) { + static CheckerProgramPointTag Tag(this, "NullablePassedToNonnull"); + ExplodedNode *N = C.addTransition(State, C.getPredecessor(), &Tag); + reportBugIfInvariantHolds("Nullable pointer is assigned to a pointer " + "which is expected to have non-null value", + ErrorKind::NullableAssignedToNonnull, N, + ValueRegion, C); + } + return; + } + + const auto *BinOp = dyn_cast<BinaryOperator>(S); + + if (ValNullability == Nullability::Nullable) { + // Trust the static information of the value more than the static + // information on the location. + const Stmt *NullabilitySource = BinOp ? BinOp->getRHS() : S; + State = State->set<NullabilityMap>( + ValueRegion, NullabilityState(ValNullability, NullabilitySource)); + C.addTransition(State); + return; + } + + if (LocNullability == Nullability::Nullable) { + const Stmt *NullabilitySource = BinOp ? BinOp->getLHS() : S; + State = State->set<NullabilityMap>( + ValueRegion, NullabilityState(LocNullability, NullabilitySource)); + C.addTransition(State); + } +} + +void NullabilityChecker::printState(raw_ostream &Out, ProgramStateRef State, + const char *NL, const char *Sep) const { + + NullabilityMapTy B = State->get<NullabilityMap>(); + + if (State->get<InvariantViolated>()) + Out << Sep << NL + << "Nullability invariant was violated, warnings suppressed." << NL; + + if (B.isEmpty()) + return; + + if (!State->get<InvariantViolated>()) + Out << Sep << NL; + + for (NullabilityMapTy::iterator I = B.begin(), E = B.end(); I != E; ++I) { + Out << I->first << " : "; + I->second.print(Out); + Out << NL; + } +} + +void ento::registerNullabilityBase(CheckerManager &mgr) { + mgr.registerChecker<NullabilityChecker>(); +} + +bool ento::shouldRegisterNullabilityBase(const LangOptions &LO) { + return true; +} + +#define REGISTER_CHECKER(name, trackingRequired) \ + void ento::register##name##Checker(CheckerManager &mgr) { \ + NullabilityChecker *checker = mgr.getChecker<NullabilityChecker>(); \ + checker->Filter.Check##name = true; \ + checker->Filter.CheckName##name = mgr.getCurrentCheckerName(); \ + checker->NeedTracking = checker->NeedTracking || trackingRequired; \ + checker->NoDiagnoseCallsToSystemHeaders = \ + checker->NoDiagnoseCallsToSystemHeaders || \ + mgr.getAnalyzerOptions().getCheckerBooleanOption( \ + checker, "NoDiagnoseCallsToSystemHeaders", true); \ + } \ + \ + bool ento::shouldRegister##name##Checker(const LangOptions &LO) { \ + return true; \ + } + +// The checks are likely to be turned on by default and it is possible to do +// them without tracking any nullability related information. As an optimization +// no nullability information will be tracked when only these two checks are +// enables. +REGISTER_CHECKER(NullPassedToNonnull, false) +REGISTER_CHECKER(NullReturnedFromNonnull, false) + +REGISTER_CHECKER(NullableDereferenced, true) +REGISTER_CHECKER(NullablePassedToNonnull, true) +REGISTER_CHECKER(NullableReturnedFromNonnull, true) diff --git a/clang/lib/StaticAnalyzer/Checkers/NumberObjectConversionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NumberObjectConversionChecker.cpp new file mode 100644 index 000000000000..1053424ae6fa --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/NumberObjectConversionChecker.cpp @@ -0,0 +1,354 @@ +//===- NumberObjectConversionChecker.cpp -------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines NumberObjectConversionChecker, which checks for a +// particular common mistake when dealing with numbers represented as objects +// passed around by pointers. Namely, the language allows to reinterpret the +// pointer as a number directly, often without throwing any warnings, +// but in most cases the result of such conversion is clearly unexpected, +// as pointer value, rather than number value represented by the pointee object, +// becomes the result of such operation. +// +// Currently the checker supports the Objective-C NSNumber class, +// and the OSBoolean class found in macOS low-level code; the latter +// can only hold boolean values. +// +// This checker has an option "Pedantic" (boolean), which enables detection of +// more conversion patterns (which are most likely more harmless, and therefore +// are more likely to produce false positives) - disabled by default, +// enabled with `-analyzer-config osx.NumberObjectConversion:Pedantic=true'. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "clang/Lex/Lexer.h" +#include "llvm/ADT/APSInt.h" + +using namespace clang; +using namespace ento; +using namespace ast_matchers; + +namespace { + +class NumberObjectConversionChecker : public Checker<check::ASTCodeBody> { +public: + bool Pedantic; + + void checkASTCodeBody(const Decl *D, AnalysisManager &AM, + BugReporter &BR) const; +}; + +class Callback : public MatchFinder::MatchCallback { + const NumberObjectConversionChecker *C; + BugReporter &BR; + AnalysisDeclContext *ADC; + +public: + Callback(const NumberObjectConversionChecker *C, + BugReporter &BR, AnalysisDeclContext *ADC) + : C(C), BR(BR), ADC(ADC) {} + virtual void run(const MatchFinder::MatchResult &Result); +}; +} // end of anonymous namespace + +void Callback::run(const MatchFinder::MatchResult &Result) { + bool IsPedanticMatch = + (Result.Nodes.getNodeAs<Stmt>("pedantic") != nullptr); + if (IsPedanticMatch && !C->Pedantic) + return; + + ASTContext &ACtx = ADC->getASTContext(); + + if (const Expr *CheckIfNull = + Result.Nodes.getNodeAs<Expr>("check_if_null")) { + // Unless the macro indicates that the intended type is clearly not + // a pointer type, we should avoid warning on comparing pointers + // to zero literals in non-pedantic mode. + // FIXME: Introduce an AST matcher to implement the macro-related logic? + bool MacroIndicatesWeShouldSkipTheCheck = false; + SourceLocation Loc = CheckIfNull->getBeginLoc(); + if (Loc.isMacroID()) { + StringRef MacroName = Lexer::getImmediateMacroName( + Loc, ACtx.getSourceManager(), ACtx.getLangOpts()); + if (MacroName == "NULL" || MacroName == "nil") + return; + if (MacroName == "YES" || MacroName == "NO") + MacroIndicatesWeShouldSkipTheCheck = true; + } + if (!MacroIndicatesWeShouldSkipTheCheck) { + Expr::EvalResult EVResult; + if (CheckIfNull->IgnoreParenCasts()->EvaluateAsInt( + EVResult, ACtx, Expr::SE_AllowSideEffects)) { + llvm::APSInt Result = EVResult.Val.getInt(); + if (Result == 0) { + if (!C->Pedantic) + return; + IsPedanticMatch = true; + } + } + } + } + + const Stmt *Conv = Result.Nodes.getNodeAs<Stmt>("conv"); + assert(Conv); + + const Expr *ConvertedCObject = Result.Nodes.getNodeAs<Expr>("c_object"); + const Expr *ConvertedCppObject = Result.Nodes.getNodeAs<Expr>("cpp_object"); + const Expr *ConvertedObjCObject = Result.Nodes.getNodeAs<Expr>("objc_object"); + bool IsCpp = (ConvertedCppObject != nullptr); + bool IsObjC = (ConvertedObjCObject != nullptr); + const Expr *Obj = IsObjC ? ConvertedObjCObject + : IsCpp ? ConvertedCppObject + : ConvertedCObject; + assert(Obj); + + bool IsComparison = + (Result.Nodes.getNodeAs<Stmt>("comparison") != nullptr); + + bool IsOSNumber = + (Result.Nodes.getNodeAs<Decl>("osnumber") != nullptr); + + bool IsInteger = + (Result.Nodes.getNodeAs<QualType>("int_type") != nullptr); + bool IsObjCBool = + (Result.Nodes.getNodeAs<QualType>("objc_bool_type") != nullptr); + bool IsCppBool = + (Result.Nodes.getNodeAs<QualType>("cpp_bool_type") != nullptr); + + llvm::SmallString<64> Msg; + llvm::raw_svector_ostream OS(Msg); + + // Remove ObjC ARC qualifiers. + QualType ObjT = Obj->getType().getUnqualifiedType(); + + // Remove consts from pointers. + if (IsCpp) { + assert(ObjT.getCanonicalType()->isPointerType()); + ObjT = ACtx.getPointerType( + ObjT->getPointeeType().getCanonicalType().getUnqualifiedType()); + } + + if (IsComparison) + OS << "Comparing "; + else + OS << "Converting "; + + OS << "a pointer value of type '" << ObjT.getAsString() << "' to a "; + + std::string EuphemismForPlain = "primitive"; + std::string SuggestedApi = IsObjC ? (IsInteger ? "" : "-boolValue") + : IsCpp ? (IsOSNumber ? "" : "getValue()") + : "CFNumberGetValue()"; + if (SuggestedApi.empty()) { + // A generic message if we're not sure what API should be called. + // FIXME: Pattern-match the integer type to make a better guess? + SuggestedApi = + "a method on '" + ObjT.getAsString() + "' to get the scalar value"; + // "scalar" is not quite correct or common, but some documentation uses it + // when describing object methods we suggest. For consistency, we use + // "scalar" in the whole sentence when we need to use this word in at least + // one place, otherwise we use "primitive". + EuphemismForPlain = "scalar"; + } + + if (IsInteger) + OS << EuphemismForPlain << " integer value"; + else if (IsObjCBool) + OS << EuphemismForPlain << " BOOL value"; + else if (IsCppBool) + OS << EuphemismForPlain << " bool value"; + else // Branch condition? + OS << EuphemismForPlain << " boolean value"; + + + if (IsPedanticMatch) + OS << "; instead, either compare the pointer to " + << (IsObjC ? "nil" : IsCpp ? "nullptr" : "NULL") << " or "; + else + OS << "; did you mean to "; + + if (IsComparison) + OS << "compare the result of calling " << SuggestedApi; + else + OS << "call " << SuggestedApi; + + if (!IsPedanticMatch) + OS << "?"; + + BR.EmitBasicReport( + ADC->getDecl(), C, "Suspicious number object conversion", "Logic error", + OS.str(), + PathDiagnosticLocation::createBegin(Obj, BR.getSourceManager(), ADC), + Conv->getSourceRange()); +} + +void NumberObjectConversionChecker::checkASTCodeBody(const Decl *D, + AnalysisManager &AM, + BugReporter &BR) const { + // Currently this matches CoreFoundation opaque pointer typedefs. + auto CSuspiciousNumberObjectExprM = + expr(ignoringParenImpCasts( + expr(hasType( + typedefType(hasDeclaration(anyOf( + typedefDecl(hasName("CFNumberRef")), + typedefDecl(hasName("CFBooleanRef"))))))) + .bind("c_object"))); + + // Currently this matches XNU kernel number-object pointers. + auto CppSuspiciousNumberObjectExprM = + expr(ignoringParenImpCasts( + expr(hasType(hasCanonicalType( + pointerType(pointee(hasCanonicalType( + recordType(hasDeclaration( + anyOf( + cxxRecordDecl(hasName("OSBoolean")), + cxxRecordDecl(hasName("OSNumber")) + .bind("osnumber")))))))))) + .bind("cpp_object"))); + + // Currently this matches NeXTSTEP number objects. + auto ObjCSuspiciousNumberObjectExprM = + expr(ignoringParenImpCasts( + expr(hasType(hasCanonicalType( + objcObjectPointerType(pointee( + qualType(hasCanonicalType( + qualType(hasDeclaration( + objcInterfaceDecl(hasName("NSNumber"))))))))))) + .bind("objc_object"))); + + auto SuspiciousNumberObjectExprM = anyOf( + CSuspiciousNumberObjectExprM, + CppSuspiciousNumberObjectExprM, + ObjCSuspiciousNumberObjectExprM); + + // Useful for predicates like "Unless we've seen the same object elsewhere". + auto AnotherSuspiciousNumberObjectExprM = + expr(anyOf( + equalsBoundNode("c_object"), + equalsBoundNode("objc_object"), + equalsBoundNode("cpp_object"))); + + // The .bind here is in order to compose the error message more accurately. + auto ObjCSuspiciousScalarBooleanTypeM = + qualType(typedefType(hasDeclaration( + typedefDecl(hasName("BOOL"))))).bind("objc_bool_type"); + + // The .bind here is in order to compose the error message more accurately. + auto SuspiciousScalarBooleanTypeM = + qualType(anyOf(qualType(booleanType()).bind("cpp_bool_type"), + ObjCSuspiciousScalarBooleanTypeM)); + + // The .bind here is in order to compose the error message more accurately. + // Also avoid intptr_t and uintptr_t because they were specifically created + // for storing pointers. + auto SuspiciousScalarNumberTypeM = + qualType(hasCanonicalType(isInteger()), + unless(typedefType(hasDeclaration( + typedefDecl(matchesName("^::u?intptr_t$")))))) + .bind("int_type"); + + auto SuspiciousScalarTypeM = + qualType(anyOf(SuspiciousScalarBooleanTypeM, + SuspiciousScalarNumberTypeM)); + + auto SuspiciousScalarExprM = + expr(ignoringParenImpCasts(expr(hasType(SuspiciousScalarTypeM)))); + + auto ConversionThroughAssignmentM = + binaryOperator(allOf(hasOperatorName("="), + hasLHS(SuspiciousScalarExprM), + hasRHS(SuspiciousNumberObjectExprM))); + + auto ConversionThroughBranchingM = + ifStmt(allOf( + hasCondition(SuspiciousNumberObjectExprM), + unless(hasConditionVariableStatement(declStmt()) + ))).bind("pedantic"); + + auto ConversionThroughCallM = + callExpr(hasAnyArgument(allOf(hasType(SuspiciousScalarTypeM), + ignoringParenImpCasts( + SuspiciousNumberObjectExprM)))); + + // We bind "check_if_null" to modify the warning message + // in case it was intended to compare a pointer to 0 with a relatively-ok + // construct "x == 0" or "x != 0". + auto ConversionThroughEquivalenceM = + binaryOperator(allOf(anyOf(hasOperatorName("=="), hasOperatorName("!=")), + hasEitherOperand(SuspiciousNumberObjectExprM), + hasEitherOperand(SuspiciousScalarExprM + .bind("check_if_null")))) + .bind("comparison"); + + auto ConversionThroughComparisonM = + binaryOperator(allOf(anyOf(hasOperatorName(">="), hasOperatorName(">"), + hasOperatorName("<="), hasOperatorName("<")), + hasEitherOperand(SuspiciousNumberObjectExprM), + hasEitherOperand(SuspiciousScalarExprM))) + .bind("comparison"); + + auto ConversionThroughConditionalOperatorM = + conditionalOperator(allOf( + hasCondition(SuspiciousNumberObjectExprM), + unless(hasTrueExpression( + hasDescendant(AnotherSuspiciousNumberObjectExprM))), + unless(hasFalseExpression( + hasDescendant(AnotherSuspiciousNumberObjectExprM))))) + .bind("pedantic"); + + auto ConversionThroughExclamationMarkM = + unaryOperator(allOf(hasOperatorName("!"), + has(expr(SuspiciousNumberObjectExprM)))) + .bind("pedantic"); + + auto ConversionThroughExplicitBooleanCastM = + explicitCastExpr(allOf(hasType(SuspiciousScalarBooleanTypeM), + has(expr(SuspiciousNumberObjectExprM)))); + + auto ConversionThroughExplicitNumberCastM = + explicitCastExpr(allOf(hasType(SuspiciousScalarNumberTypeM), + has(expr(SuspiciousNumberObjectExprM)))); + + auto ConversionThroughInitializerM = + declStmt(hasSingleDecl( + varDecl(hasType(SuspiciousScalarTypeM), + hasInitializer(SuspiciousNumberObjectExprM)))); + + auto FinalM = stmt(anyOf(ConversionThroughAssignmentM, + ConversionThroughBranchingM, + ConversionThroughCallM, + ConversionThroughComparisonM, + ConversionThroughConditionalOperatorM, + ConversionThroughEquivalenceM, + ConversionThroughExclamationMarkM, + ConversionThroughExplicitBooleanCastM, + ConversionThroughExplicitNumberCastM, + ConversionThroughInitializerM)).bind("conv"); + + MatchFinder F; + Callback CB(this, BR, AM.getAnalysisDeclContext(D)); + + F.addMatcher(stmt(forEachDescendant(FinalM)), &CB); + F.match(*D->getBody(), AM.getASTContext()); +} + +void ento::registerNumberObjectConversionChecker(CheckerManager &Mgr) { + NumberObjectConversionChecker *Chk = + Mgr.registerChecker<NumberObjectConversionChecker>(); + Chk->Pedantic = + Mgr.getAnalyzerOptions().getCheckerBooleanOption(Chk, "Pedantic"); +} + +bool ento::shouldRegisterNumberObjectConversionChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/OSObjectCStyleCast.cpp b/clang/lib/StaticAnalyzer/Checkers/OSObjectCStyleCast.cpp new file mode 100644 index 000000000000..5b9895c338d8 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/OSObjectCStyleCast.cpp @@ -0,0 +1,90 @@ +//===- OSObjectCStyleCast.cpp ------------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines OSObjectCStyleCast checker, which checks for C-style casts +// of OSObjects. Such casts almost always indicate a code smell, +// as an explicit static or dynamic cast should be used instead. +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "llvm/Support/Debug.h" + +using namespace clang; +using namespace ento; +using namespace ast_matchers; + +namespace { + +const char *WarnAtNode = "OSObjCast"; + +class OSObjectCStyleCastChecker : public Checker<check::ASTCodeBody> { +public: + void checkASTCodeBody(const Decl *D, + AnalysisManager &AM, + BugReporter &BR) const; +}; + +static void emitDiagnostics(const BoundNodes &Nodes, + BugReporter &BR, + AnalysisDeclContext *ADC, + const OSObjectCStyleCastChecker *Checker) { + const auto *CE = Nodes.getNodeAs<CastExpr>(WarnAtNode); + assert(CE); + + std::string Diagnostics; + llvm::raw_string_ostream OS(Diagnostics); + OS << "C-style cast of OSObject. Use OSDynamicCast instead."; + + BR.EmitBasicReport( + ADC->getDecl(), + Checker, + /*Name=*/"OSObject C-Style Cast", + /*BugCategory=*/"Security", + OS.str(), + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), ADC), + CE->getSourceRange()); +} + +static auto hasTypePointingTo(DeclarationMatcher DeclM) + -> decltype(hasType(pointerType())) { + return hasType(pointerType(pointee(hasDeclaration(DeclM)))); +} + +void OSObjectCStyleCastChecker::checkASTCodeBody(const Decl *D, AnalysisManager &AM, + BugReporter &BR) const { + + AnalysisDeclContext *ADC = AM.getAnalysisDeclContext(D); + + auto DynamicCastM = callExpr(callee(functionDecl(hasName("safeMetaCast")))); + + auto OSObjTypeM = hasTypePointingTo(cxxRecordDecl(isDerivedFrom("OSMetaClassBase"))); + auto OSObjSubclassM = hasTypePointingTo( + cxxRecordDecl(isDerivedFrom("OSObject"))); + + auto CastM = cStyleCastExpr( + allOf(hasSourceExpression(allOf(OSObjTypeM, unless(DynamicCastM))), + OSObjSubclassM)).bind(WarnAtNode); + + auto Matches = match(stmt(forEachDescendant(CastM)), *D->getBody(), AM.getASTContext()); + for (BoundNodes Match : Matches) + emitDiagnostics(Match, BR, ADC, this); +} +} + +void ento::registerOSObjectCStyleCast(CheckerManager &Mgr) { + Mgr.registerChecker<OSObjectCStyleCastChecker>(); +} + +bool ento::shouldRegisterOSObjectCStyleCast(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp new file mode 100644 index 000000000000..0e25817c8793 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp @@ -0,0 +1,96 @@ +//== ObjCAtSyncChecker.cpp - nil mutex checker for @synchronized -*- C++ -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines ObjCAtSyncChecker, a builtin check that checks for null pointers +// used as mutexes for @synchronized. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/StmtObjC.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" + +using namespace clang; +using namespace ento; + +namespace { +class ObjCAtSyncChecker + : public Checker< check::PreStmt<ObjCAtSynchronizedStmt> > { + mutable std::unique_ptr<BuiltinBug> BT_null; + mutable std::unique_ptr<BuiltinBug> BT_undef; + +public: + void checkPreStmt(const ObjCAtSynchronizedStmt *S, CheckerContext &C) const; +}; +} // end anonymous namespace + +void ObjCAtSyncChecker::checkPreStmt(const ObjCAtSynchronizedStmt *S, + CheckerContext &C) const { + + const Expr *Ex = S->getSynchExpr(); + ProgramStateRef state = C.getState(); + SVal V = C.getSVal(Ex); + + // Uninitialized value used for the mutex? + if (V.getAs<UndefinedVal>()) { + if (ExplodedNode *N = C.generateErrorNode()) { + if (!BT_undef) + BT_undef.reset(new BuiltinBug(this, "Uninitialized value used as mutex " + "for @synchronized")); + auto report = std::make_unique<PathSensitiveBugReport>( + *BT_undef, BT_undef->getDescription(), N); + bugreporter::trackExpressionValue(N, Ex, *report); + C.emitReport(std::move(report)); + } + return; + } + + if (V.isUnknown()) + return; + + // Check for null mutexes. + ProgramStateRef notNullState, nullState; + std::tie(notNullState, nullState) = state->assume(V.castAs<DefinedSVal>()); + + if (nullState) { + if (!notNullState) { + // Generate an error node. This isn't a sink since + // a null mutex just means no synchronization occurs. + if (ExplodedNode *N = C.generateNonFatalErrorNode(nullState)) { + if (!BT_null) + BT_null.reset(new BuiltinBug( + this, "Nil value used as mutex for @synchronized() " + "(no synchronization will occur)")); + auto report = std::make_unique<PathSensitiveBugReport>( + *BT_null, BT_null->getDescription(), N); + bugreporter::trackExpressionValue(N, Ex, *report); + + C.emitReport(std::move(report)); + return; + } + } + // Don't add a transition for 'nullState'. If the value is + // under-constrained to be null or non-null, assume it is non-null + // afterwards. + } + + if (notNullState) + C.addTransition(notNullState); +} + +void ento::registerObjCAtSyncChecker(CheckerManager &mgr) { + mgr.registerChecker<ObjCAtSyncChecker>(); +} + +bool ento::shouldRegisterObjCAtSyncChecker(const LangOptions &LO) { + return LO.ObjC; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCAutoreleaseWriteChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCAutoreleaseWriteChecker.cpp new file mode 100644 index 000000000000..d2371fe60d21 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCAutoreleaseWriteChecker.cpp @@ -0,0 +1,212 @@ +//===- ObjCAutoreleaseWriteChecker.cpp ----------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines ObjCAutoreleaseWriteChecker which warns against writes +// into autoreleased out parameters which cause crashes. +// An example of a problematic write is a write to {@code error} in the example +// below: +// +// - (BOOL) mymethod:(NSError *__autoreleasing *)error list:(NSArray*) list { +// [list enumerateObjectsUsingBlock:^(id obj, NSUInteger idx, BOOL *stop) { +// NSString *myString = obj; +// if ([myString isEqualToString:@"error"] && error) +// *error = [NSError errorWithDomain:@"MyDomain" code:-1]; +// }]; +// return false; +// } +// +// Such code will crash on read from `*error` due to the autorelease pool +// in `enumerateObjectsUsingBlock` implementation freeing the error object +// on exit from the function. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "llvm/ADT/Twine.h" + +using namespace clang; +using namespace ento; +using namespace ast_matchers; + +namespace { + +const char *ProblematicWriteBind = "problematicwrite"; +const char *CapturedBind = "capturedbind"; +const char *ParamBind = "parambind"; +const char *IsMethodBind = "ismethodbind"; + +class ObjCAutoreleaseWriteChecker : public Checker<check::ASTCodeBody> { +public: + void checkASTCodeBody(const Decl *D, + AnalysisManager &AM, + BugReporter &BR) const; +private: + std::vector<std::string> SelectorsWithAutoreleasingPool = { + // Common to NSArray, NSSet, NSOrderedSet + "enumerateObjectsUsingBlock:", + "enumerateObjectsWithOptions:usingBlock:", + + // Common to NSArray and NSOrderedSet + "enumerateObjectsAtIndexes:options:usingBlock:", + "indexOfObjectAtIndexes:options:passingTest:", + "indexesOfObjectsAtIndexes:options:passingTest:", + "indexOfObjectPassingTest:", + "indexOfObjectWithOptions:passingTest:", + "indexesOfObjectsPassingTest:", + "indexesOfObjectsWithOptions:passingTest:", + + // NSDictionary + "enumerateKeysAndObjectsUsingBlock:", + "enumerateKeysAndObjectsWithOptions:usingBlock:", + "keysOfEntriesPassingTest:", + "keysOfEntriesWithOptions:passingTest:", + + // NSSet + "objectsPassingTest:", + "objectsWithOptions:passingTest:", + "enumerateIndexPathsWithOptions:usingBlock:", + + // NSIndexSet + "enumerateIndexesWithOptions:usingBlock:", + "enumerateIndexesUsingBlock:", + "enumerateIndexesInRange:options:usingBlock:", + "enumerateRangesUsingBlock:", + "enumerateRangesWithOptions:usingBlock:", + "enumerateRangesInRange:options:usingBlock:", + "indexPassingTest:", + "indexesPassingTest:", + "indexWithOptions:passingTest:", + "indexesWithOptions:passingTest:", + "indexInRange:options:passingTest:", + "indexesInRange:options:passingTest:" + }; + + std::vector<std::string> FunctionsWithAutoreleasingPool = { + "dispatch_async", "dispatch_group_async", "dispatch_barrier_async"}; +}; +} + +static inline std::vector<llvm::StringRef> toRefs(std::vector<std::string> V) { + return std::vector<llvm::StringRef>(V.begin(), V.end()); +} + +static auto callsNames(std::vector<std::string> FunctionNames) + -> decltype(callee(functionDecl())) { + return callee(functionDecl(hasAnyName(toRefs(FunctionNames)))); +} + +static void emitDiagnostics(BoundNodes &Match, const Decl *D, BugReporter &BR, + AnalysisManager &AM, + const ObjCAutoreleaseWriteChecker *Checker) { + AnalysisDeclContext *ADC = AM.getAnalysisDeclContext(D); + + const auto *PVD = Match.getNodeAs<ParmVarDecl>(ParamBind); + QualType Ty = PVD->getType(); + if (Ty->getPointeeType().getObjCLifetime() != Qualifiers::OCL_Autoreleasing) + return; + const char *ActionMsg = "Write to"; + const auto *MarkedStmt = Match.getNodeAs<Expr>(ProblematicWriteBind); + bool IsCapture = false; + + // Prefer to warn on write, but if not available, warn on capture. + if (!MarkedStmt) { + MarkedStmt = Match.getNodeAs<Expr>(CapturedBind); + assert(MarkedStmt); + ActionMsg = "Capture of"; + IsCapture = true; + } + + SourceRange Range = MarkedStmt->getSourceRange(); + PathDiagnosticLocation Location = PathDiagnosticLocation::createBegin( + MarkedStmt, BR.getSourceManager(), ADC); + bool IsMethod = Match.getNodeAs<ObjCMethodDecl>(IsMethodBind) != nullptr; + const char *Name = IsMethod ? "method" : "function"; + + BR.EmitBasicReport( + ADC->getDecl(), Checker, + /*Name=*/(llvm::Twine(ActionMsg) + + " autoreleasing out parameter inside autorelease pool").str(), + /*BugCategory=*/"Memory", + (llvm::Twine(ActionMsg) + " autoreleasing out parameter " + + (IsCapture ? "'" + PVD->getName() + "'" + " " : "") + "inside " + + "autorelease pool that may exit before " + Name + " returns; consider " + "writing first to a strong local variable declared outside of the block") + .str(), + Location, + Range); +} + +void ObjCAutoreleaseWriteChecker::checkASTCodeBody(const Decl *D, + AnalysisManager &AM, + BugReporter &BR) const { + + auto DoublePointerParamM = + parmVarDecl(hasType(hasCanonicalType(pointerType( + pointee(hasCanonicalType(objcObjectPointerType())))))) + .bind(ParamBind); + + auto ReferencedParamM = + declRefExpr(to(parmVarDecl(DoublePointerParamM))).bind(CapturedBind); + + // Write into a binded object, e.g. *ParamBind = X. + auto WritesIntoM = binaryOperator( + hasLHS(unaryOperator( + hasOperatorName("*"), + hasUnaryOperand( + ignoringParenImpCasts(ReferencedParamM)) + )), + hasOperatorName("=") + ).bind(ProblematicWriteBind); + + auto ArgumentCaptureM = hasAnyArgument( + ignoringParenImpCasts(ReferencedParamM)); + auto CapturedInParamM = stmt(anyOf( + callExpr(ArgumentCaptureM), + objcMessageExpr(ArgumentCaptureM))); + + // WritesIntoM happens inside a block passed as an argument. + auto WritesOrCapturesInBlockM = hasAnyArgument(allOf( + hasType(hasCanonicalType(blockPointerType())), + forEachDescendant( + stmt(anyOf(WritesIntoM, CapturedInParamM)) + ))); + + auto BlockPassedToMarkedFuncM = stmt(anyOf( + callExpr(allOf( + callsNames(FunctionsWithAutoreleasingPool), WritesOrCapturesInBlockM)), + objcMessageExpr(allOf( + hasAnySelector(toRefs(SelectorsWithAutoreleasingPool)), + WritesOrCapturesInBlockM)) + )); + + auto HasParamAndWritesInMarkedFuncM = allOf( + hasAnyParameter(DoublePointerParamM), + forEachDescendant(BlockPassedToMarkedFuncM)); + + auto MatcherM = decl(anyOf( + objcMethodDecl(HasParamAndWritesInMarkedFuncM).bind(IsMethodBind), + functionDecl(HasParamAndWritesInMarkedFuncM), + blockDecl(HasParamAndWritesInMarkedFuncM))); + + auto Matches = match(MatcherM, *D, AM.getASTContext()); + for (BoundNodes Match : Matches) + emitDiagnostics(Match, D, BR, AM, this); +} + +void ento::registerAutoreleaseWriteChecker(CheckerManager &Mgr) { + Mgr.registerChecker<ObjCAutoreleaseWriteChecker>(); +} + +bool ento::shouldRegisterAutoreleaseWriteChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCContainersASTChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCContainersASTChecker.cpp new file mode 100644 index 000000000000..4450c464f89d --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCContainersASTChecker.cpp @@ -0,0 +1,177 @@ +//== ObjCContainersASTChecker.cpp - CoreFoundation containers API *- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// An AST checker that looks for common pitfalls when using 'CFArray', +// 'CFDictionary', 'CFSet' APIs. +// +//===----------------------------------------------------------------------===// +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +namespace { +class WalkAST : public StmtVisitor<WalkAST> { + BugReporter &BR; + const CheckerBase *Checker; + AnalysisDeclContext* AC; + ASTContext &ASTC; + uint64_t PtrWidth; + + /// Check if the type has pointer size (very conservative). + inline bool isPointerSize(const Type *T) { + if (!T) + return true; + if (T->isIncompleteType()) + return true; + return (ASTC.getTypeSize(T) == PtrWidth); + } + + /// Check if the type is a pointer/array to pointer sized values. + inline bool hasPointerToPointerSizedType(const Expr *E) { + QualType T = E->getType(); + + // The type could be either a pointer or array. + const Type *TP = T.getTypePtr(); + QualType PointeeT = TP->getPointeeType(); + if (!PointeeT.isNull()) { + // If the type is a pointer to an array, check the size of the array + // elements. To avoid false positives coming from assumption that the + // values x and &x are equal when x is an array. + if (const Type *TElem = PointeeT->getArrayElementTypeNoTypeQual()) + if (isPointerSize(TElem)) + return true; + + // Else, check the pointee size. + return isPointerSize(PointeeT.getTypePtr()); + } + + if (const Type *TElem = TP->getArrayElementTypeNoTypeQual()) + return isPointerSize(TElem); + + // The type must be an array/pointer type. + + // This could be a null constant, which is allowed. + return static_cast<bool>( + E->isNullPointerConstant(ASTC, Expr::NPC_ValueDependentIsNull)); + } + +public: + WalkAST(BugReporter &br, const CheckerBase *checker, AnalysisDeclContext *ac) + : BR(br), Checker(checker), AC(ac), ASTC(AC->getASTContext()), + PtrWidth(ASTC.getTargetInfo().getPointerWidth(0)) {} + + // Statement visitor methods. + void VisitChildren(Stmt *S); + void VisitStmt(Stmt *S) { VisitChildren(S); } + void VisitCallExpr(CallExpr *CE); +}; +} // end anonymous namespace + +static StringRef getCalleeName(CallExpr *CE) { + const FunctionDecl *FD = CE->getDirectCallee(); + if (!FD) + return StringRef(); + + IdentifierInfo *II = FD->getIdentifier(); + if (!II) // if no identifier, not a simple C function + return StringRef(); + + return II->getName(); +} + +void WalkAST::VisitCallExpr(CallExpr *CE) { + StringRef Name = getCalleeName(CE); + if (Name.empty()) + return; + + const Expr *Arg = nullptr; + unsigned ArgNum; + + if (Name.equals("CFArrayCreate") || Name.equals("CFSetCreate")) { + if (CE->getNumArgs() != 4) + return; + ArgNum = 1; + Arg = CE->getArg(ArgNum)->IgnoreParenCasts(); + if (hasPointerToPointerSizedType(Arg)) + return; + } else if (Name.equals("CFDictionaryCreate")) { + if (CE->getNumArgs() != 6) + return; + // Check first argument. + ArgNum = 1; + Arg = CE->getArg(ArgNum)->IgnoreParenCasts(); + if (hasPointerToPointerSizedType(Arg)) { + // Check second argument. + ArgNum = 2; + Arg = CE->getArg(ArgNum)->IgnoreParenCasts(); + if (hasPointerToPointerSizedType(Arg)) + // Both are good, return. + return; + } + } + + if (Arg) { + assert(ArgNum == 1 || ArgNum == 2); + + SmallString<64> BufName; + llvm::raw_svector_ostream OsName(BufName); + OsName << " Invalid use of '" << Name << "'" ; + + SmallString<256> Buf; + llvm::raw_svector_ostream Os(Buf); + // Use "second" and "third" since users will expect 1-based indexing + // for parameter names when mentioned in prose. + Os << " The "<< ((ArgNum == 1) ? "second" : "third") << " argument to '" + << Name << "' must be a C array of pointer-sized values, not '" + << Arg->getType().getAsString() << "'"; + + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC); + BR.EmitBasicReport(AC->getDecl(), Checker, OsName.str(), + categories::CoreFoundationObjectiveC, Os.str(), CELoc, + Arg->getSourceRange()); + } + + // Recurse and check children. + VisitChildren(CE); +} + +void WalkAST::VisitChildren(Stmt *S) { + for (Stmt *Child : S->children()) + if (Child) + Visit(Child); +} + +namespace { +class ObjCContainersASTChecker : public Checker<check::ASTCodeBody> { +public: + + void checkASTCodeBody(const Decl *D, AnalysisManager& Mgr, + BugReporter &BR) const { + WalkAST walker(BR, this, Mgr.getAnalysisDeclContext(D)); + walker.Visit(D->getBody()); + } +}; +} + +void ento::registerObjCContainersASTChecker(CheckerManager &mgr) { + mgr.registerChecker<ObjCContainersASTChecker>(); +} + +bool ento::shouldRegisterObjCContainersASTChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp new file mode 100644 index 000000000000..8abb926d4862 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp @@ -0,0 +1,193 @@ +//== ObjCContainersChecker.cpp - Path sensitive checker for CFArray *- C++ -*=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Performs path sensitive checks of Core Foundation static containers like +// CFArray. +// 1) Check for buffer overflows: +// In CFArrayGetArrayAtIndex( myArray, index), if the index is outside the +// index space of theArray (0 to N-1 inclusive (where N is the count of +// theArray), the behavior is undefined. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/ParentMap.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" + +using namespace clang; +using namespace ento; + +namespace { +class ObjCContainersChecker : public Checker< check::PreStmt<CallExpr>, + check::PostStmt<CallExpr>, + check::PointerEscape> { + mutable std::unique_ptr<BugType> BT; + inline void initBugType() const { + if (!BT) + BT.reset(new BugType(this, "CFArray API", + categories::CoreFoundationObjectiveC)); + } + + inline SymbolRef getArraySym(const Expr *E, CheckerContext &C) const { + SVal ArrayRef = C.getSVal(E); + SymbolRef ArraySym = ArrayRef.getAsSymbol(); + return ArraySym; + } + + void addSizeInfo(const Expr *Array, const Expr *Size, + CheckerContext &C) const; + +public: + /// A tag to id this checker. + static void *getTag() { static int Tag; return &Tag; } + + void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; + void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; + ProgramStateRef checkPointerEscape(ProgramStateRef State, + const InvalidatedSymbols &Escaped, + const CallEvent *Call, + PointerEscapeKind Kind) const; + + void printState(raw_ostream &OS, ProgramStateRef State, + const char *NL, const char *Sep) const; +}; +} // end anonymous namespace + +// ProgramState trait - a map from array symbol to its state. +REGISTER_MAP_WITH_PROGRAMSTATE(ArraySizeMap, SymbolRef, DefinedSVal) + +void ObjCContainersChecker::addSizeInfo(const Expr *Array, const Expr *Size, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + SVal SizeV = C.getSVal(Size); + // Undefined is reported by another checker. + if (SizeV.isUnknownOrUndef()) + return; + + // Get the ArrayRef symbol. + SVal ArrayRef = C.getSVal(Array); + SymbolRef ArraySym = ArrayRef.getAsSymbol(); + if (!ArraySym) + return; + + C.addTransition( + State->set<ArraySizeMap>(ArraySym, SizeV.castAs<DefinedSVal>())); +} + +void ObjCContainersChecker::checkPostStmt(const CallExpr *CE, + CheckerContext &C) const { + StringRef Name = C.getCalleeName(CE); + if (Name.empty() || CE->getNumArgs() < 1) + return; + + // Add array size information to the state. + if (Name.equals("CFArrayCreate")) { + if (CE->getNumArgs() < 3) + return; + // Note, we can visit the Create method in the post-visit because + // the CFIndex parameter is passed in by value and will not be invalidated + // by the call. + addSizeInfo(CE, CE->getArg(2), C); + return; + } + + if (Name.equals("CFArrayGetCount")) { + addSizeInfo(CE->getArg(0), CE, C); + return; + } +} + +void ObjCContainersChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const { + StringRef Name = C.getCalleeName(CE); + if (Name.empty() || CE->getNumArgs() < 2) + return; + + // Check the array access. + if (Name.equals("CFArrayGetValueAtIndex")) { + ProgramStateRef State = C.getState(); + // Retrieve the size. + // Find out if we saw this array symbol before and have information about + // it. + const Expr *ArrayExpr = CE->getArg(0); + SymbolRef ArraySym = getArraySym(ArrayExpr, C); + if (!ArraySym) + return; + + const DefinedSVal *Size = State->get<ArraySizeMap>(ArraySym); + + if (!Size) + return; + + // Get the index. + const Expr *IdxExpr = CE->getArg(1); + SVal IdxVal = C.getSVal(IdxExpr); + if (IdxVal.isUnknownOrUndef()) + return; + DefinedSVal Idx = IdxVal.castAs<DefinedSVal>(); + + // Now, check if 'Idx in [0, Size-1]'. + const QualType T = IdxExpr->getType(); + ProgramStateRef StInBound = State->assumeInBound(Idx, *Size, true, T); + ProgramStateRef StOutBound = State->assumeInBound(Idx, *Size, false, T); + if (StOutBound && !StInBound) { + ExplodedNode *N = C.generateErrorNode(StOutBound); + if (!N) + return; + initBugType(); + auto R = std::make_unique<PathSensitiveBugReport>( + *BT, "Index is out of bounds", N); + R->addRange(IdxExpr->getSourceRange()); + bugreporter::trackExpressionValue( + N, IdxExpr, *R, bugreporter::TrackingKind::Thorough, false); + C.emitReport(std::move(R)); + return; + } + } +} + +ProgramStateRef +ObjCContainersChecker::checkPointerEscape(ProgramStateRef State, + const InvalidatedSymbols &Escaped, + const CallEvent *Call, + PointerEscapeKind Kind) const { + for (const auto &Sym : Escaped) { + // When a symbol for a mutable array escapes, we can't reason precisely + // about its size any more -- so remove it from the map. + // Note that we aren't notified here when a CFMutableArrayRef escapes as a + // CFArrayRef. This is because CFArrayRef is typedef'd as a pointer to a + // const-qualified type. + State = State->remove<ArraySizeMap>(Sym); + } + return State; +} + +void ObjCContainersChecker::printState(raw_ostream &OS, ProgramStateRef State, + const char *NL, const char *Sep) const { + ArraySizeMapTy Map = State->get<ArraySizeMap>(); + if (Map.isEmpty()) + return; + + OS << Sep << "ObjC container sizes :" << NL; + for (auto I : Map) { + OS << I.first << " : " << I.second << NL; + } +} + +/// Register checker. +void ento::registerObjCContainersChecker(CheckerManager &mgr) { + mgr.registerChecker<ObjCContainersChecker>(); +} + +bool ento::shouldRegisterObjCContainersChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCMissingSuperCallChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCMissingSuperCallChecker.cpp new file mode 100644 index 000000000000..1870c08432de --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCMissingSuperCallChecker.cpp @@ -0,0 +1,265 @@ +//==- ObjCMissingSuperCallChecker.cpp - Check missing super-calls in ObjC --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a ObjCMissingSuperCallChecker, a checker that +// analyzes a UIViewController implementation to determine if it +// correctly calls super in the methods where this is mandatory. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/Analysis/PathDiagnostic.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprObjC.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +namespace { +struct SelectorDescriptor { + const char *SelectorName; + unsigned ArgumentCount; +}; + +//===----------------------------------------------------------------------===// +// FindSuperCallVisitor - Identify specific calls to the superclass. +//===----------------------------------------------------------------------===// + +class FindSuperCallVisitor : public RecursiveASTVisitor<FindSuperCallVisitor> { +public: + explicit FindSuperCallVisitor(Selector S) : DoesCallSuper(false), Sel(S) {} + + bool VisitObjCMessageExpr(ObjCMessageExpr *E) { + if (E->getSelector() == Sel) + if (E->getReceiverKind() == ObjCMessageExpr::SuperInstance) + DoesCallSuper = true; + + // Recurse if we didn't find the super call yet. + return !DoesCallSuper; + } + + bool DoesCallSuper; + +private: + Selector Sel; +}; + +//===----------------------------------------------------------------------===// +// ObjCSuperCallChecker +//===----------------------------------------------------------------------===// + +class ObjCSuperCallChecker : public Checker< + check::ASTDecl<ObjCImplementationDecl> > { +public: + ObjCSuperCallChecker() : IsInitialized(false) {} + + void checkASTDecl(const ObjCImplementationDecl *D, AnalysisManager &Mgr, + BugReporter &BR) const; +private: + bool isCheckableClass(const ObjCImplementationDecl *D, + StringRef &SuperclassName) const; + void initializeSelectors(ASTContext &Ctx) const; + void fillSelectors(ASTContext &Ctx, ArrayRef<SelectorDescriptor> Sel, + StringRef ClassName) const; + mutable llvm::StringMap<llvm::SmallSet<Selector, 16> > SelectorsForClass; + mutable bool IsInitialized; +}; + +} + +/// Determine whether the given class has a superclass that we want +/// to check. The name of the found superclass is stored in SuperclassName. +/// +/// \param D The declaration to check for superclasses. +/// \param[out] SuperclassName On return, the found superclass name. +bool ObjCSuperCallChecker::isCheckableClass(const ObjCImplementationDecl *D, + StringRef &SuperclassName) const { + const ObjCInterfaceDecl *ID = D->getClassInterface()->getSuperClass(); + for ( ; ID ; ID = ID->getSuperClass()) + { + SuperclassName = ID->getIdentifier()->getName(); + if (SelectorsForClass.count(SuperclassName)) + return true; + } + return false; +} + +void ObjCSuperCallChecker::fillSelectors(ASTContext &Ctx, + ArrayRef<SelectorDescriptor> Sel, + StringRef ClassName) const { + llvm::SmallSet<Selector, 16> &ClassSelectors = SelectorsForClass[ClassName]; + // Fill the Selectors SmallSet with all selectors we want to check. + for (ArrayRef<SelectorDescriptor>::iterator I = Sel.begin(), E = Sel.end(); + I != E; ++I) { + SelectorDescriptor Descriptor = *I; + assert(Descriptor.ArgumentCount <= 1); // No multi-argument selectors yet. + + // Get the selector. + IdentifierInfo *II = &Ctx.Idents.get(Descriptor.SelectorName); + + Selector Sel = Ctx.Selectors.getSelector(Descriptor.ArgumentCount, &II); + ClassSelectors.insert(Sel); + } +} + +void ObjCSuperCallChecker::initializeSelectors(ASTContext &Ctx) const { + + { // Initialize selectors for: UIViewController + const SelectorDescriptor Selectors[] = { + { "addChildViewController", 1 }, + { "viewDidAppear", 1 }, + { "viewDidDisappear", 1 }, + { "viewWillAppear", 1 }, + { "viewWillDisappear", 1 }, + { "removeFromParentViewController", 0 }, + { "didReceiveMemoryWarning", 0 }, + { "viewDidUnload", 0 }, + { "viewDidLoad", 0 }, + { "viewWillUnload", 0 }, + { "updateViewConstraints", 0 }, + { "encodeRestorableStateWithCoder", 1 }, + { "restoreStateWithCoder", 1 }}; + + fillSelectors(Ctx, Selectors, "UIViewController"); + } + + { // Initialize selectors for: UIResponder + const SelectorDescriptor Selectors[] = { + { "resignFirstResponder", 0 }}; + + fillSelectors(Ctx, Selectors, "UIResponder"); + } + + { // Initialize selectors for: NSResponder + const SelectorDescriptor Selectors[] = { + { "encodeRestorableStateWithCoder", 1 }, + { "restoreStateWithCoder", 1 }}; + + fillSelectors(Ctx, Selectors, "NSResponder"); + } + + { // Initialize selectors for: NSDocument + const SelectorDescriptor Selectors[] = { + { "encodeRestorableStateWithCoder", 1 }, + { "restoreStateWithCoder", 1 }}; + + fillSelectors(Ctx, Selectors, "NSDocument"); + } + + IsInitialized = true; +} + +void ObjCSuperCallChecker::checkASTDecl(const ObjCImplementationDecl *D, + AnalysisManager &Mgr, + BugReporter &BR) const { + ASTContext &Ctx = BR.getContext(); + + // We need to initialize the selector table once. + if (!IsInitialized) + initializeSelectors(Ctx); + + // Find out whether this class has a superclass that we are supposed to check. + StringRef SuperclassName; + if (!isCheckableClass(D, SuperclassName)) + return; + + + // Iterate over all instance methods. + for (auto *MD : D->instance_methods()) { + Selector S = MD->getSelector(); + // Find out whether this is a selector that we want to check. + if (!SelectorsForClass[SuperclassName].count(S)) + continue; + + // Check if the method calls its superclass implementation. + if (MD->getBody()) + { + FindSuperCallVisitor Visitor(S); + Visitor.TraverseDecl(MD); + + // It doesn't call super, emit a diagnostic. + if (!Visitor.DoesCallSuper) { + PathDiagnosticLocation DLoc = + PathDiagnosticLocation::createEnd(MD->getBody(), + BR.getSourceManager(), + Mgr.getAnalysisDeclContext(D)); + + const char *Name = "Missing call to superclass"; + SmallString<320> Buf; + llvm::raw_svector_ostream os(Buf); + + os << "The '" << S.getAsString() + << "' instance method in " << SuperclassName.str() << " subclass '" + << *D << "' is missing a [super " << S.getAsString() << "] call"; + + BR.EmitBasicReport(MD, this, Name, categories::CoreFoundationObjectiveC, + os.str(), DLoc); + } + } + } +} + + +//===----------------------------------------------------------------------===// +// Check registration. +//===----------------------------------------------------------------------===// + +void ento::registerObjCSuperCallChecker(CheckerManager &Mgr) { + Mgr.registerChecker<ObjCSuperCallChecker>(); +} + +bool ento::shouldRegisterObjCSuperCallChecker(const LangOptions &LO) { + return true; +} + +/* + ToDo list for expanding this check in the future, the list is not exhaustive. + There are also cases where calling super is suggested but not "mandatory". + In addition to be able to check the classes and methods below, architectural + improvements like being able to allow for the super-call to be done in a called + method would be good too. + +UIDocument subclasses +- finishedHandlingError:recovered: (is multi-arg) +- finishedHandlingError:recovered: (is multi-arg) + +UIViewController subclasses +- loadView (should *never* call super) +- transitionFromViewController:toViewController: + duration:options:animations:completion: (is multi-arg) + +UICollectionViewController subclasses +- loadView (take care because UIViewController subclasses should NOT call super + in loadView, but UICollectionViewController subclasses should) + +NSObject subclasses +- doesNotRecognizeSelector (it only has to call super if it doesn't throw) + +UIPopoverBackgroundView subclasses (some of those are class methods) +- arrowDirection (should *never* call super) +- arrowOffset (should *never* call super) +- arrowBase (should *never* call super) +- arrowHeight (should *never* call super) +- contentViewInsets (should *never* call super) + +UITextSelectionRect subclasses (some of those are properties) +- rect (should *never* call super) +- range (should *never* call super) +- writingDirection (should *never* call super) +- isVertical (should *never* call super) +- containsStart (should *never* call super) +- containsEnd (should *never* call super) +*/ diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCPropertyChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCPropertyChecker.cpp new file mode 100644 index 000000000000..9a49200545e3 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCPropertyChecker.cpp @@ -0,0 +1,84 @@ +//==- ObjCPropertyChecker.cpp - Check ObjC properties ------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This checker finds issues with Objective-C properties. +// Currently finds only one kind of issue: +// - Find synthesized properties with copy attribute of mutable NS collection +// types. Calling -copy on such collections produces an immutable copy, +// which contradicts the type of the property. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/Checker.h" + +using namespace clang; +using namespace ento; + +namespace { +class ObjCPropertyChecker + : public Checker<check::ASTDecl<ObjCPropertyDecl>> { + void checkCopyMutable(const ObjCPropertyDecl *D, BugReporter &BR) const; + +public: + void checkASTDecl(const ObjCPropertyDecl *D, AnalysisManager &Mgr, + BugReporter &BR) const; +}; +} // end anonymous namespace. + +void ObjCPropertyChecker::checkASTDecl(const ObjCPropertyDecl *D, + AnalysisManager &Mgr, + BugReporter &BR) const { + checkCopyMutable(D, BR); +} + +void ObjCPropertyChecker::checkCopyMutable(const ObjCPropertyDecl *D, + BugReporter &BR) const { + if (D->isReadOnly() || D->getSetterKind() != ObjCPropertyDecl::Copy) + return; + + QualType T = D->getType(); + if (!T->isObjCObjectPointerType()) + return; + + const std::string &PropTypeName(T->getPointeeType().getCanonicalType() + .getUnqualifiedType() + .getAsString()); + if (!StringRef(PropTypeName).startswith("NSMutable")) + return; + + const ObjCImplDecl *ImplD = nullptr; + if (const ObjCInterfaceDecl *IntD = + dyn_cast<ObjCInterfaceDecl>(D->getDeclContext())) { + ImplD = IntD->getImplementation(); + } else if (auto *CatD = dyn_cast<ObjCCategoryDecl>(D->getDeclContext())) { + ImplD = CatD->getClassInterface()->getImplementation(); + } + + if (!ImplD || ImplD->HasUserDeclaredSetterMethod(D)) + return; + + SmallString<128> Str; + llvm::raw_svector_ostream OS(Str); + OS << "Property of mutable type '" << PropTypeName + << "' has 'copy' attribute; an immutable object will be stored instead"; + + BR.EmitBasicReport( + D, this, "Objective-C property misuse", "Logic error", OS.str(), + PathDiagnosticLocation::createBegin(D, BR.getSourceManager()), + D->getSourceRange()); +} + +void ento::registerObjCPropertyChecker(CheckerManager &Mgr) { + Mgr.registerChecker<ObjCPropertyChecker>(); +} + +bool ento::shouldRegisterObjCPropertyChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp new file mode 100644 index 000000000000..344285750f0e --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp @@ -0,0 +1,442 @@ +//== ObjCSelfInitChecker.cpp - Checker for 'self' initialization -*- C++ -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines ObjCSelfInitChecker, a builtin check that checks for uses of +// 'self' before proper initialization. +// +//===----------------------------------------------------------------------===// + +// This checks initialization methods to verify that they assign 'self' to the +// result of an initialization call (e.g. [super init], or [self initWith..]) +// before using 'self' or any instance variable. +// +// To perform the required checking, values are tagged with flags that indicate +// 1) if the object is the one pointed to by 'self', and 2) if the object +// is the result of an initializer (e.g. [super init]). +// +// Uses of an object that is true for 1) but not 2) trigger a diagnostic. +// The uses that are currently checked are: +// - Using instance variables. +// - Returning the object. +// +// Note that we don't check for an invalid 'self' that is the receiver of an +// obj-c message expression to cut down false positives where logging functions +// get information from self (like its class) or doing "invalidation" on self +// when the initialization fails. +// +// Because the object that 'self' points to gets invalidated when a call +// receives a reference to 'self', the checker keeps track and passes the flags +// for 1) and 2) to the new object that 'self' points to after the call. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/ParentMap.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +static bool shouldRunOnFunctionOrMethod(const NamedDecl *ND); +static bool isInitializationMethod(const ObjCMethodDecl *MD); +static bool isInitMessage(const ObjCMethodCall &Msg); +static bool isSelfVar(SVal location, CheckerContext &C); + +namespace { +class ObjCSelfInitChecker : public Checker< check::PostObjCMessage, + check::PostStmt<ObjCIvarRefExpr>, + check::PreStmt<ReturnStmt>, + check::PreCall, + check::PostCall, + check::Location, + check::Bind > { + mutable std::unique_ptr<BugType> BT; + + void checkForInvalidSelf(const Expr *E, CheckerContext &C, + const char *errorStr) const; + +public: + ObjCSelfInitChecker() {} + void checkPostObjCMessage(const ObjCMethodCall &Msg, CheckerContext &C) const; + void checkPostStmt(const ObjCIvarRefExpr *E, CheckerContext &C) const; + void checkPreStmt(const ReturnStmt *S, CheckerContext &C) const; + void checkLocation(SVal location, bool isLoad, const Stmt *S, + CheckerContext &C) const; + void checkBind(SVal loc, SVal val, const Stmt *S, CheckerContext &C) const; + + void checkPreCall(const CallEvent &CE, CheckerContext &C) const; + void checkPostCall(const CallEvent &CE, CheckerContext &C) const; + + void printState(raw_ostream &Out, ProgramStateRef State, + const char *NL, const char *Sep) const override; +}; +} // end anonymous namespace + +namespace { +enum SelfFlagEnum { + /// No flag set. + SelfFlag_None = 0x0, + /// Value came from 'self'. + SelfFlag_Self = 0x1, + /// Value came from the result of an initializer (e.g. [super init]). + SelfFlag_InitRes = 0x2 +}; +} + +REGISTER_MAP_WITH_PROGRAMSTATE(SelfFlag, SymbolRef, unsigned) +REGISTER_TRAIT_WITH_PROGRAMSTATE(CalledInit, bool) + +/// A call receiving a reference to 'self' invalidates the object that +/// 'self' contains. This keeps the "self flags" assigned to the 'self' +/// object before the call so we can assign them to the new object that 'self' +/// points to after the call. +REGISTER_TRAIT_WITH_PROGRAMSTATE(PreCallSelfFlags, unsigned) + +static SelfFlagEnum getSelfFlags(SVal val, ProgramStateRef state) { + if (SymbolRef sym = val.getAsSymbol()) + if (const unsigned *attachedFlags = state->get<SelfFlag>(sym)) + return (SelfFlagEnum)*attachedFlags; + return SelfFlag_None; +} + +static SelfFlagEnum getSelfFlags(SVal val, CheckerContext &C) { + return getSelfFlags(val, C.getState()); +} + +static void addSelfFlag(ProgramStateRef state, SVal val, + SelfFlagEnum flag, CheckerContext &C) { + // We tag the symbol that the SVal wraps. + if (SymbolRef sym = val.getAsSymbol()) { + state = state->set<SelfFlag>(sym, getSelfFlags(val, state) | flag); + C.addTransition(state); + } +} + +static bool hasSelfFlag(SVal val, SelfFlagEnum flag, CheckerContext &C) { + return getSelfFlags(val, C) & flag; +} + +/// Returns true of the value of the expression is the object that 'self' +/// points to and is an object that did not come from the result of calling +/// an initializer. +static bool isInvalidSelf(const Expr *E, CheckerContext &C) { + SVal exprVal = C.getSVal(E); + if (!hasSelfFlag(exprVal, SelfFlag_Self, C)) + return false; // value did not come from 'self'. + if (hasSelfFlag(exprVal, SelfFlag_InitRes, C)) + return false; // 'self' is properly initialized. + + return true; +} + +void ObjCSelfInitChecker::checkForInvalidSelf(const Expr *E, CheckerContext &C, + const char *errorStr) const { + if (!E) + return; + + if (!C.getState()->get<CalledInit>()) + return; + + if (!isInvalidSelf(E, C)) + return; + + // Generate an error node. + ExplodedNode *N = C.generateErrorNode(); + if (!N) + return; + + if (!BT) + BT.reset(new BugType(this, "Missing \"self = [(super or self) init...]\"", + categories::CoreFoundationObjectiveC)); + C.emitReport(std::make_unique<PathSensitiveBugReport>(*BT, errorStr, N)); +} + +void ObjCSelfInitChecker::checkPostObjCMessage(const ObjCMethodCall &Msg, + CheckerContext &C) const { + // When encountering a message that does initialization (init rule), + // tag the return value so that we know later on that if self has this value + // then it is properly initialized. + + // FIXME: A callback should disable checkers at the start of functions. + if (!shouldRunOnFunctionOrMethod(dyn_cast<NamedDecl>( + C.getCurrentAnalysisDeclContext()->getDecl()))) + return; + + if (isInitMessage(Msg)) { + // Tag the return value as the result of an initializer. + ProgramStateRef state = C.getState(); + + // FIXME this really should be context sensitive, where we record + // the current stack frame (for IPA). Also, we need to clean this + // value out when we return from this method. + state = state->set<CalledInit>(true); + + SVal V = C.getSVal(Msg.getOriginExpr()); + addSelfFlag(state, V, SelfFlag_InitRes, C); + return; + } + + // We don't check for an invalid 'self' in an obj-c message expression to cut + // down false positives where logging functions get information from self + // (like its class) or doing "invalidation" on self when the initialization + // fails. +} + +void ObjCSelfInitChecker::checkPostStmt(const ObjCIvarRefExpr *E, + CheckerContext &C) const { + // FIXME: A callback should disable checkers at the start of functions. + if (!shouldRunOnFunctionOrMethod(dyn_cast<NamedDecl>( + C.getCurrentAnalysisDeclContext()->getDecl()))) + return; + + checkForInvalidSelf( + E->getBase(), C, + "Instance variable used while 'self' is not set to the result of " + "'[(super or self) init...]'"); +} + +void ObjCSelfInitChecker::checkPreStmt(const ReturnStmt *S, + CheckerContext &C) const { + // FIXME: A callback should disable checkers at the start of functions. + if (!shouldRunOnFunctionOrMethod(dyn_cast<NamedDecl>( + C.getCurrentAnalysisDeclContext()->getDecl()))) + return; + + checkForInvalidSelf(S->getRetValue(), C, + "Returning 'self' while it is not set to the result of " + "'[(super or self) init...]'"); +} + +// When a call receives a reference to 'self', [Pre/Post]Call pass +// the SelfFlags from the object 'self' points to before the call to the new +// object after the call. This is to avoid invalidation of 'self' by logging +// functions. +// Another common pattern in classes with multiple initializers is to put the +// subclass's common initialization bits into a static function that receives +// the value of 'self', e.g: +// @code +// if (!(self = [super init])) +// return nil; +// if (!(self = _commonInit(self))) +// return nil; +// @endcode +// Until we can use inter-procedural analysis, in such a call, transfer the +// SelfFlags to the result of the call. + +void ObjCSelfInitChecker::checkPreCall(const CallEvent &CE, + CheckerContext &C) const { + // FIXME: A callback should disable checkers at the start of functions. + if (!shouldRunOnFunctionOrMethod(dyn_cast<NamedDecl>( + C.getCurrentAnalysisDeclContext()->getDecl()))) + return; + + ProgramStateRef state = C.getState(); + unsigned NumArgs = CE.getNumArgs(); + // If we passed 'self' as and argument to the call, record it in the state + // to be propagated after the call. + // Note, we could have just given up, but try to be more optimistic here and + // assume that the functions are going to continue initialization or will not + // modify self. + for (unsigned i = 0; i < NumArgs; ++i) { + SVal argV = CE.getArgSVal(i); + if (isSelfVar(argV, C)) { + unsigned selfFlags = getSelfFlags(state->getSVal(argV.castAs<Loc>()), C); + C.addTransition(state->set<PreCallSelfFlags>(selfFlags)); + return; + } else if (hasSelfFlag(argV, SelfFlag_Self, C)) { + unsigned selfFlags = getSelfFlags(argV, C); + C.addTransition(state->set<PreCallSelfFlags>(selfFlags)); + return; + } + } +} + +void ObjCSelfInitChecker::checkPostCall(const CallEvent &CE, + CheckerContext &C) const { + // FIXME: A callback should disable checkers at the start of functions. + if (!shouldRunOnFunctionOrMethod(dyn_cast<NamedDecl>( + C.getCurrentAnalysisDeclContext()->getDecl()))) + return; + + ProgramStateRef state = C.getState(); + SelfFlagEnum prevFlags = (SelfFlagEnum)state->get<PreCallSelfFlags>(); + if (!prevFlags) + return; + state = state->remove<PreCallSelfFlags>(); + + unsigned NumArgs = CE.getNumArgs(); + for (unsigned i = 0; i < NumArgs; ++i) { + SVal argV = CE.getArgSVal(i); + if (isSelfVar(argV, C)) { + // If the address of 'self' is being passed to the call, assume that the + // 'self' after the call will have the same flags. + // EX: log(&self) + addSelfFlag(state, state->getSVal(argV.castAs<Loc>()), prevFlags, C); + return; + } else if (hasSelfFlag(argV, SelfFlag_Self, C)) { + // If 'self' is passed to the call by value, assume that the function + // returns 'self'. So assign the flags, which were set on 'self' to the + // return value. + // EX: self = performMoreInitialization(self) + addSelfFlag(state, CE.getReturnValue(), prevFlags, C); + return; + } + } + + C.addTransition(state); +} + +void ObjCSelfInitChecker::checkLocation(SVal location, bool isLoad, + const Stmt *S, + CheckerContext &C) const { + if (!shouldRunOnFunctionOrMethod(dyn_cast<NamedDecl>( + C.getCurrentAnalysisDeclContext()->getDecl()))) + return; + + // Tag the result of a load from 'self' so that we can easily know that the + // value is the object that 'self' points to. + ProgramStateRef state = C.getState(); + if (isSelfVar(location, C)) + addSelfFlag(state, state->getSVal(location.castAs<Loc>()), SelfFlag_Self, + C); +} + + +void ObjCSelfInitChecker::checkBind(SVal loc, SVal val, const Stmt *S, + CheckerContext &C) const { + // Allow assignment of anything to self. Self is a local variable in the + // initializer, so it is legal to assign anything to it, like results of + // static functions/method calls. After self is assigned something we cannot + // reason about, stop enforcing the rules. + // (Only continue checking if the assigned value should be treated as self.) + if ((isSelfVar(loc, C)) && + !hasSelfFlag(val, SelfFlag_InitRes, C) && + !hasSelfFlag(val, SelfFlag_Self, C) && + !isSelfVar(val, C)) { + + // Stop tracking the checker-specific state in the state. + ProgramStateRef State = C.getState(); + State = State->remove<CalledInit>(); + if (SymbolRef sym = loc.getAsSymbol()) + State = State->remove<SelfFlag>(sym); + C.addTransition(State); + } +} + +void ObjCSelfInitChecker::printState(raw_ostream &Out, ProgramStateRef State, + const char *NL, const char *Sep) const { + SelfFlagTy FlagMap = State->get<SelfFlag>(); + bool DidCallInit = State->get<CalledInit>(); + SelfFlagEnum PreCallFlags = (SelfFlagEnum)State->get<PreCallSelfFlags>(); + + if (FlagMap.isEmpty() && !DidCallInit && !PreCallFlags) + return; + + Out << Sep << NL << *this << " :" << NL; + + if (DidCallInit) + Out << " An init method has been called." << NL; + + if (PreCallFlags != SelfFlag_None) { + if (PreCallFlags & SelfFlag_Self) { + Out << " An argument of the current call came from the 'self' variable." + << NL; + } + if (PreCallFlags & SelfFlag_InitRes) { + Out << " An argument of the current call came from an init method." + << NL; + } + } + + Out << NL; + for (SelfFlagTy::iterator I = FlagMap.begin(), E = FlagMap.end(); + I != E; ++I) { + Out << I->first << " : "; + + if (I->second == SelfFlag_None) + Out << "none"; + + if (I->second & SelfFlag_Self) + Out << "self variable"; + + if (I->second & SelfFlag_InitRes) { + if (I->second != SelfFlag_InitRes) + Out << " | "; + Out << "result of init method"; + } + + Out << NL; + } +} + + +// FIXME: A callback should disable checkers at the start of functions. +static bool shouldRunOnFunctionOrMethod(const NamedDecl *ND) { + if (!ND) + return false; + + const ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(ND); + if (!MD) + return false; + if (!isInitializationMethod(MD)) + return false; + + // self = [super init] applies only to NSObject subclasses. + // For instance, NSProxy doesn't implement -init. + ASTContext &Ctx = MD->getASTContext(); + IdentifierInfo* NSObjectII = &Ctx.Idents.get("NSObject"); + ObjCInterfaceDecl *ID = MD->getClassInterface()->getSuperClass(); + for ( ; ID ; ID = ID->getSuperClass()) { + IdentifierInfo *II = ID->getIdentifier(); + + if (II == NSObjectII) + break; + } + return ID != nullptr; +} + +/// Returns true if the location is 'self'. +static bool isSelfVar(SVal location, CheckerContext &C) { + AnalysisDeclContext *analCtx = C.getCurrentAnalysisDeclContext(); + if (!analCtx->getSelfDecl()) + return false; + if (!location.getAs<loc::MemRegionVal>()) + return false; + + loc::MemRegionVal MRV = location.castAs<loc::MemRegionVal>(); + if (const DeclRegion *DR = dyn_cast<DeclRegion>(MRV.stripCasts())) + return (DR->getDecl() == analCtx->getSelfDecl()); + + return false; +} + +static bool isInitializationMethod(const ObjCMethodDecl *MD) { + return MD->getMethodFamily() == OMF_init; +} + +static bool isInitMessage(const ObjCMethodCall &Call) { + return Call.getMethodFamily() == OMF_init; +} + +//===----------------------------------------------------------------------===// +// Registration. +//===----------------------------------------------------------------------===// + +void ento::registerObjCSelfInitChecker(CheckerManager &mgr) { + mgr.registerChecker<ObjCSelfInitChecker>(); +} + +bool ento::shouldRegisterObjCSelfInitChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp new file mode 100644 index 000000000000..0575be845374 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp @@ -0,0 +1,289 @@ +//===- ObjCSuperDeallocChecker.cpp - Check correct use of [super dealloc] -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines ObjCSuperDeallocChecker, a builtin check that warns when +// self is used after a call to [super dealloc] in MRR mode. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" + +using namespace clang; +using namespace ento; + +namespace { +class ObjCSuperDeallocChecker + : public Checker<check::PostObjCMessage, check::PreObjCMessage, + check::PreCall, check::Location> { + + mutable IdentifierInfo *IIdealloc, *IINSObject; + mutable Selector SELdealloc; + + std::unique_ptr<BugType> DoubleSuperDeallocBugType; + + void initIdentifierInfoAndSelectors(ASTContext &Ctx) const; + + bool isSuperDeallocMessage(const ObjCMethodCall &M) const; + +public: + ObjCSuperDeallocChecker(); + void checkPostObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const; + void checkPreObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const; + + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + + void checkLocation(SVal l, bool isLoad, const Stmt *S, + CheckerContext &C) const; + +private: + + void diagnoseCallArguments(const CallEvent &CE, CheckerContext &C) const; + + void reportUseAfterDealloc(SymbolRef Sym, StringRef Desc, const Stmt *S, + CheckerContext &C) const; +}; + +} // End anonymous namespace. + +// Remember whether [super dealloc] has previously been called on the +// SymbolRef for the receiver. +REGISTER_SET_WITH_PROGRAMSTATE(CalledSuperDealloc, SymbolRef) + +namespace { +class SuperDeallocBRVisitor final : public BugReporterVisitor { + SymbolRef ReceiverSymbol; + bool Satisfied; + +public: + SuperDeallocBRVisitor(SymbolRef ReceiverSymbol) + : ReceiverSymbol(ReceiverSymbol), Satisfied(false) {} + + PathDiagnosticPieceRef VisitNode(const ExplodedNode *Succ, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; + + void Profile(llvm::FoldingSetNodeID &ID) const override { + ID.Add(ReceiverSymbol); + } +}; +} // End anonymous namespace. + +void ObjCSuperDeallocChecker::checkPreObjCMessage(const ObjCMethodCall &M, + CheckerContext &C) const { + + ProgramStateRef State = C.getState(); + SymbolRef ReceiverSymbol = M.getReceiverSVal().getAsSymbol(); + if (!ReceiverSymbol) { + diagnoseCallArguments(M, C); + return; + } + + bool AlreadyCalled = State->contains<CalledSuperDealloc>(ReceiverSymbol); + if (!AlreadyCalled) + return; + + StringRef Desc; + + if (isSuperDeallocMessage(M)) { + Desc = "[super dealloc] should not be called multiple times"; + } else { + Desc = StringRef(); + } + + reportUseAfterDealloc(ReceiverSymbol, Desc, M.getOriginExpr(), C); +} + +void ObjCSuperDeallocChecker::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + diagnoseCallArguments(Call, C); +} + +void ObjCSuperDeallocChecker::checkPostObjCMessage(const ObjCMethodCall &M, + CheckerContext &C) const { + // Check for [super dealloc] method call. + if (!isSuperDeallocMessage(M)) + return; + + ProgramStateRef State = C.getState(); + SymbolRef ReceiverSymbol = M.getSelfSVal().getAsSymbol(); + assert(ReceiverSymbol && "No receiver symbol at call to [super dealloc]?"); + + // We add this transition in checkPostObjCMessage to avoid warning when + // we inline a call to [super dealloc] where the inlined call itself + // calls [super dealloc]. + State = State->add<CalledSuperDealloc>(ReceiverSymbol); + C.addTransition(State); +} + +void ObjCSuperDeallocChecker::checkLocation(SVal L, bool IsLoad, const Stmt *S, + CheckerContext &C) const { + SymbolRef BaseSym = L.getLocSymbolInBase(); + if (!BaseSym) + return; + + ProgramStateRef State = C.getState(); + + if (!State->contains<CalledSuperDealloc>(BaseSym)) + return; + + const MemRegion *R = L.getAsRegion(); + if (!R) + return; + + // Climb the super regions to find the base symbol while recording + // the second-to-last region for error reporting. + const MemRegion *PriorSubRegion = nullptr; + while (const SubRegion *SR = dyn_cast<SubRegion>(R)) { + if (const SymbolicRegion *SymR = dyn_cast<SymbolicRegion>(SR)) { + BaseSym = SymR->getSymbol(); + break; + } else { + R = SR->getSuperRegion(); + PriorSubRegion = SR; + } + } + + StringRef Desc = StringRef(); + auto *IvarRegion = dyn_cast_or_null<ObjCIvarRegion>(PriorSubRegion); + + std::string Buf; + llvm::raw_string_ostream OS(Buf); + if (IvarRegion) { + OS << "Use of instance variable '" << *IvarRegion->getDecl() << + "' after 'self' has been deallocated"; + Desc = OS.str(); + } + + reportUseAfterDealloc(BaseSym, Desc, S, C); +} + +/// Report a use-after-dealloc on Sym. If not empty, +/// Desc will be used to describe the error; otherwise, +/// a default warning will be used. +void ObjCSuperDeallocChecker::reportUseAfterDealloc(SymbolRef Sym, + StringRef Desc, + const Stmt *S, + CheckerContext &C) const { + // We have a use of self after free. + // This likely causes a crash, so stop exploring the + // path by generating a sink. + ExplodedNode *ErrNode = C.generateErrorNode(); + // If we've already reached this node on another path, return. + if (!ErrNode) + return; + + if (Desc.empty()) + Desc = "Use of 'self' after it has been deallocated"; + + // Generate the report. + auto BR = std::make_unique<PathSensitiveBugReport>(*DoubleSuperDeallocBugType, + Desc, ErrNode); + BR->addRange(S->getSourceRange()); + BR->addVisitor(std::make_unique<SuperDeallocBRVisitor>(Sym)); + C.emitReport(std::move(BR)); +} + +/// Diagnose if any of the arguments to CE have already been +/// dealloc'd. +void ObjCSuperDeallocChecker::diagnoseCallArguments(const CallEvent &CE, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + unsigned ArgCount = CE.getNumArgs(); + for (unsigned I = 0; I < ArgCount; I++) { + SymbolRef Sym = CE.getArgSVal(I).getAsSymbol(); + if (!Sym) + continue; + + if (State->contains<CalledSuperDealloc>(Sym)) { + reportUseAfterDealloc(Sym, StringRef(), CE.getArgExpr(I), C); + return; + } + } +} + +ObjCSuperDeallocChecker::ObjCSuperDeallocChecker() + : IIdealloc(nullptr), IINSObject(nullptr) { + + DoubleSuperDeallocBugType.reset( + new BugType(this, "[super dealloc] should not be called more than once", + categories::CoreFoundationObjectiveC)); +} + +void +ObjCSuperDeallocChecker::initIdentifierInfoAndSelectors(ASTContext &Ctx) const { + if (IIdealloc) + return; + + IIdealloc = &Ctx.Idents.get("dealloc"); + IINSObject = &Ctx.Idents.get("NSObject"); + + SELdealloc = Ctx.Selectors.getSelector(0, &IIdealloc); +} + +bool +ObjCSuperDeallocChecker::isSuperDeallocMessage(const ObjCMethodCall &M) const { + if (M.getOriginExpr()->getReceiverKind() != ObjCMessageExpr::SuperInstance) + return false; + + ASTContext &Ctx = M.getState()->getStateManager().getContext(); + initIdentifierInfoAndSelectors(Ctx); + + return M.getSelector() == SELdealloc; +} + +PathDiagnosticPieceRef +SuperDeallocBRVisitor::VisitNode(const ExplodedNode *Succ, + BugReporterContext &BRC, + PathSensitiveBugReport &) { + if (Satisfied) + return nullptr; + + ProgramStateRef State = Succ->getState(); + + bool CalledNow = + Succ->getState()->contains<CalledSuperDealloc>(ReceiverSymbol); + bool CalledBefore = + Succ->getFirstPred()->getState()->contains<CalledSuperDealloc>( + ReceiverSymbol); + + // Is Succ the node on which the analyzer noted that [super dealloc] was + // called on ReceiverSymbol? + if (CalledNow && !CalledBefore) { + Satisfied = true; + + ProgramPoint P = Succ->getLocation(); + PathDiagnosticLocation L = + PathDiagnosticLocation::create(P, BRC.getSourceManager()); + + if (!L.isValid() || !L.asLocation().isValid()) + return nullptr; + + return std::make_shared<PathDiagnosticEventPiece>( + L, "[super dealloc] called here"); + } + + return nullptr; +} + +//===----------------------------------------------------------------------===// +// Checker Registration. +//===----------------------------------------------------------------------===// + +void ento::registerObjCSuperDeallocChecker(CheckerManager &Mgr) { + Mgr.registerChecker<ObjCSuperDeallocChecker>(); +} + +bool ento::shouldRegisterObjCSuperDeallocChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCUnusedIVarsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCUnusedIVarsChecker.cpp new file mode 100644 index 000000000000..cb4770451572 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCUnusedIVarsChecker.cpp @@ -0,0 +1,191 @@ +//==- ObjCUnusedIVarsChecker.cpp - Check for unused ivars --------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a CheckObjCUnusedIvars, a checker that +// analyzes an Objective-C class's interface/implementation to determine if it +// has any ivars that are never accessed. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/Analysis/PathDiagnostic.h" +#include "clang/AST/Attr.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprObjC.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceManager.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/Checker.h" + +using namespace clang; +using namespace ento; + +enum IVarState { Unused, Used }; +typedef llvm::DenseMap<const ObjCIvarDecl*,IVarState> IvarUsageMap; + +static void Scan(IvarUsageMap& M, const Stmt *S) { + if (!S) + return; + + if (const ObjCIvarRefExpr *Ex = dyn_cast<ObjCIvarRefExpr>(S)) { + const ObjCIvarDecl *D = Ex->getDecl(); + IvarUsageMap::iterator I = M.find(D); + if (I != M.end()) + I->second = Used; + return; + } + + // Blocks can reference an instance variable of a class. + if (const BlockExpr *BE = dyn_cast<BlockExpr>(S)) { + Scan(M, BE->getBody()); + return; + } + + if (const PseudoObjectExpr *POE = dyn_cast<PseudoObjectExpr>(S)) + for (PseudoObjectExpr::const_semantics_iterator + i = POE->semantics_begin(), e = POE->semantics_end(); i != e; ++i) { + const Expr *sub = *i; + if (const OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(sub)) + sub = OVE->getSourceExpr(); + Scan(M, sub); + } + + for (const Stmt *SubStmt : S->children()) + Scan(M, SubStmt); +} + +static void Scan(IvarUsageMap& M, const ObjCPropertyImplDecl *D) { + if (!D) + return; + + const ObjCIvarDecl *ID = D->getPropertyIvarDecl(); + + if (!ID) + return; + + IvarUsageMap::iterator I = M.find(ID); + if (I != M.end()) + I->second = Used; +} + +static void Scan(IvarUsageMap& M, const ObjCContainerDecl *D) { + // Scan the methods for accesses. + for (const auto *I : D->instance_methods()) + Scan(M, I->getBody()); + + if (const ObjCImplementationDecl *ID = dyn_cast<ObjCImplementationDecl>(D)) { + // Scan for @synthesized property methods that act as setters/getters + // to an ivar. + for (const auto *I : ID->property_impls()) + Scan(M, I); + + // Scan the associated categories as well. + for (const auto *Cat : ID->getClassInterface()->visible_categories()) { + if (const ObjCCategoryImplDecl *CID = Cat->getImplementation()) + Scan(M, CID); + } + } +} + +static void Scan(IvarUsageMap &M, const DeclContext *C, const FileID FID, + const SourceManager &SM) { + for (const auto *I : C->decls()) + if (const auto *FD = dyn_cast<FunctionDecl>(I)) { + SourceLocation L = FD->getBeginLoc(); + if (SM.getFileID(L) == FID) + Scan(M, FD->getBody()); + } +} + +static void checkObjCUnusedIvar(const ObjCImplementationDecl *D, + BugReporter &BR, + const CheckerBase *Checker) { + + const ObjCInterfaceDecl *ID = D->getClassInterface(); + IvarUsageMap M; + + // Iterate over the ivars. + for (const auto *Ivar : ID->ivars()) { + // Ignore ivars that... + // (a) aren't private + // (b) explicitly marked unused + // (c) are iboutlets + // (d) are unnamed bitfields + if (Ivar->getAccessControl() != ObjCIvarDecl::Private || + Ivar->hasAttr<UnusedAttr>() || Ivar->hasAttr<IBOutletAttr>() || + Ivar->hasAttr<IBOutletCollectionAttr>() || + Ivar->isUnnamedBitfield()) + continue; + + M[Ivar] = Unused; + } + + if (M.empty()) + return; + + // Now scan the implementation declaration. + Scan(M, D); + + // Any potentially unused ivars? + bool hasUnused = false; + for (IvarUsageMap::iterator I = M.begin(), E = M.end(); I!=E; ++I) + if (I->second == Unused) { + hasUnused = true; + break; + } + + if (!hasUnused) + return; + + // We found some potentially unused ivars. Scan the entire translation unit + // for functions inside the @implementation that reference these ivars. + // FIXME: In the future hopefully we can just use the lexical DeclContext + // to go from the ObjCImplementationDecl to the lexically "nested" + // C functions. + const SourceManager &SM = BR.getSourceManager(); + Scan(M, D->getDeclContext(), SM.getFileID(D->getLocation()), SM); + + // Find ivars that are unused. + for (IvarUsageMap::iterator I = M.begin(), E = M.end(); I!=E; ++I) + if (I->second == Unused) { + std::string sbuf; + llvm::raw_string_ostream os(sbuf); + os << "Instance variable '" << *I->first << "' in class '" << *ID + << "' is never used by the methods in its @implementation " + "(although it may be used by category methods)."; + + PathDiagnosticLocation L = + PathDiagnosticLocation::create(I->first, BR.getSourceManager()); + BR.EmitBasicReport(D, Checker, "Unused instance variable", "Optimization", + os.str(), L); + } +} + +//===----------------------------------------------------------------------===// +// ObjCUnusedIvarsChecker +//===----------------------------------------------------------------------===// + +namespace { +class ObjCUnusedIvarsChecker : public Checker< + check::ASTDecl<ObjCImplementationDecl> > { +public: + void checkASTDecl(const ObjCImplementationDecl *D, AnalysisManager& mgr, + BugReporter &BR) const { + checkObjCUnusedIvar(D, BR, this); + } +}; +} + +void ento::registerObjCUnusedIvarsChecker(CheckerManager &mgr) { + mgr.registerChecker<ObjCUnusedIvarsChecker>(); +} + +bool ento::shouldRegisterObjCUnusedIvarsChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp new file mode 100644 index 000000000000..4a3c2b8cd40e --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp @@ -0,0 +1,358 @@ +//=======- PaddingChecker.cpp ------------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a checker that checks for padding that could be +// removed by re-ordering members. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/CharUnits.h" +#include "clang/AST/DeclTemplate.h" +#include "clang/AST/RecordLayout.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Driver/DriverDiagnostic.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <numeric> + +using namespace clang; +using namespace ento; + +namespace { +class PaddingChecker : public Checker<check::ASTDecl<TranslationUnitDecl>> { +private: + mutable std::unique_ptr<BugType> PaddingBug; + mutable BugReporter *BR; + +public: + int64_t AllowedPad; + + void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR, + BugReporter &BRArg) const { + BR = &BRArg; + + // The calls to checkAST* from AnalysisConsumer don't + // visit template instantiations or lambda classes. We + // want to visit those, so we make our own RecursiveASTVisitor. + struct LocalVisitor : public RecursiveASTVisitor<LocalVisitor> { + const PaddingChecker *Checker; + bool shouldVisitTemplateInstantiations() const { return true; } + bool shouldVisitImplicitCode() const { return true; } + explicit LocalVisitor(const PaddingChecker *Checker) : Checker(Checker) {} + bool VisitRecordDecl(const RecordDecl *RD) { + Checker->visitRecord(RD); + return true; + } + bool VisitVarDecl(const VarDecl *VD) { + Checker->visitVariable(VD); + return true; + } + // TODO: Visit array new and mallocs for arrays. + }; + + LocalVisitor visitor(this); + visitor.TraverseDecl(const_cast<TranslationUnitDecl *>(TUD)); + } + + /// Look for records of overly padded types. If padding * + /// PadMultiplier exceeds AllowedPad, then generate a report. + /// PadMultiplier is used to share code with the array padding + /// checker. + void visitRecord(const RecordDecl *RD, uint64_t PadMultiplier = 1) const { + if (shouldSkipDecl(RD)) + return; + + // TODO: Figure out why we are going through declarations and not only + // definitions. + if (!(RD = RD->getDefinition())) + return; + + // This is the simplest correct case: a class with no fields and one base + // class. Other cases are more complicated because of how the base classes + // & fields might interact, so we don't bother dealing with them. + // TODO: Support other combinations of base classes and fields. + if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + if (CXXRD->field_empty() && CXXRD->getNumBases() == 1) + return visitRecord(CXXRD->bases().begin()->getType()->getAsRecordDecl(), + PadMultiplier); + + auto &ASTContext = RD->getASTContext(); + const ASTRecordLayout &RL = ASTContext.getASTRecordLayout(RD); + assert(llvm::isPowerOf2_64(RL.getAlignment().getQuantity())); + + CharUnits BaselinePad = calculateBaselinePad(RD, ASTContext, RL); + if (BaselinePad.isZero()) + return; + + CharUnits OptimalPad; + SmallVector<const FieldDecl *, 20> OptimalFieldsOrder; + std::tie(OptimalPad, OptimalFieldsOrder) = + calculateOptimalPad(RD, ASTContext, RL); + + CharUnits DiffPad = PadMultiplier * (BaselinePad - OptimalPad); + if (DiffPad.getQuantity() <= AllowedPad) { + assert(!DiffPad.isNegative() && "DiffPad should not be negative"); + // There is not enough excess padding to trigger a warning. + return; + } + reportRecord(RD, BaselinePad, OptimalPad, OptimalFieldsOrder); + } + + /// Look for arrays of overly padded types. If the padding of the + /// array type exceeds AllowedPad, then generate a report. + void visitVariable(const VarDecl *VD) const { + const ArrayType *ArrTy = VD->getType()->getAsArrayTypeUnsafe(); + if (ArrTy == nullptr) + return; + uint64_t Elts = 0; + if (const ConstantArrayType *CArrTy = dyn_cast<ConstantArrayType>(ArrTy)) + Elts = CArrTy->getSize().getZExtValue(); + if (Elts == 0) + return; + const RecordType *RT = ArrTy->getElementType()->getAs<RecordType>(); + if (RT == nullptr) + return; + + // TODO: Recurse into the fields to see if they have excess padding. + visitRecord(RT->getDecl(), Elts); + } + + bool shouldSkipDecl(const RecordDecl *RD) const { + // TODO: Figure out why we are going through declarations and not only + // definitions. + if (!(RD = RD->getDefinition())) + return true; + auto Location = RD->getLocation(); + // If the construct doesn't have a source file, then it's not something + // we want to diagnose. + if (!Location.isValid()) + return true; + SrcMgr::CharacteristicKind Kind = + BR->getSourceManager().getFileCharacteristic(Location); + // Throw out all records that come from system headers. + if (Kind != SrcMgr::C_User) + return true; + + // Not going to attempt to optimize unions. + if (RD->isUnion()) + return true; + if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + // Tail padding with base classes ends up being very complicated. + // We will skip objects with base classes for now, unless they do not + // have fields. + // TODO: Handle more base class scenarios. + if (!CXXRD->field_empty() && CXXRD->getNumBases() != 0) + return true; + if (CXXRD->field_empty() && CXXRD->getNumBases() != 1) + return true; + // Virtual bases are complicated, skipping those for now. + if (CXXRD->getNumVBases() != 0) + return true; + // Can't layout a template, so skip it. We do still layout the + // instantiations though. + if (CXXRD->getTypeForDecl()->isDependentType()) + return true; + if (CXXRD->getTypeForDecl()->isInstantiationDependentType()) + return true; + } + // How do you reorder fields if you haven't got any? + else if (RD->field_empty()) + return true; + + auto IsTrickyField = [](const FieldDecl *FD) -> bool { + // Bitfield layout is hard. + if (FD->isBitField()) + return true; + + // Variable length arrays are tricky too. + QualType Ty = FD->getType(); + if (Ty->isIncompleteArrayType()) + return true; + return false; + }; + + if (std::any_of(RD->field_begin(), RD->field_end(), IsTrickyField)) + return true; + return false; + } + + static CharUnits calculateBaselinePad(const RecordDecl *RD, + const ASTContext &ASTContext, + const ASTRecordLayout &RL) { + CharUnits PaddingSum; + CharUnits Offset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0)); + for (const FieldDecl *FD : RD->fields()) { + // This checker only cares about the padded size of the + // field, and not the data size. If the field is a record + // with tail padding, then we won't put that number in our + // total because reordering fields won't fix that problem. + CharUnits FieldSize = ASTContext.getTypeSizeInChars(FD->getType()); + auto FieldOffsetBits = RL.getFieldOffset(FD->getFieldIndex()); + CharUnits FieldOffset = ASTContext.toCharUnitsFromBits(FieldOffsetBits); + PaddingSum += (FieldOffset - Offset); + Offset = FieldOffset + FieldSize; + } + PaddingSum += RL.getSize() - Offset; + return PaddingSum; + } + + /// Optimal padding overview: + /// 1. Find a close approximation to where we can place our first field. + /// This will usually be at offset 0. + /// 2. Try to find the best field that can legally be placed at the current + /// offset. + /// a. "Best" is the largest alignment that is legal, but smallest size. + /// This is to account for overly aligned types. + /// 3. If no fields can fit, pad by rounding the current offset up to the + /// smallest alignment requirement of our fields. Measure and track the + // amount of padding added. Go back to 2. + /// 4. Increment the current offset by the size of the chosen field. + /// 5. Remove the chosen field from the set of future possibilities. + /// 6. Go back to 2 if there are still unplaced fields. + /// 7. Add tail padding by rounding the current offset up to the structure + /// alignment. Track the amount of padding added. + + static std::pair<CharUnits, SmallVector<const FieldDecl *, 20>> + calculateOptimalPad(const RecordDecl *RD, const ASTContext &ASTContext, + const ASTRecordLayout &RL) { + struct FieldInfo { + CharUnits Align; + CharUnits Size; + const FieldDecl *Field; + bool operator<(const FieldInfo &RHS) const { + // Order from small alignments to large alignments, + // then large sizes to small sizes. + // then large field indices to small field indices + return std::make_tuple(Align, -Size, + Field ? -static_cast<int>(Field->getFieldIndex()) + : 0) < + std::make_tuple( + RHS.Align, -RHS.Size, + RHS.Field ? -static_cast<int>(RHS.Field->getFieldIndex()) + : 0); + } + }; + SmallVector<FieldInfo, 20> Fields; + auto GatherSizesAndAlignments = [](const FieldDecl *FD) { + FieldInfo RetVal; + RetVal.Field = FD; + auto &Ctx = FD->getASTContext(); + std::tie(RetVal.Size, RetVal.Align) = + Ctx.getTypeInfoInChars(FD->getType()); + assert(llvm::isPowerOf2_64(RetVal.Align.getQuantity())); + if (auto Max = FD->getMaxAlignment()) + RetVal.Align = std::max(Ctx.toCharUnitsFromBits(Max), RetVal.Align); + return RetVal; + }; + std::transform(RD->field_begin(), RD->field_end(), + std::back_inserter(Fields), GatherSizesAndAlignments); + llvm::sort(Fields); + // This lets us skip over vptrs and non-virtual bases, + // so that we can just worry about the fields in our object. + // Note that this does cause us to miss some cases where we + // could pack more bytes in to a base class's tail padding. + CharUnits NewOffset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0)); + CharUnits NewPad; + SmallVector<const FieldDecl *, 20> OptimalFieldsOrder; + while (!Fields.empty()) { + unsigned TrailingZeros = + llvm::countTrailingZeros((unsigned long long)NewOffset.getQuantity()); + // If NewOffset is zero, then countTrailingZeros will be 64. Shifting + // 64 will overflow our unsigned long long. Shifting 63 will turn + // our long long (and CharUnits internal type) negative. So shift 62. + long long CurAlignmentBits = 1ull << (std::min)(TrailingZeros, 62u); + CharUnits CurAlignment = CharUnits::fromQuantity(CurAlignmentBits); + FieldInfo InsertPoint = {CurAlignment, CharUnits::Zero(), nullptr}; + + // In the typical case, this will find the last element + // of the vector. We won't find a middle element unless + // we started on a poorly aligned address or have an overly + // aligned field. + auto Iter = llvm::upper_bound(Fields, InsertPoint); + if (Iter != Fields.begin()) { + // We found a field that we can layout with the current alignment. + --Iter; + NewOffset += Iter->Size; + OptimalFieldsOrder.push_back(Iter->Field); + Fields.erase(Iter); + } else { + // We are poorly aligned, and we need to pad in order to layout another + // field. Round up to at least the smallest field alignment that we + // currently have. + CharUnits NextOffset = NewOffset.alignTo(Fields[0].Align); + NewPad += NextOffset - NewOffset; + NewOffset = NextOffset; + } + } + // Calculate tail padding. + CharUnits NewSize = NewOffset.alignTo(RL.getAlignment()); + NewPad += NewSize - NewOffset; + return {NewPad, std::move(OptimalFieldsOrder)}; + } + + void reportRecord( + const RecordDecl *RD, CharUnits BaselinePad, CharUnits OptimalPad, + const SmallVector<const FieldDecl *, 20> &OptimalFieldsOrder) const { + if (!PaddingBug) + PaddingBug = + std::make_unique<BugType>(this, "Excessive Padding", "Performance"); + + SmallString<100> Buf; + llvm::raw_svector_ostream Os(Buf); + Os << "Excessive padding in '"; + Os << QualType::getAsString(RD->getTypeForDecl(), Qualifiers(), + LangOptions()) + << "'"; + + if (auto *TSD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) { + // TODO: make this show up better in the console output and in + // the HTML. Maybe just make it show up in HTML like the path + // diagnostics show. + SourceLocation ILoc = TSD->getPointOfInstantiation(); + if (ILoc.isValid()) + Os << " instantiated here: " + << ILoc.printToString(BR->getSourceManager()); + } + + Os << " (" << BaselinePad.getQuantity() << " padding bytes, where " + << OptimalPad.getQuantity() << " is optimal). \n" + << "Optimal fields order: \n"; + for (const auto *FD : OptimalFieldsOrder) + Os << FD->getName() << ", \n"; + Os << "consider reordering the fields or adding explicit padding " + "members."; + + PathDiagnosticLocation CELoc = + PathDiagnosticLocation::create(RD, BR->getSourceManager()); + auto Report = + std::make_unique<BasicBugReport>(*PaddingBug, Os.str(), CELoc); + Report->setDeclWithIssue(RD); + Report->addRange(RD->getSourceRange()); + BR->emitReport(std::move(Report)); + } +}; +} // namespace + +void ento::registerPaddingChecker(CheckerManager &Mgr) { + auto *Checker = Mgr.registerChecker<PaddingChecker>(); + Checker->AllowedPad = Mgr.getAnalyzerOptions() + .getCheckerIntegerOption(Checker, "AllowedPad"); + if (Checker->AllowedPad < 0) + Mgr.reportInvalidCheckerOptionValue( + Checker, "AllowedPad", "a non-negative value"); +} + +bool ento::shouldRegisterPaddingChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp new file mode 100644 index 000000000000..259f23abdc95 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp @@ -0,0 +1,348 @@ +//=== PointerArithChecker.cpp - Pointer arithmetic checker -----*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files defines PointerArithChecker, a builtin checker that checks for +// pointer arithmetic on locations other than array elements. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/ExprCXX.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +enum class AllocKind { + SingleObject, + Array, + Unknown, + Reinterpreted // Single object interpreted as an array. +}; +} // end namespace + +namespace llvm { +template <> struct FoldingSetTrait<AllocKind> { + static inline void Profile(AllocKind X, FoldingSetNodeID &ID) { + ID.AddInteger(static_cast<int>(X)); + } +}; +} // end namespace llvm + +namespace { +class PointerArithChecker + : public Checker< + check::PreStmt<BinaryOperator>, check::PreStmt<UnaryOperator>, + check::PreStmt<ArraySubscriptExpr>, check::PreStmt<CastExpr>, + check::PostStmt<CastExpr>, check::PostStmt<CXXNewExpr>, + check::PostStmt<CallExpr>, check::DeadSymbols> { + AllocKind getKindOfNewOp(const CXXNewExpr *NE, const FunctionDecl *FD) const; + const MemRegion *getArrayRegion(const MemRegion *Region, bool &Polymorphic, + AllocKind &AKind, CheckerContext &C) const; + const MemRegion *getPointedRegion(const MemRegion *Region, + CheckerContext &C) const; + void reportPointerArithMisuse(const Expr *E, CheckerContext &C, + bool PointedNeeded = false) const; + void initAllocIdentifiers(ASTContext &C) const; + + mutable std::unique_ptr<BuiltinBug> BT_pointerArith; + mutable std::unique_ptr<BuiltinBug> BT_polyArray; + mutable llvm::SmallSet<IdentifierInfo *, 8> AllocFunctions; + +public: + void checkPreStmt(const UnaryOperator *UOp, CheckerContext &C) const; + void checkPreStmt(const BinaryOperator *BOp, CheckerContext &C) const; + void checkPreStmt(const ArraySubscriptExpr *SubExpr, CheckerContext &C) const; + void checkPreStmt(const CastExpr *CE, CheckerContext &C) const; + void checkPostStmt(const CastExpr *CE, CheckerContext &C) const; + void checkPostStmt(const CXXNewExpr *NE, CheckerContext &C) const; + void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; + void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; +}; +} // end namespace + +REGISTER_MAP_WITH_PROGRAMSTATE(RegionState, const MemRegion *, AllocKind) + +void PointerArithChecker::checkDeadSymbols(SymbolReaper &SR, + CheckerContext &C) const { + // TODO: intentional leak. Some information is garbage collected too early, + // see http://reviews.llvm.org/D14203 for further information. + /*ProgramStateRef State = C.getState(); + RegionStateTy RegionStates = State->get<RegionState>(); + for (RegionStateTy::iterator I = RegionStates.begin(), E = RegionStates.end(); + I != E; ++I) { + if (!SR.isLiveRegion(I->first)) + State = State->remove<RegionState>(I->first); + } + C.addTransition(State);*/ +} + +AllocKind PointerArithChecker::getKindOfNewOp(const CXXNewExpr *NE, + const FunctionDecl *FD) const { + // This checker try not to assume anything about placement and overloaded + // new to avoid false positives. + if (isa<CXXMethodDecl>(FD)) + return AllocKind::Unknown; + if (FD->getNumParams() != 1 || FD->isVariadic()) + return AllocKind::Unknown; + if (NE->isArray()) + return AllocKind::Array; + + return AllocKind::SingleObject; +} + +const MemRegion * +PointerArithChecker::getPointedRegion(const MemRegion *Region, + CheckerContext &C) const { + assert(Region); + ProgramStateRef State = C.getState(); + SVal S = State->getSVal(Region); + return S.getAsRegion(); +} + +/// Checks whether a region is the part of an array. +/// In case there is a derived to base cast above the array element, the +/// Polymorphic output value is set to true. AKind output value is set to the +/// allocation kind of the inspected region. +const MemRegion *PointerArithChecker::getArrayRegion(const MemRegion *Region, + bool &Polymorphic, + AllocKind &AKind, + CheckerContext &C) const { + assert(Region); + while (const auto *BaseRegion = dyn_cast<CXXBaseObjectRegion>(Region)) { + Region = BaseRegion->getSuperRegion(); + Polymorphic = true; + } + if (const auto *ElemRegion = dyn_cast<ElementRegion>(Region)) { + Region = ElemRegion->getSuperRegion(); + } + + ProgramStateRef State = C.getState(); + if (const AllocKind *Kind = State->get<RegionState>(Region)) { + AKind = *Kind; + if (*Kind == AllocKind::Array) + return Region; + else + return nullptr; + } + // When the region is symbolic and we do not have any information about it, + // assume that this is an array to avoid false positives. + if (isa<SymbolicRegion>(Region)) + return Region; + + // No AllocKind stored and not symbolic, assume that it points to a single + // object. + return nullptr; +} + +void PointerArithChecker::reportPointerArithMisuse(const Expr *E, + CheckerContext &C, + bool PointedNeeded) const { + SourceRange SR = E->getSourceRange(); + if (SR.isInvalid()) + return; + + ProgramStateRef State = C.getState(); + const MemRegion *Region = C.getSVal(E).getAsRegion(); + if (!Region) + return; + if (PointedNeeded) + Region = getPointedRegion(Region, C); + if (!Region) + return; + + bool IsPolymorphic = false; + AllocKind Kind = AllocKind::Unknown; + if (const MemRegion *ArrayRegion = + getArrayRegion(Region, IsPolymorphic, Kind, C)) { + if (!IsPolymorphic) + return; + if (ExplodedNode *N = C.generateNonFatalErrorNode()) { + if (!BT_polyArray) + BT_polyArray.reset(new BuiltinBug( + this, "Dangerous pointer arithmetic", + "Pointer arithmetic on a pointer to base class is dangerous " + "because derived and base class may have different size.")); + auto R = std::make_unique<PathSensitiveBugReport>( + *BT_polyArray, BT_polyArray->getDescription(), N); + R->addRange(E->getSourceRange()); + R->markInteresting(ArrayRegion); + C.emitReport(std::move(R)); + } + return; + } + + if (Kind == AllocKind::Reinterpreted) + return; + + // We might not have enough information about symbolic regions. + if (Kind != AllocKind::SingleObject && + Region->getKind() == MemRegion::Kind::SymbolicRegionKind) + return; + + if (ExplodedNode *N = C.generateNonFatalErrorNode()) { + if (!BT_pointerArith) + BT_pointerArith.reset(new BuiltinBug(this, "Dangerous pointer arithmetic", + "Pointer arithmetic on non-array " + "variables relies on memory layout, " + "which is dangerous.")); + auto R = std::make_unique<PathSensitiveBugReport>( + *BT_pointerArith, BT_pointerArith->getDescription(), N); + R->addRange(SR); + R->markInteresting(Region); + C.emitReport(std::move(R)); + } +} + +void PointerArithChecker::initAllocIdentifiers(ASTContext &C) const { + if (!AllocFunctions.empty()) + return; + AllocFunctions.insert(&C.Idents.get("alloca")); + AllocFunctions.insert(&C.Idents.get("malloc")); + AllocFunctions.insert(&C.Idents.get("realloc")); + AllocFunctions.insert(&C.Idents.get("calloc")); + AllocFunctions.insert(&C.Idents.get("valloc")); +} + +void PointerArithChecker::checkPostStmt(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + const FunctionDecl *FD = C.getCalleeDecl(CE); + if (!FD) + return; + IdentifierInfo *FunI = FD->getIdentifier(); + initAllocIdentifiers(C.getASTContext()); + if (AllocFunctions.count(FunI) == 0) + return; + + SVal SV = C.getSVal(CE); + const MemRegion *Region = SV.getAsRegion(); + if (!Region) + return; + // Assume that C allocation functions allocate arrays to avoid false + // positives. + // TODO: Add heuristics to distinguish alloc calls that allocates single + // objecs. + State = State->set<RegionState>(Region, AllocKind::Array); + C.addTransition(State); +} + +void PointerArithChecker::checkPostStmt(const CXXNewExpr *NE, + CheckerContext &C) const { + const FunctionDecl *FD = NE->getOperatorNew(); + if (!FD) + return; + + AllocKind Kind = getKindOfNewOp(NE, FD); + + ProgramStateRef State = C.getState(); + SVal AllocedVal = C.getSVal(NE); + const MemRegion *Region = AllocedVal.getAsRegion(); + if (!Region) + return; + State = State->set<RegionState>(Region, Kind); + C.addTransition(State); +} + +void PointerArithChecker::checkPostStmt(const CastExpr *CE, + CheckerContext &C) const { + if (CE->getCastKind() != CastKind::CK_BitCast) + return; + + const Expr *CastedExpr = CE->getSubExpr(); + ProgramStateRef State = C.getState(); + SVal CastedVal = C.getSVal(CastedExpr); + + const MemRegion *Region = CastedVal.getAsRegion(); + if (!Region) + return; + + // Suppress reinterpret casted hits. + State = State->set<RegionState>(Region, AllocKind::Reinterpreted); + C.addTransition(State); +} + +void PointerArithChecker::checkPreStmt(const CastExpr *CE, + CheckerContext &C) const { + if (CE->getCastKind() != CastKind::CK_ArrayToPointerDecay) + return; + + const Expr *CastedExpr = CE->getSubExpr(); + ProgramStateRef State = C.getState(); + SVal CastedVal = C.getSVal(CastedExpr); + + const MemRegion *Region = CastedVal.getAsRegion(); + if (!Region) + return; + + if (const AllocKind *Kind = State->get<RegionState>(Region)) { + if (*Kind == AllocKind::Array || *Kind == AllocKind::Reinterpreted) + return; + } + State = State->set<RegionState>(Region, AllocKind::Array); + C.addTransition(State); +} + +void PointerArithChecker::checkPreStmt(const UnaryOperator *UOp, + CheckerContext &C) const { + if (!UOp->isIncrementDecrementOp() || !UOp->getType()->isPointerType()) + return; + reportPointerArithMisuse(UOp->getSubExpr(), C, true); +} + +void PointerArithChecker::checkPreStmt(const ArraySubscriptExpr *SubsExpr, + CheckerContext &C) const { + SVal Idx = C.getSVal(SubsExpr->getIdx()); + + // Indexing with 0 is OK. + if (Idx.isZeroConstant()) + return; + + // Indexing vector-type expressions is also OK. + if (SubsExpr->getBase()->getType()->isVectorType()) + return; + reportPointerArithMisuse(SubsExpr->getBase(), C); +} + +void PointerArithChecker::checkPreStmt(const BinaryOperator *BOp, + CheckerContext &C) const { + BinaryOperatorKind OpKind = BOp->getOpcode(); + if (!BOp->isAdditiveOp() && OpKind != BO_AddAssign && OpKind != BO_SubAssign) + return; + + const Expr *Lhs = BOp->getLHS(); + const Expr *Rhs = BOp->getRHS(); + ProgramStateRef State = C.getState(); + + if (Rhs->getType()->isIntegerType() && Lhs->getType()->isPointerType()) { + SVal RHSVal = C.getSVal(Rhs); + if (State->isNull(RHSVal).isConstrainedTrue()) + return; + reportPointerArithMisuse(Lhs, C, !BOp->isAdditiveOp()); + } + // The int += ptr; case is not valid C++. + if (Lhs->getType()->isIntegerType() && Rhs->getType()->isPointerType()) { + SVal LHSVal = C.getSVal(Lhs); + if (State->isNull(LHSVal).isConstrainedTrue()) + return; + reportPointerArithMisuse(Rhs, C); + } +} + +void ento::registerPointerArithChecker(CheckerManager &mgr) { + mgr.registerChecker<PointerArithChecker>(); +} + +bool ento::shouldRegisterPointerArithChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/PointerIterationChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PointerIterationChecker.cpp new file mode 100644 index 000000000000..307e59b8eebc --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/PointerIterationChecker.cpp @@ -0,0 +1,100 @@ +//== PointerIterationChecker.cpp ------------------------------- -*- C++ -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines PointerIterationChecker which checks for non-determinism +// caused due to iteration of unordered containers of pointer elements. +// +//===----------------------------------------------------------------------===// + +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; +using namespace ast_matchers; + +namespace { + +// ID of a node at which the diagnostic would be emitted. +constexpr llvm::StringLiteral WarnAtNode = "iter"; + +class PointerIterationChecker : public Checker<check::ASTCodeBody> { +public: + void checkASTCodeBody(const Decl *D, + AnalysisManager &AM, + BugReporter &BR) const; +}; + +static void emitDiagnostics(const BoundNodes &Match, const Decl *D, + BugReporter &BR, AnalysisManager &AM, + const PointerIterationChecker *Checker) { + auto *ADC = AM.getAnalysisDeclContext(D); + + const auto *MarkedStmt = Match.getNodeAs<Stmt>(WarnAtNode); + assert(MarkedStmt); + + auto Range = MarkedStmt->getSourceRange(); + auto Location = PathDiagnosticLocation::createBegin(MarkedStmt, + BR.getSourceManager(), + ADC); + std::string Diagnostics; + llvm::raw_string_ostream OS(Diagnostics); + OS << "Iteration of pointer-like elements " + << "can result in non-deterministic ordering"; + + BR.EmitBasicReport(ADC->getDecl(), Checker, + "Iteration of pointer-like elements", "Non-determinism", + OS.str(), Location, Range); +} + +// Assumption: Iteration of ordered containers of pointers is deterministic. + +// TODO: Currently, we only check for std::unordered_set. Other unordered +// containers like std::unordered_map also need to be handled. + +// TODO: Currently, we do not check what the for loop does with the iterated +// pointer values. Not all iterations may cause non-determinism. For example, +// counting or summing up the elements should not be non-deterministic. + +auto matchUnorderedIterWithPointers() -> decltype(decl()) { + + auto UnorderedContainerM = declRefExpr(to(varDecl(hasType( + recordDecl(hasName("std::unordered_set") + ))))); + + auto PointerTypeM = varDecl(hasType(hasCanonicalType(pointerType()))); + + auto PointerIterM = stmt(cxxForRangeStmt( + hasLoopVariable(PointerTypeM), + hasRangeInit(UnorderedContainerM) + )).bind(WarnAtNode); + + return decl(forEachDescendant(PointerIterM)); +} + +void PointerIterationChecker::checkASTCodeBody(const Decl *D, + AnalysisManager &AM, + BugReporter &BR) const { + auto MatcherM = matchUnorderedIterWithPointers(); + + auto Matches = match(MatcherM, *D, AM.getASTContext()); + for (const auto &Match : Matches) + emitDiagnostics(Match, D, BR, AM, this); +} + +} // end of anonymous namespace + +void ento::registerPointerIterationChecker(CheckerManager &Mgr) { + Mgr.registerChecker<PointerIterationChecker>(); +} + +bool ento::shouldRegisterPointerIterationChecker(const LangOptions &LO) { + return LO.CPlusPlus; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/PointerSortingChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PointerSortingChecker.cpp new file mode 100644 index 000000000000..586d9d3af2a6 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/PointerSortingChecker.cpp @@ -0,0 +1,113 @@ +//== PointerSortingChecker.cpp --------------------------------- -*- C++ -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines PointerSortingChecker which checks for non-determinism +// caused due to sorting containers with pointer-like elements. +// +//===----------------------------------------------------------------------===// + +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; +using namespace ast_matchers; + +namespace { + +// ID of a node at which the diagnostic would be emitted. +constexpr llvm::StringLiteral WarnAtNode = "sort"; + +class PointerSortingChecker : public Checker<check::ASTCodeBody> { +public: + void checkASTCodeBody(const Decl *D, + AnalysisManager &AM, + BugReporter &BR) const; +}; + +static void emitDiagnostics(const BoundNodes &Match, const Decl *D, + BugReporter &BR, AnalysisManager &AM, + const PointerSortingChecker *Checker) { + auto *ADC = AM.getAnalysisDeclContext(D); + + const auto *MarkedStmt = Match.getNodeAs<CallExpr>(WarnAtNode); + assert(MarkedStmt); + + auto Range = MarkedStmt->getSourceRange(); + auto Location = PathDiagnosticLocation::createBegin(MarkedStmt, + BR.getSourceManager(), + ADC); + std::string Diagnostics; + llvm::raw_string_ostream OS(Diagnostics); + OS << "Sorting pointer-like elements " + << "can result in non-deterministic ordering"; + + BR.EmitBasicReport(ADC->getDecl(), Checker, + "Sorting of pointer-like elements", "Non-determinism", + OS.str(), Location, Range); +} + +auto callsName(const char *FunctionName) -> decltype(callee(functionDecl())) { + return callee(functionDecl(hasName(FunctionName))); +} + +// FIXME: Currently we simply check if std::sort is used with pointer-like +// elements. This approach can have a big false positive rate. Using std::sort, +// std::unique and then erase is common technique for deduplicating a container +// (which in some cases might even be quicker than using, let's say std::set). +// In case a container contains arbitrary memory addresses (e.g. multiple +// things give different stuff but might give the same thing multiple times) +// which we don't want to do things with more than once, we might use +// sort-unique-erase and the sort call will emit a report. +auto matchSortWithPointers() -> decltype(decl()) { + // Match any of these function calls. + auto SortFuncM = anyOf( + callsName("std::is_sorted"), + callsName("std::nth_element"), + callsName("std::partial_sort"), + callsName("std::partition"), + callsName("std::sort"), + callsName("std::stable_partition"), + callsName("std::stable_sort") + ); + + // Match only if the container has pointer-type elements. + auto IteratesPointerEltsM = hasArgument(0, + hasType(cxxRecordDecl(has( + fieldDecl(hasType(hasCanonicalType( + pointsTo(hasCanonicalType(pointerType())) + ))) + )))); + + auto PointerSortM = stmt(callExpr(allOf(SortFuncM, IteratesPointerEltsM)) + ).bind(WarnAtNode); + + return decl(forEachDescendant(PointerSortM)); +} + +void PointerSortingChecker::checkASTCodeBody(const Decl *D, + AnalysisManager &AM, + BugReporter &BR) const { + auto MatcherM = matchSortWithPointers(); + + auto Matches = match(MatcherM, *D, AM.getASTContext()); + for (const auto &Match : Matches) + emitDiagnostics(Match, D, BR, AM, this); +} + +} // end of anonymous namespace + +void ento::registerPointerSortingChecker(CheckerManager &Mgr) { + Mgr.registerChecker<PointerSortingChecker>(); +} + +bool ento::shouldRegisterPointerSortingChecker(const LangOptions &LO) { + return LO.CPlusPlus; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp new file mode 100644 index 000000000000..88d0eb2ae748 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp @@ -0,0 +1,79 @@ +//=== PointerSubChecker.cpp - Pointer subtraction checker ------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files defines PointerSubChecker, a builtin checker that checks for +// pointer subtractions on two pointers pointing to different memory chunks. +// This check corresponds to CWE-469. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class PointerSubChecker + : public Checker< check::PreStmt<BinaryOperator> > { + mutable std::unique_ptr<BuiltinBug> BT; + +public: + void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const; +}; +} + +void PointerSubChecker::checkPreStmt(const BinaryOperator *B, + CheckerContext &C) const { + // When doing pointer subtraction, if the two pointers do not point to the + // same memory chunk, emit a warning. + if (B->getOpcode() != BO_Sub) + return; + + SVal LV = C.getSVal(B->getLHS()); + SVal RV = C.getSVal(B->getRHS()); + + const MemRegion *LR = LV.getAsRegion(); + const MemRegion *RR = RV.getAsRegion(); + + if (!(LR && RR)) + return; + + const MemRegion *BaseLR = LR->getBaseRegion(); + const MemRegion *BaseRR = RR->getBaseRegion(); + + if (BaseLR == BaseRR) + return; + + // Allow arithmetic on different symbolic regions. + if (isa<SymbolicRegion>(BaseLR) || isa<SymbolicRegion>(BaseRR)) + return; + + if (ExplodedNode *N = C.generateNonFatalErrorNode()) { + if (!BT) + BT.reset( + new BuiltinBug(this, "Pointer subtraction", + "Subtraction of two pointers that do not point to " + "the same memory chunk may cause incorrect result.")); + auto R = + std::make_unique<PathSensitiveBugReport>(*BT, BT->getDescription(), N); + R->addRange(B->getSourceRange()); + C.emitReport(std::move(R)); + } +} + +void ento::registerPointerSubChecker(CheckerManager &mgr) { + mgr.registerChecker<PointerSubChecker>(); +} + +bool ento::shouldRegisterPointerSubChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp new file mode 100644 index 000000000000..8649b8b96dd0 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp @@ -0,0 +1,488 @@ +//===--- PthreadLockChecker.cpp - Check for locking problems ---*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines PthreadLockChecker, a simple lock -> unlock checker. +// Also handles XNU locks, which behave similarly enough to share code. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" + +using namespace clang; +using namespace ento; + +namespace { + +struct LockState { + enum Kind { + Destroyed, + Locked, + Unlocked, + UntouchedAndPossiblyDestroyed, + UnlockedAndPossiblyDestroyed + } K; + +private: + LockState(Kind K) : K(K) {} + +public: + static LockState getLocked() { return LockState(Locked); } + static LockState getUnlocked() { return LockState(Unlocked); } + static LockState getDestroyed() { return LockState(Destroyed); } + static LockState getUntouchedAndPossiblyDestroyed() { + return LockState(UntouchedAndPossiblyDestroyed); + } + static LockState getUnlockedAndPossiblyDestroyed() { + return LockState(UnlockedAndPossiblyDestroyed); + } + + bool operator==(const LockState &X) const { + return K == X.K; + } + + bool isLocked() const { return K == Locked; } + bool isUnlocked() const { return K == Unlocked; } + bool isDestroyed() const { return K == Destroyed; } + bool isUntouchedAndPossiblyDestroyed() const { + return K == UntouchedAndPossiblyDestroyed; + } + bool isUnlockedAndPossiblyDestroyed() const { + return K == UnlockedAndPossiblyDestroyed; + } + + void Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddInteger(K); + } +}; + +class PthreadLockChecker + : public Checker<check::PostStmt<CallExpr>, check::DeadSymbols> { + mutable std::unique_ptr<BugType> BT_doublelock; + mutable std::unique_ptr<BugType> BT_doubleunlock; + mutable std::unique_ptr<BugType> BT_destroylock; + mutable std::unique_ptr<BugType> BT_initlock; + mutable std::unique_ptr<BugType> BT_lor; + enum LockingSemantics { + NotApplicable = 0, + PthreadSemantics, + XNUSemantics + }; +public: + void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; + void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const; + void printState(raw_ostream &Out, ProgramStateRef State, + const char *NL, const char *Sep) const override; + + void AcquireLock(CheckerContext &C, const CallExpr *CE, SVal lock, + bool isTryLock, enum LockingSemantics semantics) const; + + void ReleaseLock(CheckerContext &C, const CallExpr *CE, SVal lock) const; + void DestroyLock(CheckerContext &C, const CallExpr *CE, SVal Lock, + enum LockingSemantics semantics) const; + void InitLock(CheckerContext &C, const CallExpr *CE, SVal Lock) const; + void reportUseDestroyedBug(CheckerContext &C, const CallExpr *CE) const; + ProgramStateRef resolvePossiblyDestroyedMutex(ProgramStateRef state, + const MemRegion *lockR, + const SymbolRef *sym) const; +}; +} // end anonymous namespace + +// A stack of locks for tracking lock-unlock order. +REGISTER_LIST_WITH_PROGRAMSTATE(LockSet, const MemRegion *) + +// An entry for tracking lock states. +REGISTER_MAP_WITH_PROGRAMSTATE(LockMap, const MemRegion *, LockState) + +// Return values for unresolved calls to pthread_mutex_destroy(). +REGISTER_MAP_WITH_PROGRAMSTATE(DestroyRetVal, const MemRegion *, SymbolRef) + +void PthreadLockChecker::checkPostStmt(const CallExpr *CE, + CheckerContext &C) const { + StringRef FName = C.getCalleeName(CE); + if (FName.empty()) + return; + + if (CE->getNumArgs() != 1 && CE->getNumArgs() != 2) + return; + + if (FName == "pthread_mutex_lock" || + FName == "pthread_rwlock_rdlock" || + FName == "pthread_rwlock_wrlock") + AcquireLock(C, CE, C.getSVal(CE->getArg(0)), false, PthreadSemantics); + else if (FName == "lck_mtx_lock" || + FName == "lck_rw_lock_exclusive" || + FName == "lck_rw_lock_shared") + AcquireLock(C, CE, C.getSVal(CE->getArg(0)), false, XNUSemantics); + else if (FName == "pthread_mutex_trylock" || + FName == "pthread_rwlock_tryrdlock" || + FName == "pthread_rwlock_trywrlock") + AcquireLock(C, CE, C.getSVal(CE->getArg(0)), + true, PthreadSemantics); + else if (FName == "lck_mtx_try_lock" || + FName == "lck_rw_try_lock_exclusive" || + FName == "lck_rw_try_lock_shared") + AcquireLock(C, CE, C.getSVal(CE->getArg(0)), true, XNUSemantics); + else if (FName == "pthread_mutex_unlock" || + FName == "pthread_rwlock_unlock" || + FName == "lck_mtx_unlock" || + FName == "lck_rw_done") + ReleaseLock(C, CE, C.getSVal(CE->getArg(0))); + else if (FName == "pthread_mutex_destroy") + DestroyLock(C, CE, C.getSVal(CE->getArg(0)), PthreadSemantics); + else if (FName == "lck_mtx_destroy") + DestroyLock(C, CE, C.getSVal(CE->getArg(0)), XNUSemantics); + else if (FName == "pthread_mutex_init") + InitLock(C, CE, C.getSVal(CE->getArg(0))); +} + +// When a lock is destroyed, in some semantics(like PthreadSemantics) we are not +// sure if the destroy call has succeeded or failed, and the lock enters one of +// the 'possibly destroyed' state. There is a short time frame for the +// programmer to check the return value to see if the lock was successfully +// destroyed. Before we model the next operation over that lock, we call this +// function to see if the return value was checked by now and set the lock state +// - either to destroyed state or back to its previous state. + +// In PthreadSemantics, pthread_mutex_destroy() returns zero if the lock is +// successfully destroyed and it returns a non-zero value otherwise. +ProgramStateRef PthreadLockChecker::resolvePossiblyDestroyedMutex( + ProgramStateRef state, const MemRegion *lockR, const SymbolRef *sym) const { + const LockState *lstate = state->get<LockMap>(lockR); + // Existence in DestroyRetVal ensures existence in LockMap. + // Existence in Destroyed also ensures that the lock state for lockR is either + // UntouchedAndPossiblyDestroyed or UnlockedAndPossiblyDestroyed. + assert(lstate->isUntouchedAndPossiblyDestroyed() || + lstate->isUnlockedAndPossiblyDestroyed()); + + ConstraintManager &CMgr = state->getConstraintManager(); + ConditionTruthVal retZero = CMgr.isNull(state, *sym); + if (retZero.isConstrainedFalse()) { + if (lstate->isUntouchedAndPossiblyDestroyed()) + state = state->remove<LockMap>(lockR); + else if (lstate->isUnlockedAndPossiblyDestroyed()) + state = state->set<LockMap>(lockR, LockState::getUnlocked()); + } else + state = state->set<LockMap>(lockR, LockState::getDestroyed()); + + // Removing the map entry (lockR, sym) from DestroyRetVal as the lock state is + // now resolved. + state = state->remove<DestroyRetVal>(lockR); + return state; +} + +void PthreadLockChecker::printState(raw_ostream &Out, ProgramStateRef State, + const char *NL, const char *Sep) const { + LockMapTy LM = State->get<LockMap>(); + if (!LM.isEmpty()) { + Out << Sep << "Mutex states:" << NL; + for (auto I : LM) { + I.first->dumpToStream(Out); + if (I.second.isLocked()) + Out << ": locked"; + else if (I.second.isUnlocked()) + Out << ": unlocked"; + else if (I.second.isDestroyed()) + Out << ": destroyed"; + else if (I.second.isUntouchedAndPossiblyDestroyed()) + Out << ": not tracked, possibly destroyed"; + else if (I.second.isUnlockedAndPossiblyDestroyed()) + Out << ": unlocked, possibly destroyed"; + Out << NL; + } + } + + LockSetTy LS = State->get<LockSet>(); + if (!LS.isEmpty()) { + Out << Sep << "Mutex lock order:" << NL; + for (auto I: LS) { + I->dumpToStream(Out); + Out << NL; + } + } + + // TODO: Dump destroyed mutex symbols? +} + +void PthreadLockChecker::AcquireLock(CheckerContext &C, const CallExpr *CE, + SVal lock, bool isTryLock, + enum LockingSemantics semantics) const { + + const MemRegion *lockR = lock.getAsRegion(); + if (!lockR) + return; + + ProgramStateRef state = C.getState(); + const SymbolRef *sym = state->get<DestroyRetVal>(lockR); + if (sym) + state = resolvePossiblyDestroyedMutex(state, lockR, sym); + + SVal X = C.getSVal(CE); + if (X.isUnknownOrUndef()) + return; + + DefinedSVal retVal = X.castAs<DefinedSVal>(); + + if (const LockState *LState = state->get<LockMap>(lockR)) { + if (LState->isLocked()) { + if (!BT_doublelock) + BT_doublelock.reset(new BugType(this, "Double locking", + "Lock checker")); + ExplodedNode *N = C.generateErrorNode(); + if (!N) + return; + auto report = std::make_unique<PathSensitiveBugReport>( + *BT_doublelock, "This lock has already been acquired", N); + report->addRange(CE->getArg(0)->getSourceRange()); + C.emitReport(std::move(report)); + return; + } else if (LState->isDestroyed()) { + reportUseDestroyedBug(C, CE); + return; + } + } + + ProgramStateRef lockSucc = state; + if (isTryLock) { + // Bifurcate the state, and allow a mode where the lock acquisition fails. + ProgramStateRef lockFail; + switch (semantics) { + case PthreadSemantics: + std::tie(lockFail, lockSucc) = state->assume(retVal); + break; + case XNUSemantics: + std::tie(lockSucc, lockFail) = state->assume(retVal); + break; + default: + llvm_unreachable("Unknown tryLock locking semantics"); + } + assert(lockFail && lockSucc); + C.addTransition(lockFail); + + } else if (semantics == PthreadSemantics) { + // Assume that the return value was 0. + lockSucc = state->assume(retVal, false); + assert(lockSucc); + + } else { + // XNU locking semantics return void on non-try locks + assert((semantics == XNUSemantics) && "Unknown locking semantics"); + lockSucc = state; + } + + // Record that the lock was acquired. + lockSucc = lockSucc->add<LockSet>(lockR); + lockSucc = lockSucc->set<LockMap>(lockR, LockState::getLocked()); + C.addTransition(lockSucc); +} + +void PthreadLockChecker::ReleaseLock(CheckerContext &C, const CallExpr *CE, + SVal lock) const { + + const MemRegion *lockR = lock.getAsRegion(); + if (!lockR) + return; + + ProgramStateRef state = C.getState(); + const SymbolRef *sym = state->get<DestroyRetVal>(lockR); + if (sym) + state = resolvePossiblyDestroyedMutex(state, lockR, sym); + + if (const LockState *LState = state->get<LockMap>(lockR)) { + if (LState->isUnlocked()) { + if (!BT_doubleunlock) + BT_doubleunlock.reset(new BugType(this, "Double unlocking", + "Lock checker")); + ExplodedNode *N = C.generateErrorNode(); + if (!N) + return; + auto Report = std::make_unique<PathSensitiveBugReport>( + *BT_doubleunlock, "This lock has already been unlocked", N); + Report->addRange(CE->getArg(0)->getSourceRange()); + C.emitReport(std::move(Report)); + return; + } else if (LState->isDestroyed()) { + reportUseDestroyedBug(C, CE); + return; + } + } + + LockSetTy LS = state->get<LockSet>(); + + // FIXME: Better analysis requires IPA for wrappers. + + if (!LS.isEmpty()) { + const MemRegion *firstLockR = LS.getHead(); + if (firstLockR != lockR) { + if (!BT_lor) + BT_lor.reset(new BugType(this, "Lock order reversal", "Lock checker")); + ExplodedNode *N = C.generateErrorNode(); + if (!N) + return; + auto report = std::make_unique<PathSensitiveBugReport>( + *BT_lor, "This was not the most recently acquired lock. Possible " + "lock order reversal", N); + report->addRange(CE->getArg(0)->getSourceRange()); + C.emitReport(std::move(report)); + return; + } + // Record that the lock was released. + state = state->set<LockSet>(LS.getTail()); + } + + state = state->set<LockMap>(lockR, LockState::getUnlocked()); + C.addTransition(state); +} + +void PthreadLockChecker::DestroyLock(CheckerContext &C, const CallExpr *CE, + SVal Lock, + enum LockingSemantics semantics) const { + + const MemRegion *LockR = Lock.getAsRegion(); + if (!LockR) + return; + + ProgramStateRef State = C.getState(); + + const SymbolRef *sym = State->get<DestroyRetVal>(LockR); + if (sym) + State = resolvePossiblyDestroyedMutex(State, LockR, sym); + + const LockState *LState = State->get<LockMap>(LockR); + // Checking the return value of the destroy method only in the case of + // PthreadSemantics + if (semantics == PthreadSemantics) { + if (!LState || LState->isUnlocked()) { + SymbolRef sym = C.getSVal(CE).getAsSymbol(); + if (!sym) { + State = State->remove<LockMap>(LockR); + C.addTransition(State); + return; + } + State = State->set<DestroyRetVal>(LockR, sym); + if (LState && LState->isUnlocked()) + State = State->set<LockMap>( + LockR, LockState::getUnlockedAndPossiblyDestroyed()); + else + State = State->set<LockMap>( + LockR, LockState::getUntouchedAndPossiblyDestroyed()); + C.addTransition(State); + return; + } + } else { + if (!LState || LState->isUnlocked()) { + State = State->set<LockMap>(LockR, LockState::getDestroyed()); + C.addTransition(State); + return; + } + } + StringRef Message; + + if (LState->isLocked()) { + Message = "This lock is still locked"; + } else { + Message = "This lock has already been destroyed"; + } + + if (!BT_destroylock) + BT_destroylock.reset(new BugType(this, "Destroy invalid lock", + "Lock checker")); + ExplodedNode *N = C.generateErrorNode(); + if (!N) + return; + auto Report = + std::make_unique<PathSensitiveBugReport>(*BT_destroylock, Message, N); + Report->addRange(CE->getArg(0)->getSourceRange()); + C.emitReport(std::move(Report)); +} + +void PthreadLockChecker::InitLock(CheckerContext &C, const CallExpr *CE, + SVal Lock) const { + + const MemRegion *LockR = Lock.getAsRegion(); + if (!LockR) + return; + + ProgramStateRef State = C.getState(); + + const SymbolRef *sym = State->get<DestroyRetVal>(LockR); + if (sym) + State = resolvePossiblyDestroyedMutex(State, LockR, sym); + + const struct LockState *LState = State->get<LockMap>(LockR); + if (!LState || LState->isDestroyed()) { + State = State->set<LockMap>(LockR, LockState::getUnlocked()); + C.addTransition(State); + return; + } + + StringRef Message; + + if (LState->isLocked()) { + Message = "This lock is still being held"; + } else { + Message = "This lock has already been initialized"; + } + + if (!BT_initlock) + BT_initlock.reset(new BugType(this, "Init invalid lock", + "Lock checker")); + ExplodedNode *N = C.generateErrorNode(); + if (!N) + return; + auto Report = + std::make_unique<PathSensitiveBugReport>(*BT_initlock, Message, N); + Report->addRange(CE->getArg(0)->getSourceRange()); + C.emitReport(std::move(Report)); +} + +void PthreadLockChecker::reportUseDestroyedBug(CheckerContext &C, + const CallExpr *CE) const { + if (!BT_destroylock) + BT_destroylock.reset(new BugType(this, "Use destroyed lock", + "Lock checker")); + ExplodedNode *N = C.generateErrorNode(); + if (!N) + return; + auto Report = std::make_unique<PathSensitiveBugReport>( + *BT_destroylock, "This lock has already been destroyed", N); + Report->addRange(CE->getArg(0)->getSourceRange()); + C.emitReport(std::move(Report)); +} + +void PthreadLockChecker::checkDeadSymbols(SymbolReaper &SymReaper, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + + // TODO: Clean LockMap when a mutex region dies. + + DestroyRetValTy TrackedSymbols = State->get<DestroyRetVal>(); + for (DestroyRetValTy::iterator I = TrackedSymbols.begin(), + E = TrackedSymbols.end(); + I != E; ++I) { + const SymbolRef Sym = I->second; + const MemRegion *lockR = I->first; + bool IsSymDead = SymReaper.isDead(Sym); + // Remove the dead symbol from the return value symbols map. + if (IsSymDead) + State = resolvePossiblyDestroyedMutex(State, lockR, &Sym); + } + C.addTransition(State); +} + +void ento::registerPthreadLockChecker(CheckerManager &mgr) { + mgr.registerChecker<PthreadLockChecker>(); +} + +bool ento::shouldRegisterPthreadLockChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp new file mode 100644 index 000000000000..6f8cb1432bb1 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp @@ -0,0 +1,1520 @@ +//==-- RetainCountChecker.cpp - Checks for leaks and other issues -*- C++ -*--// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the methods for RetainCountChecker, which implements +// a reference count checker for Core Foundation and Cocoa on (Mac OS X). +// +//===----------------------------------------------------------------------===// + +#include "RetainCountChecker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" + +using namespace clang; +using namespace ento; +using namespace retaincountchecker; +using llvm::StrInStrNoCase; + +REGISTER_MAP_WITH_PROGRAMSTATE(RefBindings, SymbolRef, RefVal) + +namespace clang { +namespace ento { +namespace retaincountchecker { + +const RefVal *getRefBinding(ProgramStateRef State, SymbolRef Sym) { + return State->get<RefBindings>(Sym); +} + +} // end namespace retaincountchecker +} // end namespace ento +} // end namespace clang + +static ProgramStateRef setRefBinding(ProgramStateRef State, SymbolRef Sym, + RefVal Val) { + assert(Sym != nullptr); + return State->set<RefBindings>(Sym, Val); +} + +static ProgramStateRef removeRefBinding(ProgramStateRef State, SymbolRef Sym) { + return State->remove<RefBindings>(Sym); +} + +void RefVal::print(raw_ostream &Out) const { + if (!T.isNull()) + Out << "Tracked " << T.getAsString() << " | "; + + switch (getKind()) { + default: llvm_unreachable("Invalid RefVal kind"); + case Owned: { + Out << "Owned"; + unsigned cnt = getCount(); + if (cnt) Out << " (+ " << cnt << ")"; + break; + } + + case NotOwned: { + Out << "NotOwned"; + unsigned cnt = getCount(); + if (cnt) Out << " (+ " << cnt << ")"; + break; + } + + case ReturnedOwned: { + Out << "ReturnedOwned"; + unsigned cnt = getCount(); + if (cnt) Out << " (+ " << cnt << ")"; + break; + } + + case ReturnedNotOwned: { + Out << "ReturnedNotOwned"; + unsigned cnt = getCount(); + if (cnt) Out << " (+ " << cnt << ")"; + break; + } + + case Released: + Out << "Released"; + break; + + case ErrorDeallocNotOwned: + Out << "-dealloc (not-owned)"; + break; + + case ErrorLeak: + Out << "Leaked"; + break; + + case ErrorLeakReturned: + Out << "Leaked (Bad naming)"; + break; + + case ErrorUseAfterRelease: + Out << "Use-After-Release [ERROR]"; + break; + + case ErrorReleaseNotOwned: + Out << "Release of Not-Owned [ERROR]"; + break; + + case RefVal::ErrorOverAutorelease: + Out << "Over-autoreleased"; + break; + + case RefVal::ErrorReturnedNotOwned: + Out << "Non-owned object returned instead of owned"; + break; + } + + switch (getIvarAccessHistory()) { + case IvarAccessHistory::None: + break; + case IvarAccessHistory::AccessedDirectly: + Out << " [direct ivar access]"; + break; + case IvarAccessHistory::ReleasedAfterDirectAccess: + Out << " [released after direct ivar access]"; + } + + if (ACnt) { + Out << " [autorelease -" << ACnt << ']'; + } +} + +namespace { +class StopTrackingCallback final : public SymbolVisitor { + ProgramStateRef state; +public: + StopTrackingCallback(ProgramStateRef st) : state(std::move(st)) {} + ProgramStateRef getState() const { return state; } + + bool VisitSymbol(SymbolRef sym) override { + state = removeRefBinding(state, sym); + return true; + } +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Handle statements that may have an effect on refcounts. +//===----------------------------------------------------------------------===// + +void RetainCountChecker::checkPostStmt(const BlockExpr *BE, + CheckerContext &C) const { + + // Scan the BlockDecRefExprs for any object the retain count checker + // may be tracking. + if (!BE->getBlockDecl()->hasCaptures()) + return; + + ProgramStateRef state = C.getState(); + auto *R = cast<BlockDataRegion>(C.getSVal(BE).getAsRegion()); + + BlockDataRegion::referenced_vars_iterator I = R->referenced_vars_begin(), + E = R->referenced_vars_end(); + + if (I == E) + return; + + // FIXME: For now we invalidate the tracking of all symbols passed to blocks + // via captured variables, even though captured variables result in a copy + // and in implicit increment/decrement of a retain count. + SmallVector<const MemRegion*, 10> Regions; + const LocationContext *LC = C.getLocationContext(); + MemRegionManager &MemMgr = C.getSValBuilder().getRegionManager(); + + for ( ; I != E; ++I) { + const VarRegion *VR = I.getCapturedRegion(); + if (VR->getSuperRegion() == R) { + VR = MemMgr.getVarRegion(VR->getDecl(), LC); + } + Regions.push_back(VR); + } + + state = state->scanReachableSymbols<StopTrackingCallback>(Regions).getState(); + C.addTransition(state); +} + +void RetainCountChecker::checkPostStmt(const CastExpr *CE, + CheckerContext &C) const { + const ObjCBridgedCastExpr *BE = dyn_cast<ObjCBridgedCastExpr>(CE); + if (!BE) + return; + + QualType QT = CE->getType(); + ObjKind K; + if (QT->isObjCObjectPointerType()) { + K = ObjKind::ObjC; + } else { + K = ObjKind::CF; + } + + ArgEffect AE = ArgEffect(IncRef, K); + + switch (BE->getBridgeKind()) { + case OBC_Bridge: + // Do nothing. + return; + case OBC_BridgeRetained: + AE = AE.withKind(IncRef); + break; + case OBC_BridgeTransfer: + AE = AE.withKind(DecRefBridgedTransferred); + break; + } + + ProgramStateRef state = C.getState(); + SymbolRef Sym = C.getSVal(CE).getAsLocSymbol(); + if (!Sym) + return; + const RefVal* T = getRefBinding(state, Sym); + if (!T) + return; + + RefVal::Kind hasErr = (RefVal::Kind) 0; + state = updateSymbol(state, Sym, *T, AE, hasErr, C); + + if (hasErr) { + // FIXME: If we get an error during a bridge cast, should we report it? + return; + } + + C.addTransition(state); +} + +void RetainCountChecker::processObjCLiterals(CheckerContext &C, + const Expr *Ex) const { + ProgramStateRef state = C.getState(); + const ExplodedNode *pred = C.getPredecessor(); + for (const Stmt *Child : Ex->children()) { + SVal V = pred->getSVal(Child); + if (SymbolRef sym = V.getAsSymbol()) + if (const RefVal* T = getRefBinding(state, sym)) { + RefVal::Kind hasErr = (RefVal::Kind) 0; + state = updateSymbol(state, sym, *T, + ArgEffect(MayEscape, ObjKind::ObjC), hasErr, C); + if (hasErr) { + processNonLeakError(state, Child->getSourceRange(), hasErr, sym, C); + return; + } + } + } + + // Return the object as autoreleased. + // RetEffect RE = RetEffect::MakeNotOwned(ObjKind::ObjC); + if (SymbolRef sym = + state->getSVal(Ex, pred->getLocationContext()).getAsSymbol()) { + QualType ResultTy = Ex->getType(); + state = setRefBinding(state, sym, + RefVal::makeNotOwned(ObjKind::ObjC, ResultTy)); + } + + C.addTransition(state); +} + +void RetainCountChecker::checkPostStmt(const ObjCArrayLiteral *AL, + CheckerContext &C) const { + // Apply the 'MayEscape' to all values. + processObjCLiterals(C, AL); +} + +void RetainCountChecker::checkPostStmt(const ObjCDictionaryLiteral *DL, + CheckerContext &C) const { + // Apply the 'MayEscape' to all keys and values. + processObjCLiterals(C, DL); +} + +void RetainCountChecker::checkPostStmt(const ObjCBoxedExpr *Ex, + CheckerContext &C) const { + const ExplodedNode *Pred = C.getPredecessor(); + ProgramStateRef State = Pred->getState(); + + if (SymbolRef Sym = Pred->getSVal(Ex).getAsSymbol()) { + QualType ResultTy = Ex->getType(); + State = setRefBinding(State, Sym, + RefVal::makeNotOwned(ObjKind::ObjC, ResultTy)); + } + + C.addTransition(State); +} + +void RetainCountChecker::checkPostStmt(const ObjCIvarRefExpr *IRE, + CheckerContext &C) const { + Optional<Loc> IVarLoc = C.getSVal(IRE).getAs<Loc>(); + if (!IVarLoc) + return; + + ProgramStateRef State = C.getState(); + SymbolRef Sym = State->getSVal(*IVarLoc).getAsSymbol(); + if (!Sym || !dyn_cast_or_null<ObjCIvarRegion>(Sym->getOriginRegion())) + return; + + // Accessing an ivar directly is unusual. If we've done that, be more + // forgiving about what the surrounding code is allowed to do. + + QualType Ty = Sym->getType(); + ObjKind Kind; + if (Ty->isObjCRetainableType()) + Kind = ObjKind::ObjC; + else if (coreFoundation::isCFObjectRef(Ty)) + Kind = ObjKind::CF; + else + return; + + // If the value is already known to be nil, don't bother tracking it. + ConstraintManager &CMgr = State->getConstraintManager(); + if (CMgr.isNull(State, Sym).isConstrainedTrue()) + return; + + if (const RefVal *RV = getRefBinding(State, Sym)) { + // If we've seen this symbol before, or we're only seeing it now because + // of something the analyzer has synthesized, don't do anything. + if (RV->getIvarAccessHistory() != RefVal::IvarAccessHistory::None || + isSynthesizedAccessor(C.getStackFrame())) { + return; + } + + // Note that this value has been loaded from an ivar. + C.addTransition(setRefBinding(State, Sym, RV->withIvarAccess())); + return; + } + + RefVal PlusZero = RefVal::makeNotOwned(Kind, Ty); + + // In a synthesized accessor, the effective retain count is +0. + if (isSynthesizedAccessor(C.getStackFrame())) { + C.addTransition(setRefBinding(State, Sym, PlusZero)); + return; + } + + State = setRefBinding(State, Sym, PlusZero.withIvarAccess()); + C.addTransition(State); +} + +static bool isReceiverUnconsumedSelf(const CallEvent &Call) { + if (const auto *MC = dyn_cast<ObjCMethodCall>(&Call)) { + + // Check if the message is not consumed, we know it will not be used in + // an assignment, ex: "self = [super init]". + return MC->getMethodFamily() == OMF_init && MC->isReceiverSelfOrSuper() && + !Call.getLocationContext() + ->getAnalysisDeclContext() + ->getParentMap() + .isConsumedExpr(Call.getOriginExpr()); + } + return false; +} + +const static RetainSummary *getSummary(RetainSummaryManager &Summaries, + const CallEvent &Call, + QualType ReceiverType) { + const Expr *CE = Call.getOriginExpr(); + AnyCall C = + CE ? *AnyCall::forExpr(CE) + : AnyCall(cast<CXXDestructorDecl>(Call.getDecl())); + return Summaries.getSummary(C, Call.hasNonZeroCallbackArg(), + isReceiverUnconsumedSelf(Call), ReceiverType); +} + +void RetainCountChecker::checkPostCall(const CallEvent &Call, + CheckerContext &C) const { + RetainSummaryManager &Summaries = getSummaryManager(C); + + // Leave null if no receiver. + QualType ReceiverType; + if (const auto *MC = dyn_cast<ObjCMethodCall>(&Call)) { + if (MC->isInstanceMessage()) { + SVal ReceiverV = MC->getReceiverSVal(); + if (SymbolRef Sym = ReceiverV.getAsLocSymbol()) + if (const RefVal *T = getRefBinding(C.getState(), Sym)) + ReceiverType = T->getType(); + } + } + + const RetainSummary *Summ = getSummary(Summaries, Call, ReceiverType); + + if (C.wasInlined) { + processSummaryOfInlined(*Summ, Call, C); + return; + } + checkSummary(*Summ, Call, C); +} + +/// GetReturnType - Used to get the return type of a message expression or +/// function call with the intention of affixing that type to a tracked symbol. +/// While the return type can be queried directly from RetEx, when +/// invoking class methods we augment to the return type to be that of +/// a pointer to the class (as opposed it just being id). +// FIXME: We may be able to do this with related result types instead. +// This function is probably overestimating. +static QualType GetReturnType(const Expr *RetE, ASTContext &Ctx) { + QualType RetTy = RetE->getType(); + // If RetE is not a message expression just return its type. + // If RetE is a message expression, return its types if it is something + /// more specific than id. + if (const ObjCMessageExpr *ME = dyn_cast<ObjCMessageExpr>(RetE)) + if (const ObjCObjectPointerType *PT = RetTy->getAs<ObjCObjectPointerType>()) + if (PT->isObjCQualifiedIdType() || PT->isObjCIdType() || + PT->isObjCClassType()) { + // At this point we know the return type of the message expression is + // id, id<...>, or Class. If we have an ObjCInterfaceDecl, we know this + // is a call to a class method whose type we can resolve. In such + // cases, promote the return type to XXX* (where XXX is the class). + const ObjCInterfaceDecl *D = ME->getReceiverInterface(); + return !D ? RetTy : + Ctx.getObjCObjectPointerType(Ctx.getObjCInterfaceType(D)); + } + + return RetTy; +} + +static Optional<RefVal> refValFromRetEffect(RetEffect RE, + QualType ResultTy) { + if (RE.isOwned()) { + return RefVal::makeOwned(RE.getObjKind(), ResultTy); + } else if (RE.notOwned()) { + return RefVal::makeNotOwned(RE.getObjKind(), ResultTy); + } + + return None; +} + +static bool isPointerToObject(QualType QT) { + QualType PT = QT->getPointeeType(); + if (!PT.isNull()) + if (PT->getAsCXXRecordDecl()) + return true; + return false; +} + +/// Whether the tracked value should be escaped on a given call. +/// OSObjects are escaped when passed to void * / etc. +static bool shouldEscapeOSArgumentOnCall(const CallEvent &CE, unsigned ArgIdx, + const RefVal *TrackedValue) { + if (TrackedValue->getObjKind() != ObjKind::OS) + return false; + if (ArgIdx >= CE.parameters().size()) + return false; + return !isPointerToObject(CE.parameters()[ArgIdx]->getType()); +} + +// We don't always get the exact modeling of the function with regards to the +// retain count checker even when the function is inlined. For example, we need +// to stop tracking the symbols which were marked with StopTrackingHard. +void RetainCountChecker::processSummaryOfInlined(const RetainSummary &Summ, + const CallEvent &CallOrMsg, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + + // Evaluate the effect of the arguments. + for (unsigned idx = 0, e = CallOrMsg.getNumArgs(); idx != e; ++idx) { + SVal V = CallOrMsg.getArgSVal(idx); + + if (SymbolRef Sym = V.getAsLocSymbol()) { + bool ShouldRemoveBinding = Summ.getArg(idx).getKind() == StopTrackingHard; + if (const RefVal *T = getRefBinding(state, Sym)) + if (shouldEscapeOSArgumentOnCall(CallOrMsg, idx, T)) + ShouldRemoveBinding = true; + + if (ShouldRemoveBinding) + state = removeRefBinding(state, Sym); + } + } + + // Evaluate the effect on the message receiver. + if (const auto *MsgInvocation = dyn_cast<ObjCMethodCall>(&CallOrMsg)) { + if (SymbolRef Sym = MsgInvocation->getReceiverSVal().getAsLocSymbol()) { + if (Summ.getReceiverEffect().getKind() == StopTrackingHard) { + state = removeRefBinding(state, Sym); + } + } + } + + // Consult the summary for the return value. + RetEffect RE = Summ.getRetEffect(); + + if (SymbolRef Sym = CallOrMsg.getReturnValue().getAsSymbol()) { + if (RE.getKind() == RetEffect::NoRetHard) + state = removeRefBinding(state, Sym); + } + + C.addTransition(state); +} + +static bool isSmartPtrField(const MemRegion *MR) { + const auto *TR = dyn_cast<TypedValueRegion>( + cast<SubRegion>(MR)->getSuperRegion()); + return TR && RetainSummaryManager::isKnownSmartPointer(TR->getValueType()); +} + + +/// A value escapes in these possible cases: +/// +/// - binding to something that is not a memory region. +/// - binding to a memregion that does not have stack storage +/// - binding to a variable that has a destructor attached using CleanupAttr +/// +/// We do not currently model what happens when a symbol is +/// assigned to a struct field, unless it is a known smart pointer +/// implementation, about which we know that it is inlined. +/// FIXME: This could definitely be improved upon. +static bool shouldEscapeRegion(const MemRegion *R) { + if (isSmartPtrField(R)) + return false; + + const auto *VR = dyn_cast<VarRegion>(R); + + if (!R->hasStackStorage() || !VR) + return true; + + const VarDecl *VD = VR->getDecl(); + if (!VD->hasAttr<CleanupAttr>()) + return false; // CleanupAttr attaches destructors, which cause escaping. + return true; +} + +static SmallVector<ProgramStateRef, 2> +updateOutParameters(ProgramStateRef State, const RetainSummary &Summ, + const CallEvent &CE) { + + SVal L = CE.getReturnValue(); + + // Splitting is required to support out parameters, + // as out parameters might be created only on the "success" branch. + // We want to avoid eagerly splitting unless out parameters are actually + // needed. + bool SplitNecessary = false; + for (auto &P : Summ.getArgEffects()) + if (P.second.getKind() == RetainedOutParameterOnNonZero || + P.second.getKind() == RetainedOutParameterOnZero) + SplitNecessary = true; + + ProgramStateRef AssumeNonZeroReturn = State; + ProgramStateRef AssumeZeroReturn = State; + + if (SplitNecessary) { + if (!CE.getResultType()->isScalarType()) { + // Structures cannot be assumed. This probably deserves + // a compiler warning for invalid annotations. + return {State}; + } + if (auto DL = L.getAs<DefinedOrUnknownSVal>()) { + AssumeNonZeroReturn = AssumeNonZeroReturn->assume(*DL, true); + AssumeZeroReturn = AssumeZeroReturn->assume(*DL, false); + } + } + + for (unsigned idx = 0, e = CE.getNumArgs(); idx != e; ++idx) { + SVal ArgVal = CE.getArgSVal(idx); + ArgEffect AE = Summ.getArg(idx); + + auto *ArgRegion = dyn_cast_or_null<TypedValueRegion>(ArgVal.getAsRegion()); + if (!ArgRegion) + continue; + + QualType PointeeTy = ArgRegion->getValueType(); + SVal PointeeVal = State->getSVal(ArgRegion); + SymbolRef Pointee = PointeeVal.getAsLocSymbol(); + if (!Pointee) + continue; + + if (shouldEscapeRegion(ArgRegion)) + continue; + + auto makeNotOwnedParameter = [&](ProgramStateRef St) { + return setRefBinding(St, Pointee, + RefVal::makeNotOwned(AE.getObjKind(), PointeeTy)); + }; + auto makeOwnedParameter = [&](ProgramStateRef St) { + return setRefBinding(St, Pointee, + RefVal::makeOwned(ObjKind::OS, PointeeTy)); + }; + + switch (AE.getKind()) { + case UnretainedOutParameter: + AssumeNonZeroReturn = makeNotOwnedParameter(AssumeNonZeroReturn); + AssumeZeroReturn = makeNotOwnedParameter(AssumeZeroReturn); + break; + case RetainedOutParameter: + AssumeNonZeroReturn = makeOwnedParameter(AssumeNonZeroReturn); + AssumeZeroReturn = makeOwnedParameter(AssumeZeroReturn); + break; + case RetainedOutParameterOnNonZero: + AssumeNonZeroReturn = makeOwnedParameter(AssumeNonZeroReturn); + break; + case RetainedOutParameterOnZero: + AssumeZeroReturn = makeOwnedParameter(AssumeZeroReturn); + break; + default: + break; + } + } + + if (SplitNecessary) { + return {AssumeNonZeroReturn, AssumeZeroReturn}; + } else { + assert(AssumeZeroReturn == AssumeNonZeroReturn); + return {AssumeZeroReturn}; + } +} + +void RetainCountChecker::checkSummary(const RetainSummary &Summ, + const CallEvent &CallOrMsg, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + + // Evaluate the effect of the arguments. + RefVal::Kind hasErr = (RefVal::Kind) 0; + SourceRange ErrorRange; + SymbolRef ErrorSym = nullptr; + + // Helper tag for providing diagnostics: indicate whether dealloc was sent + // at this location. + bool DeallocSent = false; + + for (unsigned idx = 0, e = CallOrMsg.getNumArgs(); idx != e; ++idx) { + SVal V = CallOrMsg.getArgSVal(idx); + + ArgEffect Effect = Summ.getArg(idx); + if (SymbolRef Sym = V.getAsLocSymbol()) { + if (const RefVal *T = getRefBinding(state, Sym)) { + + if (shouldEscapeOSArgumentOnCall(CallOrMsg, idx, T)) + Effect = ArgEffect(StopTrackingHard, ObjKind::OS); + + state = updateSymbol(state, Sym, *T, Effect, hasErr, C); + if (hasErr) { + ErrorRange = CallOrMsg.getArgSourceRange(idx); + ErrorSym = Sym; + break; + } else if (Effect.getKind() == Dealloc) { + DeallocSent = true; + } + } + } + } + + // Evaluate the effect on the message receiver / `this` argument. + bool ReceiverIsTracked = false; + if (!hasErr) { + if (const auto *MsgInvocation = dyn_cast<ObjCMethodCall>(&CallOrMsg)) { + if (SymbolRef Sym = MsgInvocation->getReceiverSVal().getAsLocSymbol()) { + if (const RefVal *T = getRefBinding(state, Sym)) { + ReceiverIsTracked = true; + state = updateSymbol(state, Sym, *T, + Summ.getReceiverEffect(), hasErr, C); + if (hasErr) { + ErrorRange = MsgInvocation->getOriginExpr()->getReceiverRange(); + ErrorSym = Sym; + } else if (Summ.getReceiverEffect().getKind() == Dealloc) { + DeallocSent = true; + } + } + } + } else if (const auto *MCall = dyn_cast<CXXMemberCall>(&CallOrMsg)) { + if (SymbolRef Sym = MCall->getCXXThisVal().getAsLocSymbol()) { + if (const RefVal *T = getRefBinding(state, Sym)) { + state = updateSymbol(state, Sym, *T, Summ.getThisEffect(), + hasErr, C); + if (hasErr) { + ErrorRange = MCall->getOriginExpr()->getSourceRange(); + ErrorSym = Sym; + } + } + } + } + } + + // Process any errors. + if (hasErr) { + processNonLeakError(state, ErrorRange, hasErr, ErrorSym, C); + return; + } + + // Consult the summary for the return value. + RetEffect RE = Summ.getRetEffect(); + + if (RE.getKind() == RetEffect::OwnedWhenTrackedReceiver) { + if (ReceiverIsTracked) + RE = getSummaryManager(C).getObjAllocRetEffect(); + else + RE = RetEffect::MakeNoRet(); + } + + if (SymbolRef Sym = CallOrMsg.getReturnValue().getAsSymbol()) { + QualType ResultTy = CallOrMsg.getResultType(); + if (RE.notOwned()) { + const Expr *Ex = CallOrMsg.getOriginExpr(); + assert(Ex); + ResultTy = GetReturnType(Ex, C.getASTContext()); + } + if (Optional<RefVal> updatedRefVal = refValFromRetEffect(RE, ResultTy)) + state = setRefBinding(state, Sym, *updatedRefVal); + } + + SmallVector<ProgramStateRef, 2> Out = + updateOutParameters(state, Summ, CallOrMsg); + + for (ProgramStateRef St : Out) { + if (DeallocSent) { + C.addTransition(St, C.getPredecessor(), &DeallocSentTag); + } else { + C.addTransition(St); + } + } +} + +ProgramStateRef RetainCountChecker::updateSymbol(ProgramStateRef state, + SymbolRef sym, RefVal V, + ArgEffect AE, + RefVal::Kind &hasErr, + CheckerContext &C) const { + bool IgnoreRetainMsg = (bool)C.getASTContext().getLangOpts().ObjCAutoRefCount; + if (AE.getObjKind() == ObjKind::ObjC && IgnoreRetainMsg) { + switch (AE.getKind()) { + default: + break; + case IncRef: + AE = AE.withKind(DoNothing); + break; + case DecRef: + AE = AE.withKind(DoNothing); + break; + case DecRefAndStopTrackingHard: + AE = AE.withKind(StopTracking); + break; + } + } + + // Handle all use-after-releases. + if (V.getKind() == RefVal::Released) { + V = V ^ RefVal::ErrorUseAfterRelease; + hasErr = V.getKind(); + return setRefBinding(state, sym, V); + } + + switch (AE.getKind()) { + case UnretainedOutParameter: + case RetainedOutParameter: + case RetainedOutParameterOnZero: + case RetainedOutParameterOnNonZero: + llvm_unreachable("Applies to pointer-to-pointer parameters, which should " + "not have ref state."); + + case Dealloc: // NB. we only need to add a note in a non-error case. + switch (V.getKind()) { + default: + llvm_unreachable("Invalid RefVal state for an explicit dealloc."); + case RefVal::Owned: + // The object immediately transitions to the released state. + V = V ^ RefVal::Released; + V.clearCounts(); + return setRefBinding(state, sym, V); + case RefVal::NotOwned: + V = V ^ RefVal::ErrorDeallocNotOwned; + hasErr = V.getKind(); + break; + } + break; + + case MayEscape: + if (V.getKind() == RefVal::Owned) { + V = V ^ RefVal::NotOwned; + break; + } + + LLVM_FALLTHROUGH; + + case DoNothing: + return state; + + case Autorelease: + // Update the autorelease counts. + V = V.autorelease(); + break; + + case StopTracking: + case StopTrackingHard: + return removeRefBinding(state, sym); + + case IncRef: + switch (V.getKind()) { + default: + llvm_unreachable("Invalid RefVal state for a retain."); + case RefVal::Owned: + case RefVal::NotOwned: + V = V + 1; + break; + } + break; + + case DecRef: + case DecRefBridgedTransferred: + case DecRefAndStopTrackingHard: + switch (V.getKind()) { + default: + // case 'RefVal::Released' handled above. + llvm_unreachable("Invalid RefVal state for a release."); + + case RefVal::Owned: + assert(V.getCount() > 0); + if (V.getCount() == 1) { + if (AE.getKind() == DecRefBridgedTransferred || + V.getIvarAccessHistory() == + RefVal::IvarAccessHistory::AccessedDirectly) + V = V ^ RefVal::NotOwned; + else + V = V ^ RefVal::Released; + } else if (AE.getKind() == DecRefAndStopTrackingHard) { + return removeRefBinding(state, sym); + } + + V = V - 1; + break; + + case RefVal::NotOwned: + if (V.getCount() > 0) { + if (AE.getKind() == DecRefAndStopTrackingHard) + return removeRefBinding(state, sym); + V = V - 1; + } else if (V.getIvarAccessHistory() == + RefVal::IvarAccessHistory::AccessedDirectly) { + // Assume that the instance variable was holding on the object at + // +1, and we just didn't know. + if (AE.getKind() == DecRefAndStopTrackingHard) + return removeRefBinding(state, sym); + V = V.releaseViaIvar() ^ RefVal::Released; + } else { + V = V ^ RefVal::ErrorReleaseNotOwned; + hasErr = V.getKind(); + } + break; + } + break; + } + return setRefBinding(state, sym, V); +} + +const RefCountBug & +RetainCountChecker::errorKindToBugKind(RefVal::Kind ErrorKind, + SymbolRef Sym) const { + switch (ErrorKind) { + case RefVal::ErrorUseAfterRelease: + return useAfterRelease; + case RefVal::ErrorReleaseNotOwned: + return releaseNotOwned; + case RefVal::ErrorDeallocNotOwned: + if (Sym->getType()->getPointeeCXXRecordDecl()) + return freeNotOwned; + return deallocNotOwned; + default: + llvm_unreachable("Unhandled error."); + } +} + +void RetainCountChecker::processNonLeakError(ProgramStateRef St, + SourceRange ErrorRange, + RefVal::Kind ErrorKind, + SymbolRef Sym, + CheckerContext &C) const { + // HACK: Ignore retain-count issues on values accessed through ivars, + // because of cases like this: + // [_contentView retain]; + // [_contentView removeFromSuperview]; + // [self addSubview:_contentView]; // invalidates 'self' + // [_contentView release]; + if (const RefVal *RV = getRefBinding(St, Sym)) + if (RV->getIvarAccessHistory() != RefVal::IvarAccessHistory::None) + return; + + ExplodedNode *N = C.generateErrorNode(St); + if (!N) + return; + + auto report = std::make_unique<RefCountReport>( + errorKindToBugKind(ErrorKind, Sym), + C.getASTContext().getLangOpts(), N, Sym); + report->addRange(ErrorRange); + C.emitReport(std::move(report)); +} + +//===----------------------------------------------------------------------===// +// Handle the return values of retain-count-related functions. +//===----------------------------------------------------------------------===// + +bool RetainCountChecker::evalCall(const CallEvent &Call, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + const auto *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); + if (!FD) + return false; + + const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr()); + if (!CE) + return false; + + RetainSummaryManager &SmrMgr = getSummaryManager(C); + QualType ResultTy = Call.getResultType(); + + // See if the function has 'rc_ownership_trusted_implementation' + // annotate attribute. If it does, we will not inline it. + bool hasTrustedImplementationAnnotation = false; + + const LocationContext *LCtx = C.getLocationContext(); + + using BehaviorSummary = RetainSummaryManager::BehaviorSummary; + Optional<BehaviorSummary> BSmr = + SmrMgr.canEval(CE, FD, hasTrustedImplementationAnnotation); + + // See if it's one of the specific functions we know how to eval. + if (!BSmr) + return false; + + // Bind the return value. + if (BSmr == BehaviorSummary::Identity || + BSmr == BehaviorSummary::IdentityOrZero || + BSmr == BehaviorSummary::IdentityThis) { + + const Expr *BindReturnTo = + (BSmr == BehaviorSummary::IdentityThis) + ? cast<CXXMemberCallExpr>(CE)->getImplicitObjectArgument() + : CE->getArg(0); + SVal RetVal = state->getSVal(BindReturnTo, LCtx); + + // If the receiver is unknown or the function has + // 'rc_ownership_trusted_implementation' annotate attribute, conjure a + // return value. + // FIXME: this branch is very strange. + if (RetVal.isUnknown() || + (hasTrustedImplementationAnnotation && !ResultTy.isNull())) { + SValBuilder &SVB = C.getSValBuilder(); + RetVal = + SVB.conjureSymbolVal(nullptr, CE, LCtx, ResultTy, C.blockCount()); + } + + // Bind the value. + state = state->BindExpr(CE, LCtx, RetVal, /*Invalidate=*/false); + + if (BSmr == BehaviorSummary::IdentityOrZero) { + // Add a branch where the output is zero. + ProgramStateRef NullOutputState = C.getState(); + + // Assume that output is zero on the other branch. + NullOutputState = NullOutputState->BindExpr( + CE, LCtx, C.getSValBuilder().makeNull(), /*Invalidate=*/false); + C.addTransition(NullOutputState, &CastFailTag); + + // And on the original branch assume that both input and + // output are non-zero. + if (auto L = RetVal.getAs<DefinedOrUnknownSVal>()) + state = state->assume(*L, /*assumption=*/true); + + } + } + + C.addTransition(state); + return true; +} + +ExplodedNode * RetainCountChecker::processReturn(const ReturnStmt *S, + CheckerContext &C) const { + ExplodedNode *Pred = C.getPredecessor(); + + // Only adjust the reference count if this is the top-level call frame, + // and not the result of inlining. In the future, we should do + // better checking even for inlined calls, and see if they match + // with their expected semantics (e.g., the method should return a retained + // object, etc.). + if (!C.inTopFrame()) + return Pred; + + if (!S) + return Pred; + + const Expr *RetE = S->getRetValue(); + if (!RetE) + return Pred; + + ProgramStateRef state = C.getState(); + // We need to dig down to the symbolic base here because various + // custom allocators do sometimes return the symbol with an offset. + SymbolRef Sym = state->getSValAsScalarOrLoc(RetE, C.getLocationContext()) + .getAsLocSymbol(/*IncludeBaseRegions=*/true); + if (!Sym) + return Pred; + + // Get the reference count binding (if any). + const RefVal *T = getRefBinding(state, Sym); + if (!T) + return Pred; + + // Change the reference count. + RefVal X = *T; + + switch (X.getKind()) { + case RefVal::Owned: { + unsigned cnt = X.getCount(); + assert(cnt > 0); + X.setCount(cnt - 1); + X = X ^ RefVal::ReturnedOwned; + break; + } + + case RefVal::NotOwned: { + unsigned cnt = X.getCount(); + if (cnt) { + X.setCount(cnt - 1); + X = X ^ RefVal::ReturnedOwned; + } else { + X = X ^ RefVal::ReturnedNotOwned; + } + break; + } + + default: + return Pred; + } + + // Update the binding. + state = setRefBinding(state, Sym, X); + Pred = C.addTransition(state); + + // At this point we have updated the state properly. + // Everything after this is merely checking to see if the return value has + // been over- or under-retained. + + // Did we cache out? + if (!Pred) + return nullptr; + + // Update the autorelease counts. + static CheckerProgramPointTag AutoreleaseTag(this, "Autorelease"); + state = handleAutoreleaseCounts(state, Pred, &AutoreleaseTag, C, Sym, X, S); + + // Have we generated a sink node? + if (!state) + return nullptr; + + // Get the updated binding. + T = getRefBinding(state, Sym); + assert(T); + X = *T; + + // Consult the summary of the enclosing method. + RetainSummaryManager &Summaries = getSummaryManager(C); + const Decl *CD = &Pred->getCodeDecl(); + RetEffect RE = RetEffect::MakeNoRet(); + + // FIXME: What is the convention for blocks? Is there one? + if (const ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(CD)) { + const RetainSummary *Summ = Summaries.getSummary(AnyCall(MD)); + RE = Summ->getRetEffect(); + } else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(CD)) { + if (!isa<CXXMethodDecl>(FD)) { + const RetainSummary *Summ = Summaries.getSummary(AnyCall(FD)); + RE = Summ->getRetEffect(); + } + } + + return checkReturnWithRetEffect(S, C, Pred, RE, X, Sym, state); +} + +ExplodedNode * RetainCountChecker::checkReturnWithRetEffect(const ReturnStmt *S, + CheckerContext &C, + ExplodedNode *Pred, + RetEffect RE, RefVal X, + SymbolRef Sym, + ProgramStateRef state) const { + // HACK: Ignore retain-count issues on values accessed through ivars, + // because of cases like this: + // [_contentView retain]; + // [_contentView removeFromSuperview]; + // [self addSubview:_contentView]; // invalidates 'self' + // [_contentView release]; + if (X.getIvarAccessHistory() != RefVal::IvarAccessHistory::None) + return Pred; + + // Any leaks or other errors? + if (X.isReturnedOwned() && X.getCount() == 0) { + if (RE.getKind() != RetEffect::NoRet) { + if (!RE.isOwned()) { + + // The returning type is a CF, we expect the enclosing method should + // return ownership. + X = X ^ RefVal::ErrorLeakReturned; + + // Generate an error node. + state = setRefBinding(state, Sym, X); + + static CheckerProgramPointTag ReturnOwnLeakTag(this, "ReturnsOwnLeak"); + ExplodedNode *N = C.addTransition(state, Pred, &ReturnOwnLeakTag); + if (N) { + const LangOptions &LOpts = C.getASTContext().getLangOpts(); + auto R = + std::make_unique<RefLeakReport>(leakAtReturn, LOpts, N, Sym, C); + C.emitReport(std::move(R)); + } + return N; + } + } + } else if (X.isReturnedNotOwned()) { + if (RE.isOwned()) { + if (X.getIvarAccessHistory() == + RefVal::IvarAccessHistory::AccessedDirectly) { + // Assume the method was trying to transfer a +1 reference from a + // strong ivar to the caller. + state = setRefBinding(state, Sym, + X.releaseViaIvar() ^ RefVal::ReturnedOwned); + } else { + // Trying to return a not owned object to a caller expecting an + // owned object. + state = setRefBinding(state, Sym, X ^ RefVal::ErrorReturnedNotOwned); + + static CheckerProgramPointTag + ReturnNotOwnedTag(this, "ReturnNotOwnedForOwned"); + + ExplodedNode *N = C.addTransition(state, Pred, &ReturnNotOwnedTag); + if (N) { + auto R = std::make_unique<RefCountReport>( + returnNotOwnedForOwned, C.getASTContext().getLangOpts(), N, Sym); + C.emitReport(std::move(R)); + } + return N; + } + } + } + return Pred; +} + +//===----------------------------------------------------------------------===// +// Check various ways a symbol can be invalidated. +//===----------------------------------------------------------------------===// + +void RetainCountChecker::checkBind(SVal loc, SVal val, const Stmt *S, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + const MemRegion *MR = loc.getAsRegion(); + + // Find all symbols referenced by 'val' that we are tracking + // and stop tracking them. + if (MR && shouldEscapeRegion(MR)) { + state = state->scanReachableSymbols<StopTrackingCallback>(val).getState(); + C.addTransition(state); + } +} + +ProgramStateRef RetainCountChecker::evalAssume(ProgramStateRef state, + SVal Cond, + bool Assumption) const { + // FIXME: We may add to the interface of evalAssume the list of symbols + // whose assumptions have changed. For now we just iterate through the + // bindings and check if any of the tracked symbols are NULL. This isn't + // too bad since the number of symbols we will track in practice are + // probably small and evalAssume is only called at branches and a few + // other places. + RefBindingsTy B = state->get<RefBindings>(); + + if (B.isEmpty()) + return state; + + bool changed = false; + RefBindingsTy::Factory &RefBFactory = state->get_context<RefBindings>(); + ConstraintManager &CMgr = state->getConstraintManager(); + + for (auto &I : B) { + // Check if the symbol is null stop tracking the symbol. + ConditionTruthVal AllocFailed = CMgr.isNull(state, I.first); + if (AllocFailed.isConstrainedTrue()) { + changed = true; + B = RefBFactory.remove(B, I.first); + } + } + + if (changed) + state = state->set<RefBindings>(B); + + return state; +} + +ProgramStateRef RetainCountChecker::checkRegionChanges( + ProgramStateRef state, const InvalidatedSymbols *invalidated, + ArrayRef<const MemRegion *> ExplicitRegions, + ArrayRef<const MemRegion *> Regions, const LocationContext *LCtx, + const CallEvent *Call) const { + if (!invalidated) + return state; + + llvm::SmallPtrSet<SymbolRef, 8> WhitelistedSymbols; + + for (const MemRegion *I : ExplicitRegions) + if (const SymbolicRegion *SR = I->StripCasts()->getAs<SymbolicRegion>()) + WhitelistedSymbols.insert(SR->getSymbol()); + + for (SymbolRef sym : *invalidated) { + if (WhitelistedSymbols.count(sym)) + continue; + // Remove any existing reference-count binding. + state = removeRefBinding(state, sym); + } + return state; +} + +ProgramStateRef +RetainCountChecker::handleAutoreleaseCounts(ProgramStateRef state, + ExplodedNode *Pred, + const ProgramPointTag *Tag, + CheckerContext &Ctx, + SymbolRef Sym, + RefVal V, + const ReturnStmt *S) const { + unsigned ACnt = V.getAutoreleaseCount(); + + // No autorelease counts? Nothing to be done. + if (!ACnt) + return state; + + unsigned Cnt = V.getCount(); + + // FIXME: Handle sending 'autorelease' to already released object. + + if (V.getKind() == RefVal::ReturnedOwned) + ++Cnt; + + // If we would over-release here, but we know the value came from an ivar, + // assume it was a strong ivar that's just been relinquished. + if (ACnt > Cnt && + V.getIvarAccessHistory() == RefVal::IvarAccessHistory::AccessedDirectly) { + V = V.releaseViaIvar(); + --ACnt; + } + + if (ACnt <= Cnt) { + if (ACnt == Cnt) { + V.clearCounts(); + if (V.getKind() == RefVal::ReturnedOwned) { + V = V ^ RefVal::ReturnedNotOwned; + } else { + V = V ^ RefVal::NotOwned; + } + } else { + V.setCount(V.getCount() - ACnt); + V.setAutoreleaseCount(0); + } + return setRefBinding(state, Sym, V); + } + + // HACK: Ignore retain-count issues on values accessed through ivars, + // because of cases like this: + // [_contentView retain]; + // [_contentView removeFromSuperview]; + // [self addSubview:_contentView]; // invalidates 'self' + // [_contentView release]; + if (V.getIvarAccessHistory() != RefVal::IvarAccessHistory::None) + return state; + + // Woah! More autorelease counts then retain counts left. + // Emit hard error. + V = V ^ RefVal::ErrorOverAutorelease; + state = setRefBinding(state, Sym, V); + + ExplodedNode *N = Ctx.generateSink(state, Pred, Tag); + if (N) { + SmallString<128> sbuf; + llvm::raw_svector_ostream os(sbuf); + os << "Object was autoreleased "; + if (V.getAutoreleaseCount() > 1) + os << V.getAutoreleaseCount() << " times but the object "; + else + os << "but "; + os << "has a +" << V.getCount() << " retain count"; + + const LangOptions &LOpts = Ctx.getASTContext().getLangOpts(); + auto R = std::make_unique<RefCountReport>(overAutorelease, LOpts, N, Sym, + os.str()); + Ctx.emitReport(std::move(R)); + } + + return nullptr; +} + +ProgramStateRef +RetainCountChecker::handleSymbolDeath(ProgramStateRef state, + SymbolRef sid, RefVal V, + SmallVectorImpl<SymbolRef> &Leaked) const { + bool hasLeak; + + // HACK: Ignore retain-count issues on values accessed through ivars, + // because of cases like this: + // [_contentView retain]; + // [_contentView removeFromSuperview]; + // [self addSubview:_contentView]; // invalidates 'self' + // [_contentView release]; + if (V.getIvarAccessHistory() != RefVal::IvarAccessHistory::None) + hasLeak = false; + else if (V.isOwned()) + hasLeak = true; + else if (V.isNotOwned() || V.isReturnedOwned()) + hasLeak = (V.getCount() > 0); + else + hasLeak = false; + + if (!hasLeak) + return removeRefBinding(state, sid); + + Leaked.push_back(sid); + return setRefBinding(state, sid, V ^ RefVal::ErrorLeak); +} + +ExplodedNode * +RetainCountChecker::processLeaks(ProgramStateRef state, + SmallVectorImpl<SymbolRef> &Leaked, + CheckerContext &Ctx, + ExplodedNode *Pred) const { + // Generate an intermediate node representing the leak point. + ExplodedNode *N = Ctx.addTransition(state, Pred); + const LangOptions &LOpts = Ctx.getASTContext().getLangOpts(); + + if (N) { + for (SymbolRef L : Leaked) { + const RefCountBug &BT = Pred ? leakWithinFunction : leakAtReturn; + Ctx.emitReport(std::make_unique<RefLeakReport>(BT, LOpts, N, L, Ctx)); + } + } + + return N; +} + +void RetainCountChecker::checkBeginFunction(CheckerContext &Ctx) const { + if (!Ctx.inTopFrame()) + return; + + RetainSummaryManager &SmrMgr = getSummaryManager(Ctx); + const LocationContext *LCtx = Ctx.getLocationContext(); + const Decl *D = LCtx->getDecl(); + Optional<AnyCall> C = AnyCall::forDecl(D); + + if (!C || SmrMgr.isTrustedReferenceCountImplementation(D)) + return; + + ProgramStateRef state = Ctx.getState(); + const RetainSummary *FunctionSummary = SmrMgr.getSummary(*C); + ArgEffects CalleeSideArgEffects = FunctionSummary->getArgEffects(); + + for (unsigned idx = 0, e = C->param_size(); idx != e; ++idx) { + const ParmVarDecl *Param = C->parameters()[idx]; + SymbolRef Sym = state->getSVal(state->getRegion(Param, LCtx)).getAsSymbol(); + + QualType Ty = Param->getType(); + const ArgEffect *AE = CalleeSideArgEffects.lookup(idx); + if (AE) { + ObjKind K = AE->getObjKind(); + if (K == ObjKind::Generalized || K == ObjKind::OS || + (TrackNSCFStartParam && (K == ObjKind::ObjC || K == ObjKind::CF))) { + RefVal NewVal = AE->getKind() == DecRef ? RefVal::makeOwned(K, Ty) + : RefVal::makeNotOwned(K, Ty); + state = setRefBinding(state, Sym, NewVal); + } + } + } + + Ctx.addTransition(state); +} + +void RetainCountChecker::checkEndFunction(const ReturnStmt *RS, + CheckerContext &Ctx) const { + ExplodedNode *Pred = processReturn(RS, Ctx); + + // Created state cached out. + if (!Pred) { + return; + } + + ProgramStateRef state = Pred->getState(); + RefBindingsTy B = state->get<RefBindings>(); + + // Don't process anything within synthesized bodies. + const LocationContext *LCtx = Pred->getLocationContext(); + if (LCtx->getAnalysisDeclContext()->isBodyAutosynthesized()) { + assert(!LCtx->inTopFrame()); + return; + } + + for (auto &I : B) { + state = handleAutoreleaseCounts(state, Pred, /*Tag=*/nullptr, Ctx, + I.first, I.second); + if (!state) + return; + } + + // If the current LocationContext has a parent, don't check for leaks. + // We will do that later. + // FIXME: we should instead check for imbalances of the retain/releases, + // and suggest annotations. + if (LCtx->getParent()) + return; + + B = state->get<RefBindings>(); + SmallVector<SymbolRef, 10> Leaked; + + for (auto &I : B) + state = handleSymbolDeath(state, I.first, I.second, Leaked); + + processLeaks(state, Leaked, Ctx, Pred); +} + +void RetainCountChecker::checkDeadSymbols(SymbolReaper &SymReaper, + CheckerContext &C) const { + ExplodedNode *Pred = C.getPredecessor(); + + ProgramStateRef state = C.getState(); + SmallVector<SymbolRef, 10> Leaked; + + // Update counts from autorelease pools + for (const auto &I: state->get<RefBindings>()) { + SymbolRef Sym = I.first; + if (SymReaper.isDead(Sym)) { + static CheckerProgramPointTag Tag(this, "DeadSymbolAutorelease"); + const RefVal &V = I.second; + state = handleAutoreleaseCounts(state, Pred, &Tag, C, Sym, V); + if (!state) + return; + + // Fetch the new reference count from the state, and use it to handle + // this symbol. + state = handleSymbolDeath(state, Sym, *getRefBinding(state, Sym), Leaked); + } + } + + if (Leaked.empty()) { + C.addTransition(state); + return; + } + + Pred = processLeaks(state, Leaked, C, Pred); + + // Did we cache out? + if (!Pred) + return; + + // Now generate a new node that nukes the old bindings. + // The only bindings left at this point are the leaked symbols. + RefBindingsTy::Factory &F = state->get_context<RefBindings>(); + RefBindingsTy B = state->get<RefBindings>(); + + for (SymbolRef L : Leaked) + B = F.remove(B, L); + + state = state->set<RefBindings>(B); + C.addTransition(state, Pred); +} + +void RetainCountChecker::printState(raw_ostream &Out, ProgramStateRef State, + const char *NL, const char *Sep) const { + + RefBindingsTy B = State->get<RefBindings>(); + + if (B.isEmpty()) + return; + + Out << Sep << NL; + + for (auto &I : B) { + Out << I.first << " : "; + I.second.print(Out); + Out << NL; + } +} + +//===----------------------------------------------------------------------===// +// Checker registration. +//===----------------------------------------------------------------------===// + +void ento::registerRetainCountBase(CheckerManager &Mgr) { + Mgr.registerChecker<RetainCountChecker>(); +} + +bool ento::shouldRegisterRetainCountBase(const LangOptions &LO) { + return true; +} + +// FIXME: remove this, hack for backwards compatibility: +// it should be possible to enable the NS/CF retain count checker as +// osx.cocoa.RetainCount, and it should be possible to disable +// osx.OSObjectRetainCount using osx.cocoa.RetainCount:CheckOSObject=false. +static bool getOption(AnalyzerOptions &Options, + StringRef Postfix, + StringRef Value) { + auto I = Options.Config.find( + (StringRef("osx.cocoa.RetainCount:") + Postfix).str()); + if (I != Options.Config.end()) + return I->getValue() == Value; + return false; +} + +void ento::registerRetainCountChecker(CheckerManager &Mgr) { + auto *Chk = Mgr.getChecker<RetainCountChecker>(); + Chk->TrackObjCAndCFObjects = true; + Chk->TrackNSCFStartParam = getOption(Mgr.getAnalyzerOptions(), + "TrackNSCFStartParam", + "true"); +} + +bool ento::shouldRegisterRetainCountChecker(const LangOptions &LO) { + return true; +} + +void ento::registerOSObjectRetainCountChecker(CheckerManager &Mgr) { + auto *Chk = Mgr.getChecker<RetainCountChecker>(); + if (!getOption(Mgr.getAnalyzerOptions(), + "CheckOSObject", + "false")) + Chk->TrackOSObjects = true; +} + +bool ento::shouldRegisterOSObjectRetainCountChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.h b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.h new file mode 100644 index 000000000000..dd79bbef321c --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.h @@ -0,0 +1,398 @@ +//==--- RetainCountChecker.h - Checks for leaks and other issues -*- C++ -*--// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the methods for RetainCountChecker, which implements +// a reference count checker for Core Foundation and Cocoa on (Mac OS X). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_RETAINCOUNTCHECKER_H +#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_RETAINCOUNTCHECKER_H + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "RetainCountDiagnostics.h" +#include "clang/AST/Attr.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/ParentMap.h" +#include "clang/Analysis/DomainSpecific/CocoaConventions.h" +#include "clang/Analysis/PathDiagnostic.h" +#include "clang/Analysis/RetainSummaryManager.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Analysis/SelectorExtras.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/ImmutableList.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include <cstdarg> +#include <utility> + +namespace clang { +namespace ento { +namespace retaincountchecker { + +/// Metadata on reference. +class RefVal { +public: + enum Kind { + Owned = 0, // Owning reference. + NotOwned, // Reference is not owned by still valid (not freed). + Released, // Object has been released. + ReturnedOwned, // Returned object passes ownership to caller. + ReturnedNotOwned, // Return object does not pass ownership to caller. + ERROR_START, + ErrorDeallocNotOwned, // -dealloc called on non-owned object. + ErrorUseAfterRelease, // Object used after released. + ErrorReleaseNotOwned, // Release of an object that was not owned. + ERROR_LEAK_START, + ErrorLeak, // A memory leak due to excessive reference counts. + ErrorLeakReturned, // A memory leak due to the returning method not having + // the correct naming conventions. + ErrorOverAutorelease, + ErrorReturnedNotOwned + }; + + /// Tracks how an object referenced by an ivar has been used. + /// + /// This accounts for us not knowing if an arbitrary ivar is supposed to be + /// stored at +0 or +1. + enum class IvarAccessHistory { + None, + AccessedDirectly, + ReleasedAfterDirectAccess + }; + +private: + /// The number of outstanding retains. + unsigned Cnt; + /// The number of outstanding autoreleases. + unsigned ACnt; + /// The (static) type of the object at the time we started tracking it. + QualType T; + + /// The current state of the object. + /// + /// See the RefVal::Kind enum for possible values. + unsigned RawKind : 5; + + /// The kind of object being tracked (CF or ObjC or OSObject), if known. + /// + /// See the ObjKind enum for possible values. + unsigned RawObjectKind : 3; + + /// True if the current state and/or retain count may turn out to not be the + /// best possible approximation of the reference counting state. + /// + /// If true, the checker may decide to throw away ("override") this state + /// in favor of something else when it sees the object being used in new ways. + /// + /// This setting should not be propagated to state derived from this state. + /// Once we start deriving new states, it would be inconsistent to override + /// them. + unsigned RawIvarAccessHistory : 2; + + RefVal(Kind k, ObjKind o, unsigned cnt, unsigned acnt, QualType t, + IvarAccessHistory IvarAccess) + : Cnt(cnt), ACnt(acnt), T(t), RawKind(static_cast<unsigned>(k)), + RawObjectKind(static_cast<unsigned>(o)), + RawIvarAccessHistory(static_cast<unsigned>(IvarAccess)) { + assert(getKind() == k && "not enough bits for the kind"); + assert(getObjKind() == o && "not enough bits for the object kind"); + assert(getIvarAccessHistory() == IvarAccess && "not enough bits"); + } + +public: + Kind getKind() const { return static_cast<Kind>(RawKind); } + + ObjKind getObjKind() const { + return static_cast<ObjKind>(RawObjectKind); + } + + unsigned getCount() const { return Cnt; } + unsigned getAutoreleaseCount() const { return ACnt; } + unsigned getCombinedCounts() const { return Cnt + ACnt; } + void clearCounts() { + Cnt = 0; + ACnt = 0; + } + void setCount(unsigned i) { + Cnt = i; + } + void setAutoreleaseCount(unsigned i) { + ACnt = i; + } + + QualType getType() const { return T; } + + /// Returns what the analyzer knows about direct accesses to a particular + /// instance variable. + /// + /// If the object with this refcount wasn't originally from an Objective-C + /// ivar region, this should always return IvarAccessHistory::None. + IvarAccessHistory getIvarAccessHistory() const { + return static_cast<IvarAccessHistory>(RawIvarAccessHistory); + } + + bool isOwned() const { + return getKind() == Owned; + } + + bool isNotOwned() const { + return getKind() == NotOwned; + } + + bool isReturnedOwned() const { + return getKind() == ReturnedOwned; + } + + bool isReturnedNotOwned() const { + return getKind() == ReturnedNotOwned; + } + + /// Create a state for an object whose lifetime is the responsibility of the + /// current function, at least partially. + /// + /// Most commonly, this is an owned object with a retain count of +1. + static RefVal makeOwned(ObjKind o, QualType t) { + return RefVal(Owned, o, /*Count=*/1, 0, t, IvarAccessHistory::None); + } + + /// Create a state for an object whose lifetime is not the responsibility of + /// the current function. + /// + /// Most commonly, this is an unowned object with a retain count of +0. + static RefVal makeNotOwned(ObjKind o, QualType t) { + return RefVal(NotOwned, o, /*Count=*/0, 0, t, IvarAccessHistory::None); + } + + RefVal operator-(size_t i) const { + return RefVal(getKind(), getObjKind(), getCount() - i, + getAutoreleaseCount(), getType(), getIvarAccessHistory()); + } + + RefVal operator+(size_t i) const { + return RefVal(getKind(), getObjKind(), getCount() + i, + getAutoreleaseCount(), getType(), getIvarAccessHistory()); + } + + RefVal operator^(Kind k) const { + return RefVal(k, getObjKind(), getCount(), getAutoreleaseCount(), + getType(), getIvarAccessHistory()); + } + + RefVal autorelease() const { + return RefVal(getKind(), getObjKind(), getCount(), getAutoreleaseCount()+1, + getType(), getIvarAccessHistory()); + } + + RefVal withIvarAccess() const { + assert(getIvarAccessHistory() == IvarAccessHistory::None); + return RefVal(getKind(), getObjKind(), getCount(), getAutoreleaseCount(), + getType(), IvarAccessHistory::AccessedDirectly); + } + + RefVal releaseViaIvar() const { + assert(getIvarAccessHistory() == IvarAccessHistory::AccessedDirectly); + return RefVal(getKind(), getObjKind(), getCount(), getAutoreleaseCount(), + getType(), IvarAccessHistory::ReleasedAfterDirectAccess); + } + + // Comparison, profiling, and pretty-printing. + bool hasSameState(const RefVal &X) const { + return getKind() == X.getKind() && Cnt == X.Cnt && ACnt == X.ACnt && + getIvarAccessHistory() == X.getIvarAccessHistory(); + } + + bool operator==(const RefVal& X) const { + return T == X.T && hasSameState(X) && getObjKind() == X.getObjKind(); + } + + void Profile(llvm::FoldingSetNodeID& ID) const { + ID.Add(T); + ID.AddInteger(RawKind); + ID.AddInteger(Cnt); + ID.AddInteger(ACnt); + ID.AddInteger(RawObjectKind); + ID.AddInteger(RawIvarAccessHistory); + } + + void print(raw_ostream &Out) const; +}; + +class RetainCountChecker + : public Checker< check::Bind, + check::DeadSymbols, + check::BeginFunction, + check::EndFunction, + check::PostStmt<BlockExpr>, + check::PostStmt<CastExpr>, + check::PostStmt<ObjCArrayLiteral>, + check::PostStmt<ObjCDictionaryLiteral>, + check::PostStmt<ObjCBoxedExpr>, + check::PostStmt<ObjCIvarRefExpr>, + check::PostCall, + check::RegionChanges, + eval::Assume, + eval::Call > { + + RefCountBug useAfterRelease{this, RefCountBug::UseAfterRelease}; + RefCountBug releaseNotOwned{this, RefCountBug::ReleaseNotOwned}; + RefCountBug deallocNotOwned{this, RefCountBug::DeallocNotOwned}; + RefCountBug freeNotOwned{this, RefCountBug::FreeNotOwned}; + RefCountBug overAutorelease{this, RefCountBug::OverAutorelease}; + RefCountBug returnNotOwnedForOwned{this, RefCountBug::ReturnNotOwnedForOwned}; + RefCountBug leakWithinFunction{this, RefCountBug::LeakWithinFunction}; + RefCountBug leakAtReturn{this, RefCountBug::LeakAtReturn}; + + CheckerProgramPointTag DeallocSentTag{this, "DeallocSent"}; + CheckerProgramPointTag CastFailTag{this, "DynamicCastFail"}; + + mutable std::unique_ptr<RetainSummaryManager> Summaries; +public: + + /// Track Objective-C and CoreFoundation objects. + bool TrackObjCAndCFObjects = false; + + /// Track sublcasses of OSObject. + bool TrackOSObjects = false; + + /// Track initial parameters (for the entry point) for NS/CF objects. + bool TrackNSCFStartParam = false; + + RetainCountChecker() {}; + + RetainSummaryManager &getSummaryManager(ASTContext &Ctx) const { + if (!Summaries) + Summaries.reset( + new RetainSummaryManager(Ctx, TrackObjCAndCFObjects, TrackOSObjects)); + return *Summaries; + } + + RetainSummaryManager &getSummaryManager(CheckerContext &C) const { + return getSummaryManager(C.getASTContext()); + } + + void printState(raw_ostream &Out, ProgramStateRef State, + const char *NL, const char *Sep) const override; + + void checkBind(SVal loc, SVal val, const Stmt *S, CheckerContext &C) const; + void checkPostStmt(const BlockExpr *BE, CheckerContext &C) const; + void checkPostStmt(const CastExpr *CE, CheckerContext &C) const; + + void checkPostStmt(const ObjCArrayLiteral *AL, CheckerContext &C) const; + void checkPostStmt(const ObjCDictionaryLiteral *DL, CheckerContext &C) const; + void checkPostStmt(const ObjCBoxedExpr *BE, CheckerContext &C) const; + + void checkPostStmt(const ObjCIvarRefExpr *IRE, CheckerContext &C) const; + + void checkPostCall(const CallEvent &Call, CheckerContext &C) const; + + void checkSummary(const RetainSummary &Summ, const CallEvent &Call, + CheckerContext &C) const; + + void processSummaryOfInlined(const RetainSummary &Summ, + const CallEvent &Call, + CheckerContext &C) const; + + bool evalCall(const CallEvent &Call, CheckerContext &C) const; + + ProgramStateRef evalAssume(ProgramStateRef state, SVal Cond, + bool Assumption) const; + + ProgramStateRef + checkRegionChanges(ProgramStateRef state, + const InvalidatedSymbols *invalidated, + ArrayRef<const MemRegion *> ExplicitRegions, + ArrayRef<const MemRegion *> Regions, + const LocationContext* LCtx, + const CallEvent *Call) const; + + ExplodedNode* checkReturnWithRetEffect(const ReturnStmt *S, CheckerContext &C, + ExplodedNode *Pred, RetEffect RE, RefVal X, + SymbolRef Sym, ProgramStateRef state) const; + + void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const; + void checkBeginFunction(CheckerContext &C) const; + void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const; + + ProgramStateRef updateSymbol(ProgramStateRef state, SymbolRef sym, + RefVal V, ArgEffect E, RefVal::Kind &hasErr, + CheckerContext &C) const; + + const RefCountBug &errorKindToBugKind(RefVal::Kind ErrorKind, + SymbolRef Sym) const; + + void processNonLeakError(ProgramStateRef St, SourceRange ErrorRange, + RefVal::Kind ErrorKind, SymbolRef Sym, + CheckerContext &C) const; + + void processObjCLiterals(CheckerContext &C, const Expr *Ex) const; + + ProgramStateRef handleSymbolDeath(ProgramStateRef state, + SymbolRef sid, RefVal V, + SmallVectorImpl<SymbolRef> &Leaked) const; + + ProgramStateRef + handleAutoreleaseCounts(ProgramStateRef state, ExplodedNode *Pred, + const ProgramPointTag *Tag, CheckerContext &Ctx, + SymbolRef Sym, + RefVal V, + const ReturnStmt *S=nullptr) const; + + ExplodedNode *processLeaks(ProgramStateRef state, + SmallVectorImpl<SymbolRef> &Leaked, + CheckerContext &Ctx, + ExplodedNode *Pred = nullptr) const; + + const CheckerProgramPointTag &getDeallocSentTag() const { + return DeallocSentTag; + } + + const CheckerProgramPointTag &getCastFailTag() const { + return CastFailTag; + } + +private: + /// Perform the necessary checks and state adjustments at the end of the + /// function. + /// \p S Return statement, may be null. + ExplodedNode * processReturn(const ReturnStmt *S, CheckerContext &C) const; +}; + +//===----------------------------------------------------------------------===// +// RefBindings - State used to track object reference counts. +//===----------------------------------------------------------------------===// + +const RefVal *getRefBinding(ProgramStateRef State, SymbolRef Sym); + +/// Returns true if this stack frame is for an Objective-C method that is a +/// property getter or setter whose body has been synthesized by the analyzer. +inline bool isSynthesizedAccessor(const StackFrameContext *SFC) { + auto Method = dyn_cast_or_null<ObjCMethodDecl>(SFC->getDecl()); + if (!Method || !Method->isPropertyAccessor()) + return false; + + return SFC->getAnalysisDeclContext()->isBodyAutosynthesized(); +} + +} // end namespace retaincountchecker +} // end namespace ento +} // end namespace clang + +#endif diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp new file mode 100644 index 000000000000..9853758f7f2c --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp @@ -0,0 +1,917 @@ +// RetainCountDiagnostics.cpp - Checks for leaks and other issues -*- C++ -*--// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines diagnostics for RetainCountChecker, which implements +// a reference count checker for Core Foundation and Cocoa on (Mac OS X). +// +//===----------------------------------------------------------------------===// + +#include "RetainCountDiagnostics.h" +#include "RetainCountChecker.h" + +using namespace clang; +using namespace ento; +using namespace retaincountchecker; + +StringRef RefCountBug::bugTypeToName(RefCountBug::RefCountBugType BT) { + switch (BT) { + case UseAfterRelease: + return "Use-after-release"; + case ReleaseNotOwned: + return "Bad release"; + case DeallocNotOwned: + return "-dealloc sent to non-exclusively owned object"; + case FreeNotOwned: + return "freeing non-exclusively owned object"; + case OverAutorelease: + return "Object autoreleased too many times"; + case ReturnNotOwnedForOwned: + return "Method should return an owned object"; + case LeakWithinFunction: + return "Leak"; + case LeakAtReturn: + return "Leak of returned object"; + } + llvm_unreachable("Unknown RefCountBugType"); +} + +StringRef RefCountBug::getDescription() const { + switch (BT) { + case UseAfterRelease: + return "Reference-counted object is used after it is released"; + case ReleaseNotOwned: + return "Incorrect decrement of the reference count of an object that is " + "not owned at this point by the caller"; + case DeallocNotOwned: + return "-dealloc sent to object that may be referenced elsewhere"; + case FreeNotOwned: + return "'free' called on an object that may be referenced elsewhere"; + case OverAutorelease: + return "Object autoreleased too many times"; + case ReturnNotOwnedForOwned: + return "Object with a +0 retain count returned to caller where a +1 " + "(owning) retain count is expected"; + case LeakWithinFunction: + case LeakAtReturn: + return ""; + } + llvm_unreachable("Unknown RefCountBugType"); +} + +RefCountBug::RefCountBug(const CheckerBase *Checker, RefCountBugType BT) + : BugType(Checker, bugTypeToName(BT), categories::MemoryRefCount, + /*SuppressOnSink=*/BT == LeakWithinFunction || BT == LeakAtReturn), + BT(BT), Checker(Checker) {} + +static bool isNumericLiteralExpression(const Expr *E) { + // FIXME: This set of cases was copied from SemaExprObjC. + return isa<IntegerLiteral>(E) || + isa<CharacterLiteral>(E) || + isa<FloatingLiteral>(E) || + isa<ObjCBoolLiteralExpr>(E) || + isa<CXXBoolLiteralExpr>(E); +} + +/// If type represents a pointer to CXXRecordDecl, +/// and is not a typedef, return the decl name. +/// Otherwise, return the serialization of type. +static std::string getPrettyTypeName(QualType QT) { + QualType PT = QT->getPointeeType(); + if (!PT.isNull() && !QT->getAs<TypedefType>()) + if (const auto *RD = PT->getAsCXXRecordDecl()) + return RD->getName(); + return QT.getAsString(); +} + +/// Write information about the type state change to {@code os}, +/// return whether the note should be generated. +static bool shouldGenerateNote(llvm::raw_string_ostream &os, + const RefVal *PrevT, + const RefVal &CurrV, + bool DeallocSent) { + // Get the previous type state. + RefVal PrevV = *PrevT; + + // Specially handle -dealloc. + if (DeallocSent) { + // Determine if the object's reference count was pushed to zero. + assert(!PrevV.hasSameState(CurrV) && "The state should have changed."); + // We may not have transitioned to 'release' if we hit an error. + // This case is handled elsewhere. + if (CurrV.getKind() == RefVal::Released) { + assert(CurrV.getCombinedCounts() == 0); + os << "Object released by directly sending the '-dealloc' message"; + return true; + } + } + + // Determine if the typestate has changed. + if (!PrevV.hasSameState(CurrV)) + switch (CurrV.getKind()) { + case RefVal::Owned: + case RefVal::NotOwned: + if (PrevV.getCount() == CurrV.getCount()) { + // Did an autorelease message get sent? + if (PrevV.getAutoreleaseCount() == CurrV.getAutoreleaseCount()) + return false; + + assert(PrevV.getAutoreleaseCount() < CurrV.getAutoreleaseCount()); + os << "Object autoreleased"; + return true; + } + + if (PrevV.getCount() > CurrV.getCount()) + os << "Reference count decremented."; + else + os << "Reference count incremented."; + + if (unsigned Count = CurrV.getCount()) + os << " The object now has a +" << Count << " retain count."; + + return true; + + case RefVal::Released: + if (CurrV.getIvarAccessHistory() == + RefVal::IvarAccessHistory::ReleasedAfterDirectAccess && + CurrV.getIvarAccessHistory() != PrevV.getIvarAccessHistory()) { + os << "Strong instance variable relinquished. "; + } + os << "Object released."; + return true; + + case RefVal::ReturnedOwned: + // Autoreleases can be applied after marking a node ReturnedOwned. + if (CurrV.getAutoreleaseCount()) + return false; + + os << "Object returned to caller as an owning reference (single " + "retain count transferred to caller)"; + return true; + + case RefVal::ReturnedNotOwned: + os << "Object returned to caller with a +0 retain count"; + return true; + + default: + return false; + } + return true; +} + +/// Finds argument index of the out paramter in the call {@code S} +/// corresponding to the symbol {@code Sym}. +/// If none found, returns None. +static Optional<unsigned> findArgIdxOfSymbol(ProgramStateRef CurrSt, + const LocationContext *LCtx, + SymbolRef &Sym, + Optional<CallEventRef<>> CE) { + if (!CE) + return None; + + for (unsigned Idx = 0; Idx < (*CE)->getNumArgs(); Idx++) + if (const MemRegion *MR = (*CE)->getArgSVal(Idx).getAsRegion()) + if (const auto *TR = dyn_cast<TypedValueRegion>(MR)) + if (CurrSt->getSVal(MR, TR->getValueType()).getAsSymExpr() == Sym) + return Idx; + + return None; +} + +static Optional<std::string> findMetaClassAlloc(const Expr *Callee) { + if (const auto *ME = dyn_cast<MemberExpr>(Callee)) { + if (ME->getMemberDecl()->getNameAsString() != "alloc") + return None; + const Expr *This = ME->getBase()->IgnoreParenImpCasts(); + if (const auto *DRE = dyn_cast<DeclRefExpr>(This)) { + const ValueDecl *VD = DRE->getDecl(); + if (VD->getNameAsString() != "metaClass") + return None; + + if (const auto *RD = dyn_cast<CXXRecordDecl>(VD->getDeclContext())) + return RD->getNameAsString(); + + } + } + return None; +} + +static std::string findAllocatedObjectName(const Stmt *S, QualType QT) { + if (const auto *CE = dyn_cast<CallExpr>(S)) + if (auto Out = findMetaClassAlloc(CE->getCallee())) + return *Out; + return getPrettyTypeName(QT); +} + +static void generateDiagnosticsForCallLike(ProgramStateRef CurrSt, + const LocationContext *LCtx, + const RefVal &CurrV, SymbolRef &Sym, + const Stmt *S, + llvm::raw_string_ostream &os) { + CallEventManager &Mgr = CurrSt->getStateManager().getCallEventManager(); + if (const CallExpr *CE = dyn_cast<CallExpr>(S)) { + // Get the name of the callee (if it is available) + // from the tracked SVal. + SVal X = CurrSt->getSValAsScalarOrLoc(CE->getCallee(), LCtx); + const FunctionDecl *FD = X.getAsFunctionDecl(); + + // If failed, try to get it from AST. + if (!FD) + FD = dyn_cast<FunctionDecl>(CE->getCalleeDecl()); + + if (const auto *MD = dyn_cast<CXXMethodDecl>(CE->getCalleeDecl())) { + os << "Call to method '" << MD->getQualifiedNameAsString() << '\''; + } else if (FD) { + os << "Call to function '" << FD->getQualifiedNameAsString() << '\''; + } else { + os << "function call"; + } + } else if (isa<CXXNewExpr>(S)) { + os << "Operator 'new'"; + } else { + assert(isa<ObjCMessageExpr>(S)); + CallEventRef<ObjCMethodCall> Call = + Mgr.getObjCMethodCall(cast<ObjCMessageExpr>(S), CurrSt, LCtx); + + switch (Call->getMessageKind()) { + case OCM_Message: + os << "Method"; + break; + case OCM_PropertyAccess: + os << "Property"; + break; + case OCM_Subscript: + os << "Subscript"; + break; + } + } + + Optional<CallEventRef<>> CE = Mgr.getCall(S, CurrSt, LCtx); + auto Idx = findArgIdxOfSymbol(CurrSt, LCtx, Sym, CE); + + // If index is not found, we assume that the symbol was returned. + if (!Idx) { + os << " returns "; + } else { + os << " writes "; + } + + if (CurrV.getObjKind() == ObjKind::CF) { + os << "a Core Foundation object of type '" + << Sym->getType().getAsString() << "' with a "; + } else if (CurrV.getObjKind() == ObjKind::OS) { + os << "an OSObject of type '" << findAllocatedObjectName(S, Sym->getType()) + << "' with a "; + } else if (CurrV.getObjKind() == ObjKind::Generalized) { + os << "an object of type '" << Sym->getType().getAsString() + << "' with a "; + } else { + assert(CurrV.getObjKind() == ObjKind::ObjC); + QualType T = Sym->getType(); + if (!isa<ObjCObjectPointerType>(T)) { + os << "an Objective-C object with a "; + } else { + const ObjCObjectPointerType *PT = cast<ObjCObjectPointerType>(T); + os << "an instance of " << PT->getPointeeType().getAsString() + << " with a "; + } + } + + if (CurrV.isOwned()) { + os << "+1 retain count"; + } else { + assert(CurrV.isNotOwned()); + os << "+0 retain count"; + } + + if (Idx) { + os << " into an out parameter '"; + const ParmVarDecl *PVD = (*CE)->parameters()[*Idx]; + PVD->getNameForDiagnostic(os, PVD->getASTContext().getPrintingPolicy(), + /*Qualified=*/false); + os << "'"; + + QualType RT = (*CE)->getResultType(); + if (!RT.isNull() && !RT->isVoidType()) { + SVal RV = (*CE)->getReturnValue(); + if (CurrSt->isNull(RV).isConstrainedTrue()) { + os << " (assuming the call returns zero)"; + } else if (CurrSt->isNonNull(RV).isConstrainedTrue()) { + os << " (assuming the call returns non-zero)"; + } + + } + } +} + +namespace clang { +namespace ento { +namespace retaincountchecker { + +class RefCountReportVisitor : public BugReporterVisitor { +protected: + SymbolRef Sym; + +public: + RefCountReportVisitor(SymbolRef sym) : Sym(sym) {} + + void Profile(llvm::FoldingSetNodeID &ID) const override { + static int x = 0; + ID.AddPointer(&x); + ID.AddPointer(Sym); + } + + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; + + PathDiagnosticPieceRef getEndPath(BugReporterContext &BRC, + const ExplodedNode *N, + PathSensitiveBugReport &BR) override; +}; + +class RefLeakReportVisitor : public RefCountReportVisitor { +public: + RefLeakReportVisitor(SymbolRef sym) : RefCountReportVisitor(sym) {} + + PathDiagnosticPieceRef getEndPath(BugReporterContext &BRC, + const ExplodedNode *N, + PathSensitiveBugReport &BR) override; +}; + +} // end namespace retaincountchecker +} // end namespace ento +} // end namespace clang + + +/// Find the first node with the parent stack frame. +static const ExplodedNode *getCalleeNode(const ExplodedNode *Pred) { + const StackFrameContext *SC = Pred->getStackFrame(); + if (SC->inTopFrame()) + return nullptr; + const StackFrameContext *PC = SC->getParent()->getStackFrame(); + if (!PC) + return nullptr; + + const ExplodedNode *N = Pred; + while (N && N->getStackFrame() != PC) { + N = N->getFirstPred(); + } + return N; +} + + +/// Insert a diagnostic piece at function exit +/// if a function parameter is annotated as "os_consumed", +/// but it does not actually consume the reference. +static std::shared_ptr<PathDiagnosticEventPiece> +annotateConsumedSummaryMismatch(const ExplodedNode *N, + CallExitBegin &CallExitLoc, + const SourceManager &SM, + CallEventManager &CEMgr) { + + const ExplodedNode *CN = getCalleeNode(N); + if (!CN) + return nullptr; + + CallEventRef<> Call = CEMgr.getCaller(N->getStackFrame(), N->getState()); + + std::string sbuf; + llvm::raw_string_ostream os(sbuf); + ArrayRef<const ParmVarDecl *> Parameters = Call->parameters(); + for (unsigned I=0; I < Call->getNumArgs() && I < Parameters.size(); ++I) { + const ParmVarDecl *PVD = Parameters[I]; + + if (!PVD->hasAttr<OSConsumedAttr>()) + continue; + + if (SymbolRef SR = Call->getArgSVal(I).getAsLocSymbol()) { + const RefVal *CountBeforeCall = getRefBinding(CN->getState(), SR); + const RefVal *CountAtExit = getRefBinding(N->getState(), SR); + + if (!CountBeforeCall || !CountAtExit) + continue; + + unsigned CountBefore = CountBeforeCall->getCount(); + unsigned CountAfter = CountAtExit->getCount(); + + bool AsExpected = CountBefore > 0 && CountAfter == CountBefore - 1; + if (!AsExpected) { + os << "Parameter '"; + PVD->getNameForDiagnostic(os, PVD->getASTContext().getPrintingPolicy(), + /*Qualified=*/false); + os << "' is marked as consuming, but the function did not consume " + << "the reference\n"; + } + } + } + + if (os.str().empty()) + return nullptr; + + PathDiagnosticLocation L = PathDiagnosticLocation::create(CallExitLoc, SM); + return std::make_shared<PathDiagnosticEventPiece>(L, os.str()); +} + +/// Annotate the parameter at the analysis entry point. +static std::shared_ptr<PathDiagnosticEventPiece> +annotateStartParameter(const ExplodedNode *N, SymbolRef Sym, + const SourceManager &SM) { + auto PP = N->getLocationAs<BlockEdge>(); + if (!PP) + return nullptr; + + const CFGBlock *Src = PP->getSrc(); + const RefVal *CurrT = getRefBinding(N->getState(), Sym); + + if (&Src->getParent()->getEntry() != Src || !CurrT || + getRefBinding(N->getFirstPred()->getState(), Sym)) + return nullptr; + + const auto *VR = cast<VarRegion>(cast<SymbolRegionValue>(Sym)->getRegion()); + const auto *PVD = cast<ParmVarDecl>(VR->getDecl()); + PathDiagnosticLocation L = PathDiagnosticLocation(PVD, SM); + + std::string s; + llvm::raw_string_ostream os(s); + os << "Parameter '" << PVD->getNameAsString() << "' starts at +"; + if (CurrT->getCount() == 1) { + os << "1, as it is marked as consuming"; + } else { + assert(CurrT->getCount() == 0); + os << "0"; + } + return std::make_shared<PathDiagnosticEventPiece>(L, os.str()); +} + +PathDiagnosticPieceRef +RefCountReportVisitor::VisitNode(const ExplodedNode *N, BugReporterContext &BRC, + PathSensitiveBugReport &BR) { + + const auto &BT = static_cast<const RefCountBug&>(BR.getBugType()); + const auto *Checker = + static_cast<const RetainCountChecker *>(BT.getChecker()); + + bool IsFreeUnowned = BT.getBugType() == RefCountBug::FreeNotOwned || + BT.getBugType() == RefCountBug::DeallocNotOwned; + + const SourceManager &SM = BRC.getSourceManager(); + CallEventManager &CEMgr = BRC.getStateManager().getCallEventManager(); + if (auto CE = N->getLocationAs<CallExitBegin>()) + if (auto PD = annotateConsumedSummaryMismatch(N, *CE, SM, CEMgr)) + return PD; + + if (auto PD = annotateStartParameter(N, Sym, SM)) + return PD; + + // FIXME: We will eventually need to handle non-statement-based events + // (__attribute__((cleanup))). + if (!N->getLocation().getAs<StmtPoint>()) + return nullptr; + + // Check if the type state has changed. + const ExplodedNode *PrevNode = N->getFirstPred(); + ProgramStateRef PrevSt = PrevNode->getState(); + ProgramStateRef CurrSt = N->getState(); + const LocationContext *LCtx = N->getLocationContext(); + + const RefVal* CurrT = getRefBinding(CurrSt, Sym); + if (!CurrT) + return nullptr; + + const RefVal &CurrV = *CurrT; + const RefVal *PrevT = getRefBinding(PrevSt, Sym); + + // Create a string buffer to constain all the useful things we want + // to tell the user. + std::string sbuf; + llvm::raw_string_ostream os(sbuf); + + if (PrevT && IsFreeUnowned && CurrV.isNotOwned() && PrevT->isOwned()) { + os << "Object is now not exclusively owned"; + auto Pos = PathDiagnosticLocation::create(N->getLocation(), SM); + return std::make_shared<PathDiagnosticEventPiece>(Pos, os.str()); + } + + // This is the allocation site since the previous node had no bindings + // for this symbol. + if (!PrevT) { + const Stmt *S = N->getLocation().castAs<StmtPoint>().getStmt(); + + if (isa<ObjCIvarRefExpr>(S) && + isSynthesizedAccessor(LCtx->getStackFrame())) { + S = LCtx->getStackFrame()->getCallSite(); + } + + if (isa<ObjCArrayLiteral>(S)) { + os << "NSArray literal is an object with a +0 retain count"; + } else if (isa<ObjCDictionaryLiteral>(S)) { + os << "NSDictionary literal is an object with a +0 retain count"; + } else if (const ObjCBoxedExpr *BL = dyn_cast<ObjCBoxedExpr>(S)) { + if (isNumericLiteralExpression(BL->getSubExpr())) + os << "NSNumber literal is an object with a +0 retain count"; + else { + const ObjCInterfaceDecl *BoxClass = nullptr; + if (const ObjCMethodDecl *Method = BL->getBoxingMethod()) + BoxClass = Method->getClassInterface(); + + // We should always be able to find the boxing class interface, + // but consider this future-proofing. + if (BoxClass) { + os << *BoxClass << " b"; + } else { + os << "B"; + } + + os << "oxed expression produces an object with a +0 retain count"; + } + } else if (isa<ObjCIvarRefExpr>(S)) { + os << "Object loaded from instance variable"; + } else { + generateDiagnosticsForCallLike(CurrSt, LCtx, CurrV, Sym, S, os); + } + + PathDiagnosticLocation Pos(S, SM, N->getLocationContext()); + return std::make_shared<PathDiagnosticEventPiece>(Pos, os.str()); + } + + // Gather up the effects that were performed on the object at this + // program point + bool DeallocSent = false; + + const ProgramPointTag *Tag = N->getLocation().getTag(); + + if (Tag == &Checker->getCastFailTag()) { + os << "Assuming dynamic cast returns null due to type mismatch"; + } + + if (Tag == &Checker->getDeallocSentTag()) { + // We only have summaries attached to nodes after evaluating CallExpr and + // ObjCMessageExprs. + const Stmt *S = N->getLocation().castAs<StmtPoint>().getStmt(); + + if (const CallExpr *CE = dyn_cast<CallExpr>(S)) { + // Iterate through the parameter expressions and see if the symbol + // was ever passed as an argument. + unsigned i = 0; + + for (auto AI=CE->arg_begin(), AE=CE->arg_end(); AI!=AE; ++AI, ++i) { + + // Retrieve the value of the argument. Is it the symbol + // we are interested in? + if (CurrSt->getSValAsScalarOrLoc(*AI, LCtx).getAsLocSymbol() != Sym) + continue; + + // We have an argument. Get the effect! + DeallocSent = true; + } + } else if (const ObjCMessageExpr *ME = dyn_cast<ObjCMessageExpr>(S)) { + if (const Expr *receiver = ME->getInstanceReceiver()) { + if (CurrSt->getSValAsScalarOrLoc(receiver, LCtx) + .getAsLocSymbol() == Sym) { + // The symbol we are tracking is the receiver. + DeallocSent = true; + } + } + } + } + + if (!shouldGenerateNote(os, PrevT, CurrV, DeallocSent)) + return nullptr; + + if (os.str().empty()) + return nullptr; // We have nothing to say! + + const Stmt *S = N->getLocation().castAs<StmtPoint>().getStmt(); + PathDiagnosticLocation Pos(S, BRC.getSourceManager(), + N->getLocationContext()); + auto P = std::make_shared<PathDiagnosticEventPiece>(Pos, os.str()); + + // Add the range by scanning the children of the statement for any bindings + // to Sym. + for (const Stmt *Child : S->children()) + if (const Expr *Exp = dyn_cast_or_null<Expr>(Child)) + if (CurrSt->getSValAsScalarOrLoc(Exp, LCtx).getAsLocSymbol() == Sym) { + P->addRange(Exp->getSourceRange()); + break; + } + + return std::move(P); +} + +static Optional<std::string> describeRegion(const MemRegion *MR) { + if (const auto *VR = dyn_cast_or_null<VarRegion>(MR)) + return std::string(VR->getDecl()->getName()); + // Once we support more storage locations for bindings, + // this would need to be improved. + return None; +} + +namespace { +// Find the first node in the current function context that referred to the +// tracked symbol and the memory location that value was stored to. Note, the +// value is only reported if the allocation occurred in the same function as +// the leak. The function can also return a location context, which should be +// treated as interesting. +struct AllocationInfo { + const ExplodedNode* N; + const MemRegion *R; + const LocationContext *InterestingMethodContext; + AllocationInfo(const ExplodedNode *InN, + const MemRegion *InR, + const LocationContext *InInterestingMethodContext) : + N(InN), R(InR), InterestingMethodContext(InInterestingMethodContext) {} +}; +} // end anonymous namespace + +static AllocationInfo GetAllocationSite(ProgramStateManager &StateMgr, + const ExplodedNode *N, SymbolRef Sym) { + const ExplodedNode *AllocationNode = N; + const ExplodedNode *AllocationNodeInCurrentOrParentContext = N; + const MemRegion *FirstBinding = nullptr; + const LocationContext *LeakContext = N->getLocationContext(); + + // The location context of the init method called on the leaked object, if + // available. + const LocationContext *InitMethodContext = nullptr; + + while (N) { + ProgramStateRef St = N->getState(); + const LocationContext *NContext = N->getLocationContext(); + + if (!getRefBinding(St, Sym)) + break; + + StoreManager::FindUniqueBinding FB(Sym); + StateMgr.iterBindings(St, FB); + + if (FB) { + const MemRegion *R = FB.getRegion(); + // Do not show local variables belonging to a function other than + // where the error is reported. + if (auto MR = dyn_cast<StackSpaceRegion>(R->getMemorySpace())) + if (MR->getStackFrame() == LeakContext->getStackFrame()) + FirstBinding = R; + } + + // AllocationNode is the last node in which the symbol was tracked. + AllocationNode = N; + + // AllocationNodeInCurrentContext, is the last node in the current or + // parent context in which the symbol was tracked. + // + // Note that the allocation site might be in the parent context. For example, + // the case where an allocation happens in a block that captures a reference + // to it and that reference is overwritten/dropped by another call to + // the block. + if (NContext == LeakContext || NContext->isParentOf(LeakContext)) + AllocationNodeInCurrentOrParentContext = N; + + // Find the last init that was called on the given symbol and store the + // init method's location context. + if (!InitMethodContext) + if (auto CEP = N->getLocation().getAs<CallEnter>()) { + const Stmt *CE = CEP->getCallExpr(); + if (const auto *ME = dyn_cast_or_null<ObjCMessageExpr>(CE)) { + const Stmt *RecExpr = ME->getInstanceReceiver(); + if (RecExpr) { + SVal RecV = St->getSVal(RecExpr, NContext); + if (ME->getMethodFamily() == OMF_init && RecV.getAsSymbol() == Sym) + InitMethodContext = CEP->getCalleeContext(); + } + } + } + + N = N->getFirstPred(); + } + + // If we are reporting a leak of the object that was allocated with alloc, + // mark its init method as interesting. + const LocationContext *InterestingMethodContext = nullptr; + if (InitMethodContext) { + const ProgramPoint AllocPP = AllocationNode->getLocation(); + if (Optional<StmtPoint> SP = AllocPP.getAs<StmtPoint>()) + if (const ObjCMessageExpr *ME = SP->getStmtAs<ObjCMessageExpr>()) + if (ME->getMethodFamily() == OMF_alloc) + InterestingMethodContext = InitMethodContext; + } + + // If allocation happened in a function different from the leak node context, + // do not report the binding. + assert(N && "Could not find allocation node"); + + if (AllocationNodeInCurrentOrParentContext && + AllocationNodeInCurrentOrParentContext->getLocationContext() != + LeakContext) + FirstBinding = nullptr; + + return AllocationInfo(AllocationNodeInCurrentOrParentContext, FirstBinding, + InterestingMethodContext); +} + +PathDiagnosticPieceRef +RefCountReportVisitor::getEndPath(BugReporterContext &BRC, + const ExplodedNode *EndN, + PathSensitiveBugReport &BR) { + BR.markInteresting(Sym); + return BugReporterVisitor::getDefaultEndPath(BRC, EndN, BR); +} + +PathDiagnosticPieceRef +RefLeakReportVisitor::getEndPath(BugReporterContext &BRC, + const ExplodedNode *EndN, + PathSensitiveBugReport &BR) { + + // Tell the BugReporterContext to report cases when the tracked symbol is + // assigned to different variables, etc. + BR.markInteresting(Sym); + + // We are reporting a leak. Walk up the graph to get to the first node where + // the symbol appeared, and also get the first VarDecl that tracked object + // is stored to. + AllocationInfo AllocI = GetAllocationSite(BRC.getStateManager(), EndN, Sym); + + const MemRegion* FirstBinding = AllocI.R; + BR.markInteresting(AllocI.InterestingMethodContext); + + PathDiagnosticLocation L = cast<RefLeakReport>(BR).getEndOfPath(); + + std::string sbuf; + llvm::raw_string_ostream os(sbuf); + + os << "Object leaked: "; + + Optional<std::string> RegionDescription = describeRegion(FirstBinding); + if (RegionDescription) { + os << "object allocated and stored into '" << *RegionDescription << '\''; + } else { + os << "allocated object of type '" << getPrettyTypeName(Sym->getType()) + << "'"; + } + + // Get the retain count. + const RefVal* RV = getRefBinding(EndN->getState(), Sym); + assert(RV); + + if (RV->getKind() == RefVal::ErrorLeakReturned) { + // FIXME: Per comments in rdar://6320065, "create" only applies to CF + // objects. Only "copy", "alloc", "retain" and "new" transfer ownership + // to the caller for NS objects. + const Decl *D = &EndN->getCodeDecl(); + + os << (isa<ObjCMethodDecl>(D) ? " is returned from a method " + : " is returned from a function "); + + if (D->hasAttr<CFReturnsNotRetainedAttr>()) { + os << "that is annotated as CF_RETURNS_NOT_RETAINED"; + } else if (D->hasAttr<NSReturnsNotRetainedAttr>()) { + os << "that is annotated as NS_RETURNS_NOT_RETAINED"; + } else if (D->hasAttr<OSReturnsNotRetainedAttr>()) { + os << "that is annotated as OS_RETURNS_NOT_RETAINED"; + } else { + if (const ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(D)) { + if (BRC.getASTContext().getLangOpts().ObjCAutoRefCount) { + os << "managed by Automatic Reference Counting"; + } else { + os << "whose name ('" << MD->getSelector().getAsString() + << "') does not start with " + "'copy', 'mutableCopy', 'alloc' or 'new'." + " This violates the naming convention rules" + " given in the Memory Management Guide for Cocoa"; + } + } else { + const FunctionDecl *FD = cast<FunctionDecl>(D); + ObjKind K = RV->getObjKind(); + if (K == ObjKind::ObjC || K == ObjKind::CF) { + os << "whose name ('" << *FD + << "') does not contain 'Copy' or 'Create'. This violates the " + "naming" + " convention rules given in the Memory Management Guide for " + "Core" + " Foundation"; + } else if (RV->getObjKind() == ObjKind::OS) { + std::string FuncName = FD->getNameAsString(); + os << "whose name ('" << FuncName + << "') starts with '" << StringRef(FuncName).substr(0, 3) << "'"; + } + } + } + } else { + os << " is not referenced later in this execution path and has a retain " + "count of +" << RV->getCount(); + } + + return std::make_shared<PathDiagnosticEventPiece>(L, os.str()); +} + +RefCountReport::RefCountReport(const RefCountBug &D, const LangOptions &LOpts, + ExplodedNode *n, SymbolRef sym, bool isLeak) + : PathSensitiveBugReport(D, D.getDescription(), n), Sym(sym), + isLeak(isLeak) { + if (!isLeak) + addVisitor(std::make_unique<RefCountReportVisitor>(sym)); +} + +RefCountReport::RefCountReport(const RefCountBug &D, const LangOptions &LOpts, + ExplodedNode *n, SymbolRef sym, + StringRef endText) + : PathSensitiveBugReport(D, D.getDescription(), endText, n) { + + addVisitor(std::make_unique<RefCountReportVisitor>(sym)); +} + +void RefLeakReport::deriveParamLocation(CheckerContext &Ctx, SymbolRef sym) { + const SourceManager& SMgr = Ctx.getSourceManager(); + + if (!sym->getOriginRegion()) + return; + + auto *Region = dyn_cast<DeclRegion>(sym->getOriginRegion()); + if (Region) { + const Decl *PDecl = Region->getDecl(); + if (PDecl && isa<ParmVarDecl>(PDecl)) { + PathDiagnosticLocation ParamLocation = + PathDiagnosticLocation::create(PDecl, SMgr); + Location = ParamLocation; + UniqueingLocation = ParamLocation; + UniqueingDecl = Ctx.getLocationContext()->getDecl(); + } + } +} + +void RefLeakReport::deriveAllocLocation(CheckerContext &Ctx, + SymbolRef sym) { + // Most bug reports are cached at the location where they occurred. + // With leaks, we want to unique them by the location where they were + // allocated, and only report a single path. To do this, we need to find + // the allocation site of a piece of tracked memory, which we do via a + // call to GetAllocationSite. This will walk the ExplodedGraph backwards. + // Note that this is *not* the trimmed graph; we are guaranteed, however, + // that all ancestor nodes that represent the allocation site have the + // same SourceLocation. + const ExplodedNode *AllocNode = nullptr; + + const SourceManager& SMgr = Ctx.getSourceManager(); + + AllocationInfo AllocI = + GetAllocationSite(Ctx.getStateManager(), getErrorNode(), sym); + + AllocNode = AllocI.N; + AllocBinding = AllocI.R; + markInteresting(AllocI.InterestingMethodContext); + + // Get the SourceLocation for the allocation site. + // FIXME: This will crash the analyzer if an allocation comes from an + // implicit call (ex: a destructor call). + // (Currently there are no such allocations in Cocoa, though.) + AllocStmt = AllocNode->getStmtForDiagnostics(); + + if (!AllocStmt) { + AllocBinding = nullptr; + return; + } + + PathDiagnosticLocation AllocLocation = + PathDiagnosticLocation::createBegin(AllocStmt, SMgr, + AllocNode->getLocationContext()); + Location = AllocLocation; + + // Set uniqieing info, which will be used for unique the bug reports. The + // leaks should be uniqued on the allocation site. + UniqueingLocation = AllocLocation; + UniqueingDecl = AllocNode->getLocationContext()->getDecl(); +} + +void RefLeakReport::createDescription(CheckerContext &Ctx) { + assert(Location.isValid() && UniqueingDecl && UniqueingLocation.isValid()); + Description.clear(); + llvm::raw_string_ostream os(Description); + os << "Potential leak of an object"; + + Optional<std::string> RegionDescription = describeRegion(AllocBinding); + if (RegionDescription) { + os << " stored into '" << *RegionDescription << '\''; + } else { + + // If we can't figure out the name, just supply the type information. + os << " of type '" << getPrettyTypeName(Sym->getType()) << "'"; + } +} + +RefLeakReport::RefLeakReport(const RefCountBug &D, const LangOptions &LOpts, + ExplodedNode *n, SymbolRef sym, + CheckerContext &Ctx) + : RefCountReport(D, LOpts, n, sym, /*isLeak=*/true) { + + deriveAllocLocation(Ctx, sym); + if (!AllocBinding) + deriveParamLocation(Ctx, sym); + + createDescription(Ctx); + + addVisitor(std::make_unique<RefLeakReportVisitor>(sym)); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.h b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.h new file mode 100644 index 000000000000..e9e277754054 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.h @@ -0,0 +1,107 @@ +//== RetainCountDiagnostics.h - Checks for leaks and other issues -*- C++ -*--// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines diagnostics for RetainCountChecker, which implements +// a reference count checker for Core Foundation and Cocoa on (Mac OS X). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_RETAINCOUNTCHECKER_DIAGNOSTICS_H +#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_RETAINCOUNTCHECKER_DIAGNOSTICS_H + +#include "clang/Analysis/PathDiagnostic.h" +#include "clang/Analysis/RetainSummaryManager.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" + +namespace clang { +namespace ento { +namespace retaincountchecker { + +class RefCountBug : public BugType { +public: + enum RefCountBugType { + UseAfterRelease, + ReleaseNotOwned, + DeallocNotOwned, + FreeNotOwned, + OverAutorelease, + ReturnNotOwnedForOwned, + LeakWithinFunction, + LeakAtReturn, + }; + RefCountBug(const CheckerBase *checker, RefCountBugType BT); + StringRef getDescription() const; + + RefCountBugType getBugType() const { + return BT; + } + + const CheckerBase *getChecker() const { + return Checker; + } + +private: + RefCountBugType BT; + const CheckerBase *Checker; + static StringRef bugTypeToName(RefCountBugType BT); +}; + +class RefCountReport : public PathSensitiveBugReport { +protected: + SymbolRef Sym; + bool isLeak = false; + +public: + RefCountReport(const RefCountBug &D, const LangOptions &LOpts, + ExplodedNode *n, SymbolRef sym, + bool isLeak=false); + + RefCountReport(const RefCountBug &D, const LangOptions &LOpts, + ExplodedNode *n, SymbolRef sym, + StringRef endText); + + ArrayRef<SourceRange> getRanges() const override { + if (!isLeak) + return PathSensitiveBugReport::getRanges(); + return {}; + } +}; + +class RefLeakReport : public RefCountReport { + const MemRegion* AllocBinding; + const Stmt *AllocStmt; + PathDiagnosticLocation Location; + + // Finds the function declaration where a leak warning for the parameter + // 'sym' should be raised. + void deriveParamLocation(CheckerContext &Ctx, SymbolRef sym); + // Finds the location where a leak warning for 'sym' should be raised. + void deriveAllocLocation(CheckerContext &Ctx, SymbolRef sym); + // Produces description of a leak warning which is printed on the console. + void createDescription(CheckerContext &Ctx); + +public: + RefLeakReport(const RefCountBug &D, const LangOptions &LOpts, ExplodedNode *n, + SymbolRef sym, CheckerContext &Ctx); + PathDiagnosticLocation getLocation() const override { + assert(Location.isValid()); + return Location; + } + + PathDiagnosticLocation getEndOfPath() const { + return PathSensitiveBugReport::getLocation(); + } +}; + +} // end namespace retaincountchecker +} // end namespace ento +} // end namespace clang + +#endif diff --git a/clang/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp new file mode 100644 index 000000000000..abd1a074b487 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp @@ -0,0 +1,96 @@ +//== ReturnPointerRangeChecker.cpp ------------------------------*- C++ -*--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines ReturnPointerRangeChecker, which is a path-sensitive check +// which looks for an out-of-bound pointer being returned to callers. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" + +using namespace clang; +using namespace ento; + +namespace { +class ReturnPointerRangeChecker : + public Checker< check::PreStmt<ReturnStmt> > { + mutable std::unique_ptr<BuiltinBug> BT; + +public: + void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const; +}; +} + +void ReturnPointerRangeChecker::checkPreStmt(const ReturnStmt *RS, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + + const Expr *RetE = RS->getRetValue(); + if (!RetE) + return; + + SVal V = C.getSVal(RetE); + const MemRegion *R = V.getAsRegion(); + + const ElementRegion *ER = dyn_cast_or_null<ElementRegion>(R); + if (!ER) + return; + + DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>(); + // Zero index is always in bound, this also passes ElementRegions created for + // pointer casts. + if (Idx.isZeroConstant()) + return; + // FIXME: All of this out-of-bounds checking should eventually be refactored + // into a common place. + + DefinedOrUnknownSVal NumElements + = C.getStoreManager().getSizeInElements(state, ER->getSuperRegion(), + ER->getValueType()); + + ProgramStateRef StInBound = state->assumeInBound(Idx, NumElements, true); + ProgramStateRef StOutBound = state->assumeInBound(Idx, NumElements, false); + if (StOutBound && !StInBound) { + ExplodedNode *N = C.generateErrorNode(StOutBound); + + if (!N) + return; + + // FIXME: This bug correspond to CWE-466. Eventually we should have bug + // types explicitly reference such exploit categories (when applicable). + if (!BT) + BT.reset(new BuiltinBug( + this, "Return of pointer value outside of expected range", + "Returned pointer value points outside the original object " + "(potential buffer overflow)")); + + // FIXME: It would be nice to eventually make this diagnostic more clear, + // e.g., by referencing the original declaration or by saying *why* this + // reference is outside the range. + + // Generate a report for this bug. + auto report = + std::make_unique<PathSensitiveBugReport>(*BT, BT->getDescription(), N); + + report->addRange(RetE->getSourceRange()); + C.emitReport(std::move(report)); + } +} + +void ento::registerReturnPointerRangeChecker(CheckerManager &mgr) { + mgr.registerChecker<ReturnPointerRangeChecker>(); +} + +bool ento::shouldRegisterReturnPointerRangeChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp new file mode 100644 index 000000000000..fbd15d864424 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp @@ -0,0 +1,127 @@ +//== ReturnUndefChecker.cpp -------------------------------------*- C++ -*--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines ReturnUndefChecker, which is a path-sensitive +// check which looks for undefined or garbage values being returned to the +// caller. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class ReturnUndefChecker : public Checker< check::PreStmt<ReturnStmt> > { + mutable std::unique_ptr<BuiltinBug> BT_Undef; + mutable std::unique_ptr<BuiltinBug> BT_NullReference; + + void emitUndef(CheckerContext &C, const Expr *RetE) const; + void checkReference(CheckerContext &C, const Expr *RetE, + DefinedOrUnknownSVal RetVal) const; +public: + void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const; +}; +} + +void ReturnUndefChecker::checkPreStmt(const ReturnStmt *RS, + CheckerContext &C) const { + const Expr *RetE = RS->getRetValue(); + if (!RetE) + return; + SVal RetVal = C.getSVal(RetE); + + const StackFrameContext *SFC = C.getStackFrame(); + QualType RT = CallEvent::getDeclaredResultType(SFC->getDecl()); + + if (RetVal.isUndef()) { + // "return;" is modeled to evaluate to an UndefinedVal. Allow UndefinedVal + // to be returned in functions returning void to support this pattern: + // void foo() { + // return; + // } + // void test() { + // return foo(); + // } + if (!RT.isNull() && RT->isVoidType()) + return; + + // Not all blocks have explicitly-specified return types; if the return type + // is not available, but the return value expression has 'void' type, assume + // Sema already checked it. + if (RT.isNull() && isa<BlockDecl>(SFC->getDecl()) && + RetE->getType()->isVoidType()) + return; + + emitUndef(C, RetE); + return; + } + + if (RT.isNull()) + return; + + if (RT->isReferenceType()) { + checkReference(C, RetE, RetVal.castAs<DefinedOrUnknownSVal>()); + return; + } +} + +static void emitBug(CheckerContext &C, BuiltinBug &BT, const Expr *RetE, + const Expr *TrackingE = nullptr) { + ExplodedNode *N = C.generateErrorNode(); + if (!N) + return; + + auto Report = + std::make_unique<PathSensitiveBugReport>(BT, BT.getDescription(), N); + + Report->addRange(RetE->getSourceRange()); + bugreporter::trackExpressionValue(N, TrackingE ? TrackingE : RetE, *Report); + + C.emitReport(std::move(Report)); +} + +void ReturnUndefChecker::emitUndef(CheckerContext &C, const Expr *RetE) const { + if (!BT_Undef) + BT_Undef.reset( + new BuiltinBug(this, "Garbage return value", + "Undefined or garbage value returned to caller")); + emitBug(C, *BT_Undef, RetE); +} + +void ReturnUndefChecker::checkReference(CheckerContext &C, const Expr *RetE, + DefinedOrUnknownSVal RetVal) const { + ProgramStateRef StNonNull, StNull; + std::tie(StNonNull, StNull) = C.getState()->assume(RetVal); + + if (StNonNull) { + // Going forward, assume the location is non-null. + C.addTransition(StNonNull); + return; + } + + // The return value is known to be null. Emit a bug report. + if (!BT_NullReference) + BT_NullReference.reset(new BuiltinBug(this, "Returning null reference")); + + emitBug(C, *BT_NullReference, RetE, bugreporter::getDerefExpr(RetE)); +} + +void ento::registerReturnUndefChecker(CheckerManager &mgr) { + mgr.registerChecker<ReturnUndefChecker>(); +} + +bool ento::shouldRegisterReturnUndefChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp new file mode 100644 index 000000000000..103208d8b5a5 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp @@ -0,0 +1,170 @@ +//===- ReturnValueChecker - Applies guaranteed return values ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines ReturnValueChecker, which checks for calls with guaranteed +// boolean return value. It ensures the return value of each function call. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" + +using namespace clang; +using namespace ento; + +namespace { +class ReturnValueChecker : public Checker<check::PostCall, check::EndFunction> { +public: + // It sets the predefined invariant ('CDM') if the current call not break it. + void checkPostCall(const CallEvent &Call, CheckerContext &C) const; + + // It reports whether a predefined invariant ('CDM') is broken. + void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const; + +private: + // The pairs are in the following form: {{{class, call}}, return value} + const CallDescriptionMap<bool> CDM = { + // These are known in the LLVM project: 'Error()' + {{{"ARMAsmParser", "Error"}}, true}, + {{{"HexagonAsmParser", "Error"}}, true}, + {{{"LLLexer", "Error"}}, true}, + {{{"LLParser", "Error"}}, true}, + {{{"MCAsmParser", "Error"}}, true}, + {{{"MCAsmParserExtension", "Error"}}, true}, + {{{"TGParser", "Error"}}, true}, + {{{"X86AsmParser", "Error"}}, true}, + // 'TokError()' + {{{"LLParser", "TokError"}}, true}, + {{{"MCAsmParser", "TokError"}}, true}, + {{{"MCAsmParserExtension", "TokError"}}, true}, + {{{"TGParser", "TokError"}}, true}, + // 'error()' + {{{"MIParser", "error"}}, true}, + {{{"WasmAsmParser", "error"}}, true}, + {{{"WebAssemblyAsmParser", "error"}}, true}, + // Other + {{{"AsmParser", "printError"}}, true}}; +}; +} // namespace + +static std::string getName(const CallEvent &Call) { + std::string Name = ""; + if (const auto *MD = dyn_cast<CXXMethodDecl>(Call.getDecl())) + if (const CXXRecordDecl *RD = MD->getParent()) + Name += RD->getNameAsString() + "::"; + + Name += Call.getCalleeIdentifier()->getName(); + return Name; +} + +// The predefinitions ('CDM') could break due to the ever growing code base. +// Check for the expected invariants and see whether they apply. +static Optional<bool> isInvariantBreak(bool ExpectedValue, SVal ReturnV, + CheckerContext &C) { + auto ReturnDV = ReturnV.getAs<DefinedOrUnknownSVal>(); + if (!ReturnDV) + return None; + + if (ExpectedValue) + return C.getState()->isNull(*ReturnDV).isConstrainedTrue(); + + return C.getState()->isNull(*ReturnDV).isConstrainedFalse(); +} + +void ReturnValueChecker::checkPostCall(const CallEvent &Call, + CheckerContext &C) const { + const bool *RawExpectedValue = CDM.lookup(Call); + if (!RawExpectedValue) + return; + + SVal ReturnV = Call.getReturnValue(); + bool ExpectedValue = *RawExpectedValue; + Optional<bool> IsInvariantBreak = isInvariantBreak(ExpectedValue, ReturnV, C); + if (!IsInvariantBreak) + return; + + // If the invariant is broken it is reported by 'checkEndFunction()'. + if (*IsInvariantBreak) + return; + + std::string Name = getName(Call); + const NoteTag *CallTag = C.getNoteTag( + [Name, ExpectedValue](BugReport &) -> std::string { + SmallString<128> Msg; + llvm::raw_svector_ostream Out(Msg); + + Out << '\'' << Name << "' returns " + << (ExpectedValue ? "true" : "false"); + return Out.str(); + }, + /*IsPrunable=*/true); + + ProgramStateRef State = C.getState(); + State = State->assume(ReturnV.castAs<DefinedOrUnknownSVal>(), ExpectedValue); + C.addTransition(State, CallTag); +} + +void ReturnValueChecker::checkEndFunction(const ReturnStmt *RS, + CheckerContext &C) const { + if (!RS || !RS->getRetValue()) + return; + + // We cannot get the caller in the top-frame. + const StackFrameContext *SFC = C.getStackFrame(); + if (C.getStackFrame()->inTopFrame()) + return; + + ProgramStateRef State = C.getState(); + CallEventManager &CMgr = C.getStateManager().getCallEventManager(); + CallEventRef<> Call = CMgr.getCaller(SFC, State); + if (!Call) + return; + + const bool *RawExpectedValue = CDM.lookup(*Call); + if (!RawExpectedValue) + return; + + SVal ReturnV = State->getSVal(RS->getRetValue(), C.getLocationContext()); + bool ExpectedValue = *RawExpectedValue; + Optional<bool> IsInvariantBreak = isInvariantBreak(ExpectedValue, ReturnV, C); + if (!IsInvariantBreak) + return; + + // If the invariant is appropriate it is reported by 'checkPostCall()'. + if (!*IsInvariantBreak) + return; + + std::string Name = getName(*Call); + const NoteTag *CallTag = C.getNoteTag( + [Name, ExpectedValue](BugReport &BR) -> std::string { + SmallString<128> Msg; + llvm::raw_svector_ostream Out(Msg); + + // The following is swapped because the invariant is broken. + Out << '\'' << Name << "' returns " + << (ExpectedValue ? "false" : "true"); + + return Out.str(); + }, + /*IsPrunable=*/false); + + C.addTransition(State, CallTag); +} + +void ento::registerReturnValueChecker(CheckerManager &Mgr) { + Mgr.registerChecker<ReturnValueChecker>(); +} + +bool ento::shouldRegisterReturnValueChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/RunLoopAutoreleaseLeakChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/RunLoopAutoreleaseLeakChecker.cpp new file mode 100644 index 000000000000..5e305aa709b6 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/RunLoopAutoreleaseLeakChecker.cpp @@ -0,0 +1,208 @@ +//=- RunLoopAutoreleaseLeakChecker.cpp --------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +//===----------------------------------------------------------------------===// +// +// A checker for detecting leaks resulting from allocating temporary +// autoreleased objects before starting the main run loop. +// +// Checks for two antipatterns: +// 1. ObjCMessageExpr followed by [[NSRunLoop mainRunLoop] run] in the same +// autorelease pool. +// 2. ObjCMessageExpr followed by [[NSRunLoop mainRunLoop] run] in no +// autorelease pool. +// +// Any temporary objects autoreleased in code called in those expressions +// will not be deallocated until the program exits, and are effectively leaks. +// +//===----------------------------------------------------------------------===// +// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclObjC.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" + +using namespace clang; +using namespace ento; +using namespace ast_matchers; + +namespace { + +const char * RunLoopBind = "NSRunLoopM"; +const char * RunLoopRunBind = "RunLoopRunM"; +const char * OtherMsgBind = "OtherMessageSentM"; +const char * AutoreleasePoolBind = "AutoreleasePoolM"; +const char * OtherStmtAutoreleasePoolBind = "OtherAutoreleasePoolM"; + +class RunLoopAutoreleaseLeakChecker : public Checker<check::ASTCodeBody> { + +public: + void checkASTCodeBody(const Decl *D, + AnalysisManager &AM, + BugReporter &BR) const; + +}; + +} // end anonymous namespace + +/// \return Whether {@code A} occurs before {@code B} in traversal of +/// {@code Parent}. +/// Conceptually a very incomplete/unsound approximation of happens-before +/// relationship (A is likely to be evaluated before B), +/// but useful enough in this case. +static bool seenBefore(const Stmt *Parent, const Stmt *A, const Stmt *B) { + for (const Stmt *C : Parent->children()) { + if (!C) continue; + + if (C == A) + return true; + + if (C == B) + return false; + + return seenBefore(C, A, B); + } + return false; +} + +static void emitDiagnostics(BoundNodes &Match, + const Decl *D, + BugReporter &BR, + AnalysisManager &AM, + const RunLoopAutoreleaseLeakChecker *Checker) { + + assert(D->hasBody()); + const Stmt *DeclBody = D->getBody(); + + AnalysisDeclContext *ADC = AM.getAnalysisDeclContext(D); + + const auto *ME = Match.getNodeAs<ObjCMessageExpr>(OtherMsgBind); + assert(ME); + + const auto *AP = + Match.getNodeAs<ObjCAutoreleasePoolStmt>(AutoreleasePoolBind); + const auto *OAP = + Match.getNodeAs<ObjCAutoreleasePoolStmt>(OtherStmtAutoreleasePoolBind); + bool HasAutoreleasePool = (AP != nullptr); + + const auto *RL = Match.getNodeAs<ObjCMessageExpr>(RunLoopBind); + const auto *RLR = Match.getNodeAs<Stmt>(RunLoopRunBind); + assert(RLR && "Run loop launch not found"); + assert(ME != RLR); + + // Launch of run loop occurs before the message-sent expression is seen. + if (seenBefore(DeclBody, RLR, ME)) + return; + + if (HasAutoreleasePool && (OAP != AP)) + return; + + PathDiagnosticLocation Location = PathDiagnosticLocation::createBegin( + ME, BR.getSourceManager(), ADC); + SourceRange Range = ME->getSourceRange(); + + BR.EmitBasicReport(ADC->getDecl(), Checker, + /*Name=*/"Memory leak inside autorelease pool", + /*BugCategory=*/"Memory", + /*Name=*/ + (Twine("Temporary objects allocated in the") + + " autorelease pool " + + (HasAutoreleasePool ? "" : "of last resort ") + + "followed by the launch of " + + (RL ? "main run loop " : "xpc_main ") + + "may never get released; consider moving them to a " + "separate autorelease pool") + .str(), + Location, Range); +} + +static StatementMatcher getRunLoopRunM(StatementMatcher Extra = anything()) { + StatementMatcher MainRunLoopM = + objcMessageExpr(hasSelector("mainRunLoop"), + hasReceiverType(asString("NSRunLoop")), + Extra) + .bind(RunLoopBind); + + StatementMatcher MainRunLoopRunM = objcMessageExpr(hasSelector("run"), + hasReceiver(MainRunLoopM), + Extra).bind(RunLoopRunBind); + + StatementMatcher XPCRunM = + callExpr(callee(functionDecl(hasName("xpc_main")))).bind(RunLoopRunBind); + return anyOf(MainRunLoopRunM, XPCRunM); +} + +static StatementMatcher getOtherMessageSentM(StatementMatcher Extra = anything()) { + return objcMessageExpr(unless(anyOf(equalsBoundNode(RunLoopBind), + equalsBoundNode(RunLoopRunBind))), + Extra) + .bind(OtherMsgBind); +} + +static void +checkTempObjectsInSamePool(const Decl *D, AnalysisManager &AM, BugReporter &BR, + const RunLoopAutoreleaseLeakChecker *Chkr) { + StatementMatcher RunLoopRunM = getRunLoopRunM(); + StatementMatcher OtherMessageSentM = getOtherMessageSentM( + hasAncestor(autoreleasePoolStmt().bind(OtherStmtAutoreleasePoolBind))); + + StatementMatcher RunLoopInAutorelease = + autoreleasePoolStmt( + hasDescendant(RunLoopRunM), + hasDescendant(OtherMessageSentM)).bind(AutoreleasePoolBind); + + DeclarationMatcher GroupM = decl(hasDescendant(RunLoopInAutorelease)); + + auto Matches = match(GroupM, *D, AM.getASTContext()); + for (BoundNodes Match : Matches) + emitDiagnostics(Match, D, BR, AM, Chkr); +} + +static void +checkTempObjectsInNoPool(const Decl *D, AnalysisManager &AM, BugReporter &BR, + const RunLoopAutoreleaseLeakChecker *Chkr) { + + auto NoPoolM = unless(hasAncestor(autoreleasePoolStmt())); + + StatementMatcher RunLoopRunM = getRunLoopRunM(NoPoolM); + StatementMatcher OtherMessageSentM = getOtherMessageSentM(NoPoolM); + + DeclarationMatcher GroupM = functionDecl( + isMain(), + hasDescendant(RunLoopRunM), + hasDescendant(OtherMessageSentM) + ); + + auto Matches = match(GroupM, *D, AM.getASTContext()); + + for (BoundNodes Match : Matches) + emitDiagnostics(Match, D, BR, AM, Chkr); + +} + +void RunLoopAutoreleaseLeakChecker::checkASTCodeBody(const Decl *D, + AnalysisManager &AM, + BugReporter &BR) const { + checkTempObjectsInSamePool(D, AM, BR, this); + checkTempObjectsInNoPool(D, AM, BR, this); +} + +void ento::registerRunLoopAutoreleaseLeakChecker(CheckerManager &mgr) { + mgr.registerChecker<RunLoopAutoreleaseLeakChecker>(); +} + +bool ento::shouldRegisterRunLoopAutoreleaseLeakChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp new file mode 100644 index 000000000000..8193bcbef4cd --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp @@ -0,0 +1,276 @@ +//===-- SimpleStreamChecker.cpp -----------------------------------------*- C++ -*--// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines a checker for proper use of fopen/fclose APIs. +// - If a file has been closed with fclose, it should not be accessed again. +// Accessing a closed file results in undefined behavior. +// - If a file was opened with fopen, it must be closed with fclose before +// the execution ends. Failing to do so results in a resource leak. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include <utility> + +using namespace clang; +using namespace ento; + +namespace { +typedef SmallVector<SymbolRef, 2> SymbolVector; + +struct StreamState { +private: + enum Kind { Opened, Closed } K; + StreamState(Kind InK) : K(InK) { } + +public: + bool isOpened() const { return K == Opened; } + bool isClosed() const { return K == Closed; } + + static StreamState getOpened() { return StreamState(Opened); } + static StreamState getClosed() { return StreamState(Closed); } + + bool operator==(const StreamState &X) const { + return K == X.K; + } + void Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddInteger(K); + } +}; + +class SimpleStreamChecker : public Checker<check::PostCall, + check::PreCall, + check::DeadSymbols, + check::PointerEscape> { + CallDescription OpenFn, CloseFn; + + std::unique_ptr<BugType> DoubleCloseBugType; + std::unique_ptr<BugType> LeakBugType; + + void reportDoubleClose(SymbolRef FileDescSym, + const CallEvent &Call, + CheckerContext &C) const; + + void reportLeaks(ArrayRef<SymbolRef> LeakedStreams, CheckerContext &C, + ExplodedNode *ErrNode) const; + + bool guaranteedNotToCloseFile(const CallEvent &Call) const; + +public: + SimpleStreamChecker(); + + /// Process fopen. + void checkPostCall(const CallEvent &Call, CheckerContext &C) const; + /// Process fclose. + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + + void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const; + + /// Stop tracking addresses which escape. + ProgramStateRef checkPointerEscape(ProgramStateRef State, + const InvalidatedSymbols &Escaped, + const CallEvent *Call, + PointerEscapeKind Kind) const; +}; + +} // end anonymous namespace + +/// The state of the checker is a map from tracked stream symbols to their +/// state. Let's store it in the ProgramState. +REGISTER_MAP_WITH_PROGRAMSTATE(StreamMap, SymbolRef, StreamState) + +namespace { +class StopTrackingCallback final : public SymbolVisitor { + ProgramStateRef state; +public: + StopTrackingCallback(ProgramStateRef st) : state(std::move(st)) {} + ProgramStateRef getState() const { return state; } + + bool VisitSymbol(SymbolRef sym) override { + state = state->remove<StreamMap>(sym); + return true; + } +}; +} // end anonymous namespace + +SimpleStreamChecker::SimpleStreamChecker() + : OpenFn("fopen"), CloseFn("fclose", 1) { + // Initialize the bug types. + DoubleCloseBugType.reset( + new BugType(this, "Double fclose", "Unix Stream API Error")); + + // Sinks are higher importance bugs as well as calls to assert() or exit(0). + LeakBugType.reset( + new BugType(this, "Resource Leak", "Unix Stream API Error", + /*SuppressOnSink=*/true)); +} + +void SimpleStreamChecker::checkPostCall(const CallEvent &Call, + CheckerContext &C) const { + if (!Call.isGlobalCFunction()) + return; + + if (!Call.isCalled(OpenFn)) + return; + + // Get the symbolic value corresponding to the file handle. + SymbolRef FileDesc = Call.getReturnValue().getAsSymbol(); + if (!FileDesc) + return; + + // Generate the next transition (an edge in the exploded graph). + ProgramStateRef State = C.getState(); + State = State->set<StreamMap>(FileDesc, StreamState::getOpened()); + C.addTransition(State); +} + +void SimpleStreamChecker::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + if (!Call.isGlobalCFunction()) + return; + + if (!Call.isCalled(CloseFn)) + return; + + // Get the symbolic value corresponding to the file handle. + SymbolRef FileDesc = Call.getArgSVal(0).getAsSymbol(); + if (!FileDesc) + return; + + // Check if the stream has already been closed. + ProgramStateRef State = C.getState(); + const StreamState *SS = State->get<StreamMap>(FileDesc); + if (SS && SS->isClosed()) { + reportDoubleClose(FileDesc, Call, C); + return; + } + + // Generate the next transition, in which the stream is closed. + State = State->set<StreamMap>(FileDesc, StreamState::getClosed()); + C.addTransition(State); +} + +static bool isLeaked(SymbolRef Sym, const StreamState &SS, + bool IsSymDead, ProgramStateRef State) { + if (IsSymDead && SS.isOpened()) { + // If a symbol is NULL, assume that fopen failed on this path. + // A symbol should only be considered leaked if it is non-null. + ConstraintManager &CMgr = State->getConstraintManager(); + ConditionTruthVal OpenFailed = CMgr.isNull(State, Sym); + return !OpenFailed.isConstrainedTrue(); + } + return false; +} + +void SimpleStreamChecker::checkDeadSymbols(SymbolReaper &SymReaper, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + SymbolVector LeakedStreams; + StreamMapTy TrackedStreams = State->get<StreamMap>(); + for (StreamMapTy::iterator I = TrackedStreams.begin(), + E = TrackedStreams.end(); I != E; ++I) { + SymbolRef Sym = I->first; + bool IsSymDead = SymReaper.isDead(Sym); + + // Collect leaked symbols. + if (isLeaked(Sym, I->second, IsSymDead, State)) + LeakedStreams.push_back(Sym); + + // Remove the dead symbol from the streams map. + if (IsSymDead) + State = State->remove<StreamMap>(Sym); + } + + ExplodedNode *N = C.generateNonFatalErrorNode(State); + if (!N) + return; + reportLeaks(LeakedStreams, C, N); +} + +void SimpleStreamChecker::reportDoubleClose(SymbolRef FileDescSym, + const CallEvent &Call, + CheckerContext &C) const { + // We reached a bug, stop exploring the path here by generating a sink. + ExplodedNode *ErrNode = C.generateErrorNode(); + // If we've already reached this node on another path, return. + if (!ErrNode) + return; + + // Generate the report. + auto R = std::make_unique<PathSensitiveBugReport>( + *DoubleCloseBugType, "Closing a previously closed file stream", ErrNode); + R->addRange(Call.getSourceRange()); + R->markInteresting(FileDescSym); + C.emitReport(std::move(R)); +} + +void SimpleStreamChecker::reportLeaks(ArrayRef<SymbolRef> LeakedStreams, + CheckerContext &C, + ExplodedNode *ErrNode) const { + // Attach bug reports to the leak node. + // TODO: Identify the leaked file descriptor. + for (SymbolRef LeakedStream : LeakedStreams) { + auto R = std::make_unique<PathSensitiveBugReport>( + *LeakBugType, "Opened file is never closed; potential resource leak", + ErrNode); + R->markInteresting(LeakedStream); + C.emitReport(std::move(R)); + } +} + +bool SimpleStreamChecker::guaranteedNotToCloseFile(const CallEvent &Call) const{ + // If it's not in a system header, assume it might close a file. + if (!Call.isInSystemHeader()) + return false; + + // Handle cases where we know a buffer's /address/ can escape. + if (Call.argumentsMayEscape()) + return false; + + // Note, even though fclose closes the file, we do not list it here + // since the checker is modeling the call. + + return true; +} + +// If the pointer we are tracking escaped, do not track the symbol as +// we cannot reason about it anymore. +ProgramStateRef +SimpleStreamChecker::checkPointerEscape(ProgramStateRef State, + const InvalidatedSymbols &Escaped, + const CallEvent *Call, + PointerEscapeKind Kind) const { + // If we know that the call cannot close a file, there is nothing to do. + if (Kind == PSK_DirectEscapeOnCall && guaranteedNotToCloseFile(*Call)) { + return State; + } + + for (InvalidatedSymbols::const_iterator I = Escaped.begin(), + E = Escaped.end(); + I != E; ++I) { + SymbolRef Sym = *I; + + // The symbol escaped. Optimistically, assume that the corresponding file + // handle will be closed somewhere else. + State = State->remove<StreamMap>(Sym); + } + return State; +} + +void ento::registerSimpleStreamChecker(CheckerManager &mgr) { + mgr.registerChecker<SimpleStreamChecker>(); +} + +// This checker should be enabled regardless of how language options are set. +bool ento::shouldRegisterSimpleStreamChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp new file mode 100644 index 000000000000..fd372aafa50d --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp @@ -0,0 +1,72 @@ +// SmartPtrModeling.cpp - Model behavior of C++ smart pointers - C++ ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a checker that models various aspects of +// C++ smart pointer behavior. +// +//===----------------------------------------------------------------------===// + +#include "Move.h" + +#include "clang/AST/ExprCXX.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class SmartPtrModeling : public Checker<eval::Call> { + bool isNullAfterMoveMethod(const CallEvent &Call) const; + +public: + bool evalCall(const CallEvent &Call, CheckerContext &C) const; +}; +} // end of anonymous namespace + +bool SmartPtrModeling::isNullAfterMoveMethod(const CallEvent &Call) const { + // TODO: Update CallDescription to support anonymous calls? + // TODO: Handle other methods, such as .get() or .release(). + // But once we do, we'd need a visitor to explain null dereferences + // that are found via such modeling. + const auto *CD = dyn_cast_or_null<CXXConversionDecl>(Call.getDecl()); + return CD && CD->getConversionType()->isBooleanType(); +} + +bool SmartPtrModeling::evalCall(const CallEvent &Call, + CheckerContext &C) const { + if (!isNullAfterMoveMethod(Call)) + return false; + + ProgramStateRef State = C.getState(); + const MemRegion *ThisR = + cast<CXXInstanceCall>(&Call)->getCXXThisVal().getAsRegion(); + + if (!move::isMovedFrom(State, ThisR)) { + // TODO: Model this case as well. At least, avoid invalidation of globals. + return false; + } + + // TODO: Add a note to bug reports describing this decision. + C.addTransition( + State->BindExpr(Call.getOriginExpr(), C.getLocationContext(), + C.getSValBuilder().makeZeroVal(Call.getResultType()))); + return true; +} + +void ento::registerSmartPtrModeling(CheckerManager &Mgr) { + Mgr.registerChecker<SmartPtrModeling>(); +} + +bool ento::shouldRegisterSmartPtrModeling(const LangOptions &LO) { + return LO.CPlusPlus; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp new file mode 100644 index 000000000000..7285d27495a7 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp @@ -0,0 +1,384 @@ +//=== StackAddrEscapeChecker.cpp ----------------------------------*- C++ -*--// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines stack address leak checker, which checks if an invalid +// stack address is stored into a global or heap location. See CERT DCL30-C. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/ExprCXX.h" +#include "clang/Basic/SourceManager.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" +using namespace clang; +using namespace ento; + +namespace { +class StackAddrEscapeChecker + : public Checker<check::PreCall, check::PreStmt<ReturnStmt>, + check::EndFunction> { + mutable IdentifierInfo *dispatch_semaphore_tII; + mutable std::unique_ptr<BuiltinBug> BT_stackleak; + mutable std::unique_ptr<BuiltinBug> BT_returnstack; + mutable std::unique_ptr<BuiltinBug> BT_capturedstackasync; + mutable std::unique_ptr<BuiltinBug> BT_capturedstackret; + +public: + enum CheckKind { + CK_StackAddrEscapeChecker, + CK_StackAddrAsyncEscapeChecker, + CK_NumCheckKinds + }; + + DefaultBool ChecksEnabled[CK_NumCheckKinds]; + + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const; + void checkEndFunction(const ReturnStmt *RS, CheckerContext &Ctx) const; + +private: + void checkReturnedBlockCaptures(const BlockDataRegion &B, + CheckerContext &C) const; + void checkAsyncExecutedBlockCaptures(const BlockDataRegion &B, + CheckerContext &C) const; + void EmitStackError(CheckerContext &C, const MemRegion *R, + const Expr *RetE) const; + bool isSemaphoreCaptured(const BlockDecl &B) const; + static SourceRange genName(raw_ostream &os, const MemRegion *R, + ASTContext &Ctx); + static SmallVector<const MemRegion *, 4> + getCapturedStackRegions(const BlockDataRegion &B, CheckerContext &C); + static bool isArcManagedBlock(const MemRegion *R, CheckerContext &C); + static bool isNotInCurrentFrame(const MemRegion *R, CheckerContext &C); +}; +} // namespace + +SourceRange StackAddrEscapeChecker::genName(raw_ostream &os, const MemRegion *R, + ASTContext &Ctx) { + // Get the base region, stripping away fields and elements. + R = R->getBaseRegion(); + SourceManager &SM = Ctx.getSourceManager(); + SourceRange range; + os << "Address of "; + + // Check if the region is a compound literal. + if (const auto *CR = dyn_cast<CompoundLiteralRegion>(R)) { + const CompoundLiteralExpr *CL = CR->getLiteralExpr(); + os << "stack memory associated with a compound literal " + "declared on line " + << SM.getExpansionLineNumber(CL->getBeginLoc()) << " returned to caller"; + range = CL->getSourceRange(); + } else if (const auto *AR = dyn_cast<AllocaRegion>(R)) { + const Expr *ARE = AR->getExpr(); + SourceLocation L = ARE->getBeginLoc(); + range = ARE->getSourceRange(); + os << "stack memory allocated by call to alloca() on line " + << SM.getExpansionLineNumber(L); + } else if (const auto *BR = dyn_cast<BlockDataRegion>(R)) { + const BlockDecl *BD = BR->getCodeRegion()->getDecl(); + SourceLocation L = BD->getBeginLoc(); + range = BD->getSourceRange(); + os << "stack-allocated block declared on line " + << SM.getExpansionLineNumber(L); + } else if (const auto *VR = dyn_cast<VarRegion>(R)) { + os << "stack memory associated with local variable '" << VR->getString() + << '\''; + range = VR->getDecl()->getSourceRange(); + } else if (const auto *TOR = dyn_cast<CXXTempObjectRegion>(R)) { + QualType Ty = TOR->getValueType().getLocalUnqualifiedType(); + os << "stack memory associated with temporary object of type '"; + Ty.print(os, Ctx.getPrintingPolicy()); + os << "'"; + range = TOR->getExpr()->getSourceRange(); + } else { + llvm_unreachable("Invalid region in ReturnStackAddressChecker."); + } + + return range; +} + +bool StackAddrEscapeChecker::isArcManagedBlock(const MemRegion *R, + CheckerContext &C) { + assert(R && "MemRegion should not be null"); + return C.getASTContext().getLangOpts().ObjCAutoRefCount && + isa<BlockDataRegion>(R); +} + +bool StackAddrEscapeChecker::isNotInCurrentFrame(const MemRegion *R, + CheckerContext &C) { + const StackSpaceRegion *S = cast<StackSpaceRegion>(R->getMemorySpace()); + return S->getStackFrame() != C.getStackFrame(); +} + +bool StackAddrEscapeChecker::isSemaphoreCaptured(const BlockDecl &B) const { + if (!dispatch_semaphore_tII) + dispatch_semaphore_tII = &B.getASTContext().Idents.get("dispatch_semaphore_t"); + for (const auto &C : B.captures()) { + const auto *T = C.getVariable()->getType()->getAs<TypedefType>(); + if (T && T->getDecl()->getIdentifier() == dispatch_semaphore_tII) + return true; + } + return false; +} + +SmallVector<const MemRegion *, 4> +StackAddrEscapeChecker::getCapturedStackRegions(const BlockDataRegion &B, + CheckerContext &C) { + SmallVector<const MemRegion *, 4> Regions; + BlockDataRegion::referenced_vars_iterator I = B.referenced_vars_begin(); + BlockDataRegion::referenced_vars_iterator E = B.referenced_vars_end(); + for (; I != E; ++I) { + SVal Val = C.getState()->getSVal(I.getCapturedRegion()); + const MemRegion *Region = Val.getAsRegion(); + if (Region && isa<StackSpaceRegion>(Region->getMemorySpace())) + Regions.push_back(Region); + } + return Regions; +} + +void StackAddrEscapeChecker::EmitStackError(CheckerContext &C, + const MemRegion *R, + const Expr *RetE) const { + ExplodedNode *N = C.generateNonFatalErrorNode(); + if (!N) + return; + if (!BT_returnstack) + BT_returnstack = std::make_unique<BuiltinBug>( + this, "Return of address to stack-allocated memory"); + // Generate a report for this bug. + SmallString<128> buf; + llvm::raw_svector_ostream os(buf); + SourceRange range = genName(os, R, C.getASTContext()); + os << " returned to caller"; + auto report = + std::make_unique<PathSensitiveBugReport>(*BT_returnstack, os.str(), N); + report->addRange(RetE->getSourceRange()); + if (range.isValid()) + report->addRange(range); + C.emitReport(std::move(report)); +} + +void StackAddrEscapeChecker::checkAsyncExecutedBlockCaptures( + const BlockDataRegion &B, CheckerContext &C) const { + // There is a not-too-uncommon idiom + // where a block passed to dispatch_async captures a semaphore + // and then the thread (which called dispatch_async) is blocked on waiting + // for the completion of the execution of the block + // via dispatch_semaphore_wait. To avoid false-positives (for now) + // we ignore all the blocks which have captured + // a variable of the type "dispatch_semaphore_t". + if (isSemaphoreCaptured(*B.getDecl())) + return; + for (const MemRegion *Region : getCapturedStackRegions(B, C)) { + // The block passed to dispatch_async may capture another block + // created on the stack. However, there is no leak in this situaton, + // no matter if ARC or no ARC is enabled: + // dispatch_async copies the passed "outer" block (via Block_copy) + // and if the block has captured another "inner" block, + // the "inner" block will be copied as well. + if (isa<BlockDataRegion>(Region)) + continue; + ExplodedNode *N = C.generateNonFatalErrorNode(); + if (!N) + continue; + if (!BT_capturedstackasync) + BT_capturedstackasync = std::make_unique<BuiltinBug>( + this, "Address of stack-allocated memory is captured"); + SmallString<128> Buf; + llvm::raw_svector_ostream Out(Buf); + SourceRange Range = genName(Out, Region, C.getASTContext()); + Out << " is captured by an asynchronously-executed block"; + auto Report = std::make_unique<PathSensitiveBugReport>( + *BT_capturedstackasync, Out.str(), N); + if (Range.isValid()) + Report->addRange(Range); + C.emitReport(std::move(Report)); + } +} + +void StackAddrEscapeChecker::checkReturnedBlockCaptures( + const BlockDataRegion &B, CheckerContext &C) const { + for (const MemRegion *Region : getCapturedStackRegions(B, C)) { + if (isArcManagedBlock(Region, C) || isNotInCurrentFrame(Region, C)) + continue; + ExplodedNode *N = C.generateNonFatalErrorNode(); + if (!N) + continue; + if (!BT_capturedstackret) + BT_capturedstackret = std::make_unique<BuiltinBug>( + this, "Address of stack-allocated memory is captured"); + SmallString<128> Buf; + llvm::raw_svector_ostream Out(Buf); + SourceRange Range = genName(Out, Region, C.getASTContext()); + Out << " is captured by a returned block"; + auto Report = std::make_unique<PathSensitiveBugReport>(*BT_capturedstackret, + Out.str(), N); + if (Range.isValid()) + Report->addRange(Range); + C.emitReport(std::move(Report)); + } +} + +void StackAddrEscapeChecker::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + if (!ChecksEnabled[CK_StackAddrAsyncEscapeChecker]) + return; + if (!Call.isGlobalCFunction("dispatch_after") && + !Call.isGlobalCFunction("dispatch_async")) + return; + for (unsigned Idx = 0, NumArgs = Call.getNumArgs(); Idx < NumArgs; ++Idx) { + if (const BlockDataRegion *B = dyn_cast_or_null<BlockDataRegion>( + Call.getArgSVal(Idx).getAsRegion())) + checkAsyncExecutedBlockCaptures(*B, C); + } +} + +void StackAddrEscapeChecker::checkPreStmt(const ReturnStmt *RS, + CheckerContext &C) const { + if (!ChecksEnabled[CK_StackAddrEscapeChecker]) + return; + + const Expr *RetE = RS->getRetValue(); + if (!RetE) + return; + RetE = RetE->IgnoreParens(); + + SVal V = C.getSVal(RetE); + const MemRegion *R = V.getAsRegion(); + if (!R) + return; + + if (const BlockDataRegion *B = dyn_cast<BlockDataRegion>(R)) + checkReturnedBlockCaptures(*B, C); + + if (!isa<StackSpaceRegion>(R->getMemorySpace()) || + isNotInCurrentFrame(R, C) || isArcManagedBlock(R, C)) + return; + + // Returning a record by value is fine. (In this case, the returned + // expression will be a copy-constructor, possibly wrapped in an + // ExprWithCleanups node.) + if (const ExprWithCleanups *Cleanup = dyn_cast<ExprWithCleanups>(RetE)) + RetE = Cleanup->getSubExpr(); + if (isa<CXXConstructExpr>(RetE) && RetE->getType()->isRecordType()) + return; + + // The CK_CopyAndAutoreleaseBlockObject cast causes the block to be copied + // so the stack address is not escaping here. + if (auto *ICE = dyn_cast<ImplicitCastExpr>(RetE)) { + if (isa<BlockDataRegion>(R) && + ICE->getCastKind() == CK_CopyAndAutoreleaseBlockObject) { + return; + } + } + + EmitStackError(C, R, RetE); +} + +void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS, + CheckerContext &Ctx) const { + if (!ChecksEnabled[CK_StackAddrEscapeChecker]) + return; + + ProgramStateRef State = Ctx.getState(); + + // Iterate over all bindings to global variables and see if it contains + // a memory region in the stack space. + class CallBack : public StoreManager::BindingsHandler { + private: + CheckerContext &Ctx; + const StackFrameContext *CurSFC; + + public: + SmallVector<std::pair<const MemRegion *, const MemRegion *>, 10> V; + + CallBack(CheckerContext &CC) : Ctx(CC), CurSFC(CC.getStackFrame()) {} + + bool HandleBinding(StoreManager &SMgr, Store S, const MemRegion *Region, + SVal Val) override { + + if (!isa<GlobalsSpaceRegion>(Region->getMemorySpace())) + return true; + const MemRegion *VR = Val.getAsRegion(); + if (VR && isa<StackSpaceRegion>(VR->getMemorySpace()) && + !isArcManagedBlock(VR, Ctx) && !isNotInCurrentFrame(VR, Ctx)) + V.emplace_back(Region, VR); + return true; + } + }; + + CallBack Cb(Ctx); + State->getStateManager().getStoreManager().iterBindings(State->getStore(), + Cb); + + if (Cb.V.empty()) + return; + + // Generate an error node. + ExplodedNode *N = Ctx.generateNonFatalErrorNode(State); + if (!N) + return; + + if (!BT_stackleak) + BT_stackleak = std::make_unique<BuiltinBug>( + this, "Stack address stored into global variable", + "Stack address was saved into a global variable. " + "This is dangerous because the address will become " + "invalid after returning from the function"); + + for (const auto &P : Cb.V) { + // Generate a report for this bug. + SmallString<128> Buf; + llvm::raw_svector_ostream Out(Buf); + SourceRange Range = genName(Out, P.second, Ctx.getASTContext()); + Out << " is still referred to by the "; + if (isa<StaticGlobalSpaceRegion>(P.first->getMemorySpace())) + Out << "static"; + else + Out << "global"; + Out << " variable '"; + const VarRegion *VR = cast<VarRegion>(P.first->getBaseRegion()); + Out << *VR->getDecl() + << "' upon returning to the caller. This will be a dangling reference"; + auto Report = + std::make_unique<PathSensitiveBugReport>(*BT_stackleak, Out.str(), N); + if (Range.isValid()) + Report->addRange(Range); + + Ctx.emitReport(std::move(Report)); + } +} + +void ento::registerStackAddrEscapeBase(CheckerManager &mgr) { + mgr.registerChecker<StackAddrEscapeChecker>(); +} + +bool ento::shouldRegisterStackAddrEscapeBase(const LangOptions &LO) { + return true; +} + +#define REGISTER_CHECKER(name) \ + void ento::register##name(CheckerManager &Mgr) { \ + StackAddrEscapeChecker *Chk = \ + Mgr.getChecker<StackAddrEscapeChecker>(); \ + Chk->ChecksEnabled[StackAddrEscapeChecker::CK_##name] = true; \ + } \ + \ + bool ento::shouldRegister##name(const LangOptions &LO) { \ + return true; \ + } + +REGISTER_CHECKER(StackAddrEscapeChecker) +REGISTER_CHECKER(StackAddrAsyncEscapeChecker) diff --git a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp new file mode 100644 index 000000000000..2cdee8da375e --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp @@ -0,0 +1,1065 @@ +//=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This checker improves modeling of a few simple library functions. +// It does not generate warnings. +// +// This checker provides a specification format - `FunctionSummaryTy' - and +// contains descriptions of some library functions in this format. Each +// specification contains a list of branches for splitting the program state +// upon call, and range constraints on argument and return-value symbols that +// are satisfied on each branch. This spec can be expanded to include more +// items, like external effects of the function. +// +// The main difference between this approach and the body farms technique is +// in more explicit control over how many branches are produced. For example, +// consider standard C function `ispunct(int x)', which returns a non-zero value +// iff `x' is a punctuation character, that is, when `x' is in range +// ['!', '/'] [':', '@'] U ['[', '\`'] U ['{', '~']. +// `FunctionSummaryTy' provides only two branches for this function. However, +// any attempt to describe this range with if-statements in the body farm +// would result in many more branches. Because each branch needs to be analyzed +// independently, this significantly reduces performance. Additionally, +// once we consider a branch on which `x' is in range, say, ['!', '/'], +// we assume that such branch is an important separate path through the program, +// which may lead to false positives because considering this particular path +// was not consciously intended, and therefore it might have been unreachable. +// +// This checker uses eval::Call for modeling "pure" functions, for which +// their `FunctionSummaryTy' is a precise model. This avoids unnecessary +// invalidation passes. Conflicts with other checkers are unlikely because +// if the function has no other effects, other checkers would probably never +// want to improve upon the modeling done by this checker. +// +// Non-"pure" functions, for which only partial improvement over the default +// behavior is expected, are modeled via check::PostCall, non-intrusively. +// +// The following standard C functions are currently supported: +// +// fgetc getline isdigit isupper +// fread isalnum isgraph isxdigit +// fwrite isalpha islower read +// getc isascii isprint write +// getchar isblank ispunct +// getdelim iscntrl isspace +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace clang::ento; + +namespace { +class StdLibraryFunctionsChecker : public Checker<check::PostCall, eval::Call> { + /// Below is a series of typedefs necessary to define function specs. + /// We avoid nesting types here because each additional qualifier + /// would need to be repeated in every function spec. + struct FunctionSummaryTy; + + /// Specify how much the analyzer engine should entrust modeling this function + /// to us. If he doesn't, he performs additional invalidations. + enum InvalidationKindTy { NoEvalCall, EvalCallAsPure }; + + /// A pair of ValueRangeKindTy and IntRangeVectorTy would describe a range + /// imposed on a particular argument or return value symbol. + /// + /// Given a range, should the argument stay inside or outside this range? + /// The special `ComparesToArgument' value indicates that we should + /// impose a constraint that involves other argument or return value symbols. + enum ValueRangeKindTy { OutOfRange, WithinRange, ComparesToArgument }; + + // The universal integral type to use in value range descriptions. + // Unsigned to make sure overflows are well-defined. + typedef uint64_t RangeIntTy; + + /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is + /// a non-negative integer, which less than 5 and not equal to 2. For + /// `ComparesToArgument', holds information about how exactly to compare to + /// the argument. + typedef std::vector<std::pair<RangeIntTy, RangeIntTy>> IntRangeVectorTy; + + /// A reference to an argument or return value by its number. + /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but + /// obviously uint32_t should be enough for all practical purposes. + typedef uint32_t ArgNoTy; + static const ArgNoTy Ret = std::numeric_limits<ArgNoTy>::max(); + + /// Incapsulates a single range on a single symbol within a branch. + class ValueRange { + ArgNoTy ArgNo; // Argument to which we apply the range. + ValueRangeKindTy Kind; // Kind of range definition. + IntRangeVectorTy Args; // Polymorphic arguments. + + public: + ValueRange(ArgNoTy ArgNo, ValueRangeKindTy Kind, + const IntRangeVectorTy &Args) + : ArgNo(ArgNo), Kind(Kind), Args(Args) {} + + ArgNoTy getArgNo() const { return ArgNo; } + ValueRangeKindTy getKind() const { return Kind; } + + BinaryOperator::Opcode getOpcode() const { + assert(Kind == ComparesToArgument); + assert(Args.size() == 1); + BinaryOperator::Opcode Op = + static_cast<BinaryOperator::Opcode>(Args[0].first); + assert(BinaryOperator::isComparisonOp(Op) && + "Only comparison ops are supported for ComparesToArgument"); + return Op; + } + + ArgNoTy getOtherArgNo() const { + assert(Kind == ComparesToArgument); + assert(Args.size() == 1); + return static_cast<ArgNoTy>(Args[0].second); + } + + const IntRangeVectorTy &getRanges() const { + assert(Kind != ComparesToArgument); + return Args; + } + + // We avoid creating a virtual apply() method because + // it makes initializer lists harder to write. + private: + ProgramStateRef + applyAsOutOfRange(ProgramStateRef State, const CallEvent &Call, + const FunctionSummaryTy &Summary) const; + ProgramStateRef + applyAsWithinRange(ProgramStateRef State, const CallEvent &Call, + const FunctionSummaryTy &Summary) const; + ProgramStateRef + applyAsComparesToArgument(ProgramStateRef State, const CallEvent &Call, + const FunctionSummaryTy &Summary) const; + + public: + ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, + const FunctionSummaryTy &Summary) const { + switch (Kind) { + case OutOfRange: + return applyAsOutOfRange(State, Call, Summary); + case WithinRange: + return applyAsWithinRange(State, Call, Summary); + case ComparesToArgument: + return applyAsComparesToArgument(State, Call, Summary); + } + llvm_unreachable("Unknown ValueRange kind!"); + } + }; + + /// The complete list of ranges that defines a single branch. + typedef std::vector<ValueRange> ValueRangeSet; + + /// Includes information about function prototype (which is necessary to + /// ensure we're modeling the right function and casting values properly), + /// approach to invalidation, and a list of branches - essentially, a list + /// of list of ranges - essentially, a list of lists of lists of segments. + struct FunctionSummaryTy { + const std::vector<QualType> ArgTypes; + const QualType RetType; + const InvalidationKindTy InvalidationKind; + const std::vector<ValueRangeSet> Ranges; + + private: + static void assertTypeSuitableForSummary(QualType T) { + assert(!T->isVoidType() && + "We should have had no significant void types in the spec"); + assert(T.isCanonical() && + "We should only have canonical types in the spec"); + // FIXME: lift this assert (but not the ones above!) + assert(T->isIntegralOrEnumerationType() && + "We only support integral ranges in the spec"); + } + + public: + QualType getArgType(ArgNoTy ArgNo) const { + QualType T = (ArgNo == Ret) ? RetType : ArgTypes[ArgNo]; + assertTypeSuitableForSummary(T); + return T; + } + + /// Try our best to figure out if the call expression is the call of + /// *the* library function to which this specification applies. + bool matchesCall(const CallExpr *CE) const; + }; + + // The same function (as in, function identifier) may have different + // summaries assigned to it, with different argument and return value types. + // We call these "variants" of the function. This can be useful for handling + // C++ function overloads, and also it can be used when the same function + // may have different definitions on different platforms. + typedef std::vector<FunctionSummaryTy> FunctionVariantsTy; + + // The map of all functions supported by the checker. It is initialized + // lazily, and it doesn't change after initialization. + typedef llvm::StringMap<FunctionVariantsTy> FunctionSummaryMapTy; + mutable FunctionSummaryMapTy FunctionSummaryMap; + + // Auxiliary functions to support ArgNoTy within all structures + // in a unified manner. + static QualType getArgType(const FunctionSummaryTy &Summary, ArgNoTy ArgNo) { + return Summary.getArgType(ArgNo); + } + static QualType getArgType(const CallEvent &Call, ArgNoTy ArgNo) { + return ArgNo == Ret ? Call.getResultType().getCanonicalType() + : Call.getArgExpr(ArgNo)->getType().getCanonicalType(); + } + static QualType getArgType(const CallExpr *CE, ArgNoTy ArgNo) { + return ArgNo == Ret ? CE->getType().getCanonicalType() + : CE->getArg(ArgNo)->getType().getCanonicalType(); + } + static SVal getArgSVal(const CallEvent &Call, ArgNoTy ArgNo) { + return ArgNo == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgNo); + } + +public: + void checkPostCall(const CallEvent &Call, CheckerContext &C) const; + bool evalCall(const CallEvent &Call, CheckerContext &C) const; + +private: + Optional<FunctionSummaryTy> findFunctionSummary(const FunctionDecl *FD, + const CallExpr *CE, + CheckerContext &C) const; + + void initFunctionSummaries(BasicValueFactory &BVF) const; +}; +} // end of anonymous namespace + +ProgramStateRef StdLibraryFunctionsChecker::ValueRange::applyAsOutOfRange( + ProgramStateRef State, const CallEvent &Call, + const FunctionSummaryTy &Summary) const { + + ProgramStateManager &Mgr = State->getStateManager(); + SValBuilder &SVB = Mgr.getSValBuilder(); + BasicValueFactory &BVF = SVB.getBasicValueFactory(); + ConstraintManager &CM = Mgr.getConstraintManager(); + QualType T = getArgType(Summary, getArgNo()); + SVal V = getArgSVal(Call, getArgNo()); + + if (auto N = V.getAs<NonLoc>()) { + const IntRangeVectorTy &R = getRanges(); + size_t E = R.size(); + for (size_t I = 0; I != E; ++I) { + const llvm::APSInt &Min = BVF.getValue(R[I].first, T); + const llvm::APSInt &Max = BVF.getValue(R[I].second, T); + assert(Min <= Max); + State = CM.assumeInclusiveRange(State, *N, Min, Max, false); + if (!State) + break; + } + } + + return State; +} + +ProgramStateRef +StdLibraryFunctionsChecker::ValueRange::applyAsWithinRange( + ProgramStateRef State, const CallEvent &Call, + const FunctionSummaryTy &Summary) const { + + ProgramStateManager &Mgr = State->getStateManager(); + SValBuilder &SVB = Mgr.getSValBuilder(); + BasicValueFactory &BVF = SVB.getBasicValueFactory(); + ConstraintManager &CM = Mgr.getConstraintManager(); + QualType T = getArgType(Summary, getArgNo()); + SVal V = getArgSVal(Call, getArgNo()); + + // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R". + // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary, + // and then cut away all holes in R one by one. + if (auto N = V.getAs<NonLoc>()) { + const IntRangeVectorTy &R = getRanges(); + size_t E = R.size(); + + const llvm::APSInt &MinusInf = BVF.getMinValue(T); + const llvm::APSInt &PlusInf = BVF.getMaxValue(T); + + const llvm::APSInt &Left = BVF.getValue(R[0].first - 1ULL, T); + if (Left != PlusInf) { + assert(MinusInf <= Left); + State = CM.assumeInclusiveRange(State, *N, MinusInf, Left, false); + if (!State) + return nullptr; + } + + const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1ULL, T); + if (Right != MinusInf) { + assert(Right <= PlusInf); + State = CM.assumeInclusiveRange(State, *N, Right, PlusInf, false); + if (!State) + return nullptr; + } + + for (size_t I = 1; I != E; ++I) { + const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1ULL, T); + const llvm::APSInt &Max = BVF.getValue(R[I].first - 1ULL, T); + assert(Min <= Max); + State = CM.assumeInclusiveRange(State, *N, Min, Max, false); + if (!State) + return nullptr; + } + } + + return State; +} + +ProgramStateRef +StdLibraryFunctionsChecker::ValueRange::applyAsComparesToArgument( + ProgramStateRef State, const CallEvent &Call, + const FunctionSummaryTy &Summary) const { + + ProgramStateManager &Mgr = State->getStateManager(); + SValBuilder &SVB = Mgr.getSValBuilder(); + QualType CondT = SVB.getConditionType(); + QualType T = getArgType(Summary, getArgNo()); + SVal V = getArgSVal(Call, getArgNo()); + + BinaryOperator::Opcode Op = getOpcode(); + ArgNoTy OtherArg = getOtherArgNo(); + SVal OtherV = getArgSVal(Call, OtherArg); + QualType OtherT = getArgType(Call, OtherArg); + // Note: we avoid integral promotion for comparison. + OtherV = SVB.evalCast(OtherV, T, OtherT); + if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT) + .getAs<DefinedOrUnknownSVal>()) + State = State->assume(*CompV, true); + return State; +} + +void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call, + CheckerContext &C) const { + const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); + if (!FD) + return; + + const CallExpr *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr()); + if (!CE) + return; + + Optional<FunctionSummaryTy> FoundSummary = findFunctionSummary(FD, CE, C); + if (!FoundSummary) + return; + + // Now apply ranges. + const FunctionSummaryTy &Summary = *FoundSummary; + ProgramStateRef State = C.getState(); + + for (const auto &VRS: Summary.Ranges) { + ProgramStateRef NewState = State; + for (const auto &VR: VRS) { + NewState = VR.apply(NewState, Call, Summary); + if (!NewState) + break; + } + + if (NewState && NewState != State) + C.addTransition(NewState); + } +} + +bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call, + CheckerContext &C) const { + const auto *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); + if (!FD) + return false; + + const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr()); + if (!CE) + return false; + + Optional<FunctionSummaryTy> FoundSummary = findFunctionSummary(FD, CE, C); + if (!FoundSummary) + return false; + + const FunctionSummaryTy &Summary = *FoundSummary; + switch (Summary.InvalidationKind) { + case EvalCallAsPure: { + ProgramStateRef State = C.getState(); + const LocationContext *LC = C.getLocationContext(); + SVal V = C.getSValBuilder().conjureSymbolVal( + CE, LC, CE->getType().getCanonicalType(), C.blockCount()); + State = State->BindExpr(CE, LC, V); + C.addTransition(State); + return true; + } + case NoEvalCall: + // Summary tells us to avoid performing eval::Call. The function is possibly + // evaluated by another checker, or evaluated conservatively. + return false; + } + llvm_unreachable("Unknown invalidation kind!"); +} + +bool StdLibraryFunctionsChecker::FunctionSummaryTy::matchesCall( + const CallExpr *CE) const { + // Check number of arguments: + if (CE->getNumArgs() != ArgTypes.size()) + return false; + + // Check return type if relevant: + if (!RetType.isNull() && RetType != CE->getType().getCanonicalType()) + return false; + + // Check argument types when relevant: + for (size_t I = 0, E = ArgTypes.size(); I != E; ++I) { + QualType FormalT = ArgTypes[I]; + // Null type marks irrelevant arguments. + if (FormalT.isNull()) + continue; + + assertTypeSuitableForSummary(FormalT); + + QualType ActualT = StdLibraryFunctionsChecker::getArgType(CE, I); + assert(ActualT.isCanonical()); + if (ActualT != FormalT) + return false; + } + + return true; +} + +Optional<StdLibraryFunctionsChecker::FunctionSummaryTy> +StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD, + const CallExpr *CE, + CheckerContext &C) const { + // Note: we cannot always obtain FD from CE + // (eg. virtual call, or call by pointer). + assert(CE); + + if (!FD) + return None; + + SValBuilder &SVB = C.getSValBuilder(); + BasicValueFactory &BVF = SVB.getBasicValueFactory(); + initFunctionSummaries(BVF); + + IdentifierInfo *II = FD->getIdentifier(); + if (!II) + return None; + StringRef Name = II->getName(); + if (Name.empty() || !C.isCLibraryFunction(FD, Name)) + return None; + + auto FSMI = FunctionSummaryMap.find(Name); + if (FSMI == FunctionSummaryMap.end()) + return None; + + // Verify that function signature matches the spec in advance. + // Otherwise we might be modeling the wrong function. + // Strict checking is important because we will be conducting + // very integral-type-sensitive operations on arguments and + // return values. + const FunctionVariantsTy &SpecVariants = FSMI->second; + for (const FunctionSummaryTy &Spec : SpecVariants) + if (Spec.matchesCall(CE)) + return Spec; + + return None; +} + +void StdLibraryFunctionsChecker::initFunctionSummaries( + BasicValueFactory &BVF) const { + if (!FunctionSummaryMap.empty()) + return; + + ASTContext &ACtx = BVF.getContext(); + + // These types are useful for writing specifications quickly, + // New specifications should probably introduce more types. + // Some types are hard to obtain from the AST, eg. "ssize_t". + // In such cases it should be possible to provide multiple variants + // of function summary for common cases (eg. ssize_t could be int or long + // or long long, so three summary variants would be enough). + // Of course, function variants are also useful for C++ overloads. + QualType Irrelevant; // A placeholder, whenever we do not care about the type. + QualType IntTy = ACtx.IntTy; + QualType LongTy = ACtx.LongTy; + QualType LongLongTy = ACtx.LongLongTy; + QualType SizeTy = ACtx.getSizeType(); + + RangeIntTy IntMax = BVF.getMaxValue(IntTy).getLimitedValue(); + RangeIntTy LongMax = BVF.getMaxValue(LongTy).getLimitedValue(); + RangeIntTy LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue(); + + // We are finally ready to define specifications for all supported functions. + // + // The signature needs to have the correct number of arguments. + // However, we insert `Irrelevant' when the type is insignificant. + // + // Argument ranges should always cover all variants. If return value + // is completely unknown, omit it from the respective range set. + // + // All types in the spec need to be canonical. + // + // Every item in the list of range sets represents a particular + // execution path the analyzer would need to explore once + // the call is modeled - a new program state is constructed + // for every range set, and each range line in the range set + // corresponds to a specific constraint within this state. + // + // Upon comparing to another argument, the other argument is casted + // to the current argument's type. This avoids proper promotion but + // seems useful. For example, read() receives size_t argument, + // and its return value, which is of type ssize_t, cannot be greater + // than this argument. If we made a promotion, and the size argument + // is equal to, say, 10, then we'd impose a range of [0, 10] on the + // return value, however the correct range is [-1, 10]. + // + // Please update the list of functions in the header after editing! + // + // The format is as follows: + // + //{ "function name", + // { spec: + // { argument types list, ... }, + // return type, purity, { range set list: + // { range list: + // { argument index, within or out of, {{from, to}, ...} }, + // { argument index, compares to argument, {{how, which}} }, + // ... + // } + // } + // } + //} + +#define SUMMARY_WITH_VARIANTS(identifier) {#identifier, { +#define END_SUMMARY_WITH_VARIANTS }}, +#define VARIANT(argument_types, return_type, invalidation_approach) \ + { argument_types, return_type, invalidation_approach, { +#define END_VARIANT } }, +#define SUMMARY(identifier, argument_types, return_type, \ + invalidation_approach) \ + { #identifier, { { argument_types, return_type, invalidation_approach, { +#define END_SUMMARY } } } }, +#define ARGUMENT_TYPES(...) { __VA_ARGS__ } +#define RETURN_TYPE(x) x +#define INVALIDATION_APPROACH(x) x +#define CASE { +#define END_CASE }, +#define ARGUMENT_CONDITION(argument_number, condition_kind) \ + { argument_number, condition_kind, { +#define END_ARGUMENT_CONDITION }}, +#define RETURN_VALUE_CONDITION(condition_kind) \ + { Ret, condition_kind, { +#define END_RETURN_VALUE_CONDITION }}, +#define ARG_NO(x) x##U +#define RANGE(x, y) { x, y }, +#define SINGLE_VALUE(x) RANGE(x, x) +#define IS_LESS_THAN(arg) { BO_LE, arg } + + FunctionSummaryMap = { + // The isascii() family of functions. + SUMMARY(isalnum, ARGUMENT_TYPES(IntTy), RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure)) + CASE // Boils down to isupper() or islower() or isdigit() + ARGUMENT_CONDITION(ARG_NO(0), WithinRange) + RANGE('0', '9') + RANGE('A', 'Z') + RANGE('a', 'z') + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(OutOfRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + CASE // The locale-specific range. + ARGUMENT_CONDITION(ARG_NO(0), WithinRange) + RANGE(128, 255) + END_ARGUMENT_CONDITION + // No post-condition. We are completely unaware of + // locale-specific return values. + END_CASE + CASE + ARGUMENT_CONDITION(ARG_NO(0), OutOfRange) + RANGE('0', '9') + RANGE('A', 'Z') + RANGE('a', 'z') + RANGE(128, 255) + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(WithinRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + END_SUMMARY + SUMMARY(isalpha, ARGUMENT_TYPES(IntTy), RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure)) + CASE // isupper() or islower(). Note that 'Z' is less than 'a'. + ARGUMENT_CONDITION(ARG_NO(0), WithinRange) + RANGE('A', 'Z') + RANGE('a', 'z') + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(OutOfRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + CASE // The locale-specific range. + ARGUMENT_CONDITION(ARG_NO(0), WithinRange) + RANGE(128, 255) + END_ARGUMENT_CONDITION + END_CASE + CASE // Other. + ARGUMENT_CONDITION(ARG_NO(0), OutOfRange) + RANGE('A', 'Z') + RANGE('a', 'z') + RANGE(128, 255) + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(WithinRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + END_SUMMARY + SUMMARY(isascii, ARGUMENT_TYPES(IntTy), RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure)) + CASE // Is ASCII. + ARGUMENT_CONDITION(ARG_NO(0), WithinRange) + RANGE(0, 127) + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(OutOfRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + CASE + ARGUMENT_CONDITION(ARG_NO(0), OutOfRange) + RANGE(0, 127) + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(WithinRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + END_SUMMARY + SUMMARY(isblank, ARGUMENT_TYPES(IntTy), RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure)) + CASE + ARGUMENT_CONDITION(ARG_NO(0), WithinRange) + SINGLE_VALUE('\t') + SINGLE_VALUE(' ') + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(OutOfRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + CASE + ARGUMENT_CONDITION(ARG_NO(0), OutOfRange) + SINGLE_VALUE('\t') + SINGLE_VALUE(' ') + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(WithinRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + END_SUMMARY + SUMMARY(iscntrl, ARGUMENT_TYPES(IntTy), RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure)) + CASE // 0..31 or 127 + ARGUMENT_CONDITION(ARG_NO(0), WithinRange) + RANGE(0, 32) + SINGLE_VALUE(127) + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(OutOfRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + CASE + ARGUMENT_CONDITION(ARG_NO(0), OutOfRange) + RANGE(0, 32) + SINGLE_VALUE(127) + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(WithinRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + END_SUMMARY + SUMMARY(isdigit, ARGUMENT_TYPES(IntTy), RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure)) + CASE // Is a digit. + ARGUMENT_CONDITION(ARG_NO(0), WithinRange) + RANGE('0', '9') + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(OutOfRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + CASE + ARGUMENT_CONDITION(ARG_NO(0), OutOfRange) + RANGE('0', '9') + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(WithinRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + END_SUMMARY + SUMMARY(isgraph, ARGUMENT_TYPES(IntTy), RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure)) + CASE + ARGUMENT_CONDITION(ARG_NO(0), WithinRange) + RANGE(33, 126) + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(OutOfRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + CASE + ARGUMENT_CONDITION(ARG_NO(0), OutOfRange) + RANGE(33, 126) + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(WithinRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + END_SUMMARY + SUMMARY(islower, ARGUMENT_TYPES(IntTy), RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure)) + CASE // Is certainly lowercase. + ARGUMENT_CONDITION(ARG_NO(0), WithinRange) + RANGE('a', 'z') + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(OutOfRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + CASE // Is ascii but not lowercase. + ARGUMENT_CONDITION(ARG_NO(0), WithinRange) + RANGE(0, 127) + END_ARGUMENT_CONDITION + ARGUMENT_CONDITION(ARG_NO(0), OutOfRange) + RANGE('a', 'z') + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(WithinRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + CASE // The locale-specific range. + ARGUMENT_CONDITION(ARG_NO(0), WithinRange) + RANGE(128, 255) + END_ARGUMENT_CONDITION + END_CASE + CASE // Is not an unsigned char. + ARGUMENT_CONDITION(ARG_NO(0), OutOfRange) + RANGE(0, 255) + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(WithinRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + END_SUMMARY + SUMMARY(isprint, ARGUMENT_TYPES(IntTy), RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure)) + CASE + ARGUMENT_CONDITION(ARG_NO(0), WithinRange) + RANGE(32, 126) + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(OutOfRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + CASE + ARGUMENT_CONDITION(ARG_NO(0), OutOfRange) + RANGE(32, 126) + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(WithinRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + END_SUMMARY + SUMMARY(ispunct, ARGUMENT_TYPES(IntTy), RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure)) + CASE + ARGUMENT_CONDITION(ARG_NO(0), WithinRange) + RANGE('!', '/') + RANGE(':', '@') + RANGE('[', '`') + RANGE('{', '~') + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(OutOfRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + CASE + ARGUMENT_CONDITION(ARG_NO(0), OutOfRange) + RANGE('!', '/') + RANGE(':', '@') + RANGE('[', '`') + RANGE('{', '~') + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(WithinRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + END_SUMMARY + SUMMARY(isspace, ARGUMENT_TYPES(IntTy), RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure)) + CASE // Space, '\f', '\n', '\r', '\t', '\v'. + ARGUMENT_CONDITION(ARG_NO(0), WithinRange) + RANGE(9, 13) + SINGLE_VALUE(' ') + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(OutOfRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + CASE // The locale-specific range. + ARGUMENT_CONDITION(ARG_NO(0), WithinRange) + RANGE(128, 255) + END_ARGUMENT_CONDITION + END_CASE + CASE + ARGUMENT_CONDITION(ARG_NO(0), OutOfRange) + RANGE(9, 13) + SINGLE_VALUE(' ') + RANGE(128, 255) + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(WithinRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + END_SUMMARY + SUMMARY(isupper, ARGUMENT_TYPES(IntTy), RETURN_TYPE (IntTy), + INVALIDATION_APPROACH(EvalCallAsPure)) + CASE // Is certainly uppercase. + ARGUMENT_CONDITION(ARG_NO(0), WithinRange) + RANGE('A', 'Z') + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(OutOfRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + CASE // The locale-specific range. + ARGUMENT_CONDITION(ARG_NO(0), WithinRange) + RANGE(128, 255) + END_ARGUMENT_CONDITION + END_CASE + CASE // Other. + ARGUMENT_CONDITION(ARG_NO(0), OutOfRange) + RANGE('A', 'Z') RANGE(128, 255) + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(WithinRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + END_SUMMARY + SUMMARY(isxdigit, ARGUMENT_TYPES(IntTy), RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(EvalCallAsPure)) + CASE + ARGUMENT_CONDITION(ARG_NO(0), WithinRange) + RANGE('0', '9') + RANGE('A', 'F') + RANGE('a', 'f') + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(OutOfRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + CASE + ARGUMENT_CONDITION(ARG_NO(0), OutOfRange) + RANGE('0', '9') + RANGE('A', 'F') + RANGE('a', 'f') + END_ARGUMENT_CONDITION + RETURN_VALUE_CONDITION(WithinRange) + SINGLE_VALUE(0) + END_RETURN_VALUE_CONDITION + END_CASE + END_SUMMARY + + // The getc() family of functions that returns either a char or an EOF. + SUMMARY(getc, ARGUMENT_TYPES(Irrelevant), RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(NoEvalCall)) + CASE // FIXME: EOF is assumed to be defined as -1. + RETURN_VALUE_CONDITION(WithinRange) + RANGE(-1, 255) + END_RETURN_VALUE_CONDITION + END_CASE + END_SUMMARY + SUMMARY(fgetc, ARGUMENT_TYPES(Irrelevant), RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(NoEvalCall)) + CASE // FIXME: EOF is assumed to be defined as -1. + RETURN_VALUE_CONDITION(WithinRange) + RANGE(-1, 255) + END_RETURN_VALUE_CONDITION + END_CASE + END_SUMMARY + SUMMARY(getchar, ARGUMENT_TYPES(), RETURN_TYPE(IntTy), + INVALIDATION_APPROACH(NoEvalCall)) + CASE // FIXME: EOF is assumed to be defined as -1. + RETURN_VALUE_CONDITION(WithinRange) + RANGE(-1, 255) + END_RETURN_VALUE_CONDITION + END_CASE + END_SUMMARY + + // read()-like functions that never return more than buffer size. + // We are not sure how ssize_t is defined on every platform, so we provide + // three variants that should cover common cases. + SUMMARY_WITH_VARIANTS(read) + VARIANT(ARGUMENT_TYPES(Irrelevant, Irrelevant, SizeTy), + RETURN_TYPE(IntTy), INVALIDATION_APPROACH(NoEvalCall)) + CASE + RETURN_VALUE_CONDITION(ComparesToArgument) + IS_LESS_THAN(ARG_NO(2)) + END_RETURN_VALUE_CONDITION + RETURN_VALUE_CONDITION(WithinRange) + RANGE(-1, IntMax) + END_RETURN_VALUE_CONDITION + END_CASE + END_VARIANT + VARIANT(ARGUMENT_TYPES(Irrelevant, Irrelevant, SizeTy), + RETURN_TYPE(LongTy), INVALIDATION_APPROACH(NoEvalCall)) + CASE + RETURN_VALUE_CONDITION(ComparesToArgument) + IS_LESS_THAN(ARG_NO(2)) + END_RETURN_VALUE_CONDITION + RETURN_VALUE_CONDITION(WithinRange) + RANGE(-1, LongMax) + END_RETURN_VALUE_CONDITION + END_CASE + END_VARIANT + VARIANT(ARGUMENT_TYPES(Irrelevant, Irrelevant, SizeTy), + RETURN_TYPE(LongLongTy), INVALIDATION_APPROACH(NoEvalCall)) + CASE + RETURN_VALUE_CONDITION(ComparesToArgument) + IS_LESS_THAN(ARG_NO(2)) + END_RETURN_VALUE_CONDITION + RETURN_VALUE_CONDITION(WithinRange) + RANGE(-1, LongLongMax) + END_RETURN_VALUE_CONDITION + END_CASE + END_VARIANT + END_SUMMARY_WITH_VARIANTS + SUMMARY_WITH_VARIANTS(write) + // Again, due to elusive nature of ssize_t, we have duplicate + // our summaries to cover different variants. + VARIANT(ARGUMENT_TYPES(Irrelevant, Irrelevant, SizeTy), + RETURN_TYPE(IntTy), INVALIDATION_APPROACH(NoEvalCall)) + CASE + RETURN_VALUE_CONDITION(ComparesToArgument) + IS_LESS_THAN(ARG_NO(2)) + END_RETURN_VALUE_CONDITION + RETURN_VALUE_CONDITION(WithinRange) + RANGE(-1, IntMax) + END_RETURN_VALUE_CONDITION + END_CASE + END_VARIANT + VARIANT(ARGUMENT_TYPES(Irrelevant, Irrelevant, SizeTy), + RETURN_TYPE(LongTy), INVALIDATION_APPROACH(NoEvalCall)) + CASE + RETURN_VALUE_CONDITION(ComparesToArgument) + IS_LESS_THAN(ARG_NO(2)) + END_RETURN_VALUE_CONDITION + RETURN_VALUE_CONDITION(WithinRange) + RANGE(-1, LongMax) + END_RETURN_VALUE_CONDITION + END_CASE + END_VARIANT + VARIANT(ARGUMENT_TYPES(Irrelevant, Irrelevant, SizeTy), + RETURN_TYPE(LongLongTy), INVALIDATION_APPROACH(NoEvalCall)) + CASE + RETURN_VALUE_CONDITION(ComparesToArgument) + IS_LESS_THAN(ARG_NO(2)) + END_RETURN_VALUE_CONDITION + RETURN_VALUE_CONDITION(WithinRange) + RANGE(-1, LongLongMax) + END_RETURN_VALUE_CONDITION + END_CASE + END_VARIANT + END_SUMMARY_WITH_VARIANTS + SUMMARY(fread, + ARGUMENT_TYPES(Irrelevant, Irrelevant, SizeTy, Irrelevant), + RETURN_TYPE(SizeTy), INVALIDATION_APPROACH(NoEvalCall)) + CASE + RETURN_VALUE_CONDITION(ComparesToArgument) + IS_LESS_THAN(ARG_NO(2)) + END_RETURN_VALUE_CONDITION + END_CASE + END_SUMMARY + SUMMARY(fwrite, + ARGUMENT_TYPES(Irrelevant, Irrelevant, SizeTy, Irrelevant), + RETURN_TYPE(SizeTy), INVALIDATION_APPROACH(NoEvalCall)) + CASE + RETURN_VALUE_CONDITION(ComparesToArgument) + IS_LESS_THAN(ARG_NO(2)) + END_RETURN_VALUE_CONDITION + END_CASE + END_SUMMARY + + // getline()-like functions either fail or read at least the delimiter. + SUMMARY_WITH_VARIANTS(getline) + VARIANT(ARGUMENT_TYPES(Irrelevant, Irrelevant, Irrelevant), + RETURN_TYPE(IntTy), INVALIDATION_APPROACH(NoEvalCall)) + CASE + RETURN_VALUE_CONDITION(WithinRange) + SINGLE_VALUE(-1) + RANGE(1, IntMax) + END_RETURN_VALUE_CONDITION + END_CASE + END_VARIANT + VARIANT(ARGUMENT_TYPES(Irrelevant, Irrelevant, Irrelevant), + RETURN_TYPE(LongTy), INVALIDATION_APPROACH(NoEvalCall)) + CASE + RETURN_VALUE_CONDITION(WithinRange) + SINGLE_VALUE(-1) + RANGE(1, LongMax) + END_RETURN_VALUE_CONDITION + END_CASE + END_VARIANT + VARIANT(ARGUMENT_TYPES(Irrelevant, Irrelevant, Irrelevant), + RETURN_TYPE(LongLongTy), INVALIDATION_APPROACH(NoEvalCall)) + CASE + RETURN_VALUE_CONDITION(WithinRange) + SINGLE_VALUE(-1) + RANGE(1, LongLongMax) + END_RETURN_VALUE_CONDITION + END_CASE + END_VARIANT + END_SUMMARY_WITH_VARIANTS + SUMMARY_WITH_VARIANTS(getdelim) + VARIANT(ARGUMENT_TYPES(Irrelevant, Irrelevant, Irrelevant, Irrelevant), + RETURN_TYPE(IntTy), INVALIDATION_APPROACH(NoEvalCall)) + CASE + RETURN_VALUE_CONDITION(WithinRange) + SINGLE_VALUE(-1) + RANGE(1, IntMax) + END_RETURN_VALUE_CONDITION + END_CASE + END_VARIANT + VARIANT(ARGUMENT_TYPES(Irrelevant, Irrelevant, Irrelevant, Irrelevant), + RETURN_TYPE(LongTy), INVALIDATION_APPROACH(NoEvalCall)) + CASE + RETURN_VALUE_CONDITION(WithinRange) + SINGLE_VALUE(-1) + RANGE(1, LongMax) + END_RETURN_VALUE_CONDITION + END_CASE + END_VARIANT + VARIANT(ARGUMENT_TYPES(Irrelevant, Irrelevant, Irrelevant, Irrelevant), + RETURN_TYPE(LongLongTy), INVALIDATION_APPROACH(NoEvalCall)) + CASE + RETURN_VALUE_CONDITION(WithinRange) + SINGLE_VALUE(-1) + RANGE(1, LongLongMax) + END_RETURN_VALUE_CONDITION + END_CASE + END_VARIANT + END_SUMMARY_WITH_VARIANTS + }; +} + +void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) { + // If this checker grows large enough to support C++, Objective-C, or other + // standard libraries, we could use multiple register...Checker() functions, + // which would register various checkers with the help of the same Checker + // class, turning on different function summaries. + mgr.registerChecker<StdLibraryFunctionsChecker>(); +} + +bool ento::shouldRegisterStdCLibraryFunctionsChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp new file mode 100644 index 000000000000..c254408351c8 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp @@ -0,0 +1,419 @@ +//===-- StreamChecker.cpp -----------------------------------------*- C++ -*--// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines checkers that model and check stream handling functions. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" + +using namespace clang; +using namespace ento; + +namespace { + +struct StreamState { + enum Kind { Opened, Closed, OpenFailed, Escaped } K; + const Stmt *S; + + StreamState(Kind k, const Stmt *s) : K(k), S(s) {} + + bool isOpened() const { return K == Opened; } + bool isClosed() const { return K == Closed; } + //bool isOpenFailed() const { return K == OpenFailed; } + //bool isEscaped() const { return K == Escaped; } + + bool operator==(const StreamState &X) const { + return K == X.K && S == X.S; + } + + static StreamState getOpened(const Stmt *s) { return StreamState(Opened, s); } + static StreamState getClosed(const Stmt *s) { return StreamState(Closed, s); } + static StreamState getOpenFailed(const Stmt *s) { + return StreamState(OpenFailed, s); + } + static StreamState getEscaped(const Stmt *s) { + return StreamState(Escaped, s); + } + + void Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddInteger(K); + ID.AddPointer(S); + } +}; + +class StreamChecker : public Checker<eval::Call, + check::DeadSymbols > { + mutable IdentifierInfo *II_fopen, *II_tmpfile, *II_fclose, *II_fread, + *II_fwrite, + *II_fseek, *II_ftell, *II_rewind, *II_fgetpos, *II_fsetpos, + *II_clearerr, *II_feof, *II_ferror, *II_fileno; + mutable std::unique_ptr<BuiltinBug> BT_nullfp, BT_illegalwhence, + BT_doubleclose, BT_ResourceLeak; + +public: + StreamChecker() + : II_fopen(nullptr), II_tmpfile(nullptr), II_fclose(nullptr), + II_fread(nullptr), II_fwrite(nullptr), II_fseek(nullptr), + II_ftell(nullptr), II_rewind(nullptr), II_fgetpos(nullptr), + II_fsetpos(nullptr), II_clearerr(nullptr), II_feof(nullptr), + II_ferror(nullptr), II_fileno(nullptr) {} + + bool evalCall(const CallEvent &Call, CheckerContext &C) const; + void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const; + +private: + void Fopen(CheckerContext &C, const CallExpr *CE) const; + void Tmpfile(CheckerContext &C, const CallExpr *CE) const; + void Fclose(CheckerContext &C, const CallExpr *CE) const; + void Fread(CheckerContext &C, const CallExpr *CE) const; + void Fwrite(CheckerContext &C, const CallExpr *CE) const; + void Fseek(CheckerContext &C, const CallExpr *CE) const; + void Ftell(CheckerContext &C, const CallExpr *CE) const; + void Rewind(CheckerContext &C, const CallExpr *CE) const; + void Fgetpos(CheckerContext &C, const CallExpr *CE) const; + void Fsetpos(CheckerContext &C, const CallExpr *CE) const; + void Clearerr(CheckerContext &C, const CallExpr *CE) const; + void Feof(CheckerContext &C, const CallExpr *CE) const; + void Ferror(CheckerContext &C, const CallExpr *CE) const; + void Fileno(CheckerContext &C, const CallExpr *CE) const; + + void OpenFileAux(CheckerContext &C, const CallExpr *CE) const; + + ProgramStateRef CheckNullStream(SVal SV, ProgramStateRef state, + CheckerContext &C) const; + ProgramStateRef CheckDoubleClose(const CallExpr *CE, ProgramStateRef state, + CheckerContext &C) const; +}; + +} // end anonymous namespace + +REGISTER_MAP_WITH_PROGRAMSTATE(StreamMap, SymbolRef, StreamState) + + +bool StreamChecker::evalCall(const CallEvent &Call, CheckerContext &C) const { + const auto *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); + if (!FD || FD->getKind() != Decl::Function) + return false; + + const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr()); + if (!CE) + return false; + + ASTContext &Ctx = C.getASTContext(); + if (!II_fopen) + II_fopen = &Ctx.Idents.get("fopen"); + if (!II_tmpfile) + II_tmpfile = &Ctx.Idents.get("tmpfile"); + if (!II_fclose) + II_fclose = &Ctx.Idents.get("fclose"); + if (!II_fread) + II_fread = &Ctx.Idents.get("fread"); + if (!II_fwrite) + II_fwrite = &Ctx.Idents.get("fwrite"); + if (!II_fseek) + II_fseek = &Ctx.Idents.get("fseek"); + if (!II_ftell) + II_ftell = &Ctx.Idents.get("ftell"); + if (!II_rewind) + II_rewind = &Ctx.Idents.get("rewind"); + if (!II_fgetpos) + II_fgetpos = &Ctx.Idents.get("fgetpos"); + if (!II_fsetpos) + II_fsetpos = &Ctx.Idents.get("fsetpos"); + if (!II_clearerr) + II_clearerr = &Ctx.Idents.get("clearerr"); + if (!II_feof) + II_feof = &Ctx.Idents.get("feof"); + if (!II_ferror) + II_ferror = &Ctx.Idents.get("ferror"); + if (!II_fileno) + II_fileno = &Ctx.Idents.get("fileno"); + + if (FD->getIdentifier() == II_fopen) { + Fopen(C, CE); + return true; + } + if (FD->getIdentifier() == II_tmpfile) { + Tmpfile(C, CE); + return true; + } + if (FD->getIdentifier() == II_fclose) { + Fclose(C, CE); + return true; + } + if (FD->getIdentifier() == II_fread) { + Fread(C, CE); + return true; + } + if (FD->getIdentifier() == II_fwrite) { + Fwrite(C, CE); + return true; + } + if (FD->getIdentifier() == II_fseek) { + Fseek(C, CE); + return true; + } + if (FD->getIdentifier() == II_ftell) { + Ftell(C, CE); + return true; + } + if (FD->getIdentifier() == II_rewind) { + Rewind(C, CE); + return true; + } + if (FD->getIdentifier() == II_fgetpos) { + Fgetpos(C, CE); + return true; + } + if (FD->getIdentifier() == II_fsetpos) { + Fsetpos(C, CE); + return true; + } + if (FD->getIdentifier() == II_clearerr) { + Clearerr(C, CE); + return true; + } + if (FD->getIdentifier() == II_feof) { + Feof(C, CE); + return true; + } + if (FD->getIdentifier() == II_ferror) { + Ferror(C, CE); + return true; + } + if (FD->getIdentifier() == II_fileno) { + Fileno(C, CE); + return true; + } + + return false; +} + +void StreamChecker::Fopen(CheckerContext &C, const CallExpr *CE) const { + OpenFileAux(C, CE); +} + +void StreamChecker::Tmpfile(CheckerContext &C, const CallExpr *CE) const { + OpenFileAux(C, CE); +} + +void StreamChecker::OpenFileAux(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + SValBuilder &svalBuilder = C.getSValBuilder(); + const LocationContext *LCtx = C.getPredecessor()->getLocationContext(); + DefinedSVal RetVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, + C.blockCount()) + .castAs<DefinedSVal>(); + state = state->BindExpr(CE, C.getLocationContext(), RetVal); + + ConstraintManager &CM = C.getConstraintManager(); + // Bifurcate the state into two: one with a valid FILE* pointer, the other + // with a NULL. + ProgramStateRef stateNotNull, stateNull; + std::tie(stateNotNull, stateNull) = CM.assumeDual(state, RetVal); + + if (SymbolRef Sym = RetVal.getAsSymbol()) { + // if RetVal is not NULL, set the symbol's state to Opened. + stateNotNull = + stateNotNull->set<StreamMap>(Sym,StreamState::getOpened(CE)); + stateNull = + stateNull->set<StreamMap>(Sym, StreamState::getOpenFailed(CE)); + + C.addTransition(stateNotNull); + C.addTransition(stateNull); + } +} + +void StreamChecker::Fclose(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = CheckDoubleClose(CE, C.getState(), C); + if (state) + C.addTransition(state); +} + +void StreamChecker::Fread(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!CheckNullStream(C.getSVal(CE->getArg(3)), state, C)) + return; +} + +void StreamChecker::Fwrite(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!CheckNullStream(C.getSVal(CE->getArg(3)), state, C)) + return; +} + +void StreamChecker::Fseek(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!(state = CheckNullStream(C.getSVal(CE->getArg(0)), state, C))) + return; + // Check the legality of the 'whence' argument of 'fseek'. + SVal Whence = state->getSVal(CE->getArg(2), C.getLocationContext()); + Optional<nonloc::ConcreteInt> CI = Whence.getAs<nonloc::ConcreteInt>(); + + if (!CI) + return; + + int64_t x = CI->getValue().getSExtValue(); + if (x >= 0 && x <= 2) + return; + + if (ExplodedNode *N = C.generateNonFatalErrorNode(state)) { + if (!BT_illegalwhence) + BT_illegalwhence.reset( + new BuiltinBug(this, "Illegal whence argument", + "The whence argument to fseek() should be " + "SEEK_SET, SEEK_END, or SEEK_CUR.")); + C.emitReport(std::make_unique<PathSensitiveBugReport>( + *BT_illegalwhence, BT_illegalwhence->getDescription(), N)); + } +} + +void StreamChecker::Ftell(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!CheckNullStream(C.getSVal(CE->getArg(0)), state, C)) + return; +} + +void StreamChecker::Rewind(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!CheckNullStream(C.getSVal(CE->getArg(0)), state, C)) + return; +} + +void StreamChecker::Fgetpos(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!CheckNullStream(C.getSVal(CE->getArg(0)), state, C)) + return; +} + +void StreamChecker::Fsetpos(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!CheckNullStream(C.getSVal(CE->getArg(0)), state, C)) + return; +} + +void StreamChecker::Clearerr(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!CheckNullStream(C.getSVal(CE->getArg(0)), state, C)) + return; +} + +void StreamChecker::Feof(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!CheckNullStream(C.getSVal(CE->getArg(0)), state, C)) + return; +} + +void StreamChecker::Ferror(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!CheckNullStream(C.getSVal(CE->getArg(0)), state, C)) + return; +} + +void StreamChecker::Fileno(CheckerContext &C, const CallExpr *CE) const { + ProgramStateRef state = C.getState(); + if (!CheckNullStream(C.getSVal(CE->getArg(0)), state, C)) + return; +} + +ProgramStateRef StreamChecker::CheckNullStream(SVal SV, ProgramStateRef state, + CheckerContext &C) const { + Optional<DefinedSVal> DV = SV.getAs<DefinedSVal>(); + if (!DV) + return nullptr; + + ConstraintManager &CM = C.getConstraintManager(); + ProgramStateRef stateNotNull, stateNull; + std::tie(stateNotNull, stateNull) = CM.assumeDual(state, *DV); + + if (!stateNotNull && stateNull) { + if (ExplodedNode *N = C.generateErrorNode(stateNull)) { + if (!BT_nullfp) + BT_nullfp.reset(new BuiltinBug(this, "NULL stream pointer", + "Stream pointer might be NULL.")); + C.emitReport(std::make_unique<PathSensitiveBugReport>( + *BT_nullfp, BT_nullfp->getDescription(), N)); + } + return nullptr; + } + return stateNotNull; +} + +ProgramStateRef StreamChecker::CheckDoubleClose(const CallExpr *CE, + ProgramStateRef state, + CheckerContext &C) const { + SymbolRef Sym = C.getSVal(CE->getArg(0)).getAsSymbol(); + if (!Sym) + return state; + + const StreamState *SS = state->get<StreamMap>(Sym); + + // If the file stream is not tracked, return. + if (!SS) + return state; + + // Check: Double close a File Descriptor could cause undefined behaviour. + // Conforming to man-pages + if (SS->isClosed()) { + ExplodedNode *N = C.generateErrorNode(); + if (N) { + if (!BT_doubleclose) + BT_doubleclose.reset(new BuiltinBug( + this, "Double fclose", "Try to close a file Descriptor already" + " closed. Cause undefined behaviour.")); + C.emitReport(std::make_unique<PathSensitiveBugReport>( + *BT_doubleclose, BT_doubleclose->getDescription(), N)); + } + return nullptr; + } + + // Close the File Descriptor. + return state->set<StreamMap>(Sym, StreamState::getClosed(CE)); +} + +void StreamChecker::checkDeadSymbols(SymbolReaper &SymReaper, + CheckerContext &C) const { + ProgramStateRef state = C.getState(); + + // TODO: Clean up the state. + const StreamMapTy &Map = state->get<StreamMap>(); + for (const auto &I: Map) { + SymbolRef Sym = I.first; + const StreamState &SS = I.second; + if (!SymReaper.isDead(Sym) || !SS.isOpened()) + continue; + + ExplodedNode *N = C.generateErrorNode(); + if (!N) + return; + + if (!BT_ResourceLeak) + BT_ResourceLeak.reset( + new BuiltinBug(this, "Resource Leak", + "Opened File never closed. Potential Resource leak.")); + C.emitReport(std::make_unique<PathSensitiveBugReport>( + *BT_ResourceLeak, BT_ResourceLeak->getDescription(), N)); + } +} + +void ento::registerStreamChecker(CheckerManager &mgr) { + mgr.registerChecker<StreamChecker>(); +} + +bool ento::shouldRegisterStreamChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp new file mode 100644 index 000000000000..574d4ed1e600 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp @@ -0,0 +1,227 @@ +//=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines basic, non-domain-specific mechanisms for tracking tainted values. +// +//===----------------------------------------------------------------------===// + +#include "Taint.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" + +using namespace clang; +using namespace ento; +using namespace taint; + +// Fully tainted symbols. +REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType) + +// Partially tainted symbols. +REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *, + TaintTagType) +REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions) + +void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL, + const char *Sep) { + TaintMapTy TM = State->get<TaintMap>(); + + if (!TM.isEmpty()) + Out << "Tainted symbols:" << NL; + + for (const auto &I : TM) + Out << I.first << " : " << I.second << NL; +} + +void dumpTaint(ProgramStateRef State) { + printTaint(State, llvm::errs()); +} + +ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S, + const LocationContext *LCtx, + TaintTagType Kind) { + return addTaint(State, State->getSVal(S, LCtx), Kind); +} + +ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V, + TaintTagType Kind) { + SymbolRef Sym = V.getAsSymbol(); + if (Sym) + return addTaint(State, Sym, Kind); + + // If the SVal represents a structure, try to mass-taint all values within the + // structure. For now it only works efficiently on lazy compound values that + // were conjured during a conservative evaluation of a function - either as + // return values of functions that return structures or arrays by value, or as + // values of structures or arrays passed into the function by reference, + // directly or through pointer aliasing. Such lazy compound values are + // characterized by having exactly one binding in their captured store within + // their parent region, which is a conjured symbol default-bound to the base + // region of the parent region. + if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) { + if (Optional<SVal> binding = + State->getStateManager().getStoreManager() + .getDefaultBinding(*LCV)) { + if (SymbolRef Sym = binding->getAsSymbol()) + return addPartialTaint(State, Sym, LCV->getRegion(), Kind); + } + } + + const MemRegion *R = V.getAsRegion(); + return addTaint(State, R, Kind); +} + +ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R, + TaintTagType Kind) { + if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R)) + return addTaint(State, SR->getSymbol(), Kind); + return State; +} + +ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym, + TaintTagType Kind) { + // If this is a symbol cast, remove the cast before adding the taint. Taint + // is cast agnostic. + while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym)) + Sym = SC->getOperand(); + + ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind); + assert(NewState); + return NewState; +} + +ProgramStateRef taint::addPartialTaint(ProgramStateRef State, + SymbolRef ParentSym, + const SubRegion *SubRegion, + TaintTagType Kind) { + // Ignore partial taint if the entire parent symbol is already tainted. + if (const TaintTagType *T = State->get<TaintMap>(ParentSym)) + if (*T == Kind) + return State; + + // Partial taint applies if only a portion of the symbol is tainted. + if (SubRegion == SubRegion->getBaseRegion()) + return addTaint(State, ParentSym, Kind); + + const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym); + TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>(); + TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap(); + + Regs = F.add(Regs, SubRegion, Kind); + ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs); + assert(NewState); + return NewState; +} + +bool taint::isTainted(ProgramStateRef State, const Stmt *S, + const LocationContext *LCtx, TaintTagType Kind) { + SVal val = State->getSVal(S, LCtx); + return isTainted(State, val, Kind); +} + +bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) { + if (const SymExpr *Sym = V.getAsSymExpr()) + return isTainted(State, Sym, Kind); + if (const MemRegion *Reg = V.getAsRegion()) + return isTainted(State, Reg, Kind); + return false; +} + +bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg, + TaintTagType K) { + if (!Reg) + return false; + + // Element region (array element) is tainted if either the base or the offset + // are tainted. + if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg)) + return isTainted(State, ER->getSuperRegion(), K) || + isTainted(State, ER->getIndex(), K); + + if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg)) + return isTainted(State, SR->getSymbol(), K); + + if (const SubRegion *ER = dyn_cast<SubRegion>(Reg)) + return isTainted(State, ER->getSuperRegion(), K); + + return false; +} + +bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) { + if (!Sym) + return false; + + // Traverse all the symbols this symbol depends on to see if any are tainted. + for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), + SE = Sym->symbol_end(); SI != SE; ++SI) { + if (!isa<SymbolData>(*SI)) + continue; + + if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) { + if (*Tag == Kind) + return true; + } + + if (const auto *SD = dyn_cast<SymbolDerived>(*SI)) { + // If this is a SymbolDerived with a tainted parent, it's also tainted. + if (isTainted(State, SD->getParentSymbol(), Kind)) + return true; + + // If this is a SymbolDerived with the same parent symbol as another + // tainted SymbolDerived and a region that's a sub-region of that tainted + // symbol, it's also tainted. + if (const TaintedSubRegions *Regs = + State->get<DerivedSymTaint>(SD->getParentSymbol())) { + const TypedValueRegion *R = SD->getRegion(); + for (auto I : *Regs) { + // FIXME: The logic to identify tainted regions could be more + // complete. For example, this would not currently identify + // overlapping fields in a union as tainted. To identify this we can + // check for overlapping/nested byte offsets. + if (Kind == I.second && R->isSubRegionOf(I.first)) + return true; + } + } + } + + // If memory region is tainted, data is also tainted. + if (const auto *SRV = dyn_cast<SymbolRegionValue>(*SI)) { + if (isTainted(State, SRV->getRegion(), Kind)) + return true; + } + + // If this is a SymbolCast from a tainted value, it's also tainted. + if (const auto *SC = dyn_cast<SymbolCast>(*SI)) { + if (isTainted(State, SC->getOperand(), Kind)) + return true; + } + } + + return false; +} + +PathDiagnosticPieceRef TaintBugVisitor::VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) { + + // Find the ExplodedNode where the taint was first introduced + if (!isTainted(N->getState(), V) || + isTainted(N->getFirstPred()->getState(), V)) + return nullptr; + + const Stmt *S = N->getStmtForDiagnostics(); + if (!S) + return nullptr; + + const LocationContext *NCtx = N->getLocationContext(); + PathDiagnosticLocation L = + PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx); + if (!L.isValid() || !L.asLocation().isValid()) + return nullptr; + + return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here"); +} diff --git a/clang/lib/StaticAnalyzer/Checkers/Taint.h b/clang/lib/StaticAnalyzer/Checkers/Taint.h new file mode 100644 index 000000000000..8940916c1933 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/Taint.h @@ -0,0 +1,102 @@ +//=== Taint.h - Taint tracking and basic propagation rules. --------*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines basic, non-domain-specific mechanisms for tracking tainted values. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_TAINT_H +#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_TAINT_H + +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" + +namespace clang { +namespace ento { +namespace taint { + +/// The type of taint, which helps to differentiate between different types of +/// taint. +using TaintTagType = unsigned; + +static constexpr TaintTagType TaintTagGeneric = 0; + +/// Create a new state in which the value of the statement is marked as tainted. +LLVM_NODISCARD ProgramStateRef +addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, + TaintTagType Kind = TaintTagGeneric); + +/// Create a new state in which the value is marked as tainted. +LLVM_NODISCARD ProgramStateRef +addTaint(ProgramStateRef State, SVal V, + TaintTagType Kind = TaintTagGeneric); + +/// Create a new state in which the symbol is marked as tainted. +LLVM_NODISCARD ProgramStateRef +addTaint(ProgramStateRef State, SymbolRef Sym, + TaintTagType Kind = TaintTagGeneric); + +/// Create a new state in which the pointer represented by the region +/// is marked as tainted. +LLVM_NODISCARD ProgramStateRef +addTaint(ProgramStateRef State, const MemRegion *R, + TaintTagType Kind = TaintTagGeneric); + +/// Create a new state in a which a sub-region of a given symbol is tainted. +/// This might be necessary when referring to regions that can not have an +/// individual symbol, e.g. if they are represented by the default binding of +/// a LazyCompoundVal. +LLVM_NODISCARD ProgramStateRef +addPartialTaint(ProgramStateRef State, + SymbolRef ParentSym, const SubRegion *SubRegion, + TaintTagType Kind = TaintTagGeneric); + +/// Check if the statement has a tainted value in the given state. +bool isTainted(ProgramStateRef State, const Stmt *S, + const LocationContext *LCtx, + TaintTagType Kind = TaintTagGeneric); + +/// Check if the value is tainted in the given state. +bool isTainted(ProgramStateRef State, SVal V, + TaintTagType Kind = TaintTagGeneric); + +/// Check if the symbol is tainted in the given state. +bool isTainted(ProgramStateRef State, SymbolRef Sym, + TaintTagType Kind = TaintTagGeneric); + +/// Check if the pointer represented by the region is tainted in the given +/// state. +bool isTainted(ProgramStateRef State, const MemRegion *Reg, + TaintTagType Kind = TaintTagGeneric); + +void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl = "\n", + const char *sep = ""); + +LLVM_DUMP_METHOD void dumpTaint(ProgramStateRef State); + +/// The bug visitor prints a diagnostic message at the location where a given +/// variable was tainted. +class TaintBugVisitor final : public BugReporterVisitor { +private: + const SVal V; + +public: + TaintBugVisitor(const SVal V) : V(V) {} + void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); } + + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; +}; + +} // namespace taint +} // namespace ento +} // namespace clang + +#endif + diff --git a/clang/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp new file mode 100644 index 000000000000..f81705304f3a --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp @@ -0,0 +1,68 @@ +//== TaintTesterChecker.cpp ----------------------------------- -*- C++ -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This checker can be used for testing how taint data is propagated. +// +//===----------------------------------------------------------------------===// + +#include "Taint.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; +using namespace taint; + +namespace { +class TaintTesterChecker : public Checker< check::PostStmt<Expr> > { + + mutable std::unique_ptr<BugType> BT; + void initBugType() const; + + /// Given a pointer argument, get the symbol of the value it contains + /// (points to). + SymbolRef getPointedToSymbol(CheckerContext &C, + const Expr* Arg, + bool IssueWarning = true) const; + +public: + void checkPostStmt(const Expr *E, CheckerContext &C) const; +}; +} + +inline void TaintTesterChecker::initBugType() const { + if (!BT) + BT.reset(new BugType(this, "Tainted data", "General")); +} + +void TaintTesterChecker::checkPostStmt(const Expr *E, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + if (!State) + return; + + if (isTainted(State, E, C.getLocationContext())) { + if (ExplodedNode *N = C.generateNonFatalErrorNode()) { + initBugType(); + auto report = std::make_unique<PathSensitiveBugReport>(*BT, "tainted", N); + report->addRange(E->getSourceRange()); + C.emitReport(std::move(report)); + } + } +} + +void ento::registerTaintTesterChecker(CheckerManager &mgr) { + mgr.registerChecker<TaintTesterChecker>(); +} + +bool ento::shouldRegisterTaintTesterChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp new file mode 100644 index 000000000000..3663b0963692 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp @@ -0,0 +1,266 @@ +//== TestAfterDivZeroChecker.cpp - Test after division by zero checker --*--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines TestAfterDivZeroChecker, a builtin check that performs checks +// for division by zero where the division occurs before comparison with zero. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "llvm/ADT/FoldingSet.h" + +using namespace clang; +using namespace ento; + +namespace { + +class ZeroState { +private: + SymbolRef ZeroSymbol; + unsigned BlockID; + const StackFrameContext *SFC; + +public: + ZeroState(SymbolRef S, unsigned B, const StackFrameContext *SFC) + : ZeroSymbol(S), BlockID(B), SFC(SFC) {} + + const StackFrameContext *getStackFrameContext() const { return SFC; } + + bool operator==(const ZeroState &X) const { + return BlockID == X.BlockID && SFC == X.SFC && ZeroSymbol == X.ZeroSymbol; + } + + bool operator<(const ZeroState &X) const { + if (BlockID != X.BlockID) + return BlockID < X.BlockID; + if (SFC != X.SFC) + return SFC < X.SFC; + return ZeroSymbol < X.ZeroSymbol; + } + + void Profile(llvm::FoldingSetNodeID &ID) const { + ID.AddInteger(BlockID); + ID.AddPointer(SFC); + ID.AddPointer(ZeroSymbol); + } +}; + +class DivisionBRVisitor : public BugReporterVisitor { +private: + SymbolRef ZeroSymbol; + const StackFrameContext *SFC; + bool Satisfied; + +public: + DivisionBRVisitor(SymbolRef ZeroSymbol, const StackFrameContext *SFC) + : ZeroSymbol(ZeroSymbol), SFC(SFC), Satisfied(false) {} + + void Profile(llvm::FoldingSetNodeID &ID) const override { + ID.Add(ZeroSymbol); + ID.Add(SFC); + } + + PathDiagnosticPieceRef VisitNode(const ExplodedNode *Succ, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; +}; + +class TestAfterDivZeroChecker + : public Checker<check::PreStmt<BinaryOperator>, check::BranchCondition, + check::EndFunction> { + mutable std::unique_ptr<BuiltinBug> DivZeroBug; + void reportBug(SVal Val, CheckerContext &C) const; + +public: + void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const; + void checkBranchCondition(const Stmt *Condition, CheckerContext &C) const; + void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const; + void setDivZeroMap(SVal Var, CheckerContext &C) const; + bool hasDivZeroMap(SVal Var, const CheckerContext &C) const; + bool isZero(SVal S, CheckerContext &C) const; +}; +} // end anonymous namespace + +REGISTER_SET_WITH_PROGRAMSTATE(DivZeroMap, ZeroState) + +PathDiagnosticPieceRef +DivisionBRVisitor::VisitNode(const ExplodedNode *Succ, BugReporterContext &BRC, + PathSensitiveBugReport &BR) { + if (Satisfied) + return nullptr; + + const Expr *E = nullptr; + + if (Optional<PostStmt> P = Succ->getLocationAs<PostStmt>()) + if (const BinaryOperator *BO = P->getStmtAs<BinaryOperator>()) { + BinaryOperator::Opcode Op = BO->getOpcode(); + if (Op == BO_Div || Op == BO_Rem || Op == BO_DivAssign || + Op == BO_RemAssign) { + E = BO->getRHS(); + } + } + + if (!E) + return nullptr; + + SVal S = Succ->getSVal(E); + if (ZeroSymbol == S.getAsSymbol() && SFC == Succ->getStackFrame()) { + Satisfied = true; + + // Construct a new PathDiagnosticPiece. + ProgramPoint P = Succ->getLocation(); + PathDiagnosticLocation L = + PathDiagnosticLocation::create(P, BRC.getSourceManager()); + + if (!L.isValid() || !L.asLocation().isValid()) + return nullptr; + + return std::make_shared<PathDiagnosticEventPiece>( + L, "Division with compared value made here"); + } + + return nullptr; +} + +bool TestAfterDivZeroChecker::isZero(SVal S, CheckerContext &C) const { + Optional<DefinedSVal> DSV = S.getAs<DefinedSVal>(); + + if (!DSV) + return false; + + ConstraintManager &CM = C.getConstraintManager(); + return !CM.assume(C.getState(), *DSV, true); +} + +void TestAfterDivZeroChecker::setDivZeroMap(SVal Var, CheckerContext &C) const { + SymbolRef SR = Var.getAsSymbol(); + if (!SR) + return; + + ProgramStateRef State = C.getState(); + State = + State->add<DivZeroMap>(ZeroState(SR, C.getBlockID(), C.getStackFrame())); + C.addTransition(State); +} + +bool TestAfterDivZeroChecker::hasDivZeroMap(SVal Var, + const CheckerContext &C) const { + SymbolRef SR = Var.getAsSymbol(); + if (!SR) + return false; + + ZeroState ZS(SR, C.getBlockID(), C.getStackFrame()); + return C.getState()->contains<DivZeroMap>(ZS); +} + +void TestAfterDivZeroChecker::reportBug(SVal Val, CheckerContext &C) const { + if (ExplodedNode *N = C.generateErrorNode(C.getState())) { + if (!DivZeroBug) + DivZeroBug.reset(new BuiltinBug(this, "Division by zero")); + + auto R = std::make_unique<PathSensitiveBugReport>( + *DivZeroBug, "Value being compared against zero has already been used " + "for division", + N); + + R->addVisitor(std::make_unique<DivisionBRVisitor>(Val.getAsSymbol(), + C.getStackFrame())); + C.emitReport(std::move(R)); + } +} + +void TestAfterDivZeroChecker::checkEndFunction(const ReturnStmt *, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + + DivZeroMapTy DivZeroes = State->get<DivZeroMap>(); + if (DivZeroes.isEmpty()) + return; + + DivZeroMapTy::Factory &F = State->get_context<DivZeroMap>(); + for (llvm::ImmutableSet<ZeroState>::iterator I = DivZeroes.begin(), + E = DivZeroes.end(); + I != E; ++I) { + ZeroState ZS = *I; + if (ZS.getStackFrameContext() == C.getStackFrame()) + DivZeroes = F.remove(DivZeroes, ZS); + } + C.addTransition(State->set<DivZeroMap>(DivZeroes)); +} + +void TestAfterDivZeroChecker::checkPreStmt(const BinaryOperator *B, + CheckerContext &C) const { + BinaryOperator::Opcode Op = B->getOpcode(); + if (Op == BO_Div || Op == BO_Rem || Op == BO_DivAssign || + Op == BO_RemAssign) { + SVal S = C.getSVal(B->getRHS()); + + if (!isZero(S, C)) + setDivZeroMap(S, C); + } +} + +void TestAfterDivZeroChecker::checkBranchCondition(const Stmt *Condition, + CheckerContext &C) const { + if (const BinaryOperator *B = dyn_cast<BinaryOperator>(Condition)) { + if (B->isComparisonOp()) { + const IntegerLiteral *IntLiteral = dyn_cast<IntegerLiteral>(B->getRHS()); + bool LRHS = true; + if (!IntLiteral) { + IntLiteral = dyn_cast<IntegerLiteral>(B->getLHS()); + LRHS = false; + } + + if (!IntLiteral || IntLiteral->getValue() != 0) + return; + + SVal Val = C.getSVal(LRHS ? B->getLHS() : B->getRHS()); + if (hasDivZeroMap(Val, C)) + reportBug(Val, C); + } + } else if (const UnaryOperator *U = dyn_cast<UnaryOperator>(Condition)) { + if (U->getOpcode() == UO_LNot) { + SVal Val; + if (const ImplicitCastExpr *I = + dyn_cast<ImplicitCastExpr>(U->getSubExpr())) + Val = C.getSVal(I->getSubExpr()); + + if (hasDivZeroMap(Val, C)) + reportBug(Val, C); + else { + Val = C.getSVal(U->getSubExpr()); + if (hasDivZeroMap(Val, C)) + reportBug(Val, C); + } + } + } else if (const ImplicitCastExpr *IE = + dyn_cast<ImplicitCastExpr>(Condition)) { + SVal Val = C.getSVal(IE->getSubExpr()); + + if (hasDivZeroMap(Val, C)) + reportBug(Val, C); + else { + SVal Val = C.getSVal(Condition); + + if (hasDivZeroMap(Val, C)) + reportBug(Val, C); + } + } +} + +void ento::registerTestAfterDivZeroChecker(CheckerManager &mgr) { + mgr.registerChecker<TestAfterDivZeroChecker>(); +} + +bool ento::shouldRegisterTestAfterDivZeroChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/TraversalChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/TraversalChecker.cpp new file mode 100644 index 000000000000..73183aa468f6 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/TraversalChecker.cpp @@ -0,0 +1,121 @@ +//== TraversalChecker.cpp -------------------------------------- -*- C++ -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// These checkers print various aspects of the ExprEngine's traversal of the CFG +// as it builds the ExplodedGraph. +// +//===----------------------------------------------------------------------===// +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/ParentMap.h" +#include "clang/AST/StmtObjC.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +namespace { +class TraversalDumper : public Checker< check::BranchCondition, + check::BeginFunction, + check::EndFunction > { +public: + void checkBranchCondition(const Stmt *Condition, CheckerContext &C) const; + void checkBeginFunction(CheckerContext &C) const; + void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const; +}; +} + +void TraversalDumper::checkBranchCondition(const Stmt *Condition, + CheckerContext &C) const { + // Special-case Objective-C's for-in loop, which uses the entire loop as its + // condition. We just print the collection expression. + const Stmt *Parent = dyn_cast<ObjCForCollectionStmt>(Condition); + if (!Parent) { + const ParentMap &Parents = C.getLocationContext()->getParentMap(); + Parent = Parents.getParent(Condition); + } + + // It is mildly evil to print directly to llvm::outs() rather than emitting + // warnings, but this ensures things do not get filtered out by the rest of + // the static analyzer machinery. + SourceLocation Loc = Parent->getBeginLoc(); + llvm::outs() << C.getSourceManager().getSpellingLineNumber(Loc) << " " + << Parent->getStmtClassName() << "\n"; +} + +void TraversalDumper::checkBeginFunction(CheckerContext &C) const { + llvm::outs() << "--BEGIN FUNCTION--\n"; +} + +void TraversalDumper::checkEndFunction(const ReturnStmt *RS, + CheckerContext &C) const { + llvm::outs() << "--END FUNCTION--\n"; +} + +void ento::registerTraversalDumper(CheckerManager &mgr) { + mgr.registerChecker<TraversalDumper>(); +} + +bool ento::shouldRegisterTraversalDumper(const LangOptions &LO) { + return true; +} + +//------------------------------------------------------------------------------ + +namespace { +class CallDumper : public Checker< check::PreCall, + check::PostCall > { +public: + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + void checkPostCall(const CallEvent &Call, CheckerContext &C) const; +}; +} + +void CallDumper::checkPreCall(const CallEvent &Call, CheckerContext &C) const { + unsigned Indentation = 0; + for (const LocationContext *LC = C.getLocationContext()->getParent(); + LC != nullptr; LC = LC->getParent()) + ++Indentation; + + // It is mildly evil to print directly to llvm::outs() rather than emitting + // warnings, but this ensures things do not get filtered out by the rest of + // the static analyzer machinery. + llvm::outs().indent(Indentation); + Call.dump(llvm::outs()); +} + +void CallDumper::checkPostCall(const CallEvent &Call, CheckerContext &C) const { + const Expr *CallE = Call.getOriginExpr(); + if (!CallE) + return; + + unsigned Indentation = 0; + for (const LocationContext *LC = C.getLocationContext()->getParent(); + LC != nullptr; LC = LC->getParent()) + ++Indentation; + + // It is mildly evil to print directly to llvm::outs() rather than emitting + // warnings, but this ensures things do not get filtered out by the rest of + // the static analyzer machinery. + llvm::outs().indent(Indentation); + if (Call.getResultType()->isVoidType()) + llvm::outs() << "Returning void\n"; + else + llvm::outs() << "Returning " << C.getSVal(CallE) << "\n"; +} + +void ento::registerCallDumper(CheckerManager &mgr) { + mgr.registerChecker<CallDumper>(); +} + +bool ento::shouldRegisterCallDumper(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp new file mode 100644 index 000000000000..62a4c2ab0209 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp @@ -0,0 +1,257 @@ +//== TrustNonnullChecker.cpp --------- API nullability modeling -*- C++ -*--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This checker adds nullability-related assumptions: +// +// 1. Methods annotated with _Nonnull +// which come from system headers actually return a non-null pointer. +// +// 2. NSDictionary key is non-null after the keyword subscript operation +// on read if and only if the resulting expression is non-null. +// +// 3. NSMutableDictionary index is non-null after a write operation. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/Analysis/SelectorExtras.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" + +using namespace clang; +using namespace ento; + +/// Records implications between symbols. +/// The semantics is: +/// (antecedent != 0) => (consequent != 0) +/// These implications are then read during the evaluation of the assumption, +/// and the appropriate antecedents are applied. +REGISTER_MAP_WITH_PROGRAMSTATE(NonNullImplicationMap, SymbolRef, SymbolRef) + +/// The semantics is: +/// (antecedent == 0) => (consequent == 0) +REGISTER_MAP_WITH_PROGRAMSTATE(NullImplicationMap, SymbolRef, SymbolRef) + +namespace { + +class TrustNonnullChecker : public Checker<check::PostCall, + check::PostObjCMessage, + check::DeadSymbols, + eval::Assume> { + // Do not try to iterate over symbols with higher complexity. + static unsigned constexpr ComplexityThreshold = 10; + Selector ObjectForKeyedSubscriptSel; + Selector ObjectForKeySel; + Selector SetObjectForKeyedSubscriptSel; + Selector SetObjectForKeySel; + +public: + TrustNonnullChecker(ASTContext &Ctx) + : ObjectForKeyedSubscriptSel( + getKeywordSelector(Ctx, "objectForKeyedSubscript")), + ObjectForKeySel(getKeywordSelector(Ctx, "objectForKey")), + SetObjectForKeyedSubscriptSel( + getKeywordSelector(Ctx, "setObject", "forKeyedSubscript")), + SetObjectForKeySel(getKeywordSelector(Ctx, "setObject", "forKey")) {} + + ProgramStateRef evalAssume(ProgramStateRef State, + SVal Cond, + bool Assumption) const { + const SymbolRef CondS = Cond.getAsSymbol(); + if (!CondS || CondS->computeComplexity() > ComplexityThreshold) + return State; + + for (auto B=CondS->symbol_begin(), E=CondS->symbol_end(); B != E; ++B) { + const SymbolRef Antecedent = *B; + State = addImplication(Antecedent, State, true); + State = addImplication(Antecedent, State, false); + } + + return State; + } + + void checkPostCall(const CallEvent &Call, CheckerContext &C) const { + // Only trust annotations for system headers for non-protocols. + if (!Call.isInSystemHeader()) + return; + + ProgramStateRef State = C.getState(); + + if (isNonNullPtr(Call, C)) + if (auto L = Call.getReturnValue().getAs<Loc>()) + State = State->assume(*L, /*assumption=*/true); + + C.addTransition(State); + } + + void checkPostObjCMessage(const ObjCMethodCall &Msg, + CheckerContext &C) const { + const ObjCInterfaceDecl *ID = Msg.getReceiverInterface(); + if (!ID) + return; + + ProgramStateRef State = C.getState(); + + // Index to setter for NSMutableDictionary is assumed to be non-null, + // as an exception is thrown otherwise. + if (interfaceHasSuperclass(ID, "NSMutableDictionary") && + (Msg.getSelector() == SetObjectForKeyedSubscriptSel || + Msg.getSelector() == SetObjectForKeySel)) { + if (auto L = Msg.getArgSVal(1).getAs<Loc>()) + State = State->assume(*L, /*assumption=*/true); + } + + // Record an implication: index is non-null if the output is non-null. + if (interfaceHasSuperclass(ID, "NSDictionary") && + (Msg.getSelector() == ObjectForKeyedSubscriptSel || + Msg.getSelector() == ObjectForKeySel)) { + SymbolRef ArgS = Msg.getArgSVal(0).getAsSymbol(); + SymbolRef RetS = Msg.getReturnValue().getAsSymbol(); + + if (ArgS && RetS) { + // Emulate an implication: the argument is non-null if + // the return value is non-null. + State = State->set<NonNullImplicationMap>(RetS, ArgS); + + // Conversely, when the argument is null, the return value + // is definitely null. + State = State->set<NullImplicationMap>(ArgS, RetS); + } + } + + C.addTransition(State); + } + + void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const { + ProgramStateRef State = C.getState(); + + State = dropDeadFromGDM<NullImplicationMap>(SymReaper, State); + State = dropDeadFromGDM<NonNullImplicationMap>(SymReaper, State); + + C.addTransition(State); + } + +private: + + /// \returns State with GDM \p MapName where all dead symbols were + // removed. + template <typename MapName> + ProgramStateRef dropDeadFromGDM(SymbolReaper &SymReaper, + ProgramStateRef State) const { + for (const std::pair<SymbolRef, SymbolRef> &P : State->get<MapName>()) + if (!SymReaper.isLive(P.first) || !SymReaper.isLive(P.second)) + State = State->remove<MapName>(P.first); + return State; + } + + /// \returns Whether we trust the result of the method call to be + /// a non-null pointer. + bool isNonNullPtr(const CallEvent &Call, CheckerContext &C) const { + QualType ExprRetType = Call.getResultType(); + if (!ExprRetType->isAnyPointerType()) + return false; + + if (getNullabilityAnnotation(ExprRetType) == Nullability::Nonnull) + return true; + + // The logic for ObjC instance method calls is more complicated, + // as the return value is nil when the receiver is nil. + if (!isa<ObjCMethodCall>(&Call)) + return false; + + const auto *MCall = cast<ObjCMethodCall>(&Call); + const ObjCMethodDecl *MD = MCall->getDecl(); + + // Distrust protocols. + if (isa<ObjCProtocolDecl>(MD->getDeclContext())) + return false; + + QualType DeclRetType = MD->getReturnType(); + if (getNullabilityAnnotation(DeclRetType) != Nullability::Nonnull) + return false; + + // For class messages it is sufficient for the declaration to be + // annotated _Nonnull. + if (!MCall->isInstanceMessage()) + return true; + + // Alternatively, the analyzer could know that the receiver is not null. + SVal Receiver = MCall->getReceiverSVal(); + ConditionTruthVal TV = C.getState()->isNonNull(Receiver); + if (TV.isConstrainedTrue()) + return true; + + return false; + } + + /// \return Whether \p ID has a superclass by the name \p ClassName. + bool interfaceHasSuperclass(const ObjCInterfaceDecl *ID, + StringRef ClassName) const { + if (ID->getIdentifier()->getName() == ClassName) + return true; + + if (const ObjCInterfaceDecl *Super = ID->getSuperClass()) + return interfaceHasSuperclass(Super, ClassName); + + return false; + } + + + /// \return a state with an optional implication added (if exists) + /// from a map of recorded implications. + /// If \p Negated is true, checks NullImplicationMap, and assumes + /// the negation of \p Antecedent. + /// Checks NonNullImplicationMap and assumes \p Antecedent otherwise. + ProgramStateRef addImplication(SymbolRef Antecedent, + ProgramStateRef InputState, + bool Negated) const { + if (!InputState) + return nullptr; + SValBuilder &SVB = InputState->getStateManager().getSValBuilder(); + const SymbolRef *Consequent = + Negated ? InputState->get<NonNullImplicationMap>(Antecedent) + : InputState->get<NullImplicationMap>(Antecedent); + if (!Consequent) + return InputState; + + SVal AntecedentV = SVB.makeSymbolVal(Antecedent); + ProgramStateRef State = InputState; + + if ((Negated && InputState->isNonNull(AntecedentV).isConstrainedTrue()) + || (!Negated && InputState->isNull(AntecedentV).isConstrainedTrue())) { + SVal ConsequentS = SVB.makeSymbolVal(*Consequent); + State = InputState->assume(ConsequentS.castAs<DefinedSVal>(), Negated); + if (!State) + return nullptr; + + // Drop implications from the map. + if (Negated) { + State = State->remove<NonNullImplicationMap>(Antecedent); + State = State->remove<NullImplicationMap>(*Consequent); + } else { + State = State->remove<NullImplicationMap>(Antecedent); + State = State->remove<NonNullImplicationMap>(*Consequent); + } + } + + return State; + } +}; + +} // end empty namespace + +void ento::registerTrustNonnullChecker(CheckerManager &Mgr) { + Mgr.registerChecker<TrustNonnullChecker>(Mgr.getASTContext()); +} + +bool ento::shouldRegisterTrustNonnullChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp new file mode 100644 index 000000000000..247cba7dc933 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp @@ -0,0 +1,115 @@ +//=== UndefBranchChecker.cpp -----------------------------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines UndefBranchChecker, which checks for undefined branch +// condition. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include <utility> + +using namespace clang; +using namespace ento; + +namespace { + +class UndefBranchChecker : public Checker<check::BranchCondition> { + mutable std::unique_ptr<BuiltinBug> BT; + + struct FindUndefExpr { + ProgramStateRef St; + const LocationContext *LCtx; + + FindUndefExpr(ProgramStateRef S, const LocationContext *L) + : St(std::move(S)), LCtx(L) {} + + const Expr *FindExpr(const Expr *Ex) { + if (!MatchesCriteria(Ex)) + return nullptr; + + for (const Stmt *SubStmt : Ex->children()) + if (const Expr *ExI = dyn_cast_or_null<Expr>(SubStmt)) + if (const Expr *E2 = FindExpr(ExI)) + return E2; + + return Ex; + } + + bool MatchesCriteria(const Expr *Ex) { + return St->getSVal(Ex, LCtx).isUndef(); + } + }; + +public: + void checkBranchCondition(const Stmt *Condition, CheckerContext &Ctx) const; +}; + +} + +void UndefBranchChecker::checkBranchCondition(const Stmt *Condition, + CheckerContext &Ctx) const { + SVal X = Ctx.getSVal(Condition); + if (X.isUndef()) { + // Generate a sink node, which implicitly marks both outgoing branches as + // infeasible. + ExplodedNode *N = Ctx.generateErrorNode(); + if (N) { + if (!BT) + BT.reset(new BuiltinBug( + this, "Branch condition evaluates to a garbage value")); + + // What's going on here: we want to highlight the subexpression of the + // condition that is the most likely source of the "uninitialized + // branch condition." We do a recursive walk of the condition's + // subexpressions and roughly look for the most nested subexpression + // that binds to Undefined. We then highlight that expression's range. + + // Get the predecessor node and check if is a PostStmt with the Stmt + // being the terminator condition. We want to inspect the state + // of that node instead because it will contain main information about + // the subexpressions. + + // Note: any predecessor will do. They should have identical state, + // since all the BlockEdge did was act as an error sink since the value + // had to already be undefined. + assert (!N->pred_empty()); + const Expr *Ex = cast<Expr>(Condition); + ExplodedNode *PrevN = *N->pred_begin(); + ProgramPoint P = PrevN->getLocation(); + ProgramStateRef St = N->getState(); + + if (Optional<PostStmt> PS = P.getAs<PostStmt>()) + if (PS->getStmt() == Ex) + St = PrevN->getState(); + + FindUndefExpr FindIt(St, Ctx.getLocationContext()); + Ex = FindIt.FindExpr(Ex); + + // Emit the bug report. + auto R = std::make_unique<PathSensitiveBugReport>( + *BT, BT->getDescription(), N); + bugreporter::trackExpressionValue(N, Ex, *R); + R->addRange(Ex->getSourceRange()); + + Ctx.emitReport(std::move(R)); + } + } +} + +void ento::registerUndefBranchChecker(CheckerManager &mgr) { + mgr.registerChecker<UndefBranchChecker>(); +} + +bool ento::shouldRegisterUndefBranchChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp new file mode 100644 index 000000000000..7b581bef3900 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp @@ -0,0 +1,106 @@ +// UndefCapturedBlockVarChecker.cpp - Uninitialized captured vars -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This checker detects blocks that capture uninitialized values. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/Attr.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +namespace { +class UndefCapturedBlockVarChecker + : public Checker< check::PostStmt<BlockExpr> > { + mutable std::unique_ptr<BugType> BT; + +public: + void checkPostStmt(const BlockExpr *BE, CheckerContext &C) const; +}; +} // end anonymous namespace + +static const DeclRefExpr *FindBlockDeclRefExpr(const Stmt *S, + const VarDecl *VD) { + if (const DeclRefExpr *BR = dyn_cast<DeclRefExpr>(S)) + if (BR->getDecl() == VD) + return BR; + + for (const Stmt *Child : S->children()) + if (Child) + if (const DeclRefExpr *BR = FindBlockDeclRefExpr(Child, VD)) + return BR; + + return nullptr; +} + +void +UndefCapturedBlockVarChecker::checkPostStmt(const BlockExpr *BE, + CheckerContext &C) const { + if (!BE->getBlockDecl()->hasCaptures()) + return; + + ProgramStateRef state = C.getState(); + auto *R = cast<BlockDataRegion>(C.getSVal(BE).getAsRegion()); + + BlockDataRegion::referenced_vars_iterator I = R->referenced_vars_begin(), + E = R->referenced_vars_end(); + + for (; I != E; ++I) { + // This VarRegion is the region associated with the block; we need + // the one associated with the encompassing context. + const VarRegion *VR = I.getCapturedRegion(); + const VarDecl *VD = VR->getDecl(); + + if (VD->hasAttr<BlocksAttr>() || !VD->hasLocalStorage()) + continue; + + // Get the VarRegion associated with VD in the local stack frame. + if (Optional<UndefinedVal> V = + state->getSVal(I.getOriginalRegion()).getAs<UndefinedVal>()) { + if (ExplodedNode *N = C.generateErrorNode()) { + if (!BT) + BT.reset( + new BuiltinBug(this, "uninitialized variable captured by block")); + + // Generate a bug report. + SmallString<128> buf; + llvm::raw_svector_ostream os(buf); + + os << "Variable '" << VD->getName() + << "' is uninitialized when captured by block"; + + auto R = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N); + if (const Expr *Ex = FindBlockDeclRefExpr(BE->getBody(), VD)) + R->addRange(Ex->getSourceRange()); + R->addVisitor(std::make_unique<FindLastStoreBRVisitor>( + *V, VR, /*EnableNullFPSuppression*/ false, + bugreporter::TrackingKind::Thorough)); + R->disablePathPruning(); + // need location of block + C.emitReport(std::move(R)); + } + } + } +} + +void ento::registerUndefCapturedBlockVarChecker(CheckerManager &mgr) { + mgr.registerChecker<UndefCapturedBlockVarChecker>(); +} + +bool ento::shouldRegisterUndefCapturedBlockVarChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp new file mode 100644 index 000000000000..a2f3e0da13fb --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp @@ -0,0 +1,191 @@ +//=== UndefResultChecker.cpp ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines UndefResultChecker, a builtin check in ExprEngine that +// performs checks for undefined results of non-assignment binary operators. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +namespace { +class UndefResultChecker + : public Checker< check::PostStmt<BinaryOperator> > { + + mutable std::unique_ptr<BugType> BT; + +public: + void checkPostStmt(const BinaryOperator *B, CheckerContext &C) const; +}; +} // end anonymous namespace + +static bool isArrayIndexOutOfBounds(CheckerContext &C, const Expr *Ex) { + ProgramStateRef state = C.getState(); + + if (!isa<ArraySubscriptExpr>(Ex)) + return false; + + SVal Loc = C.getSVal(Ex); + if (!Loc.isValid()) + return false; + + const MemRegion *MR = Loc.castAs<loc::MemRegionVal>().getRegion(); + const ElementRegion *ER = dyn_cast<ElementRegion>(MR); + if (!ER) + return false; + + DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>(); + DefinedOrUnknownSVal NumElements = C.getStoreManager().getSizeInElements( + state, ER->getSuperRegion(), ER->getValueType()); + ProgramStateRef StInBound = state->assumeInBound(Idx, NumElements, true); + ProgramStateRef StOutBound = state->assumeInBound(Idx, NumElements, false); + return StOutBound && !StInBound; +} + +static bool isShiftOverflow(const BinaryOperator *B, CheckerContext &C) { + return C.isGreaterOrEqual( + B->getRHS(), C.getASTContext().getIntWidth(B->getLHS()->getType())); +} + +static bool isLeftShiftResultUnrepresentable(const BinaryOperator *B, + CheckerContext &C) { + SValBuilder &SB = C.getSValBuilder(); + ProgramStateRef State = C.getState(); + const llvm::APSInt *LHS = SB.getKnownValue(State, C.getSVal(B->getLHS())); + const llvm::APSInt *RHS = SB.getKnownValue(State, C.getSVal(B->getRHS())); + assert(LHS && RHS && "Values unknown, inconsistent state"); + return (unsigned)RHS->getZExtValue() > LHS->countLeadingZeros(); +} + +void UndefResultChecker::checkPostStmt(const BinaryOperator *B, + CheckerContext &C) const { + if (C.getSVal(B).isUndef()) { + + // Do not report assignments of uninitialized values inside swap functions. + // This should allow to swap partially uninitialized structs + // (radar://14129997) + if (const FunctionDecl *EnclosingFunctionDecl = + dyn_cast<FunctionDecl>(C.getStackFrame()->getDecl())) + if (C.getCalleeName(EnclosingFunctionDecl) == "swap") + return; + + // Generate an error node. + ExplodedNode *N = C.generateErrorNode(); + if (!N) + return; + + if (!BT) + BT.reset( + new BuiltinBug(this, "Result of operation is garbage or undefined")); + + SmallString<256> sbuf; + llvm::raw_svector_ostream OS(sbuf); + const Expr *Ex = nullptr; + bool isLeft = true; + + if (C.getSVal(B->getLHS()).isUndef()) { + Ex = B->getLHS()->IgnoreParenCasts(); + isLeft = true; + } + else if (C.getSVal(B->getRHS()).isUndef()) { + Ex = B->getRHS()->IgnoreParenCasts(); + isLeft = false; + } + + if (Ex) { + OS << "The " << (isLeft ? "left" : "right") << " operand of '" + << BinaryOperator::getOpcodeStr(B->getOpcode()) + << "' is a garbage value"; + if (isArrayIndexOutOfBounds(C, Ex)) + OS << " due to array index out of bounds"; + } else { + // Neither operand was undefined, but the result is undefined. + if ((B->getOpcode() == BinaryOperatorKind::BO_Shl || + B->getOpcode() == BinaryOperatorKind::BO_Shr) && + C.isNegative(B->getRHS())) { + OS << "The result of the " + << ((B->getOpcode() == BinaryOperatorKind::BO_Shl) ? "left" + : "right") + << " shift is undefined because the right operand is negative"; + Ex = B->getRHS(); + } else if ((B->getOpcode() == BinaryOperatorKind::BO_Shl || + B->getOpcode() == BinaryOperatorKind::BO_Shr) && + isShiftOverflow(B, C)) { + + OS << "The result of the " + << ((B->getOpcode() == BinaryOperatorKind::BO_Shl) ? "left" + : "right") + << " shift is undefined due to shifting by "; + Ex = B->getRHS(); + + SValBuilder &SB = C.getSValBuilder(); + const llvm::APSInt *I = + SB.getKnownValue(C.getState(), C.getSVal(B->getRHS())); + if (!I) + OS << "a value that is"; + else if (I->isUnsigned()) + OS << '\'' << I->getZExtValue() << "\', which is"; + else + OS << '\'' << I->getSExtValue() << "\', which is"; + + OS << " greater or equal to the width of type '" + << B->getLHS()->getType().getAsString() << "'."; + } else if (B->getOpcode() == BinaryOperatorKind::BO_Shl && + C.isNegative(B->getLHS())) { + OS << "The result of the left shift is undefined because the left " + "operand is negative"; + Ex = B->getLHS(); + } else if (B->getOpcode() == BinaryOperatorKind::BO_Shl && + isLeftShiftResultUnrepresentable(B, C)) { + ProgramStateRef State = C.getState(); + SValBuilder &SB = C.getSValBuilder(); + const llvm::APSInt *LHS = + SB.getKnownValue(State, C.getSVal(B->getLHS())); + const llvm::APSInt *RHS = + SB.getKnownValue(State, C.getSVal(B->getRHS())); + OS << "The result of the left shift is undefined due to shifting \'" + << LHS->getSExtValue() << "\' by \'" << RHS->getZExtValue() + << "\', which is unrepresentable in the unsigned version of " + << "the return type \'" << B->getLHS()->getType().getAsString() + << "\'"; + Ex = B->getLHS(); + } else { + OS << "The result of the '" + << BinaryOperator::getOpcodeStr(B->getOpcode()) + << "' expression is undefined"; + } + } + auto report = std::make_unique<PathSensitiveBugReport>(*BT, OS.str(), N); + if (Ex) { + report->addRange(Ex->getSourceRange()); + bugreporter::trackExpressionValue(N, Ex, *report); + } + else + bugreporter::trackExpressionValue(N, B, *report); + + C.emitReport(std::move(report)); + } +} + +void ento::registerUndefResultChecker(CheckerManager &mgr) { + mgr.registerChecker<UndefResultChecker>(); +} + +bool ento::shouldRegisterUndefResultChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefinedArraySubscriptChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefinedArraySubscriptChecker.cpp new file mode 100644 index 000000000000..2f075eaeb03b --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/UndefinedArraySubscriptChecker.cpp @@ -0,0 +1,67 @@ +//===--- UndefinedArraySubscriptChecker.h ----------------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines UndefinedArraySubscriptChecker, a builtin check in ExprEngine +// that performs checks for undefined array subscripts. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/DeclCXX.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class UndefinedArraySubscriptChecker + : public Checker< check::PreStmt<ArraySubscriptExpr> > { + mutable std::unique_ptr<BugType> BT; + +public: + void checkPreStmt(const ArraySubscriptExpr *A, CheckerContext &C) const; +}; +} // end anonymous namespace + +void +UndefinedArraySubscriptChecker::checkPreStmt(const ArraySubscriptExpr *A, + CheckerContext &C) const { + const Expr *Index = A->getIdx(); + if (!C.getSVal(Index).isUndef()) + return; + + // Sema generates anonymous array variables for copying array struct fields. + // Don't warn if we're in an implicitly-generated constructor. + const Decl *D = C.getLocationContext()->getDecl(); + if (const CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(D)) + if (Ctor->isDefaulted()) + return; + + ExplodedNode *N = C.generateErrorNode(); + if (!N) + return; + if (!BT) + BT.reset(new BuiltinBug(this, "Array subscript is undefined")); + + // Generate a report for this bug. + auto R = std::make_unique<PathSensitiveBugReport>(*BT, BT->getDescription(), N); + R->addRange(A->getIdx()->getSourceRange()); + bugreporter::trackExpressionValue(N, A->getIdx(), *R); + C.emitReport(std::move(R)); +} + +void ento::registerUndefinedArraySubscriptChecker(CheckerManager &mgr) { + mgr.registerChecker<UndefinedArraySubscriptChecker>(); +} + +bool ento::shouldRegisterUndefinedArraySubscriptChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp new file mode 100644 index 000000000000..277a8a143328 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp @@ -0,0 +1,125 @@ +//===--- UndefinedAssignmentChecker.h ---------------------------*- C++ -*--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines UndefinedAssignmentChecker, a builtin check in ExprEngine that +// checks for assigning undefined values. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class UndefinedAssignmentChecker + : public Checker<check::Bind> { + mutable std::unique_ptr<BugType> BT; + +public: + void checkBind(SVal location, SVal val, const Stmt *S, + CheckerContext &C) const; +}; +} + +void UndefinedAssignmentChecker::checkBind(SVal location, SVal val, + const Stmt *StoreE, + CheckerContext &C) const { + if (!val.isUndef()) + return; + + // Do not report assignments of uninitialized values inside swap functions. + // This should allow to swap partially uninitialized structs + // (radar://14129997) + if (const FunctionDecl *EnclosingFunctionDecl = + dyn_cast<FunctionDecl>(C.getStackFrame()->getDecl())) + if (C.getCalleeName(EnclosingFunctionDecl) == "swap") + return; + + ExplodedNode *N = C.generateErrorNode(); + + if (!N) + return; + + static const char *const DefaultMsg = + "Assigned value is garbage or undefined"; + if (!BT) + BT.reset(new BuiltinBug(this, DefaultMsg)); + + // Generate a report for this bug. + llvm::SmallString<128> Str; + llvm::raw_svector_ostream OS(Str); + + const Expr *ex = nullptr; + + while (StoreE) { + if (const UnaryOperator *U = dyn_cast<UnaryOperator>(StoreE)) { + OS << "The expression is an uninitialized value. " + "The computed value will also be garbage"; + + ex = U->getSubExpr(); + break; + } + + if (const BinaryOperator *B = dyn_cast<BinaryOperator>(StoreE)) { + if (B->isCompoundAssignmentOp()) { + if (C.getSVal(B->getLHS()).isUndef()) { + OS << "The left expression of the compound assignment is an " + "uninitialized value. The computed value will also be garbage"; + ex = B->getLHS(); + break; + } + } + + ex = B->getRHS(); + break; + } + + if (const DeclStmt *DS = dyn_cast<DeclStmt>(StoreE)) { + const VarDecl *VD = cast<VarDecl>(DS->getSingleDecl()); + ex = VD->getInit(); + } + + if (const auto *CD = + dyn_cast<CXXConstructorDecl>(C.getStackFrame()->getDecl())) { + if (CD->isImplicit()) { + for (auto I : CD->inits()) { + if (I->getInit()->IgnoreImpCasts() == StoreE) { + OS << "Value assigned to field '" << I->getMember()->getName() + << "' in implicit constructor is garbage or undefined"; + break; + } + } + } + } + + break; + } + + if (OS.str().empty()) + OS << DefaultMsg; + + auto R = std::make_unique<PathSensitiveBugReport>(*BT, OS.str(), N); + if (ex) { + R->addRange(ex->getSourceRange()); + bugreporter::trackExpressionValue(N, ex, *R); + } + C.emitReport(std::move(R)); +} + +void ento::registerUndefinedAssignmentChecker(CheckerManager &mgr) { + mgr.registerChecker<UndefinedAssignmentChecker>(); +} + +bool ento::shouldRegisterUndefinedAssignmentChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObject.h b/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObject.h new file mode 100644 index 000000000000..2fcdd6086309 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObject.h @@ -0,0 +1,356 @@ +//===----- UninitializedObject.h ---------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines helper classes for UninitializedObjectChecker and +// documentation about the logic of it. +// +// The checker reports uninitialized fields in objects created after a +// constructor call. +// +// This checker has several options: +// - "Pedantic" (boolean). If its not set or is set to false, the checker +// won't emit warnings for objects that don't have at least one initialized +// field. This may be set with +// +// `-analyzer-config optin.cplusplus.UninitializedObject:Pedantic=true`. +// +// - "NotesAsWarnings" (boolean). If set to true, the checker will emit a +// warning for each uninitialized field, as opposed to emitting one warning +// per constructor call, and listing the uninitialized fields that belongs +// to it in notes. Defaults to false. +// +// `-analyzer-config \ +// optin.cplusplus.UninitializedObject:NotesAsWarnings=true`. +// +// - "CheckPointeeInitialization" (boolean). If set to false, the checker will +// not analyze the pointee of pointer/reference fields, and will only check +// whether the object itself is initialized. Defaults to false. +// +// `-analyzer-config \ +// optin.cplusplus.UninitializedObject:CheckPointeeInitialization=true`. +// +// TODO: With some clever heuristics, some pointers should be dereferenced +// by default. For example, if the pointee is constructed within the +// constructor call, it's reasonable to say that no external object +// references it, and we wouldn't generate multiple report on the same +// pointee. +// +// - "IgnoreRecordsWithField" (string). If supplied, the checker will not +// analyze structures that have a field with a name or type name that +// matches the given pattern. Defaults to "". +// +// `-analyzer-config \ +// optin.cplusplus.UninitializedObject:IgnoreRecordsWithField="[Tt]ag|[Kk]ind"`. +// +// - "IgnoreGuardedFields" (boolean). If set to true, the checker will analyze +// _syntactically_ whether the found uninitialized object is used without a +// preceding assert call. Defaults to false. +// +// `-analyzer-config \ +// optin.cplusplus.UninitializedObject:IgnoreGuardedFields=true`. +// +// Most of the following methods as well as the checker itself is defined in +// UninitializedObjectChecker.cpp. +// +// Some methods are implemented in UninitializedPointee.cpp, to reduce the +// complexity of the main checker file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_STATICANALYZER_UNINITIALIZEDOBJECT_H +#define LLVM_CLANG_STATICANALYZER_UNINITIALIZEDOBJECT_H + +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +namespace clang { +namespace ento { + +struct UninitObjCheckerOptions { + bool IsPedantic = false; + bool ShouldConvertNotesToWarnings = false; + bool CheckPointeeInitialization = false; + std::string IgnoredRecordsWithFieldPattern; + bool IgnoreGuardedFields = false; +}; + +/// A lightweight polymorphic wrapper around FieldRegion *. We'll use this +/// interface to store addinitional information about fields. As described +/// later, a list of these objects (i.e. "fieldchain") will be constructed and +/// used for printing note messages should an uninitialized value be found. +class FieldNode { +protected: + const FieldRegion *FR; + + /// FieldNodes are never meant to be created on the heap, see + /// FindUninitializedFields::addFieldToUninits(). + /* non-virtual */ ~FieldNode() = default; + +public: + FieldNode(const FieldRegion *FR) : FR(FR) {} + + // We'll delete all of these special member functions to force the users of + // this interface to only store references to FieldNode objects in containers. + FieldNode() = delete; + FieldNode(const FieldNode &) = delete; + FieldNode(FieldNode &&) = delete; + FieldNode &operator=(const FieldNode &) = delete; + FieldNode &operator=(const FieldNode &&) = delete; + + void Profile(llvm::FoldingSetNodeID &ID) const { ID.AddPointer(this); } + + /// Helper method for uniqueing. + bool isSameRegion(const FieldRegion *OtherFR) const { + // Special FieldNode descendants may wrap nullpointers (for example if they + // describe a special relationship between two elements of the fieldchain) + // -- we wouldn't like to unique these objects. + if (FR == nullptr) + return false; + + return FR == OtherFR; + } + + const FieldRegion *getRegion() const { return FR; } + const FieldDecl *getDecl() const { + assert(FR); + return FR->getDecl(); + } + + // When a fieldchain is printed, it will have the following format (without + // newline, indices are in order of insertion, from 1 to n): + // + // <note_message_n>'<prefix_n><prefix_n-1>...<prefix_1> + // this-><node_1><separator_1><node_2><separator_2>...<node_n>' + + /// If this is the last element of the fieldchain, this method will print the + /// note message associated with it. + /// The note message should state something like "uninitialized field" or + /// "uninitialized pointee" etc. + virtual void printNoteMsg(llvm::raw_ostream &Out) const = 0; + + /// Print any prefixes before the fieldchain. Could contain casts, etc. + virtual void printPrefix(llvm::raw_ostream &Out) const = 0; + + /// Print the node. Should contain the name of the field stored in FR. + virtual void printNode(llvm::raw_ostream &Out) const = 0; + + /// Print the separator. For example, fields may be separated with '.' or + /// "->". + virtual void printSeparator(llvm::raw_ostream &Out) const = 0; + + virtual bool isBase() const { return false; } +}; + +/// Returns with Field's name. This is a helper function to get the correct name +/// even if Field is a captured lambda variable. +std::string getVariableName(const FieldDecl *Field); + +/// Represents a field chain. A field chain is a list of fields where the first +/// element of the chain is the object under checking (not stored), and every +/// other element is a field, and the element that precedes it is the object +/// that contains it. +/// +/// Note that this class is immutable (essentially a wrapper around an +/// ImmutableList), new FieldChainInfo objects may be created by member +/// functions such as add() and replaceHead(). +class FieldChainInfo { +public: + using FieldChain = llvm::ImmutableList<const FieldNode &>; + +private: + FieldChain::Factory &ChainFactory; + FieldChain Chain; + + FieldChainInfo(FieldChain::Factory &F, FieldChain NewChain) + : FieldChainInfo(F) { + Chain = NewChain; + } + +public: + FieldChainInfo() = delete; + FieldChainInfo(FieldChain::Factory &F) : ChainFactory(F) {} + FieldChainInfo(const FieldChainInfo &Other) = default; + + /// Constructs a new FieldChainInfo object with \p FN appended. + template <class FieldNodeT> FieldChainInfo add(const FieldNodeT &FN); + + /// Constructs a new FieldChainInfo object with \p FN as the new head of the + /// list. + template <class FieldNodeT> FieldChainInfo replaceHead(const FieldNodeT &FN); + + bool contains(const FieldRegion *FR) const; + bool isEmpty() const { return Chain.isEmpty(); } + + const FieldNode &getHead() const { return Chain.getHead(); } + const FieldRegion *getUninitRegion() const { return getHead().getRegion(); } + + void printNoteMsg(llvm::raw_ostream &Out) const; +}; + +using UninitFieldMap = std::map<const FieldRegion *, llvm::SmallString<50>>; + +/// Searches for and stores uninitialized fields in a non-union object. +class FindUninitializedFields { + ProgramStateRef State; + const TypedValueRegion *const ObjectR; + + const UninitObjCheckerOptions Opts; + bool IsAnyFieldInitialized = false; + + FieldChainInfo::FieldChain::Factory ChainFactory; + + /// A map for assigning uninitialized regions to note messages. For example, + /// + /// struct A { + /// int x; + /// }; + /// + /// A a; + /// + /// After analyzing `a`, the map will contain a pair for `a.x`'s region and + /// the note message "uninitialized field 'this->x'. + UninitFieldMap UninitFields; + +public: + /// Constructs the FindUninitializedField object, searches for and stores + /// uninitialized fields in R. + FindUninitializedFields(ProgramStateRef State, + const TypedValueRegion *const R, + const UninitObjCheckerOptions &Opts); + + /// Returns with the modified state and a map of (uninitialized region, + /// note message) pairs. + std::pair<ProgramStateRef, const UninitFieldMap &> getResults() { + return {State, UninitFields}; + } + + /// Returns whether the analyzed region contains at least one initialized + /// field. Note that this includes subfields as well, not just direct ones, + /// and will return false if an uninitialized pointee is found with + /// CheckPointeeInitialization enabled. + bool isAnyFieldInitialized() { return IsAnyFieldInitialized; } + +private: + // For the purposes of this checker, we'll regard the analyzed region as a + // directed tree, where + // * the root is the object under checking + // * every node is an object that is + // - a union + // - a non-union record + // - dereferenceable (see isDereferencableType()) + // - an array + // - of a primitive type (see isPrimitiveType()) + // * the parent of each node is the object that contains it + // * every leaf is an array, a primitive object, a nullptr or an undefined + // pointer. + // + // Example: + // + // struct A { + // struct B { + // int x, y = 0; + // }; + // B b; + // int *iptr = new int; + // B* bptr; + // + // A() {} + // }; + // + // The directed tree: + // + // ->x + // / + // ->b--->y + // / + // A-->iptr->(int value) + // \ + // ->bptr + // + // From this we'll construct a vector of fieldchains, where each fieldchain + // represents an uninitialized field. An uninitialized field may be a + // primitive object, a pointer, a pointee or a union without a single + // initialized field. + // In the above example, for the default constructor call we'll end up with + // these fieldchains: + // + // this->b.x + // this->iptr (pointee uninit) + // this->bptr (pointer uninit) + // + // We'll traverse each node of the above graph with the appropriate one of + // these methods: + + /// Checks the region of a union object, and returns true if no field is + /// initialized within the region. + bool isUnionUninit(const TypedValueRegion *R); + + /// Checks a region of a non-union object, and returns true if an + /// uninitialized field is found within the region. + bool isNonUnionUninit(const TypedValueRegion *R, FieldChainInfo LocalChain); + + /// Checks a region of a pointer or reference object, and returns true if the + /// ptr/ref object itself or any field within the pointee's region is + /// uninitialized. + bool isDereferencableUninit(const FieldRegion *FR, FieldChainInfo LocalChain); + + /// Returns true if the value of a primitive object is uninitialized. + bool isPrimitiveUninit(const SVal &V); + + // Note that we don't have a method for arrays -- the elements of an array are + // often left uninitialized intentionally even when it is of a C++ record + // type, so we'll assume that an array is always initialized. + // TODO: Add a support for nonloc::LocAsInteger. + + /// Processes LocalChain and attempts to insert it into UninitFields. Returns + /// true on success. Also adds the head of the list and \p PointeeR (if + /// supplied) to the GDM as already analyzed objects. + /// + /// Since this class analyzes regions with recursion, we'll only store + /// references to temporary FieldNode objects created on the stack. This means + /// that after analyzing a leaf of the directed tree described above, the + /// elements LocalChain references will be destructed, so we can't store it + /// directly. + bool addFieldToUninits(FieldChainInfo LocalChain, + const MemRegion *PointeeR = nullptr); +}; + +/// Returns true if T is a primitive type. An object of a primitive type only +/// needs to be analyzed as much as checking whether their value is undefined. +inline bool isPrimitiveType(const QualType &T) { + return T->isBuiltinType() || T->isEnumeralType() || + T->isFunctionType() || T->isAtomicType() || + T->isVectorType() || T->isScalarType(); +} + +inline bool isDereferencableType(const QualType &T) { + return T->isAnyPointerType() || T->isReferenceType(); +} + +// Template method definitions. + +template <class FieldNodeT> +inline FieldChainInfo FieldChainInfo::add(const FieldNodeT &FN) { + assert(!contains(FN.getRegion()) && + "Can't add a field that is already a part of the " + "fieldchain! Is this a cyclic reference?"); + + FieldChainInfo NewChain = *this; + NewChain.Chain = ChainFactory.add(FN, Chain); + return NewChain; +} + +template <class FieldNodeT> +inline FieldChainInfo FieldChainInfo::replaceHead(const FieldNodeT &FN) { + FieldChainInfo NewChain(ChainFactory, Chain.getTail()); + return NewChain.add(FN); +} + +} // end of namespace ento +} // end of namespace clang + +#endif // LLVM_CLANG_STATICANALYZER_UNINITIALIZEDOBJECT_H diff --git a/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObjectChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObjectChecker.cpp new file mode 100644 index 000000000000..020df8a1bb8c --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObjectChecker.cpp @@ -0,0 +1,633 @@ +//===----- UninitializedObjectChecker.cpp ------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a checker that reports uninitialized fields in objects +// created after a constructor call. +// +// To read about command line options and how the checker works, refer to the +// top of the file and inline comments in UninitializedObject.h. +// +// Some of the logic is implemented in UninitializedPointee.cpp, to reduce the +// complexity of this file. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "UninitializedObject.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Driver/DriverDiagnostic.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h" + +using namespace clang; +using namespace clang::ento; +using namespace clang::ast_matchers; + +/// We'll mark fields (and pointee of fields) that are confirmed to be +/// uninitialized as already analyzed. +REGISTER_SET_WITH_PROGRAMSTATE(AnalyzedRegions, const MemRegion *) + +namespace { + +class UninitializedObjectChecker + : public Checker<check::EndFunction, check::DeadSymbols> { + std::unique_ptr<BuiltinBug> BT_uninitField; + +public: + // The fields of this struct will be initialized when registering the checker. + UninitObjCheckerOptions Opts; + + UninitializedObjectChecker() + : BT_uninitField(new BuiltinBug(this, "Uninitialized fields")) {} + + void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const; + void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; +}; + +/// A basic field type, that is not a pointer or a reference, it's dynamic and +/// static type is the same. +class RegularField final : public FieldNode { +public: + RegularField(const FieldRegion *FR) : FieldNode(FR) {} + + virtual void printNoteMsg(llvm::raw_ostream &Out) const override { + Out << "uninitialized field "; + } + + virtual void printPrefix(llvm::raw_ostream &Out) const override {} + + virtual void printNode(llvm::raw_ostream &Out) const override { + Out << getVariableName(getDecl()); + } + + virtual void printSeparator(llvm::raw_ostream &Out) const override { + Out << '.'; + } +}; + +/// Represents that the FieldNode that comes after this is declared in a base +/// of the previous FieldNode. As such, this descendant doesn't wrap a +/// FieldRegion, and is purely a tool to describe a relation between two other +/// FieldRegion wrapping descendants. +class BaseClass final : public FieldNode { + const QualType BaseClassT; + +public: + BaseClass(const QualType &T) : FieldNode(nullptr), BaseClassT(T) { + assert(!T.isNull()); + assert(T->getAsCXXRecordDecl()); + } + + virtual void printNoteMsg(llvm::raw_ostream &Out) const override { + llvm_unreachable("This node can never be the final node in the " + "fieldchain!"); + } + + virtual void printPrefix(llvm::raw_ostream &Out) const override {} + + virtual void printNode(llvm::raw_ostream &Out) const override { + Out << BaseClassT->getAsCXXRecordDecl()->getName() << "::"; + } + + virtual void printSeparator(llvm::raw_ostream &Out) const override {} + + virtual bool isBase() const override { return true; } +}; + +} // end of anonymous namespace + +// Utility function declarations. + +/// Returns the region that was constructed by CtorDecl, or nullptr if that +/// isn't possible. +static const TypedValueRegion * +getConstructedRegion(const CXXConstructorDecl *CtorDecl, + CheckerContext &Context); + +/// Checks whether the object constructed by \p Ctor will be analyzed later +/// (e.g. if the object is a field of another object, in which case we'd check +/// it multiple times). +static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor, + CheckerContext &Context); + +/// Checks whether RD contains a field with a name or type name that matches +/// \p Pattern. +static bool shouldIgnoreRecord(const RecordDecl *RD, StringRef Pattern); + +/// Checks _syntactically_ whether it is possible to access FD from the record +/// that contains it without a preceding assert (even if that access happens +/// inside a method). This is mainly used for records that act like unions, like +/// having multiple bit fields, with only a fraction being properly initialized. +/// If these fields are properly guarded with asserts, this method returns +/// false. +/// +/// Since this check is done syntactically, this method could be inaccurate. +static bool hasUnguardedAccess(const FieldDecl *FD, ProgramStateRef State); + +//===----------------------------------------------------------------------===// +// Methods for UninitializedObjectChecker. +//===----------------------------------------------------------------------===// + +void UninitializedObjectChecker::checkEndFunction( + const ReturnStmt *RS, CheckerContext &Context) const { + + const auto *CtorDecl = dyn_cast_or_null<CXXConstructorDecl>( + Context.getLocationContext()->getDecl()); + if (!CtorDecl) + return; + + if (!CtorDecl->isUserProvided()) + return; + + if (CtorDecl->getParent()->isUnion()) + return; + + // This avoids essentially the same error being reported multiple times. + if (willObjectBeAnalyzedLater(CtorDecl, Context)) + return; + + const TypedValueRegion *R = getConstructedRegion(CtorDecl, Context); + if (!R) + return; + + FindUninitializedFields F(Context.getState(), R, Opts); + + std::pair<ProgramStateRef, const UninitFieldMap &> UninitInfo = + F.getResults(); + + ProgramStateRef UpdatedState = UninitInfo.first; + const UninitFieldMap &UninitFields = UninitInfo.second; + + if (UninitFields.empty()) { + Context.addTransition(UpdatedState); + return; + } + + // There are uninitialized fields in the record. + + ExplodedNode *Node = Context.generateNonFatalErrorNode(UpdatedState); + if (!Node) + return; + + PathDiagnosticLocation LocUsedForUniqueing; + const Stmt *CallSite = Context.getStackFrame()->getCallSite(); + if (CallSite) + LocUsedForUniqueing = PathDiagnosticLocation::createBegin( + CallSite, Context.getSourceManager(), Node->getLocationContext()); + + // For Plist consumers that don't support notes just yet, we'll convert notes + // to warnings. + if (Opts.ShouldConvertNotesToWarnings) { + for (const auto &Pair : UninitFields) { + + auto Report = std::make_unique<PathSensitiveBugReport>( + *BT_uninitField, Pair.second, Node, LocUsedForUniqueing, + Node->getLocationContext()->getDecl()); + Context.emitReport(std::move(Report)); + } + return; + } + + SmallString<100> WarningBuf; + llvm::raw_svector_ostream WarningOS(WarningBuf); + WarningOS << UninitFields.size() << " uninitialized field" + << (UninitFields.size() == 1 ? "" : "s") + << " at the end of the constructor call"; + + auto Report = std::make_unique<PathSensitiveBugReport>( + *BT_uninitField, WarningOS.str(), Node, LocUsedForUniqueing, + Node->getLocationContext()->getDecl()); + + for (const auto &Pair : UninitFields) { + Report->addNote(Pair.second, + PathDiagnosticLocation::create(Pair.first->getDecl(), + Context.getSourceManager())); + } + Context.emitReport(std::move(Report)); +} + +void UninitializedObjectChecker::checkDeadSymbols(SymbolReaper &SR, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + for (const MemRegion *R : State->get<AnalyzedRegions>()) { + if (!SR.isLiveRegion(R)) + State = State->remove<AnalyzedRegions>(R); + } +} + +//===----------------------------------------------------------------------===// +// Methods for FindUninitializedFields. +//===----------------------------------------------------------------------===// + +FindUninitializedFields::FindUninitializedFields( + ProgramStateRef State, const TypedValueRegion *const R, + const UninitObjCheckerOptions &Opts) + : State(State), ObjectR(R), Opts(Opts) { + + isNonUnionUninit(ObjectR, FieldChainInfo(ChainFactory)); + + // In non-pedantic mode, if ObjectR doesn't contain a single initialized + // field, we'll assume that Object was intentionally left uninitialized. + if (!Opts.IsPedantic && !isAnyFieldInitialized()) + UninitFields.clear(); +} + +bool FindUninitializedFields::addFieldToUninits(FieldChainInfo Chain, + const MemRegion *PointeeR) { + const FieldRegion *FR = Chain.getUninitRegion(); + + assert((PointeeR || !isDereferencableType(FR->getDecl()->getType())) && + "One must also pass the pointee region as a parameter for " + "dereferenceable fields!"); + + if (State->getStateManager().getContext().getSourceManager().isInSystemHeader( + FR->getDecl()->getLocation())) + return false; + + if (Opts.IgnoreGuardedFields && !hasUnguardedAccess(FR->getDecl(), State)) + return false; + + if (State->contains<AnalyzedRegions>(FR)) + return false; + + if (PointeeR) { + if (State->contains<AnalyzedRegions>(PointeeR)) { + return false; + } + State = State->add<AnalyzedRegions>(PointeeR); + } + + State = State->add<AnalyzedRegions>(FR); + + UninitFieldMap::mapped_type NoteMsgBuf; + llvm::raw_svector_ostream OS(NoteMsgBuf); + Chain.printNoteMsg(OS); + + return UninitFields.insert({FR, std::move(NoteMsgBuf)}).second; +} + +bool FindUninitializedFields::isNonUnionUninit(const TypedValueRegion *R, + FieldChainInfo LocalChain) { + assert(R->getValueType()->isRecordType() && + !R->getValueType()->isUnionType() && + "This method only checks non-union record objects!"); + + const RecordDecl *RD = R->getValueType()->getAsRecordDecl()->getDefinition(); + + if (!RD) { + IsAnyFieldInitialized = true; + return true; + } + + if (!Opts.IgnoredRecordsWithFieldPattern.empty() && + shouldIgnoreRecord(RD, Opts.IgnoredRecordsWithFieldPattern)) { + IsAnyFieldInitialized = true; + return false; + } + + bool ContainsUninitField = false; + + // Are all of this non-union's fields initialized? + for (const FieldDecl *I : RD->fields()) { + + const auto FieldVal = + State->getLValue(I, loc::MemRegionVal(R)).castAs<loc::MemRegionVal>(); + const auto *FR = FieldVal.getRegionAs<FieldRegion>(); + QualType T = I->getType(); + + // If LocalChain already contains FR, then we encountered a cyclic + // reference. In this case, region FR is already under checking at an + // earlier node in the directed tree. + if (LocalChain.contains(FR)) + return false; + + if (T->isStructureOrClassType()) { + if (isNonUnionUninit(FR, LocalChain.add(RegularField(FR)))) + ContainsUninitField = true; + continue; + } + + if (T->isUnionType()) { + if (isUnionUninit(FR)) { + if (addFieldToUninits(LocalChain.add(RegularField(FR)))) + ContainsUninitField = true; + } else + IsAnyFieldInitialized = true; + continue; + } + + if (T->isArrayType()) { + IsAnyFieldInitialized = true; + continue; + } + + SVal V = State->getSVal(FieldVal); + + if (isDereferencableType(T) || V.getAs<nonloc::LocAsInteger>()) { + if (isDereferencableUninit(FR, LocalChain)) + ContainsUninitField = true; + continue; + } + + if (isPrimitiveType(T)) { + if (isPrimitiveUninit(V)) { + if (addFieldToUninits(LocalChain.add(RegularField(FR)))) + ContainsUninitField = true; + } + continue; + } + + llvm_unreachable("All cases are handled!"); + } + + // Checking bases. The checker will regard inherited data members as direct + // fields. + const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD); + if (!CXXRD) + return ContainsUninitField; + + for (const CXXBaseSpecifier &BaseSpec : CXXRD->bases()) { + const auto *BaseRegion = State->getLValue(BaseSpec, R) + .castAs<loc::MemRegionVal>() + .getRegionAs<TypedValueRegion>(); + + // If the head of the list is also a BaseClass, we'll overwrite it to avoid + // note messages like 'this->A::B::x'. + if (!LocalChain.isEmpty() && LocalChain.getHead().isBase()) { + if (isNonUnionUninit(BaseRegion, LocalChain.replaceHead( + BaseClass(BaseSpec.getType())))) + ContainsUninitField = true; + } else { + if (isNonUnionUninit(BaseRegion, + LocalChain.add(BaseClass(BaseSpec.getType())))) + ContainsUninitField = true; + } + } + + return ContainsUninitField; +} + +bool FindUninitializedFields::isUnionUninit(const TypedValueRegion *R) { + assert(R->getValueType()->isUnionType() && + "This method only checks union objects!"); + // TODO: Implement support for union fields. + return false; +} + +bool FindUninitializedFields::isPrimitiveUninit(const SVal &V) { + if (V.isUndef()) + return true; + + IsAnyFieldInitialized = true; + return false; +} + +//===----------------------------------------------------------------------===// +// Methods for FieldChainInfo. +//===----------------------------------------------------------------------===// + +bool FieldChainInfo::contains(const FieldRegion *FR) const { + for (const FieldNode &Node : Chain) { + if (Node.isSameRegion(FR)) + return true; + } + return false; +} + +/// Prints every element except the last to `Out`. Since ImmutableLists store +/// elements in reverse order, and have no reverse iterators, we use a +/// recursive function to print the fieldchain correctly. The last element in +/// the chain is to be printed by `FieldChainInfo::print`. +static void printTail(llvm::raw_ostream &Out, + const FieldChainInfo::FieldChain L); + +// FIXME: This function constructs an incorrect string in the following case: +// +// struct Base { int x; }; +// struct D1 : Base {}; struct D2 : Base {}; +// +// struct MostDerived : D1, D2 { +// MostDerived() {} +// } +// +// A call to MostDerived::MostDerived() will cause two notes that say +// "uninitialized field 'this->x'", but we can't refer to 'x' directly, +// we need an explicit namespace resolution whether the uninit field was +// 'D1::x' or 'D2::x'. +void FieldChainInfo::printNoteMsg(llvm::raw_ostream &Out) const { + if (Chain.isEmpty()) + return; + + const FieldNode &LastField = getHead(); + + LastField.printNoteMsg(Out); + Out << '\''; + + for (const FieldNode &Node : Chain) + Node.printPrefix(Out); + + Out << "this->"; + printTail(Out, Chain.getTail()); + LastField.printNode(Out); + Out << '\''; +} + +static void printTail(llvm::raw_ostream &Out, + const FieldChainInfo::FieldChain L) { + if (L.isEmpty()) + return; + + printTail(Out, L.getTail()); + + L.getHead().printNode(Out); + L.getHead().printSeparator(Out); +} + +//===----------------------------------------------------------------------===// +// Utility functions. +//===----------------------------------------------------------------------===// + +static const TypedValueRegion * +getConstructedRegion(const CXXConstructorDecl *CtorDecl, + CheckerContext &Context) { + + Loc ThisLoc = + Context.getSValBuilder().getCXXThis(CtorDecl, Context.getStackFrame()); + + SVal ObjectV = Context.getState()->getSVal(ThisLoc); + + auto *R = ObjectV.getAsRegion()->getAs<TypedValueRegion>(); + if (R && !R->getValueType()->getAsCXXRecordDecl()) + return nullptr; + + return R; +} + +static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor, + CheckerContext &Context) { + + const TypedValueRegion *CurrRegion = getConstructedRegion(Ctor, Context); + if (!CurrRegion) + return false; + + const LocationContext *LC = Context.getLocationContext(); + while ((LC = LC->getParent())) { + + // If \p Ctor was called by another constructor. + const auto *OtherCtor = dyn_cast<CXXConstructorDecl>(LC->getDecl()); + if (!OtherCtor) + continue; + + const TypedValueRegion *OtherRegion = + getConstructedRegion(OtherCtor, Context); + if (!OtherRegion) + continue; + + // If the CurrRegion is a subregion of OtherRegion, it will be analyzed + // during the analysis of OtherRegion. + if (CurrRegion->isSubRegionOf(OtherRegion)) + return true; + } + + return false; +} + +static bool shouldIgnoreRecord(const RecordDecl *RD, StringRef Pattern) { + llvm::Regex R(Pattern); + + for (const FieldDecl *FD : RD->fields()) { + if (R.match(FD->getType().getAsString())) + return true; + if (R.match(FD->getName())) + return true; + } + + return false; +} + +static const Stmt *getMethodBody(const CXXMethodDecl *M) { + if (isa<CXXConstructorDecl>(M)) + return nullptr; + + if (!M->isDefined()) + return nullptr; + + return M->getDefinition()->getBody(); +} + +static bool hasUnguardedAccess(const FieldDecl *FD, ProgramStateRef State) { + + if (FD->getAccess() == AccessSpecifier::AS_public) + return true; + + const auto *Parent = dyn_cast<CXXRecordDecl>(FD->getParent()); + + if (!Parent) + return true; + + Parent = Parent->getDefinition(); + assert(Parent && "The record's definition must be avaible if an uninitialized" + " field of it was found!"); + + ASTContext &AC = State->getStateManager().getContext(); + + auto FieldAccessM = memberExpr(hasDeclaration(equalsNode(FD))).bind("access"); + + auto AssertLikeM = callExpr(callee(functionDecl( + anyOf(hasName("exit"), hasName("panic"), hasName("error"), + hasName("Assert"), hasName("assert"), hasName("ziperr"), + hasName("assfail"), hasName("db_error"), hasName("__assert"), + hasName("__assert2"), hasName("_wassert"), hasName("__assert_rtn"), + hasName("__assert_fail"), hasName("dtrace_assfail"), + hasName("yy_fatal_error"), hasName("_XCAssertionFailureHandler"), + hasName("_DTAssertionFailureHandler"), + hasName("_TSAssertionFailureHandler"))))); + + auto NoReturnFuncM = callExpr(callee(functionDecl(isNoReturn()))); + + auto GuardM = + stmt(anyOf(ifStmt(), switchStmt(), conditionalOperator(), AssertLikeM, + NoReturnFuncM)) + .bind("guard"); + + for (const CXXMethodDecl *M : Parent->methods()) { + const Stmt *MethodBody = getMethodBody(M); + if (!MethodBody) + continue; + + auto Accesses = match(stmt(hasDescendant(FieldAccessM)), *MethodBody, AC); + if (Accesses.empty()) + continue; + const auto *FirstAccess = Accesses[0].getNodeAs<MemberExpr>("access"); + assert(FirstAccess); + + auto Guards = match(stmt(hasDescendant(GuardM)), *MethodBody, AC); + if (Guards.empty()) + return true; + const auto *FirstGuard = Guards[0].getNodeAs<Stmt>("guard"); + assert(FirstGuard); + + if (FirstAccess->getBeginLoc() < FirstGuard->getBeginLoc()) + return true; + } + + return false; +} + +std::string clang::ento::getVariableName(const FieldDecl *Field) { + // If Field is a captured lambda variable, Field->getName() will return with + // an empty string. We can however acquire it's name from the lambda's + // captures. + const auto *CXXParent = dyn_cast<CXXRecordDecl>(Field->getParent()); + + if (CXXParent && CXXParent->isLambda()) { + assert(CXXParent->captures_begin()); + auto It = CXXParent->captures_begin() + Field->getFieldIndex(); + + if (It->capturesVariable()) + return llvm::Twine("/*captured variable*/" + + It->getCapturedVar()->getName()) + .str(); + + if (It->capturesThis()) + return "/*'this' capture*/"; + + llvm_unreachable("No other capture type is expected!"); + } + + return Field->getName(); +} + +void ento::registerUninitializedObjectChecker(CheckerManager &Mgr) { + auto Chk = Mgr.registerChecker<UninitializedObjectChecker>(); + + AnalyzerOptions &AnOpts = Mgr.getAnalyzerOptions(); + UninitObjCheckerOptions &ChOpts = Chk->Opts; + + ChOpts.IsPedantic = AnOpts.getCheckerBooleanOption(Chk, "Pedantic"); + ChOpts.ShouldConvertNotesToWarnings = AnOpts.getCheckerBooleanOption( + Chk, "NotesAsWarnings"); + ChOpts.CheckPointeeInitialization = AnOpts.getCheckerBooleanOption( + Chk, "CheckPointeeInitialization"); + ChOpts.IgnoredRecordsWithFieldPattern = + AnOpts.getCheckerStringOption(Chk, "IgnoreRecordsWithField"); + ChOpts.IgnoreGuardedFields = + AnOpts.getCheckerBooleanOption(Chk, "IgnoreGuardedFields"); + + std::string ErrorMsg; + if (!llvm::Regex(ChOpts.IgnoredRecordsWithFieldPattern).isValid(ErrorMsg)) + Mgr.reportInvalidCheckerOptionValue(Chk, "IgnoreRecordsWithField", + "a valid regex, building failed with error message " + "\"" + ErrorMsg + "\""); +} + +bool ento::shouldRegisterUninitializedObjectChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedPointee.cpp b/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedPointee.cpp new file mode 100644 index 000000000000..f0dd0bf813af --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedPointee.cpp @@ -0,0 +1,282 @@ +//===----- UninitializedPointee.cpp ------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines functions and methods for handling pointers and references +// to reduce the size and complexity of UninitializedObjectChecker.cpp. +// +// To read about command line options and documentation about how the checker +// works, refer to UninitializedObjectChecker.h. +// +//===----------------------------------------------------------------------===// + +#include "UninitializedObject.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h" + +using namespace clang; +using namespace clang::ento; + +namespace { + +/// Represents a pointer or a reference field. +class LocField final : public FieldNode { + /// We'll store whether the pointee or the pointer itself is uninitialited. + const bool IsDereferenced; + +public: + LocField(const FieldRegion *FR, const bool IsDereferenced = true) + : FieldNode(FR), IsDereferenced(IsDereferenced) {} + + virtual void printNoteMsg(llvm::raw_ostream &Out) const override { + if (IsDereferenced) + Out << "uninitialized pointee "; + else + Out << "uninitialized pointer "; + } + + virtual void printPrefix(llvm::raw_ostream &Out) const override {} + + virtual void printNode(llvm::raw_ostream &Out) const override { + Out << getVariableName(getDecl()); + } + + virtual void printSeparator(llvm::raw_ostream &Out) const override { + if (getDecl()->getType()->isPointerType()) + Out << "->"; + else + Out << '.'; + } +}; + +/// Represents a nonloc::LocAsInteger or void* field, that point to objects, but +/// needs to be casted back to its dynamic type for a correct note message. +class NeedsCastLocField final : public FieldNode { + QualType CastBackType; + +public: + NeedsCastLocField(const FieldRegion *FR, const QualType &T) + : FieldNode(FR), CastBackType(T) {} + + virtual void printNoteMsg(llvm::raw_ostream &Out) const override { + Out << "uninitialized pointee "; + } + + virtual void printPrefix(llvm::raw_ostream &Out) const override { + // If this object is a nonloc::LocAsInteger. + if (getDecl()->getType()->isIntegerType()) + Out << "reinterpret_cast"; + // If this pointer's dynamic type is different then it's static type. + else + Out << "static_cast"; + Out << '<' << CastBackType.getAsString() << ">("; + } + + virtual void printNode(llvm::raw_ostream &Out) const override { + Out << getVariableName(getDecl()) << ')'; + } + + virtual void printSeparator(llvm::raw_ostream &Out) const override { + Out << "->"; + } +}; + +/// Represents a Loc field that points to itself. +class CyclicLocField final : public FieldNode { + +public: + CyclicLocField(const FieldRegion *FR) : FieldNode(FR) {} + + virtual void printNoteMsg(llvm::raw_ostream &Out) const override { + Out << "object references itself "; + } + + virtual void printPrefix(llvm::raw_ostream &Out) const override {} + + virtual void printNode(llvm::raw_ostream &Out) const override { + Out << getVariableName(getDecl()); + } + + virtual void printSeparator(llvm::raw_ostream &Out) const override { + llvm_unreachable("CyclicLocField objects must be the last node of the " + "fieldchain!"); + } +}; + +} // end of anonymous namespace + +// Utility function declarations. + +struct DereferenceInfo { + const TypedValueRegion *R; + const bool NeedsCastBack; + const bool IsCyclic; + DereferenceInfo(const TypedValueRegion *R, bool NCB, bool IC) + : R(R), NeedsCastBack(NCB), IsCyclic(IC) {} +}; + +/// Dereferences \p FR and returns with the pointee's region, and whether it +/// needs to be casted back to it's location type. If for whatever reason +/// dereferencing fails, returns with None. +static llvm::Optional<DereferenceInfo> dereference(ProgramStateRef State, + const FieldRegion *FR); + +/// Returns whether \p T can be (transitively) dereferenced to a void pointer +/// type (void*, void**, ...). +static bool isVoidPointer(QualType T); + +//===----------------------------------------------------------------------===// +// Methods for FindUninitializedFields. +//===----------------------------------------------------------------------===// + +bool FindUninitializedFields::isDereferencableUninit( + const FieldRegion *FR, FieldChainInfo LocalChain) { + + SVal V = State->getSVal(FR); + + assert((isDereferencableType(FR->getDecl()->getType()) || + V.getAs<nonloc::LocAsInteger>()) && + "This method only checks dereferenceable objects!"); + + if (V.isUnknown() || V.getAs<loc::ConcreteInt>()) { + IsAnyFieldInitialized = true; + return false; + } + + if (V.isUndef()) { + return addFieldToUninits( + LocalChain.add(LocField(FR, /*IsDereferenced*/ false)), FR); + } + + if (!Opts.CheckPointeeInitialization) { + IsAnyFieldInitialized = true; + return false; + } + + // At this point the pointer itself is initialized and points to a valid + // location, we'll now check the pointee. + llvm::Optional<DereferenceInfo> DerefInfo = dereference(State, FR); + if (!DerefInfo) { + IsAnyFieldInitialized = true; + return false; + } + + if (DerefInfo->IsCyclic) + return addFieldToUninits(LocalChain.add(CyclicLocField(FR)), FR); + + const TypedValueRegion *R = DerefInfo->R; + const bool NeedsCastBack = DerefInfo->NeedsCastBack; + + QualType DynT = R->getLocationType(); + QualType PointeeT = DynT->getPointeeType(); + + if (PointeeT->isStructureOrClassType()) { + if (NeedsCastBack) + return isNonUnionUninit(R, LocalChain.add(NeedsCastLocField(FR, DynT))); + return isNonUnionUninit(R, LocalChain.add(LocField(FR))); + } + + if (PointeeT->isUnionType()) { + if (isUnionUninit(R)) { + if (NeedsCastBack) + return addFieldToUninits(LocalChain.add(NeedsCastLocField(FR, DynT)), + R); + return addFieldToUninits(LocalChain.add(LocField(FR)), R); + } else { + IsAnyFieldInitialized = true; + return false; + } + } + + if (PointeeT->isArrayType()) { + IsAnyFieldInitialized = true; + return false; + } + + assert((isPrimitiveType(PointeeT) || isDereferencableType(PointeeT)) && + "At this point FR must either have a primitive dynamic type, or it " + "must be a null, undefined, unknown or concrete pointer!"); + + SVal PointeeV = State->getSVal(R); + + if (isPrimitiveUninit(PointeeV)) { + if (NeedsCastBack) + return addFieldToUninits(LocalChain.add(NeedsCastLocField(FR, DynT)), R); + return addFieldToUninits(LocalChain.add(LocField(FR)), R); + } + + IsAnyFieldInitialized = true; + return false; +} + +//===----------------------------------------------------------------------===// +// Utility functions. +//===----------------------------------------------------------------------===// + +static llvm::Optional<DereferenceInfo> dereference(ProgramStateRef State, + const FieldRegion *FR) { + + llvm::SmallSet<const TypedValueRegion *, 5> VisitedRegions; + + SVal V = State->getSVal(FR); + assert(V.getAsRegion() && "V must have an underlying region!"); + + // If the static type of the field is a void pointer, or it is a + // nonloc::LocAsInteger, we need to cast it back to the dynamic type before + // dereferencing. + bool NeedsCastBack = isVoidPointer(FR->getDecl()->getType()) || + V.getAs<nonloc::LocAsInteger>(); + + // The region we'd like to acquire. + const auto *R = V.getAsRegion()->getAs<TypedValueRegion>(); + if (!R) + return None; + + VisitedRegions.insert(R); + + // We acquire the dynamic type of R, + QualType DynT = R->getLocationType(); + + while (const MemRegion *Tmp = State->getSVal(R, DynT).getAsRegion()) { + + R = Tmp->getAs<TypedValueRegion>(); + if (!R) + return None; + + // We found a cyclic pointer, like int *ptr = (int *)&ptr. + if (!VisitedRegions.insert(R).second) + return DereferenceInfo{R, NeedsCastBack, /*IsCyclic*/ true}; + + DynT = R->getLocationType(); + // In order to ensure that this loop terminates, we're also checking the + // dynamic type of R, since type hierarchy is finite. + if (isDereferencableType(DynT->getPointeeType())) + break; + } + + while (isa<CXXBaseObjectRegion>(R)) { + NeedsCastBack = true; + const auto *SuperR = dyn_cast<TypedValueRegion>(R->getSuperRegion()); + if (!SuperR) + break; + + R = SuperR; + } + + return DereferenceInfo{R, NeedsCastBack, /*IsCyclic*/ false}; +} + +static bool isVoidPointer(QualType T) { + while (!T.isNull()) { + if (T->isVoidPointerType()) + return true; + T = T->getPointeeType(); + } + return false; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp new file mode 100644 index 000000000000..f4e225d836f3 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp @@ -0,0 +1,511 @@ +//= UnixAPIChecker.h - Checks preconditions for various Unix APIs --*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines UnixAPIChecker, which is an assortment of checks on calls +// to various, widely used UNIX/Posix functions. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; + +enum class OpenVariant { + /// The standard open() call: + /// int open(const char *path, int oflag, ...); + Open, + + /// The variant taking a directory file descriptor and a relative path: + /// int openat(int fd, const char *path, int oflag, ...); + OpenAt +}; + +namespace { + +class UnixAPIMisuseChecker : public Checker< check::PreStmt<CallExpr> > { + mutable std::unique_ptr<BugType> BT_open, BT_pthreadOnce; + mutable Optional<uint64_t> Val_O_CREAT; + +public: + DefaultBool CheckMisuse, CheckPortability; + + void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; + + void CheckOpen(CheckerContext &C, const CallExpr *CE) const; + void CheckOpenAt(CheckerContext &C, const CallExpr *CE) const; + void CheckPthreadOnce(CheckerContext &C, const CallExpr *CE) const; + + void CheckOpenVariant(CheckerContext &C, + const CallExpr *CE, OpenVariant Variant) const; + + void ReportOpenBug(CheckerContext &C, + ProgramStateRef State, + const char *Msg, + SourceRange SR) const; + +}; + +class UnixAPIPortabilityChecker : public Checker< check::PreStmt<CallExpr> > { +public: + void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; + +private: + mutable std::unique_ptr<BugType> BT_mallocZero; + + void CheckCallocZero(CheckerContext &C, const CallExpr *CE) const; + void CheckMallocZero(CheckerContext &C, const CallExpr *CE) const; + void CheckReallocZero(CheckerContext &C, const CallExpr *CE) const; + void CheckReallocfZero(CheckerContext &C, const CallExpr *CE) const; + void CheckAllocaZero(CheckerContext &C, const CallExpr *CE) const; + void CheckAllocaWithAlignZero(CheckerContext &C, const CallExpr *CE) const; + void CheckVallocZero(CheckerContext &C, const CallExpr *CE) const; + + bool ReportZeroByteAllocation(CheckerContext &C, + ProgramStateRef falseState, + const Expr *arg, + const char *fn_name) const; + void BasicAllocationCheck(CheckerContext &C, + const CallExpr *CE, + const unsigned numArgs, + const unsigned sizeArg, + const char *fn) const; +}; + +} //end anonymous namespace + +static void LazyInitialize(const CheckerBase *Checker, + std::unique_ptr<BugType> &BT, + const char *name) { + if (BT) + return; + BT.reset(new BugType(Checker, name, categories::UnixAPI)); +} + +//===----------------------------------------------------------------------===// +// "open" (man 2 open) +//===----------------------------------------------------------------------===/ + +void UnixAPIMisuseChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const { + const FunctionDecl *FD = C.getCalleeDecl(CE); + if (!FD || FD->getKind() != Decl::Function) + return; + + // Don't treat functions in namespaces with the same name a Unix function + // as a call to the Unix function. + const DeclContext *NamespaceCtx = FD->getEnclosingNamespaceContext(); + if (NamespaceCtx && isa<NamespaceDecl>(NamespaceCtx)) + return; + + StringRef FName = C.getCalleeName(FD); + if (FName.empty()) + return; + + if (FName == "open") + CheckOpen(C, CE); + + else if (FName == "openat") + CheckOpenAt(C, CE); + + else if (FName == "pthread_once") + CheckPthreadOnce(C, CE); +} +void UnixAPIMisuseChecker::ReportOpenBug(CheckerContext &C, + ProgramStateRef State, + const char *Msg, + SourceRange SR) const { + ExplodedNode *N = C.generateErrorNode(State); + if (!N) + return; + + LazyInitialize(this, BT_open, "Improper use of 'open'"); + + auto Report = std::make_unique<PathSensitiveBugReport>(*BT_open, Msg, N); + Report->addRange(SR); + C.emitReport(std::move(Report)); +} + +void UnixAPIMisuseChecker::CheckOpen(CheckerContext &C, + const CallExpr *CE) const { + CheckOpenVariant(C, CE, OpenVariant::Open); +} + +void UnixAPIMisuseChecker::CheckOpenAt(CheckerContext &C, + const CallExpr *CE) const { + CheckOpenVariant(C, CE, OpenVariant::OpenAt); +} + +void UnixAPIMisuseChecker::CheckOpenVariant(CheckerContext &C, + const CallExpr *CE, + OpenVariant Variant) const { + // The index of the argument taking the flags open flags (O_RDONLY, + // O_WRONLY, O_CREAT, etc.), + unsigned int FlagsArgIndex; + const char *VariantName; + switch (Variant) { + case OpenVariant::Open: + FlagsArgIndex = 1; + VariantName = "open"; + break; + case OpenVariant::OpenAt: + FlagsArgIndex = 2; + VariantName = "openat"; + break; + }; + + // All calls should at least provide arguments up to the 'flags' parameter. + unsigned int MinArgCount = FlagsArgIndex + 1; + + // If the flags has O_CREAT set then open/openat() require an additional + // argument specifying the file mode (permission bits) for the created file. + unsigned int CreateModeArgIndex = FlagsArgIndex + 1; + + // The create mode argument should be the last argument. + unsigned int MaxArgCount = CreateModeArgIndex + 1; + + ProgramStateRef state = C.getState(); + + if (CE->getNumArgs() < MinArgCount) { + // The frontend should issue a warning for this case, so this is a sanity + // check. + return; + } else if (CE->getNumArgs() == MaxArgCount) { + const Expr *Arg = CE->getArg(CreateModeArgIndex); + QualType QT = Arg->getType(); + if (!QT->isIntegerType()) { + SmallString<256> SBuf; + llvm::raw_svector_ostream OS(SBuf); + OS << "The " << CreateModeArgIndex + 1 + << llvm::getOrdinalSuffix(CreateModeArgIndex + 1) + << " argument to '" << VariantName << "' is not an integer"; + + ReportOpenBug(C, state, + SBuf.c_str(), + Arg->getSourceRange()); + return; + } + } else if (CE->getNumArgs() > MaxArgCount) { + SmallString<256> SBuf; + llvm::raw_svector_ostream OS(SBuf); + OS << "Call to '" << VariantName << "' with more than " << MaxArgCount + << " arguments"; + + ReportOpenBug(C, state, + SBuf.c_str(), + CE->getArg(MaxArgCount)->getSourceRange()); + return; + } + + // The definition of O_CREAT is platform specific. We need a better way + // of querying this information from the checking environment. + if (!Val_O_CREAT.hasValue()) { + if (C.getASTContext().getTargetInfo().getTriple().getVendor() + == llvm::Triple::Apple) + Val_O_CREAT = 0x0200; + else { + // FIXME: We need a more general way of getting the O_CREAT value. + // We could possibly grovel through the preprocessor state, but + // that would require passing the Preprocessor object to the ExprEngine. + // See also: MallocChecker.cpp / M_ZERO. + return; + } + } + + // Now check if oflags has O_CREAT set. + const Expr *oflagsEx = CE->getArg(FlagsArgIndex); + const SVal V = C.getSVal(oflagsEx); + if (!V.getAs<NonLoc>()) { + // The case where 'V' can be a location can only be due to a bad header, + // so in this case bail out. + return; + } + NonLoc oflags = V.castAs<NonLoc>(); + NonLoc ocreateFlag = C.getSValBuilder() + .makeIntVal(Val_O_CREAT.getValue(), oflagsEx->getType()).castAs<NonLoc>(); + SVal maskedFlagsUC = C.getSValBuilder().evalBinOpNN(state, BO_And, + oflags, ocreateFlag, + oflagsEx->getType()); + if (maskedFlagsUC.isUnknownOrUndef()) + return; + DefinedSVal maskedFlags = maskedFlagsUC.castAs<DefinedSVal>(); + + // Check if maskedFlags is non-zero. + ProgramStateRef trueState, falseState; + std::tie(trueState, falseState) = state->assume(maskedFlags); + + // Only emit an error if the value of 'maskedFlags' is properly + // constrained; + if (!(trueState && !falseState)) + return; + + if (CE->getNumArgs() < MaxArgCount) { + SmallString<256> SBuf; + llvm::raw_svector_ostream OS(SBuf); + OS << "Call to '" << VariantName << "' requires a " + << CreateModeArgIndex + 1 + << llvm::getOrdinalSuffix(CreateModeArgIndex + 1) + << " argument when the 'O_CREAT' flag is set"; + ReportOpenBug(C, trueState, + SBuf.c_str(), + oflagsEx->getSourceRange()); + } +} + +//===----------------------------------------------------------------------===// +// pthread_once +//===----------------------------------------------------------------------===// + +void UnixAPIMisuseChecker::CheckPthreadOnce(CheckerContext &C, + const CallExpr *CE) const { + + // This is similar to 'CheckDispatchOnce' in the MacOSXAPIChecker. + // They can possibly be refactored. + + if (CE->getNumArgs() < 1) + return; + + // Check if the first argument is stack allocated. If so, issue a warning + // because that's likely to be bad news. + ProgramStateRef state = C.getState(); + const MemRegion *R = C.getSVal(CE->getArg(0)).getAsRegion(); + if (!R || !isa<StackSpaceRegion>(R->getMemorySpace())) + return; + + ExplodedNode *N = C.generateErrorNode(state); + if (!N) + return; + + SmallString<256> S; + llvm::raw_svector_ostream os(S); + os << "Call to 'pthread_once' uses"; + if (const VarRegion *VR = dyn_cast<VarRegion>(R)) + os << " the local variable '" << VR->getDecl()->getName() << '\''; + else + os << " stack allocated memory"; + os << " for the \"control\" value. Using such transient memory for " + "the control value is potentially dangerous."; + if (isa<VarRegion>(R) && isa<StackLocalsSpaceRegion>(R->getMemorySpace())) + os << " Perhaps you intended to declare the variable as 'static'?"; + + LazyInitialize(this, BT_pthreadOnce, "Improper use of 'pthread_once'"); + + auto report = + std::make_unique<PathSensitiveBugReport>(*BT_pthreadOnce, os.str(), N); + report->addRange(CE->getArg(0)->getSourceRange()); + C.emitReport(std::move(report)); +} + +//===----------------------------------------------------------------------===// +// "calloc", "malloc", "realloc", "reallocf", "alloca" and "valloc" +// with allocation size 0 +//===----------------------------------------------------------------------===// + +// FIXME: Eventually these should be rolled into the MallocChecker, but right now +// they're more basic and valuable for widespread use. + +// Returns true if we try to do a zero byte allocation, false otherwise. +// Fills in trueState and falseState. +static bool IsZeroByteAllocation(ProgramStateRef state, + const SVal argVal, + ProgramStateRef *trueState, + ProgramStateRef *falseState) { + std::tie(*trueState, *falseState) = + state->assume(argVal.castAs<DefinedSVal>()); + + return (*falseState && !*trueState); +} + +// Generates an error report, indicating that the function whose name is given +// will perform a zero byte allocation. +// Returns false if an error occurred, true otherwise. +bool UnixAPIPortabilityChecker::ReportZeroByteAllocation( + CheckerContext &C, + ProgramStateRef falseState, + const Expr *arg, + const char *fn_name) const { + ExplodedNode *N = C.generateErrorNode(falseState); + if (!N) + return false; + + LazyInitialize(this, BT_mallocZero, + "Undefined allocation of 0 bytes (CERT MEM04-C; CWE-131)"); + + SmallString<256> S; + llvm::raw_svector_ostream os(S); + os << "Call to '" << fn_name << "' has an allocation size of 0 bytes"; + auto report = + std::make_unique<PathSensitiveBugReport>(*BT_mallocZero, os.str(), N); + + report->addRange(arg->getSourceRange()); + bugreporter::trackExpressionValue(N, arg, *report); + C.emitReport(std::move(report)); + + return true; +} + +// Does a basic check for 0-sized allocations suitable for most of the below +// functions (modulo "calloc") +void UnixAPIPortabilityChecker::BasicAllocationCheck(CheckerContext &C, + const CallExpr *CE, + const unsigned numArgs, + const unsigned sizeArg, + const char *fn) const { + // Sanity check for the correct number of arguments + if (CE->getNumArgs() != numArgs) + return; + + // Check if the allocation size is 0. + ProgramStateRef state = C.getState(); + ProgramStateRef trueState = nullptr, falseState = nullptr; + const Expr *arg = CE->getArg(sizeArg); + SVal argVal = C.getSVal(arg); + + if (argVal.isUnknownOrUndef()) + return; + + // Is the value perfectly constrained to zero? + if (IsZeroByteAllocation(state, argVal, &trueState, &falseState)) { + (void) ReportZeroByteAllocation(C, falseState, arg, fn); + return; + } + // Assume the value is non-zero going forward. + assert(trueState); + if (trueState != state) + C.addTransition(trueState); +} + +void UnixAPIPortabilityChecker::CheckCallocZero(CheckerContext &C, + const CallExpr *CE) const { + unsigned int nArgs = CE->getNumArgs(); + if (nArgs != 2) + return; + + ProgramStateRef state = C.getState(); + ProgramStateRef trueState = nullptr, falseState = nullptr; + + unsigned int i; + for (i = 0; i < nArgs; i++) { + const Expr *arg = CE->getArg(i); + SVal argVal = C.getSVal(arg); + if (argVal.isUnknownOrUndef()) { + if (i == 0) + continue; + else + return; + } + + if (IsZeroByteAllocation(state, argVal, &trueState, &falseState)) { + if (ReportZeroByteAllocation(C, falseState, arg, "calloc")) + return; + else if (i == 0) + continue; + else + return; + } + } + + // Assume the value is non-zero going forward. + assert(trueState); + if (trueState != state) + C.addTransition(trueState); +} + +void UnixAPIPortabilityChecker::CheckMallocZero(CheckerContext &C, + const CallExpr *CE) const { + BasicAllocationCheck(C, CE, 1, 0, "malloc"); +} + +void UnixAPIPortabilityChecker::CheckReallocZero(CheckerContext &C, + const CallExpr *CE) const { + BasicAllocationCheck(C, CE, 2, 1, "realloc"); +} + +void UnixAPIPortabilityChecker::CheckReallocfZero(CheckerContext &C, + const CallExpr *CE) const { + BasicAllocationCheck(C, CE, 2, 1, "reallocf"); +} + +void UnixAPIPortabilityChecker::CheckAllocaZero(CheckerContext &C, + const CallExpr *CE) const { + BasicAllocationCheck(C, CE, 1, 0, "alloca"); +} + +void UnixAPIPortabilityChecker::CheckAllocaWithAlignZero( + CheckerContext &C, + const CallExpr *CE) const { + BasicAllocationCheck(C, CE, 2, 0, "__builtin_alloca_with_align"); +} + +void UnixAPIPortabilityChecker::CheckVallocZero(CheckerContext &C, + const CallExpr *CE) const { + BasicAllocationCheck(C, CE, 1, 0, "valloc"); +} + +void UnixAPIPortabilityChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const { + const FunctionDecl *FD = C.getCalleeDecl(CE); + if (!FD || FD->getKind() != Decl::Function) + return; + + // Don't treat functions in namespaces with the same name a Unix function + // as a call to the Unix function. + const DeclContext *NamespaceCtx = FD->getEnclosingNamespaceContext(); + if (NamespaceCtx && isa<NamespaceDecl>(NamespaceCtx)) + return; + + StringRef FName = C.getCalleeName(FD); + if (FName.empty()) + return; + + if (FName == "calloc") + CheckCallocZero(C, CE); + + else if (FName == "malloc") + CheckMallocZero(C, CE); + + else if (FName == "realloc") + CheckReallocZero(C, CE); + + else if (FName == "reallocf") + CheckReallocfZero(C, CE); + + else if (FName == "alloca" || FName == "__builtin_alloca") + CheckAllocaZero(C, CE); + + else if (FName == "__builtin_alloca_with_align") + CheckAllocaWithAlignZero(C, CE); + + else if (FName == "valloc") + CheckVallocZero(C, CE); +} + +//===----------------------------------------------------------------------===// +// Registration. +//===----------------------------------------------------------------------===// + +#define REGISTER_CHECKER(CHECKERNAME) \ + void ento::register##CHECKERNAME(CheckerManager &mgr) { \ + mgr.registerChecker<CHECKERNAME>(); \ + } \ + \ + bool ento::shouldRegister##CHECKERNAME(const LangOptions &LO) { \ + return true; \ + } + +REGISTER_CHECKER(UnixAPIMisuseChecker) +REGISTER_CHECKER(UnixAPIPortabilityChecker) diff --git a/clang/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp new file mode 100644 index 000000000000..65dd82675df9 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp @@ -0,0 +1,262 @@ +//==- UnreachableCodeChecker.cpp - Generalized dead code checker -*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file implements a generalized unreachable code checker using a +// path-sensitive analysis. We mark any path visited, and then walk the CFG as a +// post-analysis to determine what was never visited. +// +// A similar flow-sensitive only check exists in Analysis/ReachableCode.cpp +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/ParentMap.h" +#include "clang/Basic/Builtins.h" +#include "clang/Basic/SourceManager.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" +#include "llvm/ADT/SmallSet.h" + +using namespace clang; +using namespace ento; + +namespace { +class UnreachableCodeChecker : public Checker<check::EndAnalysis> { +public: + void checkEndAnalysis(ExplodedGraph &G, BugReporter &B, + ExprEngine &Eng) const; +private: + typedef llvm::SmallSet<unsigned, 32> CFGBlocksSet; + + static inline const Stmt *getUnreachableStmt(const CFGBlock *CB); + static void FindUnreachableEntryPoints(const CFGBlock *CB, + CFGBlocksSet &reachable, + CFGBlocksSet &visited); + static bool isInvalidPath(const CFGBlock *CB, const ParentMap &PM); + static inline bool isEmptyCFGBlock(const CFGBlock *CB); +}; +} + +void UnreachableCodeChecker::checkEndAnalysis(ExplodedGraph &G, + BugReporter &B, + ExprEngine &Eng) const { + CFGBlocksSet reachable, visited; + + if (Eng.hasWorkRemaining()) + return; + + const Decl *D = nullptr; + CFG *C = nullptr; + const ParentMap *PM = nullptr; + const LocationContext *LC = nullptr; + // Iterate over ExplodedGraph + for (ExplodedGraph::node_iterator I = G.nodes_begin(), E = G.nodes_end(); + I != E; ++I) { + const ProgramPoint &P = I->getLocation(); + LC = P.getLocationContext(); + if (!LC->inTopFrame()) + continue; + + if (!D) + D = LC->getAnalysisDeclContext()->getDecl(); + + // Save the CFG if we don't have it already + if (!C) + C = LC->getAnalysisDeclContext()->getUnoptimizedCFG(); + if (!PM) + PM = &LC->getParentMap(); + + if (Optional<BlockEntrance> BE = P.getAs<BlockEntrance>()) { + const CFGBlock *CB = BE->getBlock(); + reachable.insert(CB->getBlockID()); + } + } + + // Bail out if we didn't get the CFG or the ParentMap. + if (!D || !C || !PM) + return; + + // Don't do anything for template instantiations. Proving that code + // in a template instantiation is unreachable means proving that it is + // unreachable in all instantiations. + if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) + if (FD->isTemplateInstantiation()) + return; + + // Find CFGBlocks that were not covered by any node + for (CFG::const_iterator I = C->begin(), E = C->end(); I != E; ++I) { + const CFGBlock *CB = *I; + // Check if the block is unreachable + if (reachable.count(CB->getBlockID())) + continue; + + // Check if the block is empty (an artificial block) + if (isEmptyCFGBlock(CB)) + continue; + + // Find the entry points for this block + if (!visited.count(CB->getBlockID())) + FindUnreachableEntryPoints(CB, reachable, visited); + + // This block may have been pruned; check if we still want to report it + if (reachable.count(CB->getBlockID())) + continue; + + // Check for false positives + if (isInvalidPath(CB, *PM)) + continue; + + // It is good practice to always have a "default" label in a "switch", even + // if we should never get there. It can be used to detect errors, for + // instance. Unreachable code directly under a "default" label is therefore + // likely to be a false positive. + if (const Stmt *label = CB->getLabel()) + if (label->getStmtClass() == Stmt::DefaultStmtClass) + continue; + + // Special case for __builtin_unreachable. + // FIXME: This should be extended to include other unreachable markers, + // such as llvm_unreachable. + if (!CB->empty()) { + bool foundUnreachable = false; + for (CFGBlock::const_iterator ci = CB->begin(), ce = CB->end(); + ci != ce; ++ci) { + if (Optional<CFGStmt> S = (*ci).getAs<CFGStmt>()) + if (const CallExpr *CE = dyn_cast<CallExpr>(S->getStmt())) { + if (CE->getBuiltinCallee() == Builtin::BI__builtin_unreachable || + CE->isBuiltinAssumeFalse(Eng.getContext())) { + foundUnreachable = true; + break; + } + } + } + if (foundUnreachable) + continue; + } + + // We found a block that wasn't covered - find the statement to report + SourceRange SR; + PathDiagnosticLocation DL; + SourceLocation SL; + if (const Stmt *S = getUnreachableStmt(CB)) { + // In macros, 'do {...} while (0)' is often used. Don't warn about the + // condition 0 when it is unreachable. + if (S->getBeginLoc().isMacroID()) + if (const auto *I = dyn_cast<IntegerLiteral>(S)) + if (I->getValue() == 0ULL) + if (const Stmt *Parent = PM->getParent(S)) + if (isa<DoStmt>(Parent)) + continue; + SR = S->getSourceRange(); + DL = PathDiagnosticLocation::createBegin(S, B.getSourceManager(), LC); + SL = DL.asLocation(); + if (SR.isInvalid() || !SL.isValid()) + continue; + } + else + continue; + + // Check if the SourceLocation is in a system header + const SourceManager &SM = B.getSourceManager(); + if (SM.isInSystemHeader(SL) || SM.isInExternCSystemHeader(SL)) + continue; + + B.EmitBasicReport(D, this, "Unreachable code", "Dead code", + "This statement is never executed", DL, SR); + } +} + +// Recursively finds the entry point(s) for this dead CFGBlock. +void UnreachableCodeChecker::FindUnreachableEntryPoints(const CFGBlock *CB, + CFGBlocksSet &reachable, + CFGBlocksSet &visited) { + visited.insert(CB->getBlockID()); + + for (CFGBlock::const_pred_iterator I = CB->pred_begin(), E = CB->pred_end(); + I != E; ++I) { + if (!*I) + continue; + + if (!reachable.count((*I)->getBlockID())) { + // If we find an unreachable predecessor, mark this block as reachable so + // we don't report this block + reachable.insert(CB->getBlockID()); + if (!visited.count((*I)->getBlockID())) + // If we haven't previously visited the unreachable predecessor, recurse + FindUnreachableEntryPoints(*I, reachable, visited); + } + } +} + +// Find the Stmt* in a CFGBlock for reporting a warning +const Stmt *UnreachableCodeChecker::getUnreachableStmt(const CFGBlock *CB) { + for (CFGBlock::const_iterator I = CB->begin(), E = CB->end(); I != E; ++I) { + if (Optional<CFGStmt> S = I->getAs<CFGStmt>()) { + if (!isa<DeclStmt>(S->getStmt())) + return S->getStmt(); + } + } + if (const Stmt *S = CB->getTerminatorStmt()) + return S; + else + return nullptr; +} + +// Determines if the path to this CFGBlock contained an element that infers this +// block is a false positive. We assume that FindUnreachableEntryPoints has +// already marked only the entry points to any dead code, so we need only to +// find the condition that led to this block (the predecessor of this block.) +// There will never be more than one predecessor. +bool UnreachableCodeChecker::isInvalidPath(const CFGBlock *CB, + const ParentMap &PM) { + // We only expect a predecessor size of 0 or 1. If it is >1, then an external + // condition has broken our assumption (for example, a sink being placed by + // another check). In these cases, we choose not to report. + if (CB->pred_size() > 1) + return true; + + // If there are no predecessors, then this block is trivially unreachable + if (CB->pred_size() == 0) + return false; + + const CFGBlock *pred = *CB->pred_begin(); + if (!pred) + return false; + + // Get the predecessor block's terminator condition + const Stmt *cond = pred->getTerminatorCondition(); + + //assert(cond && "CFGBlock's predecessor has a terminator condition"); + // The previous assertion is invalid in some cases (eg do/while). Leaving + // reporting of these situations on at the moment to help triage these cases. + if (!cond) + return false; + + // Run each of the checks on the conditions + return containsMacro(cond) || containsEnum(cond) || + containsStaticLocal(cond) || containsBuiltinOffsetOf(cond) || + containsStmt<UnaryExprOrTypeTraitExpr>(cond); +} + +// Returns true if the given CFGBlock is empty +bool UnreachableCodeChecker::isEmptyCFGBlock(const CFGBlock *CB) { + return CB->getLabel() == nullptr // No labels + && CB->size() == 0 // No statements + && !CB->getTerminatorStmt(); // No terminator +} + +void ento::registerUnreachableCodeChecker(CheckerManager &mgr) { + mgr.registerChecker<UnreachableCodeChecker>(); +} + +bool ento::shouldRegisterUnreachableCodeChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp new file mode 100644 index 000000000000..b92757312dc6 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp @@ -0,0 +1,190 @@ +//=== VLASizeChecker.cpp - Undefined dereference checker --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines VLASizeChecker, a builtin check in ExprEngine that +// performs checks for declaration of VLA of undefined or zero size. +// In addition, VLASizeChecker is responsible for defining the extent +// of the MemRegion that represents a VLA. +// +//===----------------------------------------------------------------------===// + +#include "Taint.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/CharUnits.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace ento; +using namespace taint; + +namespace { +class VLASizeChecker : public Checker< check::PreStmt<DeclStmt> > { + mutable std::unique_ptr<BugType> BT; + enum VLASize_Kind { VLA_Garbage, VLA_Zero, VLA_Tainted, VLA_Negative }; + + void reportBug(VLASize_Kind Kind, const Expr *SizeE, ProgramStateRef State, + CheckerContext &C, + std::unique_ptr<BugReporterVisitor> Visitor = nullptr) const; + +public: + void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; +}; +} // end anonymous namespace + +void VLASizeChecker::reportBug( + VLASize_Kind Kind, const Expr *SizeE, ProgramStateRef State, + CheckerContext &C, std::unique_ptr<BugReporterVisitor> Visitor) const { + // Generate an error node. + ExplodedNode *N = C.generateErrorNode(State); + if (!N) + return; + + if (!BT) + BT.reset(new BuiltinBug( + this, "Dangerous variable-length array (VLA) declaration")); + + SmallString<256> buf; + llvm::raw_svector_ostream os(buf); + os << "Declared variable-length array (VLA) "; + switch (Kind) { + case VLA_Garbage: + os << "uses a garbage value as its size"; + break; + case VLA_Zero: + os << "has zero size"; + break; + case VLA_Tainted: + os << "has tainted size"; + break; + case VLA_Negative: + os << "has negative size"; + break; + } + + auto report = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N); + report->addVisitor(std::move(Visitor)); + report->addRange(SizeE->getSourceRange()); + bugreporter::trackExpressionValue(N, SizeE, *report); + C.emitReport(std::move(report)); +} + +void VLASizeChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { + if (!DS->isSingleDecl()) + return; + + const VarDecl *VD = dyn_cast<VarDecl>(DS->getSingleDecl()); + if (!VD) + return; + + ASTContext &Ctx = C.getASTContext(); + const VariableArrayType *VLA = Ctx.getAsVariableArrayType(VD->getType()); + if (!VLA) + return; + + // FIXME: Handle multi-dimensional VLAs. + const Expr *SE = VLA->getSizeExpr(); + ProgramStateRef state = C.getState(); + SVal sizeV = C.getSVal(SE); + + if (sizeV.isUndef()) { + reportBug(VLA_Garbage, SE, state, C); + return; + } + + // See if the size value is known. It can't be undefined because we would have + // warned about that already. + if (sizeV.isUnknown()) + return; + + // Check if the size is tainted. + if (isTainted(state, sizeV)) { + reportBug(VLA_Tainted, SE, nullptr, C, + std::make_unique<TaintBugVisitor>(sizeV)); + return; + } + + // Check if the size is zero. + DefinedSVal sizeD = sizeV.castAs<DefinedSVal>(); + + ProgramStateRef stateNotZero, stateZero; + std::tie(stateNotZero, stateZero) = state->assume(sizeD); + + if (stateZero && !stateNotZero) { + reportBug(VLA_Zero, SE, stateZero, C); + return; + } + + // From this point on, assume that the size is not zero. + state = stateNotZero; + + // VLASizeChecker is responsible for defining the extent of the array being + // declared. We do this by multiplying the array length by the element size, + // then matching that with the array region's extent symbol. + + // Check if the size is negative. + SValBuilder &svalBuilder = C.getSValBuilder(); + + QualType Ty = SE->getType(); + DefinedOrUnknownSVal Zero = svalBuilder.makeZeroVal(Ty); + + SVal LessThanZeroVal = svalBuilder.evalBinOp(state, BO_LT, sizeD, Zero, Ty); + if (Optional<DefinedSVal> LessThanZeroDVal = + LessThanZeroVal.getAs<DefinedSVal>()) { + ConstraintManager &CM = C.getConstraintManager(); + ProgramStateRef StatePos, StateNeg; + + std::tie(StateNeg, StatePos) = CM.assumeDual(state, *LessThanZeroDVal); + if (StateNeg && !StatePos) { + reportBug(VLA_Negative, SE, state, C); + return; + } + state = StatePos; + } + + // Convert the array length to size_t. + QualType SizeTy = Ctx.getSizeType(); + NonLoc ArrayLength = + svalBuilder.evalCast(sizeD, SizeTy, SE->getType()).castAs<NonLoc>(); + + // Get the element size. + CharUnits EleSize = Ctx.getTypeSizeInChars(VLA->getElementType()); + SVal EleSizeVal = svalBuilder.makeIntVal(EleSize.getQuantity(), SizeTy); + + // Multiply the array length by the element size. + SVal ArraySizeVal = svalBuilder.evalBinOpNN( + state, BO_Mul, ArrayLength, EleSizeVal.castAs<NonLoc>(), SizeTy); + + // Finally, assume that the array's extent matches the given size. + const LocationContext *LC = C.getLocationContext(); + DefinedOrUnknownSVal Extent = + state->getRegion(VD, LC)->getExtent(svalBuilder); + DefinedOrUnknownSVal ArraySize = ArraySizeVal.castAs<DefinedOrUnknownSVal>(); + DefinedOrUnknownSVal sizeIsKnown = + svalBuilder.evalEQ(state, Extent, ArraySize); + state = state->assume(sizeIsKnown, true); + + // Assume should not fail at this point. + assert(state); + + // Remember our assumptions! + C.addTransition(state); +} + +void ento::registerVLASizeChecker(CheckerManager &mgr) { + mgr.registerChecker<VLASizeChecker>(); +} + +bool ento::shouldRegisterVLASizeChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/ValistChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ValistChecker.cpp new file mode 100644 index 000000000000..a3610514a924 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/ValistChecker.cpp @@ -0,0 +1,425 @@ +//== ValistChecker.cpp - stdarg.h macro usage checker -----------*- C++ -*--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This defines checkers which detect usage of uninitialized va_list values +// and va_start calls with no matching va_end. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +REGISTER_SET_WITH_PROGRAMSTATE(InitializedVALists, const MemRegion *) + +namespace { +typedef SmallVector<const MemRegion *, 2> RegionVector; + +class ValistChecker : public Checker<check::PreCall, check::PreStmt<VAArgExpr>, + check::DeadSymbols> { + mutable std::unique_ptr<BugType> BT_leakedvalist, BT_uninitaccess; + + struct VAListAccepter { + CallDescription Func; + int VAListPos; + }; + static const SmallVector<VAListAccepter, 15> VAListAccepters; + static const CallDescription VaStart, VaEnd, VaCopy; + +public: + enum CheckKind { + CK_Uninitialized, + CK_Unterminated, + CK_CopyToSelf, + CK_NumCheckKinds + }; + + DefaultBool ChecksEnabled[CK_NumCheckKinds]; + CheckerNameRef CheckNames[CK_NumCheckKinds]; + + void checkPreStmt(const VAArgExpr *VAA, CheckerContext &C) const; + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; + +private: + const MemRegion *getVAListAsRegion(SVal SV, const Expr *VAExpr, + bool &IsSymbolic, CheckerContext &C) const; + const ExplodedNode *getStartCallSite(const ExplodedNode *N, + const MemRegion *Reg) const; + + void reportUninitializedAccess(const MemRegion *VAList, StringRef Msg, + CheckerContext &C) const; + void reportLeakedVALists(const RegionVector &LeakedVALists, StringRef Msg1, + StringRef Msg2, CheckerContext &C, ExplodedNode *N, + bool ReportUninit = false) const; + + void checkVAListStartCall(const CallEvent &Call, CheckerContext &C, + bool IsCopy) const; + void checkVAListEndCall(const CallEvent &Call, CheckerContext &C) const; + + class ValistBugVisitor : public BugReporterVisitor { + public: + ValistBugVisitor(const MemRegion *Reg, bool IsLeak = false) + : Reg(Reg), IsLeak(IsLeak) {} + void Profile(llvm::FoldingSetNodeID &ID) const override { + static int X = 0; + ID.AddPointer(&X); + ID.AddPointer(Reg); + } + PathDiagnosticPieceRef getEndPath(BugReporterContext &BRC, + const ExplodedNode *EndPathNode, + PathSensitiveBugReport &BR) override { + if (!IsLeak) + return nullptr; + + PathDiagnosticLocation L = BR.getLocation(); + // Do not add the statement itself as a range in case of leak. + return std::make_shared<PathDiagnosticEventPiece>(L, BR.getDescription(), + false); + } + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; + + private: + const MemRegion *Reg; + bool IsLeak; + }; +}; + +const SmallVector<ValistChecker::VAListAccepter, 15> + ValistChecker::VAListAccepters = { + {{"vfprintf", 3}, 2}, + {{"vfscanf", 3}, 2}, + {{"vprintf", 2}, 1}, + {{"vscanf", 2}, 1}, + {{"vsnprintf", 4}, 3}, + {{"vsprintf", 3}, 2}, + {{"vsscanf", 3}, 2}, + {{"vfwprintf", 3}, 2}, + {{"vfwscanf", 3}, 2}, + {{"vwprintf", 2}, 1}, + {{"vwscanf", 2}, 1}, + {{"vswprintf", 4}, 3}, + // vswprintf is the wide version of vsnprintf, + // vsprintf has no wide version + {{"vswscanf", 3}, 2}}; + +const CallDescription + ValistChecker::VaStart("__builtin_va_start", /*Args=*/2, /*Params=*/1), + ValistChecker::VaCopy("__builtin_va_copy", 2), + ValistChecker::VaEnd("__builtin_va_end", 1); +} // end anonymous namespace + +void ValistChecker::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + if (!Call.isGlobalCFunction()) + return; + if (Call.isCalled(VaStart)) + checkVAListStartCall(Call, C, false); + else if (Call.isCalled(VaCopy)) + checkVAListStartCall(Call, C, true); + else if (Call.isCalled(VaEnd)) + checkVAListEndCall(Call, C); + else { + for (auto FuncInfo : VAListAccepters) { + if (!Call.isCalled(FuncInfo.Func)) + continue; + bool Symbolic; + const MemRegion *VAList = + getVAListAsRegion(Call.getArgSVal(FuncInfo.VAListPos), + Call.getArgExpr(FuncInfo.VAListPos), Symbolic, C); + if (!VAList) + return; + + if (C.getState()->contains<InitializedVALists>(VAList)) + return; + + // We did not see va_start call, but the source of the region is unknown. + // Be conservative and assume the best. + if (Symbolic) + return; + + SmallString<80> Errmsg("Function '"); + Errmsg += FuncInfo.Func.getFunctionName(); + Errmsg += "' is called with an uninitialized va_list argument"; + reportUninitializedAccess(VAList, Errmsg.c_str(), C); + break; + } + } +} + +const MemRegion *ValistChecker::getVAListAsRegion(SVal SV, const Expr *E, + bool &IsSymbolic, + CheckerContext &C) const { + const MemRegion *Reg = SV.getAsRegion(); + if (!Reg) + return nullptr; + // TODO: In the future this should be abstracted away by the analyzer. + bool VaListModelledAsArray = false; + if (const auto *Cast = dyn_cast<CastExpr>(E)) { + QualType Ty = Cast->getType(); + VaListModelledAsArray = + Ty->isPointerType() && Ty->getPointeeType()->isRecordType(); + } + if (const auto *DeclReg = Reg->getAs<DeclRegion>()) { + if (isa<ParmVarDecl>(DeclReg->getDecl())) + Reg = C.getState()->getSVal(SV.castAs<Loc>()).getAsRegion(); + } + IsSymbolic = Reg && Reg->getAs<SymbolicRegion>(); + // Some VarRegion based VA lists reach here as ElementRegions. + const auto *EReg = dyn_cast_or_null<ElementRegion>(Reg); + return (EReg && VaListModelledAsArray) ? EReg->getSuperRegion() : Reg; +} + +void ValistChecker::checkPreStmt(const VAArgExpr *VAA, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + const Expr *VASubExpr = VAA->getSubExpr(); + SVal VAListSVal = C.getSVal(VASubExpr); + bool Symbolic; + const MemRegion *VAList = + getVAListAsRegion(VAListSVal, VASubExpr, Symbolic, C); + if (!VAList) + return; + if (Symbolic) + return; + if (!State->contains<InitializedVALists>(VAList)) + reportUninitializedAccess( + VAList, "va_arg() is called on an uninitialized va_list", C); +} + +void ValistChecker::checkDeadSymbols(SymbolReaper &SR, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + InitializedVAListsTy TrackedVALists = State->get<InitializedVALists>(); + RegionVector LeakedVALists; + for (auto Reg : TrackedVALists) { + if (SR.isLiveRegion(Reg)) + continue; + LeakedVALists.push_back(Reg); + State = State->remove<InitializedVALists>(Reg); + } + if (ExplodedNode *N = C.addTransition(State)) + reportLeakedVALists(LeakedVALists, "Initialized va_list", " is leaked", C, + N); +} + +// This function traverses the exploded graph backwards and finds the node where +// the va_list is initialized. That node is used for uniquing the bug paths. +// It is not likely that there are several different va_lists that belongs to +// different stack frames, so that case is not yet handled. +const ExplodedNode * +ValistChecker::getStartCallSite(const ExplodedNode *N, + const MemRegion *Reg) const { + const LocationContext *LeakContext = N->getLocationContext(); + const ExplodedNode *StartCallNode = N; + + bool FoundInitializedState = false; + + while (N) { + ProgramStateRef State = N->getState(); + if (!State->contains<InitializedVALists>(Reg)) { + if (FoundInitializedState) + break; + } else { + FoundInitializedState = true; + } + const LocationContext *NContext = N->getLocationContext(); + if (NContext == LeakContext || NContext->isParentOf(LeakContext)) + StartCallNode = N; + N = N->pred_empty() ? nullptr : *(N->pred_begin()); + } + + return StartCallNode; +} + +void ValistChecker::reportUninitializedAccess(const MemRegion *VAList, + StringRef Msg, + CheckerContext &C) const { + if (!ChecksEnabled[CK_Uninitialized]) + return; + if (ExplodedNode *N = C.generateErrorNode()) { + if (!BT_uninitaccess) + BT_uninitaccess.reset(new BugType(CheckNames[CK_Uninitialized], + "Uninitialized va_list", + categories::MemoryError)); + auto R = std::make_unique<PathSensitiveBugReport>(*BT_uninitaccess, Msg, N); + R->markInteresting(VAList); + R->addVisitor(std::make_unique<ValistBugVisitor>(VAList)); + C.emitReport(std::move(R)); + } +} + +void ValistChecker::reportLeakedVALists(const RegionVector &LeakedVALists, + StringRef Msg1, StringRef Msg2, + CheckerContext &C, ExplodedNode *N, + bool ReportUninit) const { + if (!(ChecksEnabled[CK_Unterminated] || + (ChecksEnabled[CK_Uninitialized] && ReportUninit))) + return; + for (auto Reg : LeakedVALists) { + if (!BT_leakedvalist) { + // FIXME: maybe creating a new check name for this type of bug is a better + // solution. + BT_leakedvalist.reset( + new BugType(CheckNames[CK_Unterminated].getName().empty() + ? CheckNames[CK_Uninitialized] + : CheckNames[CK_Unterminated], + "Leaked va_list", categories::MemoryError, + /*SuppressOnSink=*/true)); + } + + const ExplodedNode *StartNode = getStartCallSite(N, Reg); + PathDiagnosticLocation LocUsedForUniqueing; + + if (const Stmt *StartCallStmt = StartNode->getStmtForDiagnostics()) + LocUsedForUniqueing = PathDiagnosticLocation::createBegin( + StartCallStmt, C.getSourceManager(), StartNode->getLocationContext()); + + SmallString<100> Buf; + llvm::raw_svector_ostream OS(Buf); + OS << Msg1; + std::string VariableName = Reg->getDescriptiveName(); + if (!VariableName.empty()) + OS << " " << VariableName; + OS << Msg2; + + auto R = std::make_unique<PathSensitiveBugReport>( + *BT_leakedvalist, OS.str(), N, LocUsedForUniqueing, + StartNode->getLocationContext()->getDecl()); + R->markInteresting(Reg); + R->addVisitor(std::make_unique<ValistBugVisitor>(Reg, true)); + C.emitReport(std::move(R)); + } +} + +void ValistChecker::checkVAListStartCall(const CallEvent &Call, + CheckerContext &C, bool IsCopy) const { + bool Symbolic; + const MemRegion *VAList = + getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), Symbolic, C); + if (!VAList) + return; + + ProgramStateRef State = C.getState(); + + if (IsCopy) { + const MemRegion *Arg2 = + getVAListAsRegion(Call.getArgSVal(1), Call.getArgExpr(1), Symbolic, C); + if (Arg2) { + if (ChecksEnabled[CK_CopyToSelf] && VAList == Arg2) { + RegionVector LeakedVALists{VAList}; + if (ExplodedNode *N = C.addTransition(State)) + reportLeakedVALists(LeakedVALists, "va_list", + " is copied onto itself", C, N, true); + return; + } else if (!State->contains<InitializedVALists>(Arg2) && !Symbolic) { + if (State->contains<InitializedVALists>(VAList)) { + State = State->remove<InitializedVALists>(VAList); + RegionVector LeakedVALists{VAList}; + if (ExplodedNode *N = C.addTransition(State)) + reportLeakedVALists(LeakedVALists, "Initialized va_list", + " is overwritten by an uninitialized one", C, N, + true); + } else { + reportUninitializedAccess(Arg2, "Uninitialized va_list is copied", C); + } + return; + } + } + } + if (State->contains<InitializedVALists>(VAList)) { + RegionVector LeakedVALists{VAList}; + if (ExplodedNode *N = C.addTransition(State)) + reportLeakedVALists(LeakedVALists, "Initialized va_list", + " is initialized again", C, N); + return; + } + + State = State->add<InitializedVALists>(VAList); + C.addTransition(State); +} + +void ValistChecker::checkVAListEndCall(const CallEvent &Call, + CheckerContext &C) const { + bool Symbolic; + const MemRegion *VAList = + getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), Symbolic, C); + if (!VAList) + return; + + // We did not see va_start call, but the source of the region is unknown. + // Be conservative and assume the best. + if (Symbolic) + return; + + if (!C.getState()->contains<InitializedVALists>(VAList)) { + reportUninitializedAccess( + VAList, "va_end() is called on an uninitialized va_list", C); + return; + } + ProgramStateRef State = C.getState(); + State = State->remove<InitializedVALists>(VAList); + C.addTransition(State); +} + +PathDiagnosticPieceRef ValistChecker::ValistBugVisitor::VisitNode( + const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &) { + ProgramStateRef State = N->getState(); + ProgramStateRef StatePrev = N->getFirstPred()->getState(); + + const Stmt *S = N->getStmtForDiagnostics(); + if (!S) + return nullptr; + + StringRef Msg; + if (State->contains<InitializedVALists>(Reg) && + !StatePrev->contains<InitializedVALists>(Reg)) + Msg = "Initialized va_list"; + else if (!State->contains<InitializedVALists>(Reg) && + StatePrev->contains<InitializedVALists>(Reg)) + Msg = "Ended va_list"; + + if (Msg.empty()) + return nullptr; + + PathDiagnosticLocation Pos(S, BRC.getSourceManager(), + N->getLocationContext()); + return std::make_shared<PathDiagnosticEventPiece>(Pos, Msg, true); +} + +void ento::registerValistBase(CheckerManager &mgr) { + mgr.registerChecker<ValistChecker>(); +} + +bool ento::shouldRegisterValistBase(const LangOptions &LO) { + return true; +} + +#define REGISTER_CHECKER(name) \ + void ento::register##name##Checker(CheckerManager &mgr) { \ + ValistChecker *checker = mgr.getChecker<ValistChecker>(); \ + checker->ChecksEnabled[ValistChecker::CK_##name] = true; \ + checker->CheckNames[ValistChecker::CK_##name] = \ + mgr.getCurrentCheckerName(); \ + } \ + \ + bool ento::shouldRegister##name##Checker(const LangOptions &LO) { \ + return true; \ + } + +REGISTER_CHECKER(Uninitialized) +REGISTER_CHECKER(Unterminated) +REGISTER_CHECKER(CopyToSelf) diff --git a/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp new file mode 100644 index 000000000000..6724eead5072 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp @@ -0,0 +1,221 @@ +//===- VforkChecker.cpp -------- Vfork usage checks --------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines vfork checker which checks for dangerous uses of vfork. +// Vforked process shares memory (including stack) with parent so it's +// range of actions is significantly limited: can't write variables, +// can't call functions not in whitelist, etc. For more details, see +// http://man7.org/linux/man-pages/man2/vfork.2.html +// +// This checker checks for prohibited constructs in vforked process. +// The state transition diagram: +// PARENT ---(vfork() == 0)--> CHILD +// | +// --(*p = ...)--> bug +// | +// --foo()--> bug +// | +// --return--> bug +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/AST/ParentMap.h" + +using namespace clang; +using namespace ento; + +namespace { + +class VforkChecker : public Checker<check::PreCall, check::PostCall, + check::Bind, check::PreStmt<ReturnStmt>> { + mutable std::unique_ptr<BuiltinBug> BT; + mutable llvm::SmallSet<const IdentifierInfo *, 10> VforkWhitelist; + mutable const IdentifierInfo *II_vfork; + + static bool isChildProcess(const ProgramStateRef State); + + bool isVforkCall(const Decl *D, CheckerContext &C) const; + bool isCallWhitelisted(const IdentifierInfo *II, CheckerContext &C) const; + + void reportBug(const char *What, CheckerContext &C, + const char *Details = nullptr) const; + +public: + VforkChecker() : II_vfork(nullptr) {} + + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + void checkPostCall(const CallEvent &Call, CheckerContext &C) const; + void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const; + void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const; +}; + +} // end anonymous namespace + +// This trait holds region of variable that is assigned with vfork's +// return value (this is the only region child is allowed to write). +// VFORK_RESULT_INVALID means that we are in parent process. +// VFORK_RESULT_NONE means that vfork's return value hasn't been assigned. +// Other values point to valid regions. +REGISTER_TRAIT_WITH_PROGRAMSTATE(VforkResultRegion, const void *) +#define VFORK_RESULT_INVALID 0 +#define VFORK_RESULT_NONE ((void *)(uintptr_t)1) + +bool VforkChecker::isChildProcess(const ProgramStateRef State) { + return State->get<VforkResultRegion>() != VFORK_RESULT_INVALID; +} + +bool VforkChecker::isVforkCall(const Decl *D, CheckerContext &C) const { + auto FD = dyn_cast_or_null<FunctionDecl>(D); + if (!FD || !C.isCLibraryFunction(FD)) + return false; + + if (!II_vfork) { + ASTContext &AC = C.getASTContext(); + II_vfork = &AC.Idents.get("vfork"); + } + + return FD->getIdentifier() == II_vfork; +} + +// Returns true iff ok to call function after successful vfork. +bool VforkChecker::isCallWhitelisted(const IdentifierInfo *II, + CheckerContext &C) const { + if (VforkWhitelist.empty()) { + // According to manpage. + const char *ids[] = { + "_exit", + "_Exit", + "execl", + "execlp", + "execle", + "execv", + "execvp", + "execvpe", + nullptr + }; + + ASTContext &AC = C.getASTContext(); + for (const char **id = ids; *id; ++id) + VforkWhitelist.insert(&AC.Idents.get(*id)); + } + + return VforkWhitelist.count(II); +} + +void VforkChecker::reportBug(const char *What, CheckerContext &C, + const char *Details) const { + if (ExplodedNode *N = C.generateErrorNode(C.getState())) { + if (!BT) + BT.reset(new BuiltinBug(this, + "Dangerous construct in a vforked process")); + + SmallString<256> buf; + llvm::raw_svector_ostream os(buf); + + os << What << " is prohibited after a successful vfork"; + + if (Details) + os << "; " << Details; + + auto Report = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N); + // TODO: mark vfork call in BugReportVisitor + C.emitReport(std::move(Report)); + } +} + +// Detect calls to vfork and split execution appropriately. +void VforkChecker::checkPostCall(const CallEvent &Call, + CheckerContext &C) const { + // We can't call vfork in child so don't bother + // (corresponding warning has already been emitted in checkPreCall). + ProgramStateRef State = C.getState(); + if (isChildProcess(State)) + return; + + if (!isVforkCall(Call.getDecl(), C)) + return; + + // Get return value of vfork. + SVal VforkRetVal = Call.getReturnValue(); + Optional<DefinedOrUnknownSVal> DVal = + VforkRetVal.getAs<DefinedOrUnknownSVal>(); + if (!DVal) + return; + + // Get assigned variable. + const ParentMap &PM = C.getLocationContext()->getParentMap(); + const Stmt *P = PM.getParentIgnoreParenCasts(Call.getOriginExpr()); + const VarDecl *LhsDecl; + std::tie(LhsDecl, std::ignore) = parseAssignment(P); + + // Get assigned memory region. + MemRegionManager &M = C.getStoreManager().getRegionManager(); + const MemRegion *LhsDeclReg = + LhsDecl + ? M.getVarRegion(LhsDecl, C.getLocationContext()) + : (const MemRegion *)VFORK_RESULT_NONE; + + // Parent branch gets nonzero return value (according to manpage). + ProgramStateRef ParentState, ChildState; + std::tie(ParentState, ChildState) = C.getState()->assume(*DVal); + C.addTransition(ParentState); + ChildState = ChildState->set<VforkResultRegion>(LhsDeclReg); + C.addTransition(ChildState); +} + +// Prohibit calls to non-whitelist functions in child process. +void VforkChecker::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + if (isChildProcess(State) + && !isCallWhitelisted(Call.getCalleeIdentifier(), C)) + reportBug("This function call", C); +} + +// Prohibit writes in child process (except for vfork's lhs). +void VforkChecker::checkBind(SVal L, SVal V, const Stmt *S, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + if (!isChildProcess(State)) + return; + + const MemRegion *VforkLhs = + static_cast<const MemRegion *>(State->get<VforkResultRegion>()); + const MemRegion *MR = L.getAsRegion(); + + // Child is allowed to modify only vfork's lhs. + if (!MR || MR == VforkLhs) + return; + + reportBug("This assignment", C); +} + +// Prohibit return from function in child process. +void VforkChecker::checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const { + ProgramStateRef State = C.getState(); + if (isChildProcess(State)) + reportBug("Return", C, "call _exit() instead"); +} + +void ento::registerVforkChecker(CheckerManager &mgr) { + mgr.registerChecker<VforkChecker>(); +} + +bool ento::shouldRegisterVforkChecker(const LangOptions &LO) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp new file mode 100644 index 000000000000..12cee5f8d4f7 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp @@ -0,0 +1,236 @@ +//=======- VirtualCallChecker.cpp --------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a checker that checks virtual method calls during +// construction or destruction of C++ objects. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/AST/DeclCXX.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h" + +using namespace clang; +using namespace ento; + +namespace { +enum class ObjectState : bool { CtorCalled, DtorCalled }; +} // end namespace + // FIXME: Ascending over StackFrameContext maybe another method. + +namespace llvm { +template <> struct FoldingSetTrait<ObjectState> { + static inline void Profile(ObjectState X, FoldingSetNodeID &ID) { + ID.AddInteger(static_cast<int>(X)); + } +}; +} // end namespace llvm + +namespace { +class VirtualCallChecker + : public Checker<check::BeginFunction, check::EndFunction, check::PreCall> { +public: + // These are going to be null if the respective check is disabled. + mutable std::unique_ptr<BugType> BT_Pure, BT_Impure; + bool ShowFixIts = false; + + void checkBeginFunction(CheckerContext &C) const; + void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const; + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + +private: + void registerCtorDtorCallInState(bool IsBeginFunction, + CheckerContext &C) const; +}; +} // end namespace + +// GDM (generic data map) to the memregion of this for the ctor and dtor. +REGISTER_MAP_WITH_PROGRAMSTATE(CtorDtorMap, const MemRegion *, ObjectState) + +// The function to check if a callexpr is a virtual method call. +static bool isVirtualCall(const CallExpr *CE) { + bool CallIsNonVirtual = false; + + if (const MemberExpr *CME = dyn_cast<MemberExpr>(CE->getCallee())) { + // The member access is fully qualified (i.e., X::F). + // Treat this as a non-virtual call and do not warn. + if (CME->getQualifier()) + CallIsNonVirtual = true; + + if (const Expr *Base = CME->getBase()) { + // The most derived class is marked final. + if (Base->getBestDynamicClassType()->hasAttr<FinalAttr>()) + CallIsNonVirtual = true; + } + } + + const CXXMethodDecl *MD = + dyn_cast_or_null<CXXMethodDecl>(CE->getDirectCallee()); + if (MD && MD->isVirtual() && !CallIsNonVirtual && !MD->hasAttr<FinalAttr>() && + !MD->getParent()->hasAttr<FinalAttr>()) + return true; + return false; +} + +// The BeginFunction callback when enter a constructor or a destructor. +void VirtualCallChecker::checkBeginFunction(CheckerContext &C) const { + registerCtorDtorCallInState(true, C); +} + +// The EndFunction callback when leave a constructor or a destructor. +void VirtualCallChecker::checkEndFunction(const ReturnStmt *RS, + CheckerContext &C) const { + registerCtorDtorCallInState(false, C); +} + +void VirtualCallChecker::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + const auto MC = dyn_cast<CXXMemberCall>(&Call); + if (!MC) + return; + + const CXXMethodDecl *MD = dyn_cast_or_null<CXXMethodDecl>(Call.getDecl()); + if (!MD) + return; + + ProgramStateRef State = C.getState(); + // Member calls are always represented by a call-expression. + const auto *CE = cast<CallExpr>(Call.getOriginExpr()); + if (!isVirtualCall(CE)) + return; + + const MemRegion *Reg = MC->getCXXThisVal().getAsRegion(); + const ObjectState *ObState = State->get<CtorDtorMap>(Reg); + if (!ObState) + return; + + bool IsPure = MD->isPure(); + + // At this point we're sure that we're calling a virtual method + // during construction or destruction, so we'll emit a report. + SmallString<128> Msg; + llvm::raw_svector_ostream OS(Msg); + OS << "Call to "; + if (IsPure) + OS << "pure "; + OS << "virtual method '" << MD->getParent()->getNameAsString() + << "::" << MD->getNameAsString() << "' during "; + if (*ObState == ObjectState::CtorCalled) + OS << "construction "; + else + OS << "destruction "; + if (IsPure) + OS << "has undefined behavior"; + else + OS << "bypasses virtual dispatch"; + + ExplodedNode *N = + IsPure ? C.generateErrorNode() : C.generateNonFatalErrorNode(); + if (!N) + return; + + const std::unique_ptr<BugType> &BT = IsPure ? BT_Pure : BT_Impure; + if (!BT) { + // The respective check is disabled. + return; + } + + auto Report = std::make_unique<PathSensitiveBugReport>(*BT, OS.str(), N); + + if (ShowFixIts && !IsPure) { + // FIXME: These hints are valid only when the virtual call is made + // directly from the constructor/destructor. Otherwise the dispatch + // will work just fine from other callees, and the fix may break + // the otherwise correct program. + FixItHint Fixit = FixItHint::CreateInsertion( + CE->getBeginLoc(), MD->getParent()->getNameAsString() + "::"); + Report->addFixItHint(Fixit); + } + + C.emitReport(std::move(Report)); +} + +void VirtualCallChecker::registerCtorDtorCallInState(bool IsBeginFunction, + CheckerContext &C) const { + const auto *LCtx = C.getLocationContext(); + const auto *MD = dyn_cast_or_null<CXXMethodDecl>(LCtx->getDecl()); + if (!MD) + return; + + ProgramStateRef State = C.getState(); + auto &SVB = C.getSValBuilder(); + + // Enter a constructor, set the corresponding memregion be true. + if (isa<CXXConstructorDecl>(MD)) { + auto ThiSVal = + State->getSVal(SVB.getCXXThis(MD, LCtx->getStackFrame())); + const MemRegion *Reg = ThiSVal.getAsRegion(); + if (IsBeginFunction) + State = State->set<CtorDtorMap>(Reg, ObjectState::CtorCalled); + else + State = State->remove<CtorDtorMap>(Reg); + + C.addTransition(State); + return; + } + + // Enter a Destructor, set the corresponding memregion be true. + if (isa<CXXDestructorDecl>(MD)) { + auto ThiSVal = + State->getSVal(SVB.getCXXThis(MD, LCtx->getStackFrame())); + const MemRegion *Reg = ThiSVal.getAsRegion(); + if (IsBeginFunction) + State = State->set<CtorDtorMap>(Reg, ObjectState::DtorCalled); + else + State = State->remove<CtorDtorMap>(Reg); + + C.addTransition(State); + return; + } +} + +void ento::registerVirtualCallModeling(CheckerManager &Mgr) { + Mgr.registerChecker<VirtualCallChecker>(); +} + +void ento::registerPureVirtualCallChecker(CheckerManager &Mgr) { + auto *Chk = Mgr.getChecker<VirtualCallChecker>(); + Chk->BT_Pure = std::make_unique<BugType>(Mgr.getCurrentCheckerName(), + "Pure virtual method call", + categories::CXXObjectLifecycle); +} + +void ento::registerVirtualCallChecker(CheckerManager &Mgr) { + auto *Chk = Mgr.getChecker<VirtualCallChecker>(); + if (!Mgr.getAnalyzerOptions().getCheckerBooleanOption( + Mgr.getCurrentCheckerName(), "PureOnly")) { + Chk->BT_Impure = std::make_unique<BugType>( + Mgr.getCurrentCheckerName(), "Unexpected loss of virtual dispatch", + categories::CXXObjectLifecycle); + Chk->ShowFixIts = Mgr.getAnalyzerOptions().getCheckerBooleanOption( + Mgr.getCurrentCheckerName(), "ShowFixIts"); + } +} + +bool ento::shouldRegisterVirtualCallModeling(const LangOptions &LO) { + return LO.CPlusPlus; +} + +bool ento::shouldRegisterPureVirtualCallChecker(const LangOptions &LO) { + return LO.CPlusPlus; +} + +bool ento::shouldRegisterVirtualCallChecker(const LangOptions &LO) { + return LO.CPlusPlus; +} diff --git a/clang/lib/StaticAnalyzer/Checkers/Yaml.h b/clang/lib/StaticAnalyzer/Checkers/Yaml.h new file mode 100755 index 000000000000..968c50e33f6d --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/Yaml.h @@ -0,0 +1,59 @@ +//== Yaml.h ---------------------------------------------------- -*- C++ -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines convenience functions for handling YAML configuration files +// for checkers/packages. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKER_YAML_H +#define LLVM_CLANG_LIB_STATICANALYZER_CHECKER_YAML_H + +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "llvm/Support/YAMLTraits.h" + +namespace clang { +namespace ento { + +/// Read the given file from the filesystem and parse it as a yaml file. The +/// template parameter must have a yaml MappingTraits. +/// Emit diagnostic error in case of any failure. +template <class T, class Checker> +llvm::Optional<T> getConfiguration(CheckerManager &Mgr, Checker *Chk, + StringRef Option, StringRef ConfigFile) { + if (ConfigFile.trim().empty()) + return None; + + llvm::vfs::FileSystem *FS = llvm::vfs::getRealFileSystem().get(); + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Buffer = + FS->getBufferForFile(ConfigFile.str()); + + if (std::error_code ec = Buffer.getError()) { + Mgr.reportInvalidCheckerOptionValue(Chk, Option, + "a valid filename instead of '" + + std::string(ConfigFile) + "'"); + return None; + } + + llvm::yaml::Input Input(Buffer.get()->getBuffer()); + T Config; + Input >> Config; + + if (std::error_code ec = Input.error()) { + Mgr.reportInvalidCheckerOptionValue(Chk, Option, + "a valid yaml file: " + ec.message()); + return None; + } + + return Config; +} + +} // namespace ento +} // namespace clang + +#endif // LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MOVE_H |