diff options
Diffstat (limited to 'clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp')
| -rw-r--r-- | clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp | 185 |
1 files changed, 148 insertions, 37 deletions
diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp index e2209e3debfd..f0a114801dda 100644 --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -14,11 +14,11 @@ // //===----------------------------------------------------------------------===// -#include "Taint.h" #include "Yaml.h" #include "clang/AST/Attr.h" #include "clang/Basic/Builtins.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Checkers/Taint.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" @@ -32,10 +32,14 @@ #include <memory> #include <utility> +#define DEBUG_TYPE "taint-checker" + using namespace clang; using namespace ento; using namespace taint; +using llvm::ImmutableSet; + namespace { class GenericTaintChecker; @@ -88,10 +92,8 @@ bool isStdin(SVal Val, const ASTContext &ACtx) { return false; // Get it's symbol and find the declaration region it's pointing to. - const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); - if (!Sm) - return false; - const auto *DeclReg = dyn_cast<DeclRegion>(Sm->getRegion()); + const auto *DeclReg = + dyn_cast_or_null<DeclRegion>(SymReg->getSymbol()->getOriginRegion()); if (!DeclReg) return false; @@ -150,7 +152,7 @@ Optional<SVal> getTaintedPointeeOrPointer(const CheckerContext &C, SVal Arg) { bool isTaintedOrPointsToTainted(const Expr *E, const ProgramStateRef &State, CheckerContext &C) { - return getTaintedPointeeOrPointer(C, C.getSVal(E)).hasValue(); + return getTaintedPointeeOrPointer(C, C.getSVal(E)).has_value(); } /// ArgSet is used to describe arguments relevant for taint detection or @@ -172,15 +174,6 @@ public: bool isEmpty() const { return DiscreteArgs.empty() && !VariadicIndex; } - ArgVecTy ArgsUpTo(ArgIdxTy LastArgIdx) const { - ArgVecTy Args; - for (ArgIdxTy I = ReturnValueIndex; I <= LastArgIdx; ++I) { - if (contains(I)) - Args.push_back(I); - } - return Args; - } - private: ArgVecTy DiscreteArgs; Optional<ArgIdxTy> VariadicIndex; @@ -336,11 +329,6 @@ private: class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> { public: - static void *getTag() { - static int Tag; - return &Tag; - } - void checkPreCall(const CallEvent &Call, CheckerContext &C) const; void checkPostCall(const CallEvent &Call, CheckerContext &C) const; @@ -432,7 +420,9 @@ template <> struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> { /// to the call post-visit. The values are signed integers, which are either /// ReturnValueIndex, or indexes of the pointer/reference argument, which /// points to data, which should be tainted on return. -REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, ArgIdxTy) +REGISTER_MAP_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, const LocationContext *, + ImmutableSet<ArgIdxTy>) +REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ArgIdxFactory, ArgIdxTy) void GenericTaintRuleParser::validateArgVector(const std::string &Option, const ArgVecTy &Args) const { @@ -544,8 +534,27 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const { {{"getchar"}, TR::Source({{ReturnValueIndex}})}, {{"getchar_unlocked"}, TR::Source({{ReturnValueIndex}})}, {{"gets"}, TR::Source({{0}, ReturnValueIndex})}, + {{"gets_s"}, TR::Source({{0}, ReturnValueIndex})}, {{"scanf"}, TR::Source({{}, 1})}, + {{"scanf_s"}, TR::Source({{}, {1}})}, {{"wgetch"}, TR::Source({{}, ReturnValueIndex})}, + // Sometimes the line between taint sources and propagators is blurry. + // _IO_getc is choosen to be a source, but could also be a propagator. + // This way it is simpler, as modeling it as a propagator would require + // to model the possible sources of _IO_FILE * values, which the _IO_getc + // function takes as parameters. + {{"_IO_getc"}, TR::Source({{ReturnValueIndex}})}, + {{"getcwd"}, TR::Source({{0, ReturnValueIndex}})}, + {{"getwd"}, TR::Source({{0, ReturnValueIndex}})}, + {{"readlink"}, TR::Source({{1, ReturnValueIndex}})}, + {{"readlinkat"}, TR::Source({{2, ReturnValueIndex}})}, + {{"get_current_dir_name"}, TR::Source({{ReturnValueIndex}})}, + {{"gethostname"}, TR::Source({{0}})}, + {{"getnameinfo"}, TR::Source({{2, 4}})}, + {{"getseuserbyname"}, TR::Source({{1, 2}})}, + {{"getgroups"}, TR::Source({{1, ReturnValueIndex}})}, + {{"getlogin"}, TR::Source({{ReturnValueIndex}})}, + {{"getlogin_r"}, TR::Source({{0}})}, // Props {{"atoi"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, @@ -553,9 +562,11 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const { {{"atoll"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{"fgetc"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{"fgetln"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{"fgets"}, TR::Prop({{2}}, {{0}, ReturnValueIndex})}, + {{"fgets"}, TR::Prop({{2}}, {{0, ReturnValueIndex}})}, {{"fscanf"}, TR::Prop({{0}}, {{}, 2})}, + {{"fscanf_s"}, TR::Prop({{0}}, {{}, {2}})}, {{"sscanf"}, TR::Prop({{0}}, {{}, 2})}, + {{"getc"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{"getc_unlocked"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{"getdelim"}, TR::Prop({{3}}, {{0}})}, @@ -567,6 +578,78 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const { {{"strrchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{"tolower"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{"toupper"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"fread"}, TR::Prop({{3}}, {{0, ReturnValueIndex}})}, + {{"recv"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + {{"recvfrom"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + + {{"ttyname"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"ttyname_r"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + + {{"basename"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"dirname"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"fnmatch"}, TR::Prop({{1}}, {{ReturnValueIndex}})}, + {{"memchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"memrchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"rawmemchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + {{"mbtowc"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, + {{"wctomb"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, + {{"wcwidth"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + {{"memcmp"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{"memcpy"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, + {{"memmove"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, + // If memmem was called with a tainted needle and the search was + // successful, that would mean that the value pointed by the return value + // has the same content as the needle. If we choose to go by the policy of + // content equivalence implies taintedness equivalence, that would mean + // haystack should be considered a propagation source argument. + {{"memmem"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + // The comment for memmem above also applies to strstr. + {{"strstr"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"strcasestr"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + {{"strchrnul"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + {{"index"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"rindex"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + // FIXME: In case of arrays, only the first element of the array gets + // tainted. + {{"qsort"}, TR::Prop({{0}}, {{0}})}, + {{"qsort_r"}, TR::Prop({{0}}, {{0}})}, + + {{"strcmp"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{"strcasecmp"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{"strncmp"}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, + {{"strncasecmp"}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, + {{"strspn"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{"strcspn"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{"strpbrk"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"strndup"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"strndupa"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"strlen"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"strnlen"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"strtol"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + {{"strtoll"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + {{"strtoul"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + {{"strtoull"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + + {{"isalnum"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"isalpha"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"isascii"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"isblank"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"iscntrl"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"isdigit"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"isgraph"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"islower"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"isprint"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"ispunct"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"isspace"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"isupper"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"isxdigit"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncat)}}, TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcpy)}}, @@ -626,7 +709,7 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const { if (TR::UntrustedEnv(C)) { // void setproctitle_init(int argc, char *argv[], char *envp[]) GlobalCRules.push_back( - {{{"setproctitle_init"}}, TR::Sink({{2}}, MsgCustomSink)}); + {{{"setproctitle_init"}}, TR::Sink({{1, 2}}, MsgCustomSink)}); GlobalCRules.push_back({{"getenv"}, TR::Source({{ReturnValueIndex}})}); } @@ -649,7 +732,7 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const { } GenericTaintRuleParser::RulesContTy Rules{ - ConfigParser.parseConfiguration(Option, std::move(Config.getValue()))}; + ConfigParser.parseConfiguration(Option, std::move(*Config))}; DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()), std::make_move_iterator(Rules.end())); @@ -683,15 +766,26 @@ void GenericTaintChecker::checkPostCall(const CallEvent &Call, // Set the marked values as tainted. The return value only accessible from // checkPostStmt. ProgramStateRef State = C.getState(); + const StackFrameContext *CurrentFrame = C.getStackFrame(); // Depending on what was tainted at pre-visit, we determined a set of // arguments which should be tainted after the function returns. These are // stored in the state as TaintArgsOnPostVisit set. - TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); - if (TaintArgs.isEmpty()) + TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>(); + + const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(CurrentFrame); + if (!TaintArgs) return; + assert(!TaintArgs->isEmpty()); + + LLVM_DEBUG(for (ArgIdxTy I + : *TaintArgs) { + llvm::dbgs() << "PostCall<"; + Call.dump(llvm::dbgs()); + llvm::dbgs() << "> actually wants to taint arg index: " << I << '\n'; + }); - for (ArgIdxTy ArgNum : TaintArgs) { + for (ArgIdxTy ArgNum : *TaintArgs) { // Special handling for the tainted return value. if (ArgNum == ReturnValueIndex) { State = addTaint(State, Call.getReturnValue()); @@ -705,7 +799,7 @@ void GenericTaintChecker::checkPostCall(const CallEvent &Call, } // Clear up the taint info from the state. - State = State->remove<TaintArgsOnPostVisit>(); + State = State->remove<TaintArgsOnPostVisit>(CurrentFrame); C.addTransition(State); } @@ -730,7 +824,7 @@ void GenericTaintRule::process(const GenericTaintChecker &Checker, /// Check for taint sinks. ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) { if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C)) - Checker.generateReportIfTainted(E, SinkMsg.getValueOr(MsgCustomSink), C); + Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink), C); }); /// Check for taint filters. @@ -756,7 +850,7 @@ void GenericTaintRule::process(const GenericTaintChecker &Checker, return; const auto WouldEscape = [](SVal V, QualType Ty) -> bool { - if (!V.getAs<Loc>()) + if (!isa<Loc>(V)) return false; const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified(); @@ -767,18 +861,32 @@ void GenericTaintRule::process(const GenericTaintChecker &Checker, }; /// Propagate taint where it is necessary. + auto &F = State->getStateManager().get_context<ArgIdxFactory>(); + ImmutableSet<ArgIdxTy> Result = F.getEmptySet(); ForEachCallArg( - [this, &State, WouldEscape](ArgIdxTy I, const Expr *E, SVal V) { - if (PropDstArgs.contains(I)) - State = State->add<TaintArgsOnPostVisit>(I); + [&](ArgIdxTy I, const Expr *E, SVal V) { + if (PropDstArgs.contains(I)) { + LLVM_DEBUG(llvm::dbgs() << "PreCall<"; Call.dump(llvm::dbgs()); + llvm::dbgs() + << "> prepares tainting arg index: " << I << '\n';); + Result = F.add(Result, I); + } // TODO: We should traverse all reachable memory regions via the // escaping parameter. Instead of doing that we simply mark only the // referred memory region as tainted. - if (WouldEscape(V, E->getType())) - State = State->add<TaintArgsOnPostVisit>(I); + if (WouldEscape(V, E->getType())) { + LLVM_DEBUG(if (!Result.contains(I)) { + llvm::dbgs() << "PreCall<"; + Call.dump(llvm::dbgs()); + llvm::dbgs() << "> prepares tainting arg index: " << I << '\n'; + }); + Result = F.add(Result, I); + } }); + if (!Result.isEmpty()) + State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result); C.addTransition(State); } @@ -869,11 +977,14 @@ void GenericTaintChecker::taintUnsafeSocketProtocol(const CallEvent &Call, if (SafeProtocol) return; - C.addTransition(C.getState()->add<TaintArgsOnPostVisit>(ReturnValueIndex)); + ProgramStateRef State = C.getState(); + auto &F = State->getStateManager().get_context<ArgIdxFactory>(); + ImmutableSet<ArgIdxTy> Result = F.add(F.getEmptySet(), ReturnValueIndex); + State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result); + C.addTransition(State); } /// Checker registration - void ento::registerGenericTaintChecker(CheckerManager &Mgr) { Mgr.registerChecker<GenericTaintChecker>(); } |
