aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp')
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp185
1 files changed, 148 insertions, 37 deletions
diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index e2209e3debfd..f0a114801dda 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -14,11 +14,11 @@
//
//===----------------------------------------------------------------------===//
-#include "Taint.h"
#include "Yaml.h"
#include "clang/AST/Attr.h"
#include "clang/Basic/Builtins.h"
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
+#include "clang/StaticAnalyzer/Checkers/Taint.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
@@ -32,10 +32,14 @@
#include <memory>
#include <utility>
+#define DEBUG_TYPE "taint-checker"
+
using namespace clang;
using namespace ento;
using namespace taint;
+using llvm::ImmutableSet;
+
namespace {
class GenericTaintChecker;
@@ -88,10 +92,8 @@ bool isStdin(SVal Val, const ASTContext &ACtx) {
return false;
// Get it's symbol and find the declaration region it's pointing to.
- const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
- if (!Sm)
- return false;
- const auto *DeclReg = dyn_cast<DeclRegion>(Sm->getRegion());
+ const auto *DeclReg =
+ dyn_cast_or_null<DeclRegion>(SymReg->getSymbol()->getOriginRegion());
if (!DeclReg)
return false;
@@ -150,7 +152,7 @@ Optional<SVal> getTaintedPointeeOrPointer(const CheckerContext &C, SVal Arg) {
bool isTaintedOrPointsToTainted(const Expr *E, const ProgramStateRef &State,
CheckerContext &C) {
- return getTaintedPointeeOrPointer(C, C.getSVal(E)).hasValue();
+ return getTaintedPointeeOrPointer(C, C.getSVal(E)).has_value();
}
/// ArgSet is used to describe arguments relevant for taint detection or
@@ -172,15 +174,6 @@ public:
bool isEmpty() const { return DiscreteArgs.empty() && !VariadicIndex; }
- ArgVecTy ArgsUpTo(ArgIdxTy LastArgIdx) const {
- ArgVecTy Args;
- for (ArgIdxTy I = ReturnValueIndex; I <= LastArgIdx; ++I) {
- if (contains(I))
- Args.push_back(I);
- }
- return Args;
- }
-
private:
ArgVecTy DiscreteArgs;
Optional<ArgIdxTy> VariadicIndex;
@@ -336,11 +329,6 @@ private:
class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
public:
- static void *getTag() {
- static int Tag;
- return &Tag;
- }
-
void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
@@ -432,7 +420,9 @@ template <> struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> {
/// to the call post-visit. The values are signed integers, which are either
/// ReturnValueIndex, or indexes of the pointer/reference argument, which
/// points to data, which should be tainted on return.
-REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, ArgIdxTy)
+REGISTER_MAP_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, const LocationContext *,
+ ImmutableSet<ArgIdxTy>)
+REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ArgIdxFactory, ArgIdxTy)
void GenericTaintRuleParser::validateArgVector(const std::string &Option,
const ArgVecTy &Args) const {
@@ -544,8 +534,27 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
{{"getchar"}, TR::Source({{ReturnValueIndex}})},
{{"getchar_unlocked"}, TR::Source({{ReturnValueIndex}})},
{{"gets"}, TR::Source({{0}, ReturnValueIndex})},
+ {{"gets_s"}, TR::Source({{0}, ReturnValueIndex})},
{{"scanf"}, TR::Source({{}, 1})},
+ {{"scanf_s"}, TR::Source({{}, {1}})},
{{"wgetch"}, TR::Source({{}, ReturnValueIndex})},
+ // Sometimes the line between taint sources and propagators is blurry.
+ // _IO_getc is choosen to be a source, but could also be a propagator.
+ // This way it is simpler, as modeling it as a propagator would require
+ // to model the possible sources of _IO_FILE * values, which the _IO_getc
+ // function takes as parameters.
+ {{"_IO_getc"}, TR::Source({{ReturnValueIndex}})},
+ {{"getcwd"}, TR::Source({{0, ReturnValueIndex}})},
+ {{"getwd"}, TR::Source({{0, ReturnValueIndex}})},
+ {{"readlink"}, TR::Source({{1, ReturnValueIndex}})},
+ {{"readlinkat"}, TR::Source({{2, ReturnValueIndex}})},
+ {{"get_current_dir_name"}, TR::Source({{ReturnValueIndex}})},
+ {{"gethostname"}, TR::Source({{0}})},
+ {{"getnameinfo"}, TR::Source({{2, 4}})},
+ {{"getseuserbyname"}, TR::Source({{1, 2}})},
+ {{"getgroups"}, TR::Source({{1, ReturnValueIndex}})},
+ {{"getlogin"}, TR::Source({{ReturnValueIndex}})},
+ {{"getlogin_r"}, TR::Source({{0}})},
// Props
{{"atoi"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
@@ -553,9 +562,11 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
{{"atoll"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
{{"fgetc"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
{{"fgetln"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
- {{"fgets"}, TR::Prop({{2}}, {{0}, ReturnValueIndex})},
+ {{"fgets"}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
{{"fscanf"}, TR::Prop({{0}}, {{}, 2})},
+ {{"fscanf_s"}, TR::Prop({{0}}, {{}, {2}})},
{{"sscanf"}, TR::Prop({{0}}, {{}, 2})},
+
{{"getc"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
{{"getc_unlocked"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
{{"getdelim"}, TR::Prop({{3}}, {{0}})},
@@ -567,6 +578,78 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
{{"strrchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
{{"tolower"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
{{"toupper"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"fread"}, TR::Prop({{3}}, {{0, ReturnValueIndex}})},
+ {{"recv"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
+ {{"recvfrom"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
+
+ {{"ttyname"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"ttyname_r"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
+
+ {{"basename"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"dirname"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"fnmatch"}, TR::Prop({{1}}, {{ReturnValueIndex}})},
+ {{"memchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"memrchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"rawmemchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+
+ {{"mbtowc"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
+ {{"wctomb"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
+ {{"wcwidth"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+
+ {{"memcmp"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
+ {{"memcpy"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
+ {{"memmove"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
+ // If memmem was called with a tainted needle and the search was
+ // successful, that would mean that the value pointed by the return value
+ // has the same content as the needle. If we choose to go by the policy of
+ // content equivalence implies taintedness equivalence, that would mean
+ // haystack should be considered a propagation source argument.
+ {{"memmem"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+
+ // The comment for memmem above also applies to strstr.
+ {{"strstr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"strcasestr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+
+ {{"strchrnul"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+
+ {{"index"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"rindex"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+
+ // FIXME: In case of arrays, only the first element of the array gets
+ // tainted.
+ {{"qsort"}, TR::Prop({{0}}, {{0}})},
+ {{"qsort_r"}, TR::Prop({{0}}, {{0}})},
+
+ {{"strcmp"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
+ {{"strcasecmp"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
+ {{"strncmp"}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
+ {{"strncasecmp"}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
+ {{"strspn"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
+ {{"strcspn"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
+ {{"strpbrk"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"strndup"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"strndupa"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"strlen"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"strnlen"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"strtol"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
+ {{"strtoll"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
+ {{"strtoul"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
+ {{"strtoull"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
+
+ {{"isalnum"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"isalpha"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"isascii"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"isblank"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"iscntrl"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"isdigit"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"isgraph"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"islower"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"isprint"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"ispunct"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"isspace"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"isupper"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{"isxdigit"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+
{{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncat)}},
TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
{{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcpy)}},
@@ -626,7 +709,7 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
if (TR::UntrustedEnv(C)) {
// void setproctitle_init(int argc, char *argv[], char *envp[])
GlobalCRules.push_back(
- {{{"setproctitle_init"}}, TR::Sink({{2}}, MsgCustomSink)});
+ {{{"setproctitle_init"}}, TR::Sink({{1, 2}}, MsgCustomSink)});
GlobalCRules.push_back({{"getenv"}, TR::Source({{ReturnValueIndex}})});
}
@@ -649,7 +732,7 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
}
GenericTaintRuleParser::RulesContTy Rules{
- ConfigParser.parseConfiguration(Option, std::move(Config.getValue()))};
+ ConfigParser.parseConfiguration(Option, std::move(*Config))};
DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()),
std::make_move_iterator(Rules.end()));
@@ -683,15 +766,26 @@ void GenericTaintChecker::checkPostCall(const CallEvent &Call,
// Set the marked values as tainted. The return value only accessible from
// checkPostStmt.
ProgramStateRef State = C.getState();
+ const StackFrameContext *CurrentFrame = C.getStackFrame();
// Depending on what was tainted at pre-visit, we determined a set of
// arguments which should be tainted after the function returns. These are
// stored in the state as TaintArgsOnPostVisit set.
- TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
- if (TaintArgs.isEmpty())
+ TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>();
+
+ const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(CurrentFrame);
+ if (!TaintArgs)
return;
+ assert(!TaintArgs->isEmpty());
+
+ LLVM_DEBUG(for (ArgIdxTy I
+ : *TaintArgs) {
+ llvm::dbgs() << "PostCall<";
+ Call.dump(llvm::dbgs());
+ llvm::dbgs() << "> actually wants to taint arg index: " << I << '\n';
+ });
- for (ArgIdxTy ArgNum : TaintArgs) {
+ for (ArgIdxTy ArgNum : *TaintArgs) {
// Special handling for the tainted return value.
if (ArgNum == ReturnValueIndex) {
State = addTaint(State, Call.getReturnValue());
@@ -705,7 +799,7 @@ void GenericTaintChecker::checkPostCall(const CallEvent &Call,
}
// Clear up the taint info from the state.
- State = State->remove<TaintArgsOnPostVisit>();
+ State = State->remove<TaintArgsOnPostVisit>(CurrentFrame);
C.addTransition(State);
}
@@ -730,7 +824,7 @@ void GenericTaintRule::process(const GenericTaintChecker &Checker,
/// Check for taint sinks.
ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) {
if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C))
- Checker.generateReportIfTainted(E, SinkMsg.getValueOr(MsgCustomSink), C);
+ Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink), C);
});
/// Check for taint filters.
@@ -756,7 +850,7 @@ void GenericTaintRule::process(const GenericTaintChecker &Checker,
return;
const auto WouldEscape = [](SVal V, QualType Ty) -> bool {
- if (!V.getAs<Loc>())
+ if (!isa<Loc>(V))
return false;
const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified();
@@ -767,18 +861,32 @@ void GenericTaintRule::process(const GenericTaintChecker &Checker,
};
/// Propagate taint where it is necessary.
+ auto &F = State->getStateManager().get_context<ArgIdxFactory>();
+ ImmutableSet<ArgIdxTy> Result = F.getEmptySet();
ForEachCallArg(
- [this, &State, WouldEscape](ArgIdxTy I, const Expr *E, SVal V) {
- if (PropDstArgs.contains(I))
- State = State->add<TaintArgsOnPostVisit>(I);
+ [&](ArgIdxTy I, const Expr *E, SVal V) {
+ if (PropDstArgs.contains(I)) {
+ LLVM_DEBUG(llvm::dbgs() << "PreCall<"; Call.dump(llvm::dbgs());
+ llvm::dbgs()
+ << "> prepares tainting arg index: " << I << '\n';);
+ Result = F.add(Result, I);
+ }
// TODO: We should traverse all reachable memory regions via the
// escaping parameter. Instead of doing that we simply mark only the
// referred memory region as tainted.
- if (WouldEscape(V, E->getType()))
- State = State->add<TaintArgsOnPostVisit>(I);
+ if (WouldEscape(V, E->getType())) {
+ LLVM_DEBUG(if (!Result.contains(I)) {
+ llvm::dbgs() << "PreCall<";
+ Call.dump(llvm::dbgs());
+ llvm::dbgs() << "> prepares tainting arg index: " << I << '\n';
+ });
+ Result = F.add(Result, I);
+ }
});
+ if (!Result.isEmpty())
+ State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result);
C.addTransition(State);
}
@@ -869,11 +977,14 @@ void GenericTaintChecker::taintUnsafeSocketProtocol(const CallEvent &Call,
if (SafeProtocol)
return;
- C.addTransition(C.getState()->add<TaintArgsOnPostVisit>(ReturnValueIndex));
+ ProgramStateRef State = C.getState();
+ auto &F = State->getStateManager().get_context<ArgIdxFactory>();
+ ImmutableSet<ArgIdxTy> Result = F.add(F.getEmptySet(), ReturnValueIndex);
+ State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result);
+ C.addTransition(State);
}
/// Checker registration
-
void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
Mgr.registerChecker<GenericTaintChecker>();
}