summaryrefslogtreecommitdiff
path: root/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp')
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp855
1 files changed, 855 insertions, 0 deletions
diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
new file mode 100644
index 000000000000..d442b26b3959
--- /dev/null
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -0,0 +1,855 @@
+//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This checker defines the attack surface for generic taint propagation.
+//
+// The taint information produced by it might be useful to other checkers. For
+// example, checkers should report errors which involve tainted data more
+// aggressively, even if the involved symbols are under constrained.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Taint.h"
+#include "Yaml.h"
+#include "clang/AST/Attr.h"
+#include "clang/Basic/Builtins.h"
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <limits>
+#include <utility>
+
+using namespace clang;
+using namespace ento;
+using namespace taint;
+
+namespace {
+class GenericTaintChecker
+ : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
+public:
+ static void *getTag() {
+ static int Tag;
+ return &Tag;
+ }
+
+ void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
+
+ void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
+
+ void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
+ const char *Sep) const override;
+
+ using ArgVector = SmallVector<unsigned, 2>;
+ using SignedArgVector = SmallVector<int, 2>;
+
+ enum class VariadicType { None, Src, Dst };
+
+ /// Used to parse the configuration file.
+ struct TaintConfiguration {
+ using NameArgsPair = std::pair<std::string, ArgVector>;
+
+ struct Propagation {
+ std::string Name;
+ ArgVector SrcArgs;
+ SignedArgVector DstArgs;
+ VariadicType VarType;
+ unsigned VarIndex;
+ };
+
+ std::vector<Propagation> Propagations;
+ std::vector<NameArgsPair> Filters;
+ std::vector<NameArgsPair> Sinks;
+
+ TaintConfiguration() = default;
+ TaintConfiguration(const TaintConfiguration &) = default;
+ TaintConfiguration(TaintConfiguration &&) = default;
+ TaintConfiguration &operator=(const TaintConfiguration &) = default;
+ TaintConfiguration &operator=(TaintConfiguration &&) = default;
+ };
+
+ /// Convert SignedArgVector to ArgVector.
+ ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
+ SignedArgVector Args);
+
+ /// Parse the config.
+ void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
+ TaintConfiguration &&Config);
+
+ static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
+ /// Denotes the return vale.
+ static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
+ 1};
+
+private:
+ mutable std::unique_ptr<BugType> BT;
+ void initBugType() const {
+ if (!BT)
+ BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
+ }
+
+ /// Catch taint related bugs. Check if tainted data is passed to a
+ /// system call etc.
+ bool checkPre(const CallExpr *CE, CheckerContext &C) const;
+
+ /// Add taint sources on a pre-visit.
+ void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
+
+ /// Propagate taint generated at pre-visit.
+ bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
+
+ /// Check if the region the expression evaluates to is the standard input,
+ /// and thus, is tainted.
+ static bool isStdin(const Expr *E, CheckerContext &C);
+
+ /// Given a pointer argument, return the value it points to.
+ static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
+
+ /// Check for CWE-134: Uncontrolled Format String.
+ static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
+ "Untrusted data is used as a format string "
+ "(CWE-134: Uncontrolled Format String)";
+ bool checkUncontrolledFormatString(const CallExpr *CE,
+ CheckerContext &C) const;
+
+ /// Check for:
+ /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
+ /// CWE-78, "Failure to Sanitize Data into an OS Command"
+ static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
+ "Untrusted data is passed to a system call "
+ "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
+ bool checkSystemCall(const CallExpr *CE, StringRef Name,
+ CheckerContext &C) const;
+
+ /// Check if tainted data is used as a buffer size ins strn.. functions,
+ /// and allocators.
+ static constexpr llvm::StringLiteral MsgTaintedBufferSize =
+ "Untrusted data is used to specify the buffer size "
+ "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
+ "for character data and the null terminator)";
+ bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
+ CheckerContext &C) const;
+
+ /// Check if tainted data is used as a custom sink's parameter.
+ static constexpr llvm::StringLiteral MsgCustomSink =
+ "Untrusted data is passed to a user-defined sink";
+ bool checkCustomSinks(const CallExpr *CE, StringRef Name,
+ CheckerContext &C) const;
+
+ /// Generate a report if the expression is tainted or points to tainted data.
+ bool generateReportIfTainted(const Expr *E, StringRef Msg,
+ CheckerContext &C) const;
+
+ struct TaintPropagationRule;
+ using NameRuleMap = llvm::StringMap<TaintPropagationRule>;
+ using NameArgMap = llvm::StringMap<ArgVector>;
+
+ /// A struct used to specify taint propagation rules for a function.
+ ///
+ /// If any of the possible taint source arguments is tainted, all of the
+ /// destination arguments should also be tainted. Use InvalidArgIndex in the
+ /// src list to specify that all of the arguments can introduce taint. Use
+ /// InvalidArgIndex in the dst arguments to signify that all the non-const
+ /// pointer and reference arguments might be tainted on return. If
+ /// ReturnValueIndex is added to the dst list, the return value will be
+ /// tainted.
+ struct TaintPropagationRule {
+ using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
+ CheckerContext &C);
+
+ /// List of arguments which can be taint sources and should be checked.
+ ArgVector SrcArgs;
+ /// List of arguments which should be tainted on function return.
+ ArgVector DstArgs;
+ /// Index for the first variadic parameter if exist.
+ unsigned VariadicIndex;
+ /// Show when a function has variadic parameters. If it has, it marks all
+ /// of them as source or destination.
+ VariadicType VarType;
+ /// Special function for tainted source determination. If defined, it can
+ /// override the default behavior.
+ PropagationFuncType PropagationFunc;
+
+ TaintPropagationRule()
+ : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
+ PropagationFunc(nullptr) {}
+
+ TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
+ VariadicType Var = VariadicType::None,
+ unsigned VarIndex = InvalidArgIndex,
+ PropagationFuncType Func = nullptr)
+ : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
+ VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
+
+ /// Get the propagation rule for a given function.
+ static TaintPropagationRule
+ getTaintPropagationRule(const NameRuleMap &CustomPropagations,
+ const FunctionDecl *FDecl, StringRef Name,
+ CheckerContext &C);
+
+ void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
+ void addDstArg(unsigned A) { DstArgs.push_back(A); }
+
+ bool isNull() const {
+ return SrcArgs.empty() && DstArgs.empty() &&
+ VariadicType::None == VarType;
+ }
+
+ bool isDestinationArgument(unsigned ArgNum) const {
+ return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
+ }
+
+ static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
+ CheckerContext &C) {
+ if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
+ return true;
+
+ if (!E->getType().getTypePtr()->isPointerType())
+ return false;
+
+ Optional<SVal> V = getPointedToSVal(C, E);
+ return (V && isTainted(State, *V));
+ }
+
+ /// Pre-process a function which propagates taint according to the
+ /// taint rule.
+ ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
+
+ // Functions for custom taintedness propagation.
+ static bool postSocket(bool IsTainted, const CallExpr *CE,
+ CheckerContext &C);
+ };
+
+ /// Defines a map between the propagation function's name and
+ /// TaintPropagationRule.
+ NameRuleMap CustomPropagations;
+
+ /// Defines a map between the filter function's name and filtering args.
+ NameArgMap CustomFilters;
+
+ /// Defines a map between the sink function's name and sinking args.
+ NameArgMap CustomSinks;
+};
+
+const unsigned GenericTaintChecker::ReturnValueIndex;
+const unsigned GenericTaintChecker::InvalidArgIndex;
+
+// FIXME: these lines can be removed in C++17
+constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
+constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
+constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
+constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
+} // end of anonymous namespace
+
+using TaintConfig = GenericTaintChecker::TaintConfiguration;
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
+LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameArgsPair)
+
+namespace llvm {
+namespace yaml {
+template <> struct MappingTraits<TaintConfig> {
+ static void mapping(IO &IO, TaintConfig &Config) {
+ IO.mapOptional("Propagations", Config.Propagations);
+ IO.mapOptional("Filters", Config.Filters);
+ IO.mapOptional("Sinks", Config.Sinks);
+ }
+};
+
+template <> struct MappingTraits<TaintConfig::Propagation> {
+ static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
+ IO.mapRequired("Name", Propagation.Name);
+ IO.mapOptional("SrcArgs", Propagation.SrcArgs);
+ IO.mapOptional("DstArgs", Propagation.DstArgs);
+ IO.mapOptional("VariadicType", Propagation.VarType,
+ GenericTaintChecker::VariadicType::None);
+ IO.mapOptional("VariadicIndex", Propagation.VarIndex,
+ GenericTaintChecker::InvalidArgIndex);
+ }
+};
+
+template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
+ static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
+ IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
+ IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
+ IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
+ }
+};
+
+template <> struct MappingTraits<TaintConfig::NameArgsPair> {
+ static void mapping(IO &IO, TaintConfig::NameArgsPair &NameArg) {
+ IO.mapRequired("Name", NameArg.first);
+ IO.mapRequired("Args", NameArg.second);
+ }
+};
+} // namespace yaml
+} // namespace llvm
+
+/// A set which is used to pass information from call pre-visit instruction
+/// to the call post-visit. The values are unsigned integers, which are either
+/// ReturnValueIndex, or indexes of the pointer/reference argument, which
+/// points to data, which should be tainted on return.
+REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
+
+GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector(
+ CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) {
+ ArgVector Result;
+ for (int Arg : Args) {
+ if (Arg == -1)
+ Result.push_back(ReturnValueIndex);
+ else if (Arg < -1) {
+ Result.push_back(InvalidArgIndex);
+ Mgr.reportInvalidCheckerOptionValue(
+ this, Option,
+ "an argument number for propagation rules greater or equal to -1");
+ } else
+ Result.push_back(static_cast<unsigned>(Arg));
+ }
+ return Result;
+}
+
+void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
+ const std::string &Option,
+ TaintConfiguration &&Config) {
+ for (auto &P : Config.Propagations) {
+ GenericTaintChecker::CustomPropagations.try_emplace(
+ P.Name, std::move(P.SrcArgs),
+ convertToArgVector(Mgr, Option, P.DstArgs), P.VarType, P.VarIndex);
+ }
+
+ for (auto &F : Config.Filters) {
+ GenericTaintChecker::CustomFilters.try_emplace(F.first,
+ std::move(F.second));
+ }
+
+ for (auto &S : Config.Sinks) {
+ GenericTaintChecker::CustomSinks.try_emplace(S.first, std::move(S.second));
+ }
+}
+
+GenericTaintChecker::TaintPropagationRule
+GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
+ const NameRuleMap &CustomPropagations, const FunctionDecl *FDecl,
+ StringRef Name, CheckerContext &C) {
+ // TODO: Currently, we might lose precision here: we always mark a return
+ // value as tainted even if it's just a pointer, pointing to tainted data.
+
+ // Check for exact name match for functions without builtin substitutes.
+ TaintPropagationRule Rule =
+ llvm::StringSwitch<TaintPropagationRule>(Name)
+ // Source functions
+ // TODO: Add support for vfscanf & family.
+ .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
+ .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex}))
+ .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
+ .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
+ .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
+ .Case("getchar_unlocked",
+ TaintPropagationRule({}, {ReturnValueIndex}))
+ .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
+ .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
+ .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
+ .Case("socket",
+ TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None,
+ InvalidArgIndex,
+ &TaintPropagationRule::postSocket))
+ .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
+ // Propagating functions
+ .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
+ .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
+ .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
+ .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
+ .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
+ .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
+ .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
+ .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
+ .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
+ .Case("getdelim", TaintPropagationRule({3}, {0}))
+ .Case("getline", TaintPropagationRule({2}, {0}))
+ .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
+ .Case("pread",
+ TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
+ .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
+ .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
+ .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
+ .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
+ .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
+ .Default(TaintPropagationRule());
+
+ if (!Rule.isNull())
+ return Rule;
+
+ // Check if it's one of the memory setting/copying functions.
+ // This check is specialized but faster then calling isCLibraryFunction.
+ unsigned BId = 0;
+ if ((BId = FDecl->getMemoryFunctionKind()))
+ switch (BId) {
+ case Builtin::BImemcpy:
+ case Builtin::BImemmove:
+ case Builtin::BIstrncpy:
+ case Builtin::BIstrncat:
+ return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
+ case Builtin::BIstrlcpy:
+ case Builtin::BIstrlcat:
+ return TaintPropagationRule({1, 2}, {0});
+ case Builtin::BIstrndup:
+ return TaintPropagationRule({0, 1}, {ReturnValueIndex});
+
+ default:
+ break;
+ };
+
+ // Process all other functions which could be defined as builtins.
+ if (Rule.isNull()) {
+ if (C.isCLibraryFunction(FDecl, "snprintf"))
+ return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
+ 3);
+ else if (C.isCLibraryFunction(FDecl, "sprintf"))
+ return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
+ 2);
+ else if (C.isCLibraryFunction(FDecl, "strcpy") ||
+ C.isCLibraryFunction(FDecl, "stpcpy") ||
+ C.isCLibraryFunction(FDecl, "strcat"))
+ return TaintPropagationRule({1}, {0, ReturnValueIndex});
+ else if (C.isCLibraryFunction(FDecl, "bcopy"))
+ return TaintPropagationRule({0, 2}, {1});
+ else if (C.isCLibraryFunction(FDecl, "strdup") ||
+ C.isCLibraryFunction(FDecl, "strdupa"))
+ return TaintPropagationRule({0}, {ReturnValueIndex});
+ else if (C.isCLibraryFunction(FDecl, "wcsdup"))
+ return TaintPropagationRule({0}, {ReturnValueIndex});
+ }
+
+ // Skipping the following functions, since they might be used for cleansing
+ // or smart memory copy:
+ // - memccpy - copying until hitting a special character.
+
+ auto It = CustomPropagations.find(Name);
+ if (It != CustomPropagations.end())
+ return It->getValue();
+
+ return TaintPropagationRule();
+}
+
+void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
+ CheckerContext &C) const {
+ // Check for taintedness related errors first: system call, uncontrolled
+ // format string, tainted buffer size.
+ if (checkPre(CE, C))
+ return;
+
+ // Marks the function's arguments and/or return value tainted if it present in
+ // the list.
+ addSourcesPre(CE, C);
+}
+
+void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
+ CheckerContext &C) const {
+ // Set the marked values as tainted. The return value only accessible from
+ // checkPostStmt.
+ propagateFromPre(CE, C);
+}
+
+void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
+ const char *NL, const char *Sep) const {
+ printTaint(State, Out, NL, Sep);
+}
+
+void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
+ CheckerContext &C) const {
+ ProgramStateRef State = nullptr;
+ const FunctionDecl *FDecl = C.getCalleeDecl(CE);
+ if (!FDecl || FDecl->getKind() != Decl::Function)
+ return;
+
+ StringRef Name = C.getCalleeName(FDecl);
+ if (Name.empty())
+ return;
+
+ // First, try generating a propagation rule for this function.
+ TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
+ this->CustomPropagations, FDecl, Name, C);
+ if (!Rule.isNull()) {
+ State = Rule.process(CE, C);
+ if (!State)
+ return;
+ C.addTransition(State);
+ return;
+ }
+
+ if (!State)
+ return;
+ C.addTransition(State);
+}
+
+bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
+ CheckerContext &C) const {
+ ProgramStateRef State = C.getState();
+
+ // Depending on what was tainted at pre-visit, we determined a set of
+ // arguments which should be tainted after the function returns. These are
+ // stored in the state as TaintArgsOnPostVisit set.
+ TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
+ if (TaintArgs.isEmpty())
+ return false;
+
+ for (unsigned ArgNum : TaintArgs) {
+ // Special handling for the tainted return value.
+ if (ArgNum == ReturnValueIndex) {
+ State = addTaint(State, CE, C.getLocationContext());
+ continue;
+ }
+
+ // The arguments are pointer arguments. The data they are pointing at is
+ // tainted after the call.
+ if (CE->getNumArgs() < (ArgNum + 1))
+ return false;
+ const Expr *Arg = CE->getArg(ArgNum);
+ Optional<SVal> V = getPointedToSVal(C, Arg);
+ if (V)
+ State = addTaint(State, *V);
+ }
+
+ // Clear up the taint info from the state.
+ State = State->remove<TaintArgsOnPostVisit>();
+
+ if (State != C.getState()) {
+ C.addTransition(State);
+ return true;
+ }
+ return false;
+}
+
+bool GenericTaintChecker::checkPre(const CallExpr *CE,
+ CheckerContext &C) const {
+
+ if (checkUncontrolledFormatString(CE, C))
+ return true;
+
+ const FunctionDecl *FDecl = C.getCalleeDecl(CE);
+ if (!FDecl || FDecl->getKind() != Decl::Function)
+ return false;
+
+ StringRef Name = C.getCalleeName(FDecl);
+ if (Name.empty())
+ return false;
+
+ if (checkSystemCall(CE, Name, C))
+ return true;
+
+ if (checkTaintedBufferSize(CE, FDecl, C))
+ return true;
+
+ if (checkCustomSinks(CE, Name, C))
+ return true;
+
+ return false;
+}
+
+Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
+ const Expr *Arg) {
+ ProgramStateRef State = C.getState();
+ SVal AddrVal = C.getSVal(Arg->IgnoreParens());
+ if (AddrVal.isUnknownOrUndef())
+ return None;
+
+ Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
+ if (!AddrLoc)
+ return None;
+
+ QualType ArgTy = Arg->getType().getCanonicalType();
+ if (!ArgTy->isPointerType())
+ return None;
+
+ QualType ValTy = ArgTy->getPointeeType();
+
+ // Do not dereference void pointers. Treat them as byte pointers instead.
+ // FIXME: we might want to consider more than just the first byte.
+ if (ValTy->isVoidType())
+ ValTy = C.getASTContext().CharTy;
+
+ return State->getSVal(*AddrLoc, ValTy);
+}
+
+ProgramStateRef
+GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
+ CheckerContext &C) const {
+ ProgramStateRef State = C.getState();
+
+ // Check for taint in arguments.
+ bool IsTainted = true;
+ for (unsigned ArgNum : SrcArgs) {
+ if (ArgNum >= CE->getNumArgs())
+ continue;
+
+ if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
+ break;
+ }
+
+ // Check for taint in variadic arguments.
+ if (!IsTainted && VariadicType::Src == VarType) {
+ // Check if any of the arguments is tainted
+ for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
+ if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
+ break;
+ }
+ }
+
+ if (PropagationFunc)
+ IsTainted = PropagationFunc(IsTainted, CE, C);
+
+ if (!IsTainted)
+ return State;
+
+ // Mark the arguments which should be tainted after the function returns.
+ for (unsigned ArgNum : DstArgs) {
+ // Should mark the return value?
+ if (ArgNum == ReturnValueIndex) {
+ State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
+ continue;
+ }
+
+ if (ArgNum >= CE->getNumArgs())
+ continue;
+
+ // Mark the given argument.
+ State = State->add<TaintArgsOnPostVisit>(ArgNum);
+ }
+
+ // Mark all variadic arguments tainted if present.
+ if (VariadicType::Dst == VarType) {
+ // For all pointer and references that were passed in:
+ // If they are not pointing to const data, mark data as tainted.
+ // TODO: So far we are just going one level down; ideally we'd need to
+ // recurse here.
+ for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
+ const Expr *Arg = CE->getArg(i);
+ // Process pointer argument.
+ const Type *ArgTy = Arg->getType().getTypePtr();
+ QualType PType = ArgTy->getPointeeType();
+ if ((!PType.isNull() && !PType.isConstQualified()) ||
+ (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
+ State = State->add<TaintArgsOnPostVisit>(i);
+ }
+ }
+
+ return State;
+}
+
+// If argument 0(protocol domain) is network, the return value should get taint.
+bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
+ const CallExpr *CE,
+ CheckerContext &C) {
+ SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
+ StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
+ // White list the internal communication protocols.
+ if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
+ DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
+ return false;
+
+ return true;
+}
+
+bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
+ ProgramStateRef State = C.getState();
+ SVal Val = C.getSVal(E);
+
+ // stdin is a pointer, so it would be a region.
+ const MemRegion *MemReg = Val.getAsRegion();
+
+ // The region should be symbolic, we do not know it's value.
+ const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
+ if (!SymReg)
+ return false;
+
+ // Get it's symbol and find the declaration region it's pointing to.
+ const SymbolRegionValue *Sm =
+ dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
+ if (!Sm)
+ return false;
+ const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
+ if (!DeclReg)
+ return false;
+
+ // This region corresponds to a declaration, find out if it's a global/extern
+ // variable named stdin with the proper type.
+ if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
+ D = D->getCanonicalDecl();
+ if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
+ const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
+ if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
+ C.getASTContext().getFILEType().getCanonicalType())
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool getPrintfFormatArgumentNum(const CallExpr *CE,
+ const CheckerContext &C,
+ unsigned &ArgNum) {
+ // Find if the function contains a format string argument.
+ // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
+ // vsnprintf, syslog, custom annotated functions.
+ const FunctionDecl *FDecl = C.getCalleeDecl(CE);
+ if (!FDecl)
+ return false;
+ for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
+ ArgNum = Format->getFormatIdx() - 1;
+ if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
+ return true;
+ }
+
+ // Or if a function is named setproctitle (this is a heuristic).
+ if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
+ ArgNum = 0;
+ return true;
+ }
+
+ return false;
+}
+
+bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
+ CheckerContext &C) const {
+ assert(E);
+
+ // Check for taint.
+ ProgramStateRef State = C.getState();
+ Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
+ SVal TaintedSVal;
+ if (PointedToSVal && isTainted(State, *PointedToSVal))
+ TaintedSVal = *PointedToSVal;
+ else if (isTainted(State, E, C.getLocationContext()))
+ TaintedSVal = C.getSVal(E);
+ else
+ return false;
+
+ // Generate diagnostic.
+ if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
+ initBugType();
+ auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
+ report->addRange(E->getSourceRange());
+ report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
+ C.emitReport(std::move(report));
+ return true;
+ }
+ return false;
+}
+
+bool GenericTaintChecker::checkUncontrolledFormatString(
+ const CallExpr *CE, CheckerContext &C) const {
+ // Check if the function contains a format string argument.
+ unsigned ArgNum = 0;
+ if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
+ return false;
+
+ // If either the format string content or the pointer itself are tainted,
+ // warn.
+ return generateReportIfTainted(CE->getArg(ArgNum),
+ MsgUncontrolledFormatString, C);
+}
+
+bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
+ CheckerContext &C) const {
+ // TODO: It might make sense to run this check on demand. In some cases,
+ // we should check if the environment has been cleansed here. We also might
+ // need to know if the user was reset before these calls(seteuid).
+ unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
+ .Case("system", 0)
+ .Case("popen", 0)
+ .Case("execl", 0)
+ .Case("execle", 0)
+ .Case("execlp", 0)
+ .Case("execv", 0)
+ .Case("execvp", 0)
+ .Case("execvP", 0)
+ .Case("execve", 0)
+ .Case("dlopen", 0)
+ .Default(InvalidArgIndex);
+
+ if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1))
+ return false;
+
+ return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
+}
+
+// TODO: Should this check be a part of the CString checker?
+// If yes, should taint be a global setting?
+bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
+ const FunctionDecl *FDecl,
+ CheckerContext &C) const {
+ // If the function has a buffer size argument, set ArgNum.
+ unsigned ArgNum = InvalidArgIndex;
+ unsigned BId = 0;
+ if ((BId = FDecl->getMemoryFunctionKind()))
+ switch (BId) {
+ case Builtin::BImemcpy:
+ case Builtin::BImemmove:
+ case Builtin::BIstrncpy:
+ ArgNum = 2;
+ break;
+ case Builtin::BIstrndup:
+ ArgNum = 1;
+ break;
+ default:
+ break;
+ };
+
+ if (ArgNum == InvalidArgIndex) {
+ if (C.isCLibraryFunction(FDecl, "malloc") ||
+ C.isCLibraryFunction(FDecl, "calloc") ||
+ C.isCLibraryFunction(FDecl, "alloca"))
+ ArgNum = 0;
+ else if (C.isCLibraryFunction(FDecl, "memccpy"))
+ ArgNum = 3;
+ else if (C.isCLibraryFunction(FDecl, "realloc"))
+ ArgNum = 1;
+ else if (C.isCLibraryFunction(FDecl, "bcopy"))
+ ArgNum = 2;
+ }
+
+ return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
+ generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
+}
+
+bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE, StringRef Name,
+ CheckerContext &C) const {
+ auto It = CustomSinks.find(Name);
+ if (It == CustomSinks.end())
+ return false;
+
+ const GenericTaintChecker::ArgVector &Args = It->getValue();
+ for (unsigned ArgNum : Args) {
+ if (ArgNum >= CE->getNumArgs())
+ continue;
+
+ if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C))
+ return true;
+ }
+
+ return false;
+}
+
+void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
+ auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
+ std::string Option{"Config"};
+ StringRef ConfigFile =
+ Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
+ llvm::Optional<TaintConfig> Config =
+ getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
+ if (Config)
+ Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
+}
+
+bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
+ return true;
+}