diff options
Diffstat (limited to 'clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp')
-rw-r--r-- | clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp | 855 |
1 files changed, 855 insertions, 0 deletions
diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp new file mode 100644 index 000000000000..d442b26b3959 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -0,0 +1,855 @@ +//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This checker defines the attack surface for generic taint propagation. +// +// The taint information produced by it might be useful to other checkers. For +// example, checkers should report errors which involve tainted data more +// aggressively, even if the involved symbols are under constrained. +// +//===----------------------------------------------------------------------===// + +#include "Taint.h" +#include "Yaml.h" +#include "clang/AST/Attr.h" +#include "clang/Basic/Builtins.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/YAMLTraits.h" +#include <limits> +#include <utility> + +using namespace clang; +using namespace ento; +using namespace taint; + +namespace { +class GenericTaintChecker + : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> { +public: + static void *getTag() { + static int Tag; + return &Tag; + } + + void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; + + void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; + + void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, + const char *Sep) const override; + + using ArgVector = SmallVector<unsigned, 2>; + using SignedArgVector = SmallVector<int, 2>; + + enum class VariadicType { None, Src, Dst }; + + /// Used to parse the configuration file. + struct TaintConfiguration { + using NameArgsPair = std::pair<std::string, ArgVector>; + + struct Propagation { + std::string Name; + ArgVector SrcArgs; + SignedArgVector DstArgs; + VariadicType VarType; + unsigned VarIndex; + }; + + std::vector<Propagation> Propagations; + std::vector<NameArgsPair> Filters; + std::vector<NameArgsPair> Sinks; + + TaintConfiguration() = default; + TaintConfiguration(const TaintConfiguration &) = default; + TaintConfiguration(TaintConfiguration &&) = default; + TaintConfiguration &operator=(const TaintConfiguration &) = default; + TaintConfiguration &operator=(TaintConfiguration &&) = default; + }; + + /// Convert SignedArgVector to ArgVector. + ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option, + SignedArgVector Args); + + /// Parse the config. + void parseConfiguration(CheckerManager &Mgr, const std::string &Option, + TaintConfiguration &&Config); + + static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()}; + /// Denotes the return vale. + static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() - + 1}; + +private: + mutable std::unique_ptr<BugType> BT; + void initBugType() const { + if (!BT) + BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data")); + } + + /// Catch taint related bugs. Check if tainted data is passed to a + /// system call etc. + bool checkPre(const CallExpr *CE, CheckerContext &C) const; + + /// Add taint sources on a pre-visit. + void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; + + /// Propagate taint generated at pre-visit. + bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; + + /// Check if the region the expression evaluates to is the standard input, + /// and thus, is tainted. + static bool isStdin(const Expr *E, CheckerContext &C); + + /// Given a pointer argument, return the value it points to. + static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg); + + /// Check for CWE-134: Uncontrolled Format String. + static constexpr llvm::StringLiteral MsgUncontrolledFormatString = + "Untrusted data is used as a format string " + "(CWE-134: Uncontrolled Format String)"; + bool checkUncontrolledFormatString(const CallExpr *CE, + CheckerContext &C) const; + + /// Check for: + /// CERT/STR02-C. "Sanitize data passed to complex subsystems" + /// CWE-78, "Failure to Sanitize Data into an OS Command" + static constexpr llvm::StringLiteral MsgSanitizeSystemArgs = + "Untrusted data is passed to a system call " + "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; + bool checkSystemCall(const CallExpr *CE, StringRef Name, + CheckerContext &C) const; + + /// Check if tainted data is used as a buffer size ins strn.. functions, + /// and allocators. + static constexpr llvm::StringLiteral MsgTaintedBufferSize = + "Untrusted data is used to specify the buffer size " + "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " + "for character data and the null terminator)"; + bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, + CheckerContext &C) const; + + /// Check if tainted data is used as a custom sink's parameter. + static constexpr llvm::StringLiteral MsgCustomSink = + "Untrusted data is passed to a user-defined sink"; + bool checkCustomSinks(const CallExpr *CE, StringRef Name, + CheckerContext &C) const; + + /// Generate a report if the expression is tainted or points to tainted data. + bool generateReportIfTainted(const Expr *E, StringRef Msg, + CheckerContext &C) const; + + struct TaintPropagationRule; + using NameRuleMap = llvm::StringMap<TaintPropagationRule>; + using NameArgMap = llvm::StringMap<ArgVector>; + + /// A struct used to specify taint propagation rules for a function. + /// + /// If any of the possible taint source arguments is tainted, all of the + /// destination arguments should also be tainted. Use InvalidArgIndex in the + /// src list to specify that all of the arguments can introduce taint. Use + /// InvalidArgIndex in the dst arguments to signify that all the non-const + /// pointer and reference arguments might be tainted on return. If + /// ReturnValueIndex is added to the dst list, the return value will be + /// tainted. + struct TaintPropagationRule { + using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *, + CheckerContext &C); + + /// List of arguments which can be taint sources and should be checked. + ArgVector SrcArgs; + /// List of arguments which should be tainted on function return. + ArgVector DstArgs; + /// Index for the first variadic parameter if exist. + unsigned VariadicIndex; + /// Show when a function has variadic parameters. If it has, it marks all + /// of them as source or destination. + VariadicType VarType; + /// Special function for tainted source determination. If defined, it can + /// override the default behavior. + PropagationFuncType PropagationFunc; + + TaintPropagationRule() + : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None), + PropagationFunc(nullptr) {} + + TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst, + VariadicType Var = VariadicType::None, + unsigned VarIndex = InvalidArgIndex, + PropagationFuncType Func = nullptr) + : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)), + VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {} + + /// Get the propagation rule for a given function. + static TaintPropagationRule + getTaintPropagationRule(const NameRuleMap &CustomPropagations, + const FunctionDecl *FDecl, StringRef Name, + CheckerContext &C); + + void addSrcArg(unsigned A) { SrcArgs.push_back(A); } + void addDstArg(unsigned A) { DstArgs.push_back(A); } + + bool isNull() const { + return SrcArgs.empty() && DstArgs.empty() && + VariadicType::None == VarType; + } + + bool isDestinationArgument(unsigned ArgNum) const { + return (llvm::find(DstArgs, ArgNum) != DstArgs.end()); + } + + static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State, + CheckerContext &C) { + if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C)) + return true; + + if (!E->getType().getTypePtr()->isPointerType()) + return false; + + Optional<SVal> V = getPointedToSVal(C, E); + return (V && isTainted(State, *V)); + } + + /// Pre-process a function which propagates taint according to the + /// taint rule. + ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; + + // Functions for custom taintedness propagation. + static bool postSocket(bool IsTainted, const CallExpr *CE, + CheckerContext &C); + }; + + /// Defines a map between the propagation function's name and + /// TaintPropagationRule. + NameRuleMap CustomPropagations; + + /// Defines a map between the filter function's name and filtering args. + NameArgMap CustomFilters; + + /// Defines a map between the sink function's name and sinking args. + NameArgMap CustomSinks; +}; + +const unsigned GenericTaintChecker::ReturnValueIndex; +const unsigned GenericTaintChecker::InvalidArgIndex; + +// FIXME: these lines can be removed in C++17 +constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString; +constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs; +constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize; +constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink; +} // end of anonymous namespace + +using TaintConfig = GenericTaintChecker::TaintConfiguration; + +LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation) +LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameArgsPair) + +namespace llvm { +namespace yaml { +template <> struct MappingTraits<TaintConfig> { + static void mapping(IO &IO, TaintConfig &Config) { + IO.mapOptional("Propagations", Config.Propagations); + IO.mapOptional("Filters", Config.Filters); + IO.mapOptional("Sinks", Config.Sinks); + } +}; + +template <> struct MappingTraits<TaintConfig::Propagation> { + static void mapping(IO &IO, TaintConfig::Propagation &Propagation) { + IO.mapRequired("Name", Propagation.Name); + IO.mapOptional("SrcArgs", Propagation.SrcArgs); + IO.mapOptional("DstArgs", Propagation.DstArgs); + IO.mapOptional("VariadicType", Propagation.VarType, + GenericTaintChecker::VariadicType::None); + IO.mapOptional("VariadicIndex", Propagation.VarIndex, + GenericTaintChecker::InvalidArgIndex); + } +}; + +template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> { + static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) { + IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None); + IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src); + IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst); + } +}; + +template <> struct MappingTraits<TaintConfig::NameArgsPair> { + static void mapping(IO &IO, TaintConfig::NameArgsPair &NameArg) { + IO.mapRequired("Name", NameArg.first); + IO.mapRequired("Args", NameArg.second); + } +}; +} // namespace yaml +} // namespace llvm + +/// A set which is used to pass information from call pre-visit instruction +/// to the call post-visit. The values are unsigned integers, which are either +/// ReturnValueIndex, or indexes of the pointer/reference argument, which +/// points to data, which should be tainted on return. +REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) + +GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector( + CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) { + ArgVector Result; + for (int Arg : Args) { + if (Arg == -1) + Result.push_back(ReturnValueIndex); + else if (Arg < -1) { + Result.push_back(InvalidArgIndex); + Mgr.reportInvalidCheckerOptionValue( + this, Option, + "an argument number for propagation rules greater or equal to -1"); + } else + Result.push_back(static_cast<unsigned>(Arg)); + } + return Result; +} + +void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr, + const std::string &Option, + TaintConfiguration &&Config) { + for (auto &P : Config.Propagations) { + GenericTaintChecker::CustomPropagations.try_emplace( + P.Name, std::move(P.SrcArgs), + convertToArgVector(Mgr, Option, P.DstArgs), P.VarType, P.VarIndex); + } + + for (auto &F : Config.Filters) { + GenericTaintChecker::CustomFilters.try_emplace(F.first, + std::move(F.second)); + } + + for (auto &S : Config.Sinks) { + GenericTaintChecker::CustomSinks.try_emplace(S.first, std::move(S.second)); + } +} + +GenericTaintChecker::TaintPropagationRule +GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( + const NameRuleMap &CustomPropagations, const FunctionDecl *FDecl, + StringRef Name, CheckerContext &C) { + // TODO: Currently, we might lose precision here: we always mark a return + // value as tainted even if it's just a pointer, pointing to tainted data. + + // Check for exact name match for functions without builtin substitutes. + TaintPropagationRule Rule = + llvm::StringSwitch<TaintPropagationRule>(Name) + // Source functions + // TODO: Add support for vfscanf & family. + .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex})) + .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex})) + .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex})) + .Case("getch", TaintPropagationRule({}, {ReturnValueIndex})) + .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex})) + .Case("getchar_unlocked", + TaintPropagationRule({}, {ReturnValueIndex})) + .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex})) + .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex})) + .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1)) + .Case("socket", + TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None, + InvalidArgIndex, + &TaintPropagationRule::postSocket)) + .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex})) + // Propagating functions + .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex})) + .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2)) + .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("getdelim", TaintPropagationRule({3}, {0})) + .Case("getline", TaintPropagationRule({2}, {0})) + .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("pread", + TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex})) + .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex})) + .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex})) + .Default(TaintPropagationRule()); + + if (!Rule.isNull()) + return Rule; + + // Check if it's one of the memory setting/copying functions. + // This check is specialized but faster then calling isCLibraryFunction. + unsigned BId = 0; + if ((BId = FDecl->getMemoryFunctionKind())) + switch (BId) { + case Builtin::BImemcpy: + case Builtin::BImemmove: + case Builtin::BIstrncpy: + case Builtin::BIstrncat: + return TaintPropagationRule({1, 2}, {0, ReturnValueIndex}); + case Builtin::BIstrlcpy: + case Builtin::BIstrlcat: + return TaintPropagationRule({1, 2}, {0}); + case Builtin::BIstrndup: + return TaintPropagationRule({0, 1}, {ReturnValueIndex}); + + default: + break; + }; + + // Process all other functions which could be defined as builtins. + if (Rule.isNull()) { + if (C.isCLibraryFunction(FDecl, "snprintf")) + return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src, + 3); + else if (C.isCLibraryFunction(FDecl, "sprintf")) + return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src, + 2); + else if (C.isCLibraryFunction(FDecl, "strcpy") || + C.isCLibraryFunction(FDecl, "stpcpy") || + C.isCLibraryFunction(FDecl, "strcat")) + return TaintPropagationRule({1}, {0, ReturnValueIndex}); + else if (C.isCLibraryFunction(FDecl, "bcopy")) + return TaintPropagationRule({0, 2}, {1}); + else if (C.isCLibraryFunction(FDecl, "strdup") || + C.isCLibraryFunction(FDecl, "strdupa")) + return TaintPropagationRule({0}, {ReturnValueIndex}); + else if (C.isCLibraryFunction(FDecl, "wcsdup")) + return TaintPropagationRule({0}, {ReturnValueIndex}); + } + + // Skipping the following functions, since they might be used for cleansing + // or smart memory copy: + // - memccpy - copying until hitting a special character. + + auto It = CustomPropagations.find(Name); + if (It != CustomPropagations.end()) + return It->getValue(); + + return TaintPropagationRule(); +} + +void GenericTaintChecker::checkPreStmt(const CallExpr *CE, + CheckerContext &C) const { + // Check for taintedness related errors first: system call, uncontrolled + // format string, tainted buffer size. + if (checkPre(CE, C)) + return; + + // Marks the function's arguments and/or return value tainted if it present in + // the list. + addSourcesPre(CE, C); +} + +void GenericTaintChecker::checkPostStmt(const CallExpr *CE, + CheckerContext &C) const { + // Set the marked values as tainted. The return value only accessible from + // checkPostStmt. + propagateFromPre(CE, C); +} + +void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, + const char *NL, const char *Sep) const { + printTaint(State, Out, NL, Sep); +} + +void GenericTaintChecker::addSourcesPre(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef State = nullptr; + const FunctionDecl *FDecl = C.getCalleeDecl(CE); + if (!FDecl || FDecl->getKind() != Decl::Function) + return; + + StringRef Name = C.getCalleeName(FDecl); + if (Name.empty()) + return; + + // First, try generating a propagation rule for this function. + TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule( + this->CustomPropagations, FDecl, Name, C); + if (!Rule.isNull()) { + State = Rule.process(CE, C); + if (!State) + return; + C.addTransition(State); + return; + } + + if (!State) + return; + C.addTransition(State); +} + +bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + + // Depending on what was tainted at pre-visit, we determined a set of + // arguments which should be tainted after the function returns. These are + // stored in the state as TaintArgsOnPostVisit set. + TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); + if (TaintArgs.isEmpty()) + return false; + + for (unsigned ArgNum : TaintArgs) { + // Special handling for the tainted return value. + if (ArgNum == ReturnValueIndex) { + State = addTaint(State, CE, C.getLocationContext()); + continue; + } + + // The arguments are pointer arguments. The data they are pointing at is + // tainted after the call. + if (CE->getNumArgs() < (ArgNum + 1)) + return false; + const Expr *Arg = CE->getArg(ArgNum); + Optional<SVal> V = getPointedToSVal(C, Arg); + if (V) + State = addTaint(State, *V); + } + + // Clear up the taint info from the state. + State = State->remove<TaintArgsOnPostVisit>(); + + if (State != C.getState()) { + C.addTransition(State); + return true; + } + return false; +} + +bool GenericTaintChecker::checkPre(const CallExpr *CE, + CheckerContext &C) const { + + if (checkUncontrolledFormatString(CE, C)) + return true; + + const FunctionDecl *FDecl = C.getCalleeDecl(CE); + if (!FDecl || FDecl->getKind() != Decl::Function) + return false; + + StringRef Name = C.getCalleeName(FDecl); + if (Name.empty()) + return false; + + if (checkSystemCall(CE, Name, C)) + return true; + + if (checkTaintedBufferSize(CE, FDecl, C)) + return true; + + if (checkCustomSinks(CE, Name, C)) + return true; + + return false; +} + +Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C, + const Expr *Arg) { + ProgramStateRef State = C.getState(); + SVal AddrVal = C.getSVal(Arg->IgnoreParens()); + if (AddrVal.isUnknownOrUndef()) + return None; + + Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); + if (!AddrLoc) + return None; + + QualType ArgTy = Arg->getType().getCanonicalType(); + if (!ArgTy->isPointerType()) + return None; + + QualType ValTy = ArgTy->getPointeeType(); + + // Do not dereference void pointers. Treat them as byte pointers instead. + // FIXME: we might want to consider more than just the first byte. + if (ValTy->isVoidType()) + ValTy = C.getASTContext().CharTy; + + return State->getSVal(*AddrLoc, ValTy); +} + +ProgramStateRef +GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + + // Check for taint in arguments. + bool IsTainted = true; + for (unsigned ArgNum : SrcArgs) { + if (ArgNum >= CE->getNumArgs()) + continue; + + if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) + break; + } + + // Check for taint in variadic arguments. + if (!IsTainted && VariadicType::Src == VarType) { + // Check if any of the arguments is tainted + for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { + if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) + break; + } + } + + if (PropagationFunc) + IsTainted = PropagationFunc(IsTainted, CE, C); + + if (!IsTainted) + return State; + + // Mark the arguments which should be tainted after the function returns. + for (unsigned ArgNum : DstArgs) { + // Should mark the return value? + if (ArgNum == ReturnValueIndex) { + State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); + continue; + } + + if (ArgNum >= CE->getNumArgs()) + continue; + + // Mark the given argument. + State = State->add<TaintArgsOnPostVisit>(ArgNum); + } + + // Mark all variadic arguments tainted if present. + if (VariadicType::Dst == VarType) { + // For all pointer and references that were passed in: + // If they are not pointing to const data, mark data as tainted. + // TODO: So far we are just going one level down; ideally we'd need to + // recurse here. + for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { + const Expr *Arg = CE->getArg(i); + // Process pointer argument. + const Type *ArgTy = Arg->getType().getTypePtr(); + QualType PType = ArgTy->getPointeeType(); + if ((!PType.isNull() && !PType.isConstQualified()) || + (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) + State = State->add<TaintArgsOnPostVisit>(i); + } + } + + return State; +} + +// If argument 0(protocol domain) is network, the return value should get taint. +bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/, + const CallExpr *CE, + CheckerContext &C) { + SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); + StringRef DomName = C.getMacroNameOrSpelling(DomLoc); + // White list the internal communication protocols. + if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || + DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) + return false; + + return true; +} + +bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { + ProgramStateRef State = C.getState(); + SVal Val = C.getSVal(E); + + // stdin is a pointer, so it would be a region. + const MemRegion *MemReg = Val.getAsRegion(); + + // The region should be symbolic, we do not know it's value. + const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); + if (!SymReg) + return false; + + // Get it's symbol and find the declaration region it's pointing to. + const SymbolRegionValue *Sm = + dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); + if (!Sm) + return false; + const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); + if (!DeclReg) + return false; + + // This region corresponds to a declaration, find out if it's a global/extern + // variable named stdin with the proper type. + if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { + D = D->getCanonicalDecl(); + if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) { + const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr()); + if (PtrTy && PtrTy->getPointeeType().getCanonicalType() == + C.getASTContext().getFILEType().getCanonicalType()) + return true; + } + } + return false; +} + +static bool getPrintfFormatArgumentNum(const CallExpr *CE, + const CheckerContext &C, + unsigned &ArgNum) { + // Find if the function contains a format string argument. + // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, + // vsnprintf, syslog, custom annotated functions. + const FunctionDecl *FDecl = C.getCalleeDecl(CE); + if (!FDecl) + return false; + for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { + ArgNum = Format->getFormatIdx() - 1; + if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum) + return true; + } + + // Or if a function is named setproctitle (this is a heuristic). + if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { + ArgNum = 0; + return true; + } + + return false; +} + +bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, + CheckerContext &C) const { + assert(E); + + // Check for taint. + ProgramStateRef State = C.getState(); + Optional<SVal> PointedToSVal = getPointedToSVal(C, E); + SVal TaintedSVal; + if (PointedToSVal && isTainted(State, *PointedToSVal)) + TaintedSVal = *PointedToSVal; + else if (isTainted(State, E, C.getLocationContext())) + TaintedSVal = C.getSVal(E); + else + return false; + + // Generate diagnostic. + if (ExplodedNode *N = C.generateNonFatalErrorNode()) { + initBugType(); + auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); + report->addRange(E->getSourceRange()); + report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal)); + C.emitReport(std::move(report)); + return true; + } + return false; +} + +bool GenericTaintChecker::checkUncontrolledFormatString( + const CallExpr *CE, CheckerContext &C) const { + // Check if the function contains a format string argument. + unsigned ArgNum = 0; + if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) + return false; + + // If either the format string content or the pointer itself are tainted, + // warn. + return generateReportIfTainted(CE->getArg(ArgNum), + MsgUncontrolledFormatString, C); +} + +bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name, + CheckerContext &C) const { + // TODO: It might make sense to run this check on demand. In some cases, + // we should check if the environment has been cleansed here. We also might + // need to know if the user was reset before these calls(seteuid). + unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) + .Case("system", 0) + .Case("popen", 0) + .Case("execl", 0) + .Case("execle", 0) + .Case("execlp", 0) + .Case("execv", 0) + .Case("execvp", 0) + .Case("execvP", 0) + .Case("execve", 0) + .Case("dlopen", 0) + .Default(InvalidArgIndex); + + if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1)) + return false; + + return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C); +} + +// TODO: Should this check be a part of the CString checker? +// If yes, should taint be a global setting? +bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, + const FunctionDecl *FDecl, + CheckerContext &C) const { + // If the function has a buffer size argument, set ArgNum. + unsigned ArgNum = InvalidArgIndex; + unsigned BId = 0; + if ((BId = FDecl->getMemoryFunctionKind())) + switch (BId) { + case Builtin::BImemcpy: + case Builtin::BImemmove: + case Builtin::BIstrncpy: + ArgNum = 2; + break; + case Builtin::BIstrndup: + ArgNum = 1; + break; + default: + break; + }; + + if (ArgNum == InvalidArgIndex) { + if (C.isCLibraryFunction(FDecl, "malloc") || + C.isCLibraryFunction(FDecl, "calloc") || + C.isCLibraryFunction(FDecl, "alloca")) + ArgNum = 0; + else if (C.isCLibraryFunction(FDecl, "memccpy")) + ArgNum = 3; + else if (C.isCLibraryFunction(FDecl, "realloc")) + ArgNum = 1; + else if (C.isCLibraryFunction(FDecl, "bcopy")) + ArgNum = 2; + } + + return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && + generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C); +} + +bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE, StringRef Name, + CheckerContext &C) const { + auto It = CustomSinks.find(Name); + if (It == CustomSinks.end()) + return false; + + const GenericTaintChecker::ArgVector &Args = It->getValue(); + for (unsigned ArgNum : Args) { + if (ArgNum >= CE->getNumArgs()) + continue; + + if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C)) + return true; + } + + return false; +} + +void ento::registerGenericTaintChecker(CheckerManager &Mgr) { + auto *Checker = Mgr.registerChecker<GenericTaintChecker>(); + std::string Option{"Config"}; + StringRef ConfigFile = + Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option); + llvm::Optional<TaintConfig> Config = + getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile); + if (Config) + Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue())); +} + +bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) { + return true; +} |