From 550ae89a710bf458d47e5b1d183f5e7039c2b384 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Tue, 30 May 2017 17:37:44 +0000 Subject: Vendor import of clang trunk r304222: https://llvm.org/svn/llvm-project/cfe/trunk@304222 --- lib/StaticAnalyzer/Core/ProgramState.cpp | 100 ++++++++++++++++++++++++------- 1 file changed, 80 insertions(+), 20 deletions(-) (limited to 'lib/StaticAnalyzer/Core/ProgramState.cpp') diff --git a/lib/StaticAnalyzer/Core/ProgramState.cpp b/lib/StaticAnalyzer/Core/ProgramState.cpp index 31556c792fc5b..3215c3ccd21e9 100644 --- a/lib/StaticAnalyzer/Core/ProgramState.cpp +++ b/lib/StaticAnalyzer/Core/ProgramState.cpp @@ -644,15 +644,33 @@ ProgramStateRef ProgramState::addTaint(const Stmt *S, if (const Expr *E = dyn_cast_or_null(S)) S = E->IgnoreParens(); - SymbolRef Sym = getSVal(S, LCtx).getAsSymbol(); + return addTaint(getSVal(S, LCtx), Kind); +} + +ProgramStateRef ProgramState::addTaint(SVal V, + TaintTagType Kind) const { + SymbolRef Sym = V.getAsSymbol(); if (Sym) return addTaint(Sym, Kind); - const MemRegion *R = getSVal(S, LCtx).getAsRegion(); - addTaint(R, Kind); + // If the SVal represents a structure, try to mass-taint all values within the + // structure. For now it only works efficiently on lazy compound values that + // were conjured during a conservative evaluation of a function - either as + // return values of functions that return structures or arrays by value, or as + // values of structures or arrays passed into the function by reference, + // directly or through pointer aliasing. Such lazy compound values are + // characterized by having exactly one binding in their captured store within + // their parent region, which is a conjured symbol default-bound to the base + // region of the parent region. + if (auto LCV = V.getAs()) { + if (Optional binding = getStateManager().StoreMgr->getDefaultBinding(*LCV)) { + if (SymbolRef Sym = binding->getAsSymbol()) + return addPartialTaint(Sym, LCV->getRegion(), Kind); + } + } - // Cannot add taint, so just return the state. - return this; + const MemRegion *R = V.getAsRegion(); + return addTaint(R, Kind); } ProgramStateRef ProgramState::addTaint(const MemRegion *R, @@ -674,6 +692,27 @@ ProgramStateRef ProgramState::addTaint(SymbolRef Sym, return NewState; } +ProgramStateRef ProgramState::addPartialTaint(SymbolRef ParentSym, + const SubRegion *SubRegion, + TaintTagType Kind) const { + // Ignore partial taint if the entire parent symbol is already tainted. + if (contains(ParentSym) && *get(ParentSym) == Kind) + return this; + + // Partial taint applies if only a portion of the symbol is tainted. + if (SubRegion == SubRegion->getBaseRegion()) + return addTaint(ParentSym, Kind); + + const TaintedSubRegions *SavedRegs = get(ParentSym); + TaintedSubRegions Regs = + SavedRegs ? *SavedRegs : stateMgr->TSRFactory.getEmptyMap(); + + Regs = stateMgr->TSRFactory.add(Regs, SubRegion, Kind); + ProgramStateRef NewState = set(ParentSym, Regs); + assert(NewState); + return NewState; +} + bool ProgramState::isTainted(const Stmt *S, const LocationContext *LCtx, TaintTagType Kind) const { if (const Expr *E = dyn_cast_or_null(S)) @@ -714,31 +753,52 @@ bool ProgramState::isTainted(SymbolRef Sym, TaintTagType Kind) const { return false; // Traverse all the symbols this symbol depends on to see if any are tainted. - bool Tainted = false; for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), SE =Sym->symbol_end(); SI != SE; ++SI) { if (!isa(*SI)) continue; - const TaintTagType *Tag = get(*SI); - Tainted = (Tag && *Tag == Kind); + if (const TaintTagType *Tag = get(*SI)) { + if (*Tag == Kind) + return true; + } - // If this is a SymbolDerived with a tainted parent, it's also tainted. - if (const SymbolDerived *SD = dyn_cast(*SI)) - Tainted = Tainted || isTainted(SD->getParentSymbol(), Kind); + if (const SymbolDerived *SD = dyn_cast(*SI)) { + // If this is a SymbolDerived with a tainted parent, it's also tainted. + if (isTainted(SD->getParentSymbol(), Kind)) + return true; + + // If this is a SymbolDerived with the same parent symbol as another + // tainted SymbolDerived and a region that's a sub-region of that tainted + // symbol, it's also tainted. + if (const TaintedSubRegions *Regs = + get(SD->getParentSymbol())) { + const TypedValueRegion *R = SD->getRegion(); + for (auto I : *Regs) { + // FIXME: The logic to identify tainted regions could be more + // complete. For example, this would not currently identify + // overlapping fields in a union as tainted. To identify this we can + // check for overlapping/nested byte offsets. + if (Kind == I.second && + (R == I.first || R->isSubRegionOf(I.first))) + return true; + } + } + } // If memory region is tainted, data is also tainted. - if (const SymbolRegionValue *SRV = dyn_cast(*SI)) - Tainted = Tainted || isTainted(SRV->getRegion(), Kind); - - // If If this is a SymbolCast from a tainted value, it's also tainted. - if (const SymbolCast *SC = dyn_cast(*SI)) - Tainted = Tainted || isTainted(SC->getOperand(), Kind); + if (const SymbolRegionValue *SRV = dyn_cast(*SI)) { + if (isTainted(SRV->getRegion(), Kind)) + return true; + } - if (Tainted) - return true; + // If this is a SymbolCast from a tainted value, it's also tainted. + if (const SymbolCast *SC = dyn_cast(*SI)) { + if (isTainted(SC->getOperand(), Kind)) + return true; + } } - return Tainted; + return false; } -- cgit v1.2.3