aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp')
-rw-r--r--contrib/llvm-project/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp3463
1 files changed, 3463 insertions, 0 deletions
diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
new file mode 100644
index 000000000000..fab8e35962d7
--- /dev/null
+++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -0,0 +1,3463 @@
+//== RangeConstraintManager.cpp - Manage range constraints.------*- C++ -*--==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines RangeConstraintManager, a class that tracks simple
+// equality and inequality constraints on symbolic values of ProgramState.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/JsonSupport.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/ImmutableSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <iterator>
+#include <optional>
+
+using namespace clang;
+using namespace ento;
+
+// This class can be extended with other tables which will help to reason
+// about ranges more precisely.
+class OperatorRelationsTable {
+ static_assert(BO_LT < BO_GT && BO_GT < BO_LE && BO_LE < BO_GE &&
+ BO_GE < BO_EQ && BO_EQ < BO_NE,
+ "This class relies on operators order. Rework it otherwise.");
+
+public:
+ enum TriStateKind {
+ False = 0,
+ True,
+ Unknown,
+ };
+
+private:
+ // CmpOpTable holds states which represent the corresponding range for
+ // branching an exploded graph. We can reason about the branch if there is
+ // a previously known fact of the existence of a comparison expression with
+ // operands used in the current expression.
+ // E.g. assuming (x < y) is true that means (x != y) is surely true.
+ // if (x previous_operation y) // < | != | >
+ // if (x operation y) // != | > | <
+ // tristate // True | Unknown | False
+ //
+ // CmpOpTable represents next:
+ // __|< |> |<=|>=|==|!=|UnknownX2|
+ // < |1 |0 |* |0 |0 |* |1 |
+ // > |0 |1 |0 |* |0 |* |1 |
+ // <=|1 |0 |1 |* |1 |* |0 |
+ // >=|0 |1 |* |1 |1 |* |0 |
+ // ==|0 |0 |* |* |1 |0 |1 |
+ // !=|1 |1 |* |* |0 |1 |0 |
+ //
+ // Columns stands for a previous operator.
+ // Rows stands for a current operator.
+ // Each row has exactly two `Unknown` cases.
+ // UnknownX2 means that both `Unknown` previous operators are met in code,
+ // and there is a special column for that, for example:
+ // if (x >= y)
+ // if (x != y)
+ // if (x <= y)
+ // False only
+ static constexpr size_t CmpOpCount = BO_NE - BO_LT + 1;
+ const TriStateKind CmpOpTable[CmpOpCount][CmpOpCount + 1] = {
+ // < > <= >= == != UnknownX2
+ {True, False, Unknown, False, False, Unknown, True}, // <
+ {False, True, False, Unknown, False, Unknown, True}, // >
+ {True, False, True, Unknown, True, Unknown, False}, // <=
+ {False, True, Unknown, True, True, Unknown, False}, // >=
+ {False, False, Unknown, Unknown, True, False, True}, // ==
+ {True, True, Unknown, Unknown, False, True, False}, // !=
+ };
+
+ static size_t getIndexFromOp(BinaryOperatorKind OP) {
+ return static_cast<size_t>(OP - BO_LT);
+ }
+
+public:
+ constexpr size_t getCmpOpCount() const { return CmpOpCount; }
+
+ static BinaryOperatorKind getOpFromIndex(size_t Index) {
+ return static_cast<BinaryOperatorKind>(Index + BO_LT);
+ }
+
+ TriStateKind getCmpOpState(BinaryOperatorKind CurrentOP,
+ BinaryOperatorKind QueriedOP) const {
+ return CmpOpTable[getIndexFromOp(CurrentOP)][getIndexFromOp(QueriedOP)];
+ }
+
+ TriStateKind getCmpOpStateForUnknownX2(BinaryOperatorKind CurrentOP) const {
+ return CmpOpTable[getIndexFromOp(CurrentOP)][CmpOpCount];
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// RangeSet implementation
+//===----------------------------------------------------------------------===//
+
+RangeSet::ContainerType RangeSet::Factory::EmptySet{};
+
+RangeSet RangeSet::Factory::add(RangeSet LHS, RangeSet RHS) {
+ ContainerType Result;
+ Result.reserve(LHS.size() + RHS.size());
+ std::merge(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
+ std::back_inserter(Result));
+ return makePersistent(std::move(Result));
+}
+
+RangeSet RangeSet::Factory::add(RangeSet Original, Range Element) {
+ ContainerType Result;
+ Result.reserve(Original.size() + 1);
+
+ const_iterator Lower = llvm::lower_bound(Original, Element);
+ Result.insert(Result.end(), Original.begin(), Lower);
+ Result.push_back(Element);
+ Result.insert(Result.end(), Lower, Original.end());
+
+ return makePersistent(std::move(Result));
+}
+
+RangeSet RangeSet::Factory::add(RangeSet Original, const llvm::APSInt &Point) {
+ return add(Original, Range(Point));
+}
+
+RangeSet RangeSet::Factory::unite(RangeSet LHS, RangeSet RHS) {
+ ContainerType Result = unite(*LHS.Impl, *RHS.Impl);
+ return makePersistent(std::move(Result));
+}
+
+RangeSet RangeSet::Factory::unite(RangeSet Original, Range R) {
+ ContainerType Result;
+ Result.push_back(R);
+ Result = unite(*Original.Impl, Result);
+ return makePersistent(std::move(Result));
+}
+
+RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt Point) {
+ return unite(Original, Range(ValueFactory.getValue(Point)));
+}
+
+RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt From,
+ llvm::APSInt To) {
+ return unite(Original,
+ Range(ValueFactory.getValue(From), ValueFactory.getValue(To)));
+}
+
+template <typename T>
+void swapIterators(T &First, T &FirstEnd, T &Second, T &SecondEnd) {
+ std::swap(First, Second);
+ std::swap(FirstEnd, SecondEnd);
+}
+
+RangeSet::ContainerType RangeSet::Factory::unite(const ContainerType &LHS,
+ const ContainerType &RHS) {
+ if (LHS.empty())
+ return RHS;
+ if (RHS.empty())
+ return LHS;
+
+ using llvm::APSInt;
+ using iterator = ContainerType::const_iterator;
+
+ iterator First = LHS.begin();
+ iterator FirstEnd = LHS.end();
+ iterator Second = RHS.begin();
+ iterator SecondEnd = RHS.end();
+ APSIntType Ty = APSIntType(First->From());
+ const APSInt Min = Ty.getMinValue();
+
+ // Handle a corner case first when both range sets start from MIN.
+ // This helps to avoid complicated conditions below. Specifically, this
+ // particular check for `MIN` is not needed in the loop below every time
+ // when we do `Second->From() - One` operation.
+ if (Min == First->From() && Min == Second->From()) {
+ if (First->To() > Second->To()) {
+ // [ First ]--->
+ // [ Second ]----->
+ // MIN^
+ // The Second range is entirely inside the First one.
+
+ // Check if Second is the last in its RangeSet.
+ if (++Second == SecondEnd)
+ // [ First ]--[ First + 1 ]--->
+ // [ Second ]--------------------->
+ // MIN^
+ // The Union is equal to First's RangeSet.
+ return LHS;
+ } else {
+ // case 1: [ First ]----->
+ // case 2: [ First ]--->
+ // [ Second ]--->
+ // MIN^
+ // The First range is entirely inside or equal to the Second one.
+
+ // Check if First is the last in its RangeSet.
+ if (++First == FirstEnd)
+ // [ First ]----------------------->
+ // [ Second ]--[ Second + 1 ]---->
+ // MIN^
+ // The Union is equal to Second's RangeSet.
+ return RHS;
+ }
+ }
+
+ const APSInt One = Ty.getValue(1);
+ ContainerType Result;
+
+ // This is called when there are no ranges left in one of the ranges.
+ // Append the rest of the ranges from another range set to the Result
+ // and return with that.
+ const auto AppendTheRest = [&Result](iterator I, iterator E) {
+ Result.append(I, E);
+ return Result;
+ };
+
+ while (true) {
+ // We want to keep the following invariant at all times:
+ // ---[ First ------>
+ // -----[ Second --->
+ if (First->From() > Second->From())
+ swapIterators(First, FirstEnd, Second, SecondEnd);
+
+ // The Union definitely starts with First->From().
+ // ----------[ First ------>
+ // ------------[ Second --->
+ // ----------[ Union ------>
+ // UnionStart^
+ const llvm::APSInt &UnionStart = First->From();
+
+ // Loop where the invariant holds.
+ while (true) {
+ // Skip all enclosed ranges.
+ // ---[ First ]--->
+ // -----[ Second ]--[ Second + 1 ]--[ Second + N ]----->
+ while (First->To() >= Second->To()) {
+ // Check if Second is the last in its RangeSet.
+ if (++Second == SecondEnd) {
+ // Append the Union.
+ // ---[ Union ]--->
+ // -----[ Second ]----->
+ // --------[ First ]--->
+ // UnionEnd^
+ Result.emplace_back(UnionStart, First->To());
+ // ---[ Union ]----------------->
+ // --------------[ First + 1]--->
+ // Append all remaining ranges from the First's RangeSet.
+ return AppendTheRest(++First, FirstEnd);
+ }
+ }
+
+ // Check if First and Second are disjoint. It means that we find
+ // the end of the Union. Exit the loop and append the Union.
+ // ---[ First ]=------------->
+ // ------------=[ Second ]--->
+ // ----MinusOne^
+ if (First->To() < Second->From() - One)
+ break;
+
+ // First is entirely inside the Union. Go next.
+ // ---[ Union ----------->
+ // ---- [ First ]-------->
+ // -------[ Second ]----->
+ // Check if First is the last in its RangeSet.
+ if (++First == FirstEnd) {
+ // Append the Union.
+ // ---[ Union ]--->
+ // -----[ First ]------->
+ // --------[ Second ]--->
+ // UnionEnd^
+ Result.emplace_back(UnionStart, Second->To());
+ // ---[ Union ]------------------>
+ // --------------[ Second + 1]--->
+ // Append all remaining ranges from the Second's RangeSet.
+ return AppendTheRest(++Second, SecondEnd);
+ }
+
+ // We know that we are at one of the two cases:
+ // case 1: --[ First ]--------->
+ // case 2: ----[ First ]------->
+ // --------[ Second ]---------->
+ // In both cases First starts after Second->From().
+ // Make sure that the loop invariant holds.
+ swapIterators(First, FirstEnd, Second, SecondEnd);
+ }
+
+ // Here First and Second are disjoint.
+ // Append the Union.
+ // ---[ Union ]--------------->
+ // -----------------[ Second ]--->
+ // ------[ First ]--------------->
+ // UnionEnd^
+ Result.emplace_back(UnionStart, First->To());
+
+ // Check if First is the last in its RangeSet.
+ if (++First == FirstEnd)
+ // ---[ Union ]--------------->
+ // --------------[ Second ]--->
+ // Append all remaining ranges from the Second's RangeSet.
+ return AppendTheRest(Second, SecondEnd);
+ }
+
+ llvm_unreachable("Normally, we should not reach here");
+}
+
+RangeSet RangeSet::Factory::getRangeSet(Range From) {
+ ContainerType Result;
+ Result.push_back(From);
+ return makePersistent(std::move(Result));
+}
+
+RangeSet RangeSet::Factory::makePersistent(ContainerType &&From) {
+ llvm::FoldingSetNodeID ID;
+ void *InsertPos;
+
+ From.Profile(ID);
+ ContainerType *Result = Cache.FindNodeOrInsertPos(ID, InsertPos);
+
+ if (!Result) {
+ // It is cheaper to fully construct the resulting range on stack
+ // and move it to the freshly allocated buffer if we don't have
+ // a set like this already.
+ Result = construct(std::move(From));
+ Cache.InsertNode(Result, InsertPos);
+ }
+
+ return Result;
+}
+
+RangeSet::ContainerType *RangeSet::Factory::construct(ContainerType &&From) {
+ void *Buffer = Arena.Allocate();
+ return new (Buffer) ContainerType(std::move(From));
+}
+
+const llvm::APSInt &RangeSet::getMinValue() const {
+ assert(!isEmpty());
+ return begin()->From();
+}
+
+const llvm::APSInt &RangeSet::getMaxValue() const {
+ assert(!isEmpty());
+ return std::prev(end())->To();
+}
+
+bool clang::ento::RangeSet::isUnsigned() const {
+ assert(!isEmpty());
+ return begin()->From().isUnsigned();
+}
+
+uint32_t clang::ento::RangeSet::getBitWidth() const {
+ assert(!isEmpty());
+ return begin()->From().getBitWidth();
+}
+
+APSIntType clang::ento::RangeSet::getAPSIntType() const {
+ assert(!isEmpty());
+ return APSIntType(begin()->From());
+}
+
+bool RangeSet::containsImpl(llvm::APSInt &Point) const {
+ if (isEmpty() || !pin(Point))
+ return false;
+
+ Range Dummy(Point);
+ const_iterator It = llvm::upper_bound(*this, Dummy);
+ if (It == begin())
+ return false;
+
+ return std::prev(It)->Includes(Point);
+}
+
+bool RangeSet::pin(llvm::APSInt &Point) const {
+ APSIntType Type(getMinValue());
+ if (Type.testInRange(Point, true) != APSIntType::RTR_Within)
+ return false;
+
+ Type.apply(Point);
+ return true;
+}
+
+bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const {
+ // This function has nine cases, the cartesian product of range-testing
+ // both the upper and lower bounds against the symbol's type.
+ // Each case requires a different pinning operation.
+ // The function returns false if the described range is entirely outside
+ // the range of values for the associated symbol.
+ APSIntType Type(getMinValue());
+ APSIntType::RangeTestResultKind LowerTest = Type.testInRange(Lower, true);
+ APSIntType::RangeTestResultKind UpperTest = Type.testInRange(Upper, true);
+
+ switch (LowerTest) {
+ case APSIntType::RTR_Below:
+ switch (UpperTest) {
+ case APSIntType::RTR_Below:
+ // The entire range is outside the symbol's set of possible values.
+ // If this is a conventionally-ordered range, the state is infeasible.
+ if (Lower <= Upper)
+ return false;
+
+ // However, if the range wraps around, it spans all possible values.
+ Lower = Type.getMinValue();
+ Upper = Type.getMaxValue();
+ break;
+ case APSIntType::RTR_Within:
+ // The range starts below what's possible but ends within it. Pin.
+ Lower = Type.getMinValue();
+ Type.apply(Upper);
+ break;
+ case APSIntType::RTR_Above:
+ // The range spans all possible values for the symbol. Pin.
+ Lower = Type.getMinValue();
+ Upper = Type.getMaxValue();
+ break;
+ }
+ break;
+ case APSIntType::RTR_Within:
+ switch (UpperTest) {
+ case APSIntType::RTR_Below:
+ // The range wraps around, but all lower values are not possible.
+ Type.apply(Lower);
+ Upper = Type.getMaxValue();
+ break;
+ case APSIntType::RTR_Within:
+ // The range may or may not wrap around, but both limits are valid.
+ Type.apply(Lower);
+ Type.apply(Upper);
+ break;
+ case APSIntType::RTR_Above:
+ // The range starts within what's possible but ends above it. Pin.
+ Type.apply(Lower);
+ Upper = Type.getMaxValue();
+ break;
+ }
+ break;
+ case APSIntType::RTR_Above:
+ switch (UpperTest) {
+ case APSIntType::RTR_Below:
+ // The range wraps but is outside the symbol's set of possible values.
+ return false;
+ case APSIntType::RTR_Within:
+ // The range starts above what's possible but ends within it (wrap).
+ Lower = Type.getMinValue();
+ Type.apply(Upper);
+ break;
+ case APSIntType::RTR_Above:
+ // The entire range is outside the symbol's set of possible values.
+ // If this is a conventionally-ordered range, the state is infeasible.
+ if (Lower <= Upper)
+ return false;
+
+ // However, if the range wraps around, it spans all possible values.
+ Lower = Type.getMinValue();
+ Upper = Type.getMaxValue();
+ break;
+ }
+ break;
+ }
+
+ return true;
+}
+
+RangeSet RangeSet::Factory::intersect(RangeSet What, llvm::APSInt Lower,
+ llvm::APSInt Upper) {
+ if (What.isEmpty() || !What.pin(Lower, Upper))
+ return getEmptySet();
+
+ ContainerType DummyContainer;
+
+ if (Lower <= Upper) {
+ // [Lower, Upper] is a regular range.
+ //
+ // Shortcut: check that there is even a possibility of the intersection
+ // by checking the two following situations:
+ //
+ // <---[ What ]---[------]------>
+ // Lower Upper
+ // -or-
+ // <----[------]----[ What ]---->
+ // Lower Upper
+ if (What.getMaxValue() < Lower || Upper < What.getMinValue())
+ return getEmptySet();
+
+ DummyContainer.push_back(
+ Range(ValueFactory.getValue(Lower), ValueFactory.getValue(Upper)));
+ } else {
+ // [Lower, Upper] is an inverted range, i.e. [MIN, Upper] U [Lower, MAX]
+ //
+ // Shortcut: check that there is even a possibility of the intersection
+ // by checking the following situation:
+ //
+ // <------]---[ What ]---[------>
+ // Upper Lower
+ if (What.getMaxValue() < Lower && Upper < What.getMinValue())
+ return getEmptySet();
+
+ DummyContainer.push_back(
+ Range(ValueFactory.getMinValue(Upper), ValueFactory.getValue(Upper)));
+ DummyContainer.push_back(
+ Range(ValueFactory.getValue(Lower), ValueFactory.getMaxValue(Lower)));
+ }
+
+ return intersect(*What.Impl, DummyContainer);
+}
+
+RangeSet RangeSet::Factory::intersect(const RangeSet::ContainerType &LHS,
+ const RangeSet::ContainerType &RHS) {
+ ContainerType Result;
+ Result.reserve(std::max(LHS.size(), RHS.size()));
+
+ const_iterator First = LHS.begin(), Second = RHS.begin(),
+ FirstEnd = LHS.end(), SecondEnd = RHS.end();
+
+ // If we ran out of ranges in one set, but not in the other,
+ // it means that those elements are definitely not in the
+ // intersection.
+ while (First != FirstEnd && Second != SecondEnd) {
+ // We want to keep the following invariant at all times:
+ //
+ // ----[ First ---------------------->
+ // --------[ Second ----------------->
+ if (Second->From() < First->From())
+ swapIterators(First, FirstEnd, Second, SecondEnd);
+
+ // Loop where the invariant holds:
+ do {
+ // Check for the following situation:
+ //
+ // ----[ First ]--------------------->
+ // ---------------[ Second ]--------->
+ //
+ // which means that...
+ if (Second->From() > First->To()) {
+ // ...First is not in the intersection.
+ //
+ // We should move on to the next range after First and break out of the
+ // loop because the invariant might not be true.
+ ++First;
+ break;
+ }
+
+ // We have a guaranteed intersection at this point!
+ // And this is the current situation:
+ //
+ // ----[ First ]----------------->
+ // -------[ Second ------------------>
+ //
+ // Additionally, it definitely starts with Second->From().
+ const llvm::APSInt &IntersectionStart = Second->From();
+
+ // It is important to know which of the two ranges' ends
+ // is greater. That "longer" range might have some other
+ // intersections, while the "shorter" range might not.
+ if (Second->To() > First->To()) {
+ // Here we make a decision to keep First as the "longer"
+ // range.
+ swapIterators(First, FirstEnd, Second, SecondEnd);
+ }
+
+ // At this point, we have the following situation:
+ //
+ // ---- First ]-------------------->
+ // ---- Second ]--[ Second+1 ---------->
+ //
+ // We don't know the relationship between First->From and
+ // Second->From and we don't know whether Second+1 intersects
+ // with First.
+ //
+ // However, we know that [IntersectionStart, Second->To] is
+ // a part of the intersection...
+ Result.push_back(Range(IntersectionStart, Second->To()));
+ ++Second;
+ // ...and that the invariant will hold for a valid Second+1
+ // because First->From <= Second->To < (Second+1)->From.
+ } while (Second != SecondEnd);
+ }
+
+ if (Result.empty())
+ return getEmptySet();
+
+ return makePersistent(std::move(Result));
+}
+
+RangeSet RangeSet::Factory::intersect(RangeSet LHS, RangeSet RHS) {
+ // Shortcut: let's see if the intersection is even possible.
+ if (LHS.isEmpty() || RHS.isEmpty() || LHS.getMaxValue() < RHS.getMinValue() ||
+ RHS.getMaxValue() < LHS.getMinValue())
+ return getEmptySet();
+
+ return intersect(*LHS.Impl, *RHS.Impl);
+}
+
+RangeSet RangeSet::Factory::intersect(RangeSet LHS, llvm::APSInt Point) {
+ if (LHS.containsImpl(Point))
+ return getRangeSet(ValueFactory.getValue(Point));
+
+ return getEmptySet();
+}
+
+RangeSet RangeSet::Factory::negate(RangeSet What) {
+ if (What.isEmpty())
+ return getEmptySet();
+
+ const llvm::APSInt SampleValue = What.getMinValue();
+ const llvm::APSInt &MIN = ValueFactory.getMinValue(SampleValue);
+ const llvm::APSInt &MAX = ValueFactory.getMaxValue(SampleValue);
+
+ ContainerType Result;
+ Result.reserve(What.size() + (SampleValue == MIN));
+
+ // Handle a special case for MIN value.
+ const_iterator It = What.begin();
+ const_iterator End = What.end();
+
+ const llvm::APSInt &From = It->From();
+ const llvm::APSInt &To = It->To();
+
+ if (From == MIN) {
+ // If the range [From, To] is [MIN, MAX], then result is also [MIN, MAX].
+ if (To == MAX) {
+ return What;
+ }
+
+ const_iterator Last = std::prev(End);
+
+ // Try to find and unite the following ranges:
+ // [MIN, MIN] & [MIN + 1, N] => [MIN, N].
+ if (Last->To() == MAX) {
+ // It means that in the original range we have ranges
+ // [MIN, A], ... , [B, MAX]
+ // And the result should be [MIN, -B], ..., [-A, MAX]
+ Result.emplace_back(MIN, ValueFactory.getValue(-Last->From()));
+ // We already negated Last, so we can skip it.
+ End = Last;
+ } else {
+ // Add a separate range for the lowest value.
+ Result.emplace_back(MIN, MIN);
+ }
+
+ // Skip adding the second range in case when [From, To] are [MIN, MIN].
+ if (To != MIN) {
+ Result.emplace_back(ValueFactory.getValue(-To), MAX);
+ }
+
+ // Skip the first range in the loop.
+ ++It;
+ }
+
+ // Negate all other ranges.
+ for (; It != End; ++It) {
+ // Negate int values.
+ const llvm::APSInt &NewFrom = ValueFactory.getValue(-It->To());
+ const llvm::APSInt &NewTo = ValueFactory.getValue(-It->From());
+
+ // Add a negated range.
+ Result.emplace_back(NewFrom, NewTo);
+ }
+
+ llvm::sort(Result);
+ return makePersistent(std::move(Result));
+}
+
+// Convert range set to the given integral type using truncation and promotion.
+// This works similar to APSIntType::apply function but for the range set.
+RangeSet RangeSet::Factory::castTo(RangeSet What, APSIntType Ty) {
+ // Set is empty or NOOP (aka cast to the same type).
+ if (What.isEmpty() || What.getAPSIntType() == Ty)
+ return What;
+
+ const bool IsConversion = What.isUnsigned() != Ty.isUnsigned();
+ const bool IsTruncation = What.getBitWidth() > Ty.getBitWidth();
+ const bool IsPromotion = What.getBitWidth() < Ty.getBitWidth();
+
+ if (IsTruncation)
+ return makePersistent(truncateTo(What, Ty));
+
+ // Here we handle 2 cases:
+ // - IsConversion && !IsPromotion.
+ // In this case we handle changing a sign with same bitwidth: char -> uchar,
+ // uint -> int. Here we convert negatives to positives and positives which
+ // is out of range to negatives. We use convertTo function for that.
+ // - IsConversion && IsPromotion && !What.isUnsigned().
+ // In this case we handle changing a sign from signeds to unsigneds with
+ // higher bitwidth: char -> uint, int-> uint64. The point is that we also
+ // need convert negatives to positives and use convertTo function as well.
+ // For example, we don't need such a convertion when converting unsigned to
+ // signed with higher bitwidth, because all the values of unsigned is valid
+ // for the such signed.
+ if (IsConversion && (!IsPromotion || !What.isUnsigned()))
+ return makePersistent(convertTo(What, Ty));
+
+ assert(IsPromotion && "Only promotion operation from unsigneds left.");
+ return makePersistent(promoteTo(What, Ty));
+}
+
+RangeSet RangeSet::Factory::castTo(RangeSet What, QualType T) {
+ assert(T->isIntegralOrEnumerationType() && "T shall be an integral type.");
+ return castTo(What, ValueFactory.getAPSIntType(T));
+}
+
+RangeSet::ContainerType RangeSet::Factory::truncateTo(RangeSet What,
+ APSIntType Ty) {
+ using llvm::APInt;
+ using llvm::APSInt;
+ ContainerType Result;
+ ContainerType Dummy;
+ // CastRangeSize is an amount of all possible values of cast type.
+ // Example: `char` has 256 values; `short` has 65536 values.
+ // But in fact we use `amount of values` - 1, because
+ // we can't keep `amount of values of UINT64` inside uint64_t.
+ // E.g. 256 is an amount of all possible values of `char` and we can't keep
+ // it inside `char`.
+ // And it's OK, it's enough to do correct calculations.
+ uint64_t CastRangeSize = APInt::getMaxValue(Ty.getBitWidth()).getZExtValue();
+ for (const Range &R : What) {
+ // Get bounds of the given range.
+ APSInt FromInt = R.From();
+ APSInt ToInt = R.To();
+ // CurrentRangeSize is an amount of all possible values of the current
+ // range minus one.
+ uint64_t CurrentRangeSize = (ToInt - FromInt).getZExtValue();
+ // This is an optimization for a specific case when this Range covers
+ // the whole range of the target type.
+ Dummy.clear();
+ if (CurrentRangeSize >= CastRangeSize) {
+ Dummy.emplace_back(ValueFactory.getMinValue(Ty),
+ ValueFactory.getMaxValue(Ty));
+ Result = std::move(Dummy);
+ break;
+ }
+ // Cast the bounds.
+ Ty.apply(FromInt);
+ Ty.apply(ToInt);
+ const APSInt &PersistentFrom = ValueFactory.getValue(FromInt);
+ const APSInt &PersistentTo = ValueFactory.getValue(ToInt);
+ if (FromInt > ToInt) {
+ Dummy.emplace_back(ValueFactory.getMinValue(Ty), PersistentTo);
+ Dummy.emplace_back(PersistentFrom, ValueFactory.getMaxValue(Ty));
+ } else
+ Dummy.emplace_back(PersistentFrom, PersistentTo);
+ // Every range retrieved after truncation potentialy has garbage values.
+ // So, we have to unite every next range with the previouses.
+ Result = unite(Result, Dummy);
+ }
+
+ return Result;
+}
+
+// Divide the convertion into two phases (presented as loops here).
+// First phase(loop) works when casted values go in ascending order.
+// E.g. char{1,3,5,127} -> uint{1,3,5,127}
+// Interrupt the first phase and go to second one when casted values start
+// go in descending order. That means that we crossed over the middle of
+// the type value set (aka 0 for signeds and MAX/2+1 for unsigneds).
+// For instance:
+// 1: uchar{1,3,5,128,255} -> char{1,3,5,-128,-1}
+// Here we put {1,3,5} to one array and {-128, -1} to another
+// 2: char{-128,-127,-1,0,1,2} -> uchar{128,129,255,0,1,3}
+// Here we put {128,129,255} to one array and {0,1,3} to another.
+// After that we unite both arrays.
+// NOTE: We don't just concatenate the arrays, because they may have
+// adjacent ranges, e.g.:
+// 1: char(-128, 127) -> uchar -> arr1(128, 255), arr2(0, 127) ->
+// unite -> uchar(0, 255)
+// 2: uchar(0, 1)U(254, 255) -> char -> arr1(0, 1), arr2(-2, -1) ->
+// unite -> uchar(-2, 1)
+RangeSet::ContainerType RangeSet::Factory::convertTo(RangeSet What,
+ APSIntType Ty) {
+ using llvm::APInt;
+ using llvm::APSInt;
+ using Bounds = std::pair<const APSInt &, const APSInt &>;
+ ContainerType AscendArray;
+ ContainerType DescendArray;
+ auto CastRange = [Ty, &VF = ValueFactory](const Range &R) -> Bounds {
+ // Get bounds of the given range.
+ APSInt FromInt = R.From();
+ APSInt ToInt = R.To();
+ // Cast the bounds.
+ Ty.apply(FromInt);
+ Ty.apply(ToInt);
+ return {VF.getValue(FromInt), VF.getValue(ToInt)};
+ };
+ // Phase 1. Fill the first array.
+ APSInt LastConvertedInt = Ty.getMinValue();
+ const auto *It = What.begin();
+ const auto *E = What.end();
+ while (It != E) {
+ Bounds NewBounds = CastRange(*(It++));
+ // If values stop going acsending order, go to the second phase(loop).
+ if (NewBounds.first < LastConvertedInt) {
+ DescendArray.emplace_back(NewBounds.first, NewBounds.second);
+ break;
+ }
+ // If the range contains a midpoint, then split the range.
+ // E.g. char(-5, 5) -> uchar(251, 5)
+ // Here we shall add a range (251, 255) to the first array and (0, 5) to the
+ // second one.
+ if (NewBounds.first > NewBounds.second) {
+ DescendArray.emplace_back(ValueFactory.getMinValue(Ty), NewBounds.second);
+ AscendArray.emplace_back(NewBounds.first, ValueFactory.getMaxValue(Ty));
+ } else
+ // Values are going acsending order.
+ AscendArray.emplace_back(NewBounds.first, NewBounds.second);
+ LastConvertedInt = NewBounds.first;
+ }
+ // Phase 2. Fill the second array.
+ while (It != E) {
+ Bounds NewBounds = CastRange(*(It++));
+ DescendArray.emplace_back(NewBounds.first, NewBounds.second);
+ }
+ // Unite both arrays.
+ return unite(AscendArray, DescendArray);
+}
+
+/// Promotion from unsigneds to signeds/unsigneds left.
+RangeSet::ContainerType RangeSet::Factory::promoteTo(RangeSet What,
+ APSIntType Ty) {
+ ContainerType Result;
+ // We definitely know the size of the result set.
+ Result.reserve(What.size());
+
+ // Each unsigned value fits every larger type without any changes,
+ // whether the larger type is signed or unsigned. So just promote and push
+ // back each range one by one.
+ for (const Range &R : What) {
+ // Get bounds of the given range.
+ llvm::APSInt FromInt = R.From();
+ llvm::APSInt ToInt = R.To();
+ // Cast the bounds.
+ Ty.apply(FromInt);
+ Ty.apply(ToInt);
+ Result.emplace_back(ValueFactory.getValue(FromInt),
+ ValueFactory.getValue(ToInt));
+ }
+ return Result;
+}
+
+RangeSet RangeSet::Factory::deletePoint(RangeSet From,
+ const llvm::APSInt &Point) {
+ if (!From.contains(Point))
+ return From;
+
+ llvm::APSInt Upper = Point;
+ llvm::APSInt Lower = Point;
+
+ ++Upper;
+ --Lower;
+
+ // Notice that the lower bound is greater than the upper bound.
+ return intersect(From, Upper, Lower);
+}
+
+LLVM_DUMP_METHOD void Range::dump(raw_ostream &OS) const {
+ OS << '[' << toString(From(), 10) << ", " << toString(To(), 10) << ']';
+}
+LLVM_DUMP_METHOD void Range::dump() const { dump(llvm::errs()); }
+
+LLVM_DUMP_METHOD void RangeSet::dump(raw_ostream &OS) const {
+ OS << "{ ";
+ llvm::interleaveComma(*this, OS, [&OS](const Range &R) { R.dump(OS); });
+ OS << " }";
+}
+LLVM_DUMP_METHOD void RangeSet::dump() const { dump(llvm::errs()); }
+
+REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(SymbolSet, SymbolRef)
+
+namespace {
+class EquivalenceClass;
+} // end anonymous namespace
+
+REGISTER_MAP_WITH_PROGRAMSTATE(ClassMap, SymbolRef, EquivalenceClass)
+REGISTER_MAP_WITH_PROGRAMSTATE(ClassMembers, EquivalenceClass, SymbolSet)
+REGISTER_MAP_WITH_PROGRAMSTATE(ConstraintRange, EquivalenceClass, RangeSet)
+
+REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ClassSet, EquivalenceClass)
+REGISTER_MAP_WITH_PROGRAMSTATE(DisequalityMap, EquivalenceClass, ClassSet)
+
+namespace {
+/// This class encapsulates a set of symbols equal to each other.
+///
+/// The main idea of the approach requiring such classes is in narrowing
+/// and sharing constraints between symbols within the class. Also we can
+/// conclude that there is no practical need in storing constraints for
+/// every member of the class separately.
+///
+/// Main terminology:
+///
+/// * "Equivalence class" is an object of this class, which can be efficiently
+/// compared to other classes. It represents the whole class without
+/// storing the actual in it. The members of the class however can be
+/// retrieved from the state.
+///
+/// * "Class members" are the symbols corresponding to the class. This means
+/// that A == B for every member symbols A and B from the class. Members of
+/// each class are stored in the state.
+///
+/// * "Trivial class" is a class that has and ever had only one same symbol.
+///
+/// * "Merge operation" merges two classes into one. It is the main operation
+/// to produce non-trivial classes.
+/// If, at some point, we can assume that two symbols from two distinct
+/// classes are equal, we can merge these classes.
+class EquivalenceClass : public llvm::FoldingSetNode {
+public:
+ /// Find equivalence class for the given symbol in the given state.
+ [[nodiscard]] static inline EquivalenceClass find(ProgramStateRef State,
+ SymbolRef Sym);
+
+ /// Merge classes for the given symbols and return a new state.
+ [[nodiscard]] static inline ProgramStateRef merge(RangeSet::Factory &F,
+ ProgramStateRef State,
+ SymbolRef First,
+ SymbolRef Second);
+ // Merge this class with the given class and return a new state.
+ [[nodiscard]] inline ProgramStateRef
+ merge(RangeSet::Factory &F, ProgramStateRef State, EquivalenceClass Other);
+
+ /// Return a set of class members for the given state.
+ [[nodiscard]] inline SymbolSet getClassMembers(ProgramStateRef State) const;
+
+ /// Return true if the current class is trivial in the given state.
+ /// A class is trivial if and only if there is not any member relations stored
+ /// to it in State/ClassMembers.
+ /// An equivalence class with one member might seem as it does not hold any
+ /// meaningful information, i.e. that is a tautology. However, during the
+ /// removal of dead symbols we do not remove classes with one member for
+ /// resource and performance reasons. Consequently, a class with one member is
+ /// not necessarily trivial. It could happen that we have a class with two
+ /// members and then during the removal of dead symbols we remove one of its
+ /// members. In this case, the class is still non-trivial (it still has the
+ /// mappings in ClassMembers), even though it has only one member.
+ [[nodiscard]] inline bool isTrivial(ProgramStateRef State) const;
+
+ /// Return true if the current class is trivial and its only member is dead.
+ [[nodiscard]] inline bool isTriviallyDead(ProgramStateRef State,
+ SymbolReaper &Reaper) const;
+
+ [[nodiscard]] static inline ProgramStateRef
+ markDisequal(RangeSet::Factory &F, ProgramStateRef State, SymbolRef First,
+ SymbolRef Second);
+ [[nodiscard]] static inline ProgramStateRef
+ markDisequal(RangeSet::Factory &F, ProgramStateRef State,
+ EquivalenceClass First, EquivalenceClass Second);
+ [[nodiscard]] inline ProgramStateRef
+ markDisequal(RangeSet::Factory &F, ProgramStateRef State,
+ EquivalenceClass Other) const;
+ [[nodiscard]] static inline ClassSet getDisequalClasses(ProgramStateRef State,
+ SymbolRef Sym);
+ [[nodiscard]] inline ClassSet getDisequalClasses(ProgramStateRef State) const;
+ [[nodiscard]] inline ClassSet
+ getDisequalClasses(DisequalityMapTy Map, ClassSet::Factory &Factory) const;
+
+ [[nodiscard]] static inline std::optional<bool>
+ areEqual(ProgramStateRef State, EquivalenceClass First,
+ EquivalenceClass Second);
+ [[nodiscard]] static inline std::optional<bool>
+ areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second);
+
+ /// Remove one member from the class.
+ [[nodiscard]] ProgramStateRef removeMember(ProgramStateRef State,
+ const SymbolRef Old);
+
+ /// Iterate over all symbols and try to simplify them.
+ [[nodiscard]] static inline ProgramStateRef simplify(SValBuilder &SVB,
+ RangeSet::Factory &F,
+ ProgramStateRef State,
+ EquivalenceClass Class);
+
+ void dumpToStream(ProgramStateRef State, raw_ostream &os) const;
+ LLVM_DUMP_METHOD void dump(ProgramStateRef State) const {
+ dumpToStream(State, llvm::errs());
+ }
+
+ /// Check equivalence data for consistency.
+ [[nodiscard]] LLVM_ATTRIBUTE_UNUSED static bool
+ isClassDataConsistent(ProgramStateRef State);
+
+ [[nodiscard]] QualType getType() const {
+ return getRepresentativeSymbol()->getType();
+ }
+
+ EquivalenceClass() = delete;
+ EquivalenceClass(const EquivalenceClass &) = default;
+ EquivalenceClass &operator=(const EquivalenceClass &) = delete;
+ EquivalenceClass(EquivalenceClass &&) = default;
+ EquivalenceClass &operator=(EquivalenceClass &&) = delete;
+
+ bool operator==(const EquivalenceClass &Other) const {
+ return ID == Other.ID;
+ }
+ bool operator<(const EquivalenceClass &Other) const { return ID < Other.ID; }
+ bool operator!=(const EquivalenceClass &Other) const {
+ return !operator==(Other);
+ }
+
+ static void Profile(llvm::FoldingSetNodeID &ID, uintptr_t CID) {
+ ID.AddInteger(CID);
+ }
+
+ void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, this->ID); }
+
+private:
+ /* implicit */ EquivalenceClass(SymbolRef Sym)
+ : ID(reinterpret_cast<uintptr_t>(Sym)) {}
+
+ /// This function is intended to be used ONLY within the class.
+ /// The fact that ID is a pointer to a symbol is an implementation detail
+ /// and should stay that way.
+ /// In the current implementation, we use it to retrieve the only member
+ /// of the trivial class.
+ SymbolRef getRepresentativeSymbol() const {
+ return reinterpret_cast<SymbolRef>(ID);
+ }
+ static inline SymbolSet::Factory &getMembersFactory(ProgramStateRef State);
+
+ inline ProgramStateRef mergeImpl(RangeSet::Factory &F, ProgramStateRef State,
+ SymbolSet Members, EquivalenceClass Other,
+ SymbolSet OtherMembers);
+
+ static inline bool
+ addToDisequalityInfo(DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
+ RangeSet::Factory &F, ProgramStateRef State,
+ EquivalenceClass First, EquivalenceClass Second);
+
+ /// This is a unique identifier of the class.
+ uintptr_t ID;
+};
+
+//===----------------------------------------------------------------------===//
+// Constraint functions
+//===----------------------------------------------------------------------===//
+
+[[nodiscard]] LLVM_ATTRIBUTE_UNUSED bool
+areFeasible(ConstraintRangeTy Constraints) {
+ return llvm::none_of(
+ Constraints,
+ [](const std::pair<EquivalenceClass, RangeSet> &ClassConstraint) {
+ return ClassConstraint.second.isEmpty();
+ });
+}
+
+[[nodiscard]] inline const RangeSet *getConstraint(ProgramStateRef State,
+ EquivalenceClass Class) {
+ return State->get<ConstraintRange>(Class);
+}
+
+[[nodiscard]] inline const RangeSet *getConstraint(ProgramStateRef State,
+ SymbolRef Sym) {
+ return getConstraint(State, EquivalenceClass::find(State, Sym));
+}
+
+[[nodiscard]] ProgramStateRef setConstraint(ProgramStateRef State,
+ EquivalenceClass Class,
+ RangeSet Constraint) {
+ return State->set<ConstraintRange>(Class, Constraint);
+}
+
+[[nodiscard]] ProgramStateRef setConstraints(ProgramStateRef State,
+ ConstraintRangeTy Constraints) {
+ return State->set<ConstraintRange>(Constraints);
+}
+
+//===----------------------------------------------------------------------===//
+// Equality/diseqiality abstraction
+//===----------------------------------------------------------------------===//
+
+/// A small helper function for detecting symbolic (dis)equality.
+///
+/// Equality check can have different forms (like a == b or a - b) and this
+/// class encapsulates those away if the only thing the user wants to check -
+/// whether it's equality/diseqiality or not.
+///
+/// \returns true if assuming this Sym to be true means equality of operands
+/// false if it means disequality of operands
+/// std::nullopt otherwise
+std::optional<bool> meansEquality(const SymSymExpr *Sym) {
+ switch (Sym->getOpcode()) {
+ case BO_Sub:
+ // This case is: A - B != 0 -> disequality check.
+ return false;
+ case BO_EQ:
+ // This case is: A == B != 0 -> equality check.
+ return true;
+ case BO_NE:
+ // This case is: A != B != 0 -> diseqiality check.
+ return false;
+ default:
+ return std::nullopt;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Intersection functions
+//===----------------------------------------------------------------------===//
+
+template <class SecondTy, class... RestTy>
+[[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
+ SecondTy Second, RestTy... Tail);
+
+template <class... RangeTy> struct IntersectionTraits;
+
+template <class... TailTy> struct IntersectionTraits<RangeSet, TailTy...> {
+ // Found RangeSet, no need to check any further
+ using Type = RangeSet;
+};
+
+template <> struct IntersectionTraits<> {
+ // We ran out of types, and we didn't find any RangeSet, so the result should
+ // be optional.
+ using Type = std::optional<RangeSet>;
+};
+
+template <class OptionalOrPointer, class... TailTy>
+struct IntersectionTraits<OptionalOrPointer, TailTy...> {
+ // If current type is Optional or a raw pointer, we should keep looking.
+ using Type = typename IntersectionTraits<TailTy...>::Type;
+};
+
+template <class EndTy>
+[[nodiscard]] inline EndTy intersect(RangeSet::Factory &F, EndTy End) {
+ // If the list contains only RangeSet or std::optional<RangeSet>, simply
+ // return that range set.
+ return End;
+}
+
+[[nodiscard]] LLVM_ATTRIBUTE_UNUSED inline std::optional<RangeSet>
+intersect(RangeSet::Factory &F, const RangeSet *End) {
+ // This is an extraneous conversion from a raw pointer into
+ // std::optional<RangeSet>
+ if (End) {
+ return *End;
+ }
+ return std::nullopt;
+}
+
+template <class... RestTy>
+[[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
+ RangeSet Second, RestTy... Tail) {
+ // Here we call either the <RangeSet,RangeSet,...> or <RangeSet,...> version
+ // of the function and can be sure that the result is RangeSet.
+ return intersect(F, F.intersect(Head, Second), Tail...);
+}
+
+template <class SecondTy, class... RestTy>
+[[nodiscard]] inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
+ SecondTy Second, RestTy... Tail) {
+ if (Second) {
+ // Here we call the <RangeSet,RangeSet,...> version of the function...
+ return intersect(F, Head, *Second, Tail...);
+ }
+ // ...and here it is either <RangeSet,RangeSet,...> or <RangeSet,...>, which
+ // means that the result is definitely RangeSet.
+ return intersect(F, Head, Tail...);
+}
+
+/// Main generic intersect function.
+/// It intersects all of the given range sets. If some of the given arguments
+/// don't hold a range set (nullptr or std::nullopt), the function will skip
+/// them.
+///
+/// Available representations for the arguments are:
+/// * RangeSet
+/// * std::optional<RangeSet>
+/// * RangeSet *
+/// Pointer to a RangeSet is automatically assumed to be nullable and will get
+/// checked as well as the optional version. If this behaviour is undesired,
+/// please dereference the pointer in the call.
+///
+/// Return type depends on the arguments' types. If we can be sure in compile
+/// time that there will be a range set as a result, the returning type is
+/// simply RangeSet, in other cases we have to back off to
+/// std::optional<RangeSet>.
+///
+/// Please, prefer optional range sets to raw pointers. If the last argument is
+/// a raw pointer and all previous arguments are std::nullopt, it will cost one
+/// additional check to convert RangeSet * into std::optional<RangeSet>.
+template <class HeadTy, class SecondTy, class... RestTy>
+[[nodiscard]] inline
+ typename IntersectionTraits<HeadTy, SecondTy, RestTy...>::Type
+ intersect(RangeSet::Factory &F, HeadTy Head, SecondTy Second,
+ RestTy... Tail) {
+ if (Head) {
+ return intersect(F, *Head, Second, Tail...);
+ }
+ return intersect(F, Second, Tail...);
+}
+
+//===----------------------------------------------------------------------===//
+// Symbolic reasoning logic
+//===----------------------------------------------------------------------===//
+
+/// A little component aggregating all of the reasoning we have about
+/// the ranges of symbolic expressions.
+///
+/// Even when we don't know the exact values of the operands, we still
+/// can get a pretty good estimate of the result's range.
+class SymbolicRangeInferrer
+ : public SymExprVisitor<SymbolicRangeInferrer, RangeSet> {
+public:
+ template <class SourceType>
+ static RangeSet inferRange(RangeSet::Factory &F, ProgramStateRef State,
+ SourceType Origin) {
+ SymbolicRangeInferrer Inferrer(F, State);
+ return Inferrer.infer(Origin);
+ }
+
+ RangeSet VisitSymExpr(SymbolRef Sym) {
+ if (std::optional<RangeSet> RS = getRangeForNegatedSym(Sym))
+ return *RS;
+ // If we've reached this line, the actual type of the symbolic
+ // expression is not supported for advanced inference.
+ // In this case, we simply backoff to the default "let's simply
+ // infer the range from the expression's type".
+ return infer(Sym->getType());
+ }
+
+ RangeSet VisitUnarySymExpr(const UnarySymExpr *USE) {
+ if (std::optional<RangeSet> RS = getRangeForNegatedUnarySym(USE))
+ return *RS;
+ return infer(USE->getType());
+ }
+
+ RangeSet VisitSymIntExpr(const SymIntExpr *Sym) {
+ return VisitBinaryOperator(Sym);
+ }
+
+ RangeSet VisitIntSymExpr(const IntSymExpr *Sym) {
+ return VisitBinaryOperator(Sym);
+ }
+
+ RangeSet VisitSymSymExpr(const SymSymExpr *SSE) {
+ return intersect(
+ RangeFactory,
+ // If Sym is a difference of symbols A - B, then maybe we have range
+ // set stored for B - A.
+ //
+ // If we have range set stored for both A - B and B - A then
+ // calculate the effective range set by intersecting the range set
+ // for A - B and the negated range set of B - A.
+ getRangeForNegatedSymSym(SSE),
+ // If Sym is a comparison expression (except <=>),
+ // find any other comparisons with the same operands.
+ // See function description.
+ getRangeForComparisonSymbol(SSE),
+ // If Sym is (dis)equality, we might have some information
+ // on that in our equality classes data structure.
+ getRangeForEqualities(SSE),
+ // And we should always check what we can get from the operands.
+ VisitBinaryOperator(SSE));
+ }
+
+private:
+ SymbolicRangeInferrer(RangeSet::Factory &F, ProgramStateRef S)
+ : ValueFactory(F.getValueFactory()), RangeFactory(F), State(S) {}
+
+ /// Infer range information from the given integer constant.
+ ///
+ /// It's not a real "inference", but is here for operating with
+ /// sub-expressions in a more polymorphic manner.
+ RangeSet inferAs(const llvm::APSInt &Val, QualType) {
+ return {RangeFactory, Val};
+ }
+
+ /// Infer range information from symbol in the context of the given type.
+ RangeSet inferAs(SymbolRef Sym, QualType DestType) {
+ QualType ActualType = Sym->getType();
+ // Check that we can reason about the symbol at all.
+ if (ActualType->isIntegralOrEnumerationType() ||
+ Loc::isLocType(ActualType)) {
+ return infer(Sym);
+ }
+ // Otherwise, let's simply infer from the destination type.
+ // We couldn't figure out nothing else about that expression.
+ return infer(DestType);
+ }
+
+ RangeSet infer(SymbolRef Sym) {
+ return intersect(RangeFactory,
+ // Of course, we should take the constraint directly
+ // associated with this symbol into consideration.
+ getConstraint(State, Sym),
+ // Apart from the Sym itself, we can infer quite a lot if
+ // we look into subexpressions of Sym.
+ Visit(Sym));
+ }
+
+ RangeSet infer(EquivalenceClass Class) {
+ if (const RangeSet *AssociatedConstraint = getConstraint(State, Class))
+ return *AssociatedConstraint;
+
+ return infer(Class.getType());
+ }
+
+ /// Infer range information solely from the type.
+ RangeSet infer(QualType T) {
+ // Lazily generate a new RangeSet representing all possible values for the
+ // given symbol type.
+ RangeSet Result(RangeFactory, ValueFactory.getMinValue(T),
+ ValueFactory.getMaxValue(T));
+
+ // References are known to be non-zero.
+ if (T->isReferenceType())
+ return assumeNonZero(Result, T);
+
+ return Result;
+ }
+
+ template <class BinarySymExprTy>
+ RangeSet VisitBinaryOperator(const BinarySymExprTy *Sym) {
+ // TODO #1: VisitBinaryOperator implementation might not make a good
+ // use of the inferred ranges. In this case, we might be calculating
+ // everything for nothing. This being said, we should introduce some
+ // sort of laziness mechanism here.
+ //
+ // TODO #2: We didn't go into the nested expressions before, so it
+ // might cause us spending much more time doing the inference.
+ // This can be a problem for deeply nested expressions that are
+ // involved in conditions and get tested continuously. We definitely
+ // need to address this issue and introduce some sort of caching
+ // in here.
+ QualType ResultType = Sym->getType();
+ return VisitBinaryOperator(inferAs(Sym->getLHS(), ResultType),
+ Sym->getOpcode(),
+ inferAs(Sym->getRHS(), ResultType), ResultType);
+ }
+
+ RangeSet VisitBinaryOperator(RangeSet LHS, BinaryOperator::Opcode Op,
+ RangeSet RHS, QualType T);
+
+ //===----------------------------------------------------------------------===//
+ // Ranges and operators
+ //===----------------------------------------------------------------------===//
+
+ /// Return a rough approximation of the given range set.
+ ///
+ /// For the range set:
+ /// { [x_0, y_0], [x_1, y_1], ... , [x_N, y_N] }
+ /// it will return the range [x_0, y_N].
+ static Range fillGaps(RangeSet Origin) {
+ assert(!Origin.isEmpty());
+ return {Origin.getMinValue(), Origin.getMaxValue()};
+ }
+
+ /// Try to convert given range into the given type.
+ ///
+ /// It will return std::nullopt only when the trivial conversion is possible.
+ std::optional<Range> convert(const Range &Origin, APSIntType To) {
+ if (To.testInRange(Origin.From(), false) != APSIntType::RTR_Within ||
+ To.testInRange(Origin.To(), false) != APSIntType::RTR_Within) {
+ return std::nullopt;
+ }
+ return Range(ValueFactory.Convert(To, Origin.From()),
+ ValueFactory.Convert(To, Origin.To()));
+ }
+
+ template <BinaryOperator::Opcode Op>
+ RangeSet VisitBinaryOperator(RangeSet LHS, RangeSet RHS, QualType T) {
+ assert(!LHS.isEmpty() && !RHS.isEmpty());
+
+ Range CoarseLHS = fillGaps(LHS);
+ Range CoarseRHS = fillGaps(RHS);
+
+ APSIntType ResultType = ValueFactory.getAPSIntType(T);
+
+ // We need to convert ranges to the resulting type, so we can compare values
+ // and combine them in a meaningful (in terms of the given operation) way.
+ auto ConvertedCoarseLHS = convert(CoarseLHS, ResultType);
+ auto ConvertedCoarseRHS = convert(CoarseRHS, ResultType);
+
+ // It is hard to reason about ranges when conversion changes
+ // borders of the ranges.
+ if (!ConvertedCoarseLHS || !ConvertedCoarseRHS) {
+ return infer(T);
+ }
+
+ return VisitBinaryOperator<Op>(*ConvertedCoarseLHS, *ConvertedCoarseRHS, T);
+ }
+
+ template <BinaryOperator::Opcode Op>
+ RangeSet VisitBinaryOperator(Range LHS, Range RHS, QualType T) {
+ return infer(T);
+ }
+
+ /// Return a symmetrical range for the given range and type.
+ ///
+ /// If T is signed, return the smallest range [-x..x] that covers the original
+ /// range, or [-min(T), max(T)] if the aforementioned symmetric range doesn't
+ /// exist due to original range covering min(T)).
+ ///
+ /// If T is unsigned, return the smallest range [0..x] that covers the
+ /// original range.
+ Range getSymmetricalRange(Range Origin, QualType T) {
+ APSIntType RangeType = ValueFactory.getAPSIntType(T);
+
+ if (RangeType.isUnsigned()) {
+ return Range(ValueFactory.getMinValue(RangeType), Origin.To());
+ }
+
+ if (Origin.From().isMinSignedValue()) {
+ // If mini is a minimal signed value, absolute value of it is greater
+ // than the maximal signed value. In order to avoid these
+ // complications, we simply return the whole range.
+ return {ValueFactory.getMinValue(RangeType),
+ ValueFactory.getMaxValue(RangeType)};
+ }
+
+ // At this point, we are sure that the type is signed and we can safely
+ // use unary - operator.
+ //
+ // While calculating absolute maximum, we can use the following formula
+ // because of these reasons:
+ // * If From >= 0 then To >= From and To >= -From.
+ // AbsMax == To == max(To, -From)
+ // * If To <= 0 then -From >= -To and -From >= From.
+ // AbsMax == -From == max(-From, To)
+ // * Otherwise, From <= 0, To >= 0, and
+ // AbsMax == max(abs(From), abs(To))
+ llvm::APSInt AbsMax = std::max(-Origin.From(), Origin.To());
+
+ // Intersection is guaranteed to be non-empty.
+ return {ValueFactory.getValue(-AbsMax), ValueFactory.getValue(AbsMax)};
+ }
+
+ /// Return a range set subtracting zero from \p Domain.
+ RangeSet assumeNonZero(RangeSet Domain, QualType T) {
+ APSIntType IntType = ValueFactory.getAPSIntType(T);
+ return RangeFactory.deletePoint(Domain, IntType.getZeroValue());
+ }
+
+ template <typename ProduceNegatedSymFunc>
+ std::optional<RangeSet> getRangeForNegatedExpr(ProduceNegatedSymFunc F,
+ QualType T) {
+ // Do not negate if the type cannot be meaningfully negated.
+ if (!T->isUnsignedIntegerOrEnumerationType() &&
+ !T->isSignedIntegerOrEnumerationType())
+ return std::nullopt;
+
+ if (SymbolRef NegatedSym = F())
+ if (const RangeSet *NegatedRange = getConstraint(State, NegatedSym))
+ return RangeFactory.negate(*NegatedRange);
+
+ return std::nullopt;
+ }
+
+ std::optional<RangeSet> getRangeForNegatedUnarySym(const UnarySymExpr *USE) {
+ // Just get the operand when we negate a symbol that is already negated.
+ // -(-a) == a
+ return getRangeForNegatedExpr(
+ [USE]() -> SymbolRef {
+ if (USE->getOpcode() == UO_Minus)
+ return USE->getOperand();
+ return nullptr;
+ },
+ USE->getType());
+ }
+
+ std::optional<RangeSet> getRangeForNegatedSymSym(const SymSymExpr *SSE) {
+ return getRangeForNegatedExpr(
+ [SSE, State = this->State]() -> SymbolRef {
+ if (SSE->getOpcode() == BO_Sub)
+ return State->getSymbolManager().getSymSymExpr(
+ SSE->getRHS(), BO_Sub, SSE->getLHS(), SSE->getType());
+ return nullptr;
+ },
+ SSE->getType());
+ }
+
+ std::optional<RangeSet> getRangeForNegatedSym(SymbolRef Sym) {
+ return getRangeForNegatedExpr(
+ [Sym, State = this->State]() {
+ return State->getSymbolManager().getUnarySymExpr(Sym, UO_Minus,
+ Sym->getType());
+ },
+ Sym->getType());
+ }
+
+ // Returns ranges only for binary comparison operators (except <=>)
+ // when left and right operands are symbolic values.
+ // Finds any other comparisons with the same operands.
+ // Then do logical calculations and refuse impossible branches.
+ // E.g. (x < y) and (x > y) at the same time are impossible.
+ // E.g. (x >= y) and (x != y) at the same time makes (x > y) true only.
+ // E.g. (x == y) and (y == x) are just reversed but the same.
+ // It covers all possible combinations (see CmpOpTable description).
+ // Note that `x` and `y` can also stand for subexpressions,
+ // not only for actual symbols.
+ std::optional<RangeSet> getRangeForComparisonSymbol(const SymSymExpr *SSE) {
+ const BinaryOperatorKind CurrentOP = SSE->getOpcode();
+
+ // We currently do not support <=> (C++20).
+ if (!BinaryOperator::isComparisonOp(CurrentOP) || (CurrentOP == BO_Cmp))
+ return std::nullopt;
+
+ static const OperatorRelationsTable CmpOpTable{};
+
+ const SymExpr *LHS = SSE->getLHS();
+ const SymExpr *RHS = SSE->getRHS();
+ QualType T = SSE->getType();
+
+ SymbolManager &SymMgr = State->getSymbolManager();
+
+ // We use this variable to store the last queried operator (`QueriedOP`)
+ // for which the `getCmpOpState` returned with `Unknown`. If there are two
+ // different OPs that returned `Unknown` then we have to query the special
+ // `UnknownX2` column. We assume that `getCmpOpState(CurrentOP, CurrentOP)`
+ // never returns `Unknown`, so `CurrentOP` is a good initial value.
+ BinaryOperatorKind LastQueriedOpToUnknown = CurrentOP;
+
+ // Loop goes through all of the columns exept the last one ('UnknownX2').
+ // We treat `UnknownX2` column separately at the end of the loop body.
+ for (size_t i = 0; i < CmpOpTable.getCmpOpCount(); ++i) {
+
+ // Let's find an expression e.g. (x < y).
+ BinaryOperatorKind QueriedOP = OperatorRelationsTable::getOpFromIndex(i);
+ const SymSymExpr *SymSym = SymMgr.getSymSymExpr(LHS, QueriedOP, RHS, T);
+ const RangeSet *QueriedRangeSet = getConstraint(State, SymSym);
+
+ // If ranges were not previously found,
+ // try to find a reversed expression (y > x).
+ if (!QueriedRangeSet) {
+ const BinaryOperatorKind ROP =
+ BinaryOperator::reverseComparisonOp(QueriedOP);
+ SymSym = SymMgr.getSymSymExpr(RHS, ROP, LHS, T);
+ QueriedRangeSet = getConstraint(State, SymSym);
+ }
+
+ if (!QueriedRangeSet || QueriedRangeSet->isEmpty())
+ continue;
+
+ const llvm::APSInt *ConcreteValue = QueriedRangeSet->getConcreteValue();
+ const bool isInFalseBranch =
+ ConcreteValue ? (*ConcreteValue == 0) : false;
+
+ // If it is a false branch, we shall be guided by opposite operator,
+ // because the table is made assuming we are in the true branch.
+ // E.g. when (x <= y) is false, then (x > y) is true.
+ if (isInFalseBranch)
+ QueriedOP = BinaryOperator::negateComparisonOp(QueriedOP);
+
+ OperatorRelationsTable::TriStateKind BranchState =
+ CmpOpTable.getCmpOpState(CurrentOP, QueriedOP);
+
+ if (BranchState == OperatorRelationsTable::Unknown) {
+ if (LastQueriedOpToUnknown != CurrentOP &&
+ LastQueriedOpToUnknown != QueriedOP) {
+ // If we got the Unknown state for both different operators.
+ // if (x <= y) // assume true
+ // if (x != y) // assume true
+ // if (x < y) // would be also true
+ // Get a state from `UnknownX2` column.
+ BranchState = CmpOpTable.getCmpOpStateForUnknownX2(CurrentOP);
+ } else {
+ LastQueriedOpToUnknown = QueriedOP;
+ continue;
+ }
+ }
+
+ return (BranchState == OperatorRelationsTable::True) ? getTrueRange(T)
+ : getFalseRange(T);
+ }
+
+ return std::nullopt;
+ }
+
+ std::optional<RangeSet> getRangeForEqualities(const SymSymExpr *Sym) {
+ std::optional<bool> Equality = meansEquality(Sym);
+
+ if (!Equality)
+ return std::nullopt;
+
+ if (std::optional<bool> AreEqual =
+ EquivalenceClass::areEqual(State, Sym->getLHS(), Sym->getRHS())) {
+ // Here we cover two cases at once:
+ // * if Sym is equality and its operands are known to be equal -> true
+ // * if Sym is disequality and its operands are disequal -> true
+ if (*AreEqual == *Equality) {
+ return getTrueRange(Sym->getType());
+ }
+ // Opposite combinations result in false.
+ return getFalseRange(Sym->getType());
+ }
+
+ return std::nullopt;
+ }
+
+ RangeSet getTrueRange(QualType T) {
+ RangeSet TypeRange = infer(T);
+ return assumeNonZero(TypeRange, T);
+ }
+
+ RangeSet getFalseRange(QualType T) {
+ const llvm::APSInt &Zero = ValueFactory.getValue(0, T);
+ return RangeSet(RangeFactory, Zero);
+ }
+
+ BasicValueFactory &ValueFactory;
+ RangeSet::Factory &RangeFactory;
+ ProgramStateRef State;
+};
+
+//===----------------------------------------------------------------------===//
+// Range-based reasoning about symbolic operations
+//===----------------------------------------------------------------------===//
+
+template <>
+RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_NE>(RangeSet LHS,
+ RangeSet RHS,
+ QualType T) {
+ assert(!LHS.isEmpty() && !RHS.isEmpty());
+
+ if (LHS.getAPSIntType() == RHS.getAPSIntType()) {
+ if (intersect(RangeFactory, LHS, RHS).isEmpty())
+ return getTrueRange(T);
+
+ } else {
+ // We can only lose information if we are casting smaller signed type to
+ // bigger unsigned type. For e.g.,
+ // LHS (unsigned short): [2, USHRT_MAX]
+ // RHS (signed short): [SHRT_MIN, 0]
+ //
+ // Casting RHS to LHS type will leave us with overlapping values
+ // CastedRHS : [0, 0] U [SHRT_MAX + 1, USHRT_MAX]
+ //
+ // We can avoid this by checking if signed type's maximum value is lesser
+ // than unsigned type's minimum value.
+
+ // If both have different signs then only we can get more information.
+ if (LHS.isUnsigned() != RHS.isUnsigned()) {
+ if (LHS.isUnsigned() && (LHS.getBitWidth() >= RHS.getBitWidth())) {
+ if (RHS.getMaxValue().isNegative() ||
+ LHS.getAPSIntType().convert(RHS.getMaxValue()) < LHS.getMinValue())
+ return getTrueRange(T);
+
+ } else if (RHS.isUnsigned() && (LHS.getBitWidth() <= RHS.getBitWidth())) {
+ if (LHS.getMaxValue().isNegative() ||
+ RHS.getAPSIntType().convert(LHS.getMaxValue()) < RHS.getMinValue())
+ return getTrueRange(T);
+ }
+ }
+
+ // Both RangeSets should be casted to bigger unsigned type.
+ APSIntType CastingType(std::max(LHS.getBitWidth(), RHS.getBitWidth()),
+ LHS.isUnsigned() || RHS.isUnsigned());
+
+ RangeSet CastedLHS = RangeFactory.castTo(LHS, CastingType);
+ RangeSet CastedRHS = RangeFactory.castTo(RHS, CastingType);
+
+ if (intersect(RangeFactory, CastedLHS, CastedRHS).isEmpty())
+ return getTrueRange(T);
+ }
+
+ // In all other cases, the resulting range cannot be deduced.
+ return infer(T);
+}
+
+template <>
+RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Or>(Range LHS, Range RHS,
+ QualType T) {
+ APSIntType ResultType = ValueFactory.getAPSIntType(T);
+ llvm::APSInt Zero = ResultType.getZeroValue();
+
+ bool IsLHSPositiveOrZero = LHS.From() >= Zero;
+ bool IsRHSPositiveOrZero = RHS.From() >= Zero;
+
+ bool IsLHSNegative = LHS.To() < Zero;
+ bool IsRHSNegative = RHS.To() < Zero;
+
+ // Check if both ranges have the same sign.
+ if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
+ (IsLHSNegative && IsRHSNegative)) {
+ // The result is definitely greater or equal than any of the operands.
+ const llvm::APSInt &Min = std::max(LHS.From(), RHS.From());
+
+ // We estimate maximal value for positives as the maximal value for the
+ // given type. For negatives, we estimate it with -1 (e.g. 0x11111111).
+ //
+ // TODO: We basically, limit the resulting range from below, but don't do
+ // anything with the upper bound.
+ //
+ // For positive operands, it can be done as follows: for the upper
+ // bound of LHS and RHS we calculate the most significant bit set.
+ // Let's call it the N-th bit. Then we can estimate the maximal
+ // number to be 2^(N+1)-1, i.e. the number with all the bits up to
+ // the N-th bit set.
+ const llvm::APSInt &Max = IsLHSNegative
+ ? ValueFactory.getValue(--Zero)
+ : ValueFactory.getMaxValue(ResultType);
+
+ return {RangeFactory, ValueFactory.getValue(Min), Max};
+ }
+
+ // Otherwise, let's check if at least one of the operands is negative.
+ if (IsLHSNegative || IsRHSNegative) {
+ // This means that the result is definitely negative as well.
+ return {RangeFactory, ValueFactory.getMinValue(ResultType),
+ ValueFactory.getValue(--Zero)};
+ }
+
+ RangeSet DefaultRange = infer(T);
+
+ // It is pretty hard to reason about operands with different signs
+ // (and especially with possibly different signs). We simply check if it
+ // can be zero. In order to conclude that the result could not be zero,
+ // at least one of the operands should be definitely not zero itself.
+ if (!LHS.Includes(Zero) || !RHS.Includes(Zero)) {
+ return assumeNonZero(DefaultRange, T);
+ }
+
+ // Nothing much else to do here.
+ return DefaultRange;
+}
+
+template <>
+RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_And>(Range LHS,
+ Range RHS,
+ QualType T) {
+ APSIntType ResultType = ValueFactory.getAPSIntType(T);
+ llvm::APSInt Zero = ResultType.getZeroValue();
+
+ bool IsLHSPositiveOrZero = LHS.From() >= Zero;
+ bool IsRHSPositiveOrZero = RHS.From() >= Zero;
+
+ bool IsLHSNegative = LHS.To() < Zero;
+ bool IsRHSNegative = RHS.To() < Zero;
+
+ // Check if both ranges have the same sign.
+ if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
+ (IsLHSNegative && IsRHSNegative)) {
+ // The result is definitely less or equal than any of the operands.
+ const llvm::APSInt &Max = std::min(LHS.To(), RHS.To());
+
+ // We conservatively estimate lower bound to be the smallest positive
+ // or negative value corresponding to the sign of the operands.
+ const llvm::APSInt &Min = IsLHSNegative
+ ? ValueFactory.getMinValue(ResultType)
+ : ValueFactory.getValue(Zero);
+
+ return {RangeFactory, Min, Max};
+ }
+
+ // Otherwise, let's check if at least one of the operands is positive.
+ if (IsLHSPositiveOrZero || IsRHSPositiveOrZero) {
+ // This makes result definitely positive.
+ //
+ // We can also reason about a maximal value by finding the maximal
+ // value of the positive operand.
+ const llvm::APSInt &Max = IsLHSPositiveOrZero ? LHS.To() : RHS.To();
+
+ // The minimal value on the other hand is much harder to reason about.
+ // The only thing we know for sure is that the result is positive.
+ return {RangeFactory, ValueFactory.getValue(Zero),
+ ValueFactory.getValue(Max)};
+ }
+
+ // Nothing much else to do here.
+ return infer(T);
+}
+
+template <>
+RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS,
+ Range RHS,
+ QualType T) {
+ llvm::APSInt Zero = ValueFactory.getAPSIntType(T).getZeroValue();
+
+ Range ConservativeRange = getSymmetricalRange(RHS, T);
+
+ llvm::APSInt Max = ConservativeRange.To();
+ llvm::APSInt Min = ConservativeRange.From();
+
+ if (Max == Zero) {
+ // It's an undefined behaviour to divide by 0 and it seems like we know
+ // for sure that RHS is 0. Let's say that the resulting range is
+ // simply infeasible for that matter.
+ return RangeFactory.getEmptySet();
+ }
+
+ // At this point, our conservative range is closed. The result, however,
+ // couldn't be greater than the RHS' maximal absolute value. Because of
+ // this reason, we turn the range into open (or half-open in case of
+ // unsigned integers).
+ //
+ // While we operate on integer values, an open interval (a, b) can be easily
+ // represented by the closed interval [a + 1, b - 1]. And this is exactly
+ // what we do next.
+ //
+ // If we are dealing with unsigned case, we shouldn't move the lower bound.
+ if (Min.isSigned()) {
+ ++Min;
+ }
+ --Max;
+
+ bool IsLHSPositiveOrZero = LHS.From() >= Zero;
+ bool IsRHSPositiveOrZero = RHS.From() >= Zero;
+
+ // Remainder operator results with negative operands is implementation
+ // defined. Positive cases are much easier to reason about though.
+ if (IsLHSPositiveOrZero && IsRHSPositiveOrZero) {
+ // If maximal value of LHS is less than maximal value of RHS,
+ // the result won't get greater than LHS.To().
+ Max = std::min(LHS.To(), Max);
+ // We want to check if it is a situation similar to the following:
+ //
+ // <------------|---[ LHS ]--------[ RHS ]----->
+ // -INF 0 +INF
+ //
+ // In this situation, we can conclude that (LHS / RHS) == 0 and
+ // (LHS % RHS) == LHS.
+ Min = LHS.To() < RHS.From() ? LHS.From() : Zero;
+ }
+
+ // Nevertheless, the symmetrical range for RHS is a conservative estimate
+ // for any sign of either LHS, or RHS.
+ return {RangeFactory, ValueFactory.getValue(Min), ValueFactory.getValue(Max)};
+}
+
+RangeSet SymbolicRangeInferrer::VisitBinaryOperator(RangeSet LHS,
+ BinaryOperator::Opcode Op,
+ RangeSet RHS, QualType T) {
+ // We should propagate information about unfeasbility of one of the
+ // operands to the resulting range.
+ if (LHS.isEmpty() || RHS.isEmpty()) {
+ return RangeFactory.getEmptySet();
+ }
+
+ switch (Op) {
+ case BO_NE:
+ return VisitBinaryOperator<BO_NE>(LHS, RHS, T);
+ case BO_Or:
+ return VisitBinaryOperator<BO_Or>(LHS, RHS, T);
+ case BO_And:
+ return VisitBinaryOperator<BO_And>(LHS, RHS, T);
+ case BO_Rem:
+ return VisitBinaryOperator<BO_Rem>(LHS, RHS, T);
+ default:
+ return infer(T);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Constraint manager implementation details
+//===----------------------------------------------------------------------===//
+
+class RangeConstraintManager : public RangedConstraintManager {
+public:
+ RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB)
+ : RangedConstraintManager(EE, SVB), F(getBasicVals()) {}
+
+ //===------------------------------------------------------------------===//
+ // Implementation for interface from ConstraintManager.
+ //===------------------------------------------------------------------===//
+
+ bool haveEqualConstraints(ProgramStateRef S1,
+ ProgramStateRef S2) const override {
+ // NOTE: ClassMembers are as simple as back pointers for ClassMap,
+ // so comparing constraint ranges and class maps should be
+ // sufficient.
+ return S1->get<ConstraintRange>() == S2->get<ConstraintRange>() &&
+ S1->get<ClassMap>() == S2->get<ClassMap>();
+ }
+
+ bool canReasonAbout(SVal X) const override;
+
+ ConditionTruthVal checkNull(ProgramStateRef State, SymbolRef Sym) override;
+
+ const llvm::APSInt *getSymVal(ProgramStateRef State,
+ SymbolRef Sym) const override;
+
+ const llvm::APSInt *getSymMinVal(ProgramStateRef State,
+ SymbolRef Sym) const override;
+
+ const llvm::APSInt *getSymMaxVal(ProgramStateRef State,
+ SymbolRef Sym) const override;
+
+ ProgramStateRef removeDeadBindings(ProgramStateRef State,
+ SymbolReaper &SymReaper) override;
+
+ void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n",
+ unsigned int Space = 0, bool IsDot = false) const override;
+ void printValue(raw_ostream &Out, ProgramStateRef State,
+ SymbolRef Sym) override;
+ void printConstraints(raw_ostream &Out, ProgramStateRef State,
+ const char *NL = "\n", unsigned int Space = 0,
+ bool IsDot = false) const;
+ void printEquivalenceClasses(raw_ostream &Out, ProgramStateRef State,
+ const char *NL = "\n", unsigned int Space = 0,
+ bool IsDot = false) const;
+ void printDisequalities(raw_ostream &Out, ProgramStateRef State,
+ const char *NL = "\n", unsigned int Space = 0,
+ bool IsDot = false) const;
+
+ //===------------------------------------------------------------------===//
+ // Implementation for interface from RangedConstraintManager.
+ //===------------------------------------------------------------------===//
+
+ ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym,
+ const llvm::APSInt &V,
+ const llvm::APSInt &Adjustment) override;
+
+ ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym,
+ const llvm::APSInt &V,
+ const llvm::APSInt &Adjustment) override;
+
+ ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym,
+ const llvm::APSInt &V,
+ const llvm::APSInt &Adjustment) override;
+
+ ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym,
+ const llvm::APSInt &V,
+ const llvm::APSInt &Adjustment) override;
+
+ ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym,
+ const llvm::APSInt &V,
+ const llvm::APSInt &Adjustment) override;
+
+ ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym,
+ const llvm::APSInt &V,
+ const llvm::APSInt &Adjustment) override;
+
+ ProgramStateRef assumeSymWithinInclusiveRange(
+ ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
+ const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
+
+ ProgramStateRef assumeSymOutsideInclusiveRange(
+ ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
+ const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
+
+private:
+ RangeSet::Factory F;
+
+ RangeSet getRange(ProgramStateRef State, SymbolRef Sym);
+ RangeSet getRange(ProgramStateRef State, EquivalenceClass Class);
+ ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym,
+ RangeSet Range);
+ ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class,
+ RangeSet Range);
+
+ RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment);
+ RangeSet getSymGTRange(ProgramStateRef St, SymbolRef Sym,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment);
+ RangeSet getSymLERange(ProgramStateRef St, SymbolRef Sym,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment);
+ RangeSet getSymLERange(llvm::function_ref<RangeSet()> RS,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment);
+ RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment);
+};
+
+//===----------------------------------------------------------------------===//
+// Constraint assignment logic
+//===----------------------------------------------------------------------===//
+
+/// ConstraintAssignorBase is a small utility class that unifies visitor
+/// for ranges with a visitor for constraints (rangeset/range/constant).
+///
+/// It is designed to have one derived class, but generally it can have more.
+/// Derived class can control which types we handle by defining methods of the
+/// following form:
+///
+/// bool handle${SYMBOL}To${CONSTRAINT}(const SYMBOL *Sym,
+/// CONSTRAINT Constraint);
+///
+/// where SYMBOL is the type of the symbol (e.g. SymSymExpr, SymbolCast, etc.)
+/// CONSTRAINT is the type of constraint (RangeSet/Range/Const)
+/// return value signifies whether we should try other handle methods
+/// (i.e. false would mean to stop right after calling this method)
+template <class Derived> class ConstraintAssignorBase {
+public:
+ using Const = const llvm::APSInt &;
+
+#define DISPATCH(CLASS) return assign##CLASS##Impl(cast<CLASS>(Sym), Constraint)
+
+#define ASSIGN(CLASS, TO, SYM, CONSTRAINT) \
+ if (!static_cast<Derived *>(this)->assign##CLASS##To##TO(SYM, CONSTRAINT)) \
+ return false
+
+ void assign(SymbolRef Sym, RangeSet Constraint) {
+ assignImpl(Sym, Constraint);
+ }
+
+ bool assignImpl(SymbolRef Sym, RangeSet Constraint) {
+ switch (Sym->getKind()) {
+#define SYMBOL(Id, Parent) \
+ case SymExpr::Id##Kind: \
+ DISPATCH(Id);
+#include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
+ }
+ llvm_unreachable("Unknown SymExpr kind!");
+ }
+
+#define DEFAULT_ASSIGN(Id) \
+ bool assign##Id##To##RangeSet(const Id *Sym, RangeSet Constraint) { \
+ return true; \
+ } \
+ bool assign##Id##To##Range(const Id *Sym, Range Constraint) { return true; } \
+ bool assign##Id##To##Const(const Id *Sym, Const Constraint) { return true; }
+
+ // When we dispatch for constraint types, we first try to check
+ // if the new constraint is the constant and try the corresponding
+ // assignor methods. If it didn't interrupt, we can proceed to the
+ // range, and finally to the range set.
+#define CONSTRAINT_DISPATCH(Id) \
+ if (const llvm::APSInt *Const = Constraint.getConcreteValue()) { \
+ ASSIGN(Id, Const, Sym, *Const); \
+ } \
+ if (Constraint.size() == 1) { \
+ ASSIGN(Id, Range, Sym, *Constraint.begin()); \
+ } \
+ ASSIGN(Id, RangeSet, Sym, Constraint)
+
+ // Our internal assign method first tries to call assignor methods for all
+ // constraint types that apply. And if not interrupted, continues with its
+ // parent class.
+#define SYMBOL(Id, Parent) \
+ bool assign##Id##Impl(const Id *Sym, RangeSet Constraint) { \
+ CONSTRAINT_DISPATCH(Id); \
+ DISPATCH(Parent); \
+ } \
+ DEFAULT_ASSIGN(Id)
+#define ABSTRACT_SYMBOL(Id, Parent) SYMBOL(Id, Parent)
+#include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
+
+ // Default implementations for the top class that doesn't have parents.
+ bool assignSymExprImpl(const SymExpr *Sym, RangeSet Constraint) {
+ CONSTRAINT_DISPATCH(SymExpr);
+ return true;
+ }
+ DEFAULT_ASSIGN(SymExpr);
+
+#undef DISPATCH
+#undef CONSTRAINT_DISPATCH
+#undef DEFAULT_ASSIGN
+#undef ASSIGN
+};
+
+/// A little component aggregating all of the reasoning we have about
+/// assigning new constraints to symbols.
+///
+/// The main purpose of this class is to associate constraints to symbols,
+/// and impose additional constraints on other symbols, when we can imply
+/// them.
+///
+/// It has a nice symmetry with SymbolicRangeInferrer. When the latter
+/// can provide more precise ranges by looking into the operands of the
+/// expression in question, ConstraintAssignor looks into the operands
+/// to see if we can imply more from the new constraint.
+class ConstraintAssignor : public ConstraintAssignorBase<ConstraintAssignor> {
+public:
+ template <class ClassOrSymbol>
+ [[nodiscard]] static ProgramStateRef
+ assign(ProgramStateRef State, SValBuilder &Builder, RangeSet::Factory &F,
+ ClassOrSymbol CoS, RangeSet NewConstraint) {
+ if (!State || NewConstraint.isEmpty())
+ return nullptr;
+
+ ConstraintAssignor Assignor{State, Builder, F};
+ return Assignor.assign(CoS, NewConstraint);
+ }
+
+ /// Handle expressions like: a % b != 0.
+ template <typename SymT>
+ bool handleRemainderOp(const SymT *Sym, RangeSet Constraint) {
+ if (Sym->getOpcode() != BO_Rem)
+ return true;
+ // a % b != 0 implies that a != 0.
+ if (!Constraint.containsZero()) {
+ SVal SymSVal = Builder.makeSymbolVal(Sym->getLHS());
+ if (auto NonLocSymSVal = SymSVal.getAs<nonloc::SymbolVal>()) {
+ State = State->assume(*NonLocSymSVal, true);
+ if (!State)
+ return false;
+ }
+ }
+ return true;
+ }
+
+ inline bool assignSymExprToConst(const SymExpr *Sym, Const Constraint);
+ inline bool assignSymIntExprToRangeSet(const SymIntExpr *Sym,
+ RangeSet Constraint) {
+ return handleRemainderOp(Sym, Constraint);
+ }
+ inline bool assignSymSymExprToRangeSet(const SymSymExpr *Sym,
+ RangeSet Constraint);
+
+private:
+ ConstraintAssignor(ProgramStateRef State, SValBuilder &Builder,
+ RangeSet::Factory &F)
+ : State(State), Builder(Builder), RangeFactory(F) {}
+ using Base = ConstraintAssignorBase<ConstraintAssignor>;
+
+ /// Base method for handling new constraints for symbols.
+ [[nodiscard]] ProgramStateRef assign(SymbolRef Sym, RangeSet NewConstraint) {
+ // All constraints are actually associated with equivalence classes, and
+ // that's what we are going to do first.
+ State = assign(EquivalenceClass::find(State, Sym), NewConstraint);
+ if (!State)
+ return nullptr;
+
+ // And after that we can check what other things we can get from this
+ // constraint.
+ Base::assign(Sym, NewConstraint);
+ return State;
+ }
+
+ /// Base method for handling new constraints for classes.
+ [[nodiscard]] ProgramStateRef assign(EquivalenceClass Class,
+ RangeSet NewConstraint) {
+ // There is a chance that we might need to update constraints for the
+ // classes that are known to be disequal to Class.
+ //
+ // In order for this to be even possible, the new constraint should
+ // be simply a constant because we can't reason about range disequalities.
+ if (const llvm::APSInt *Point = NewConstraint.getConcreteValue()) {
+
+ ConstraintRangeTy Constraints = State->get<ConstraintRange>();
+ ConstraintRangeTy::Factory &CF = State->get_context<ConstraintRange>();
+
+ // Add new constraint.
+ Constraints = CF.add(Constraints, Class, NewConstraint);
+
+ for (EquivalenceClass DisequalClass : Class.getDisequalClasses(State)) {
+ RangeSet UpdatedConstraint = SymbolicRangeInferrer::inferRange(
+ RangeFactory, State, DisequalClass);
+
+ UpdatedConstraint = RangeFactory.deletePoint(UpdatedConstraint, *Point);
+
+ // If we end up with at least one of the disequal classes to be
+ // constrained with an empty range-set, the state is infeasible.
+ if (UpdatedConstraint.isEmpty())
+ return nullptr;
+
+ Constraints = CF.add(Constraints, DisequalClass, UpdatedConstraint);
+ }
+ assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
+ "a state with infeasible constraints");
+
+ return setConstraints(State, Constraints);
+ }
+
+ return setConstraint(State, Class, NewConstraint);
+ }
+
+ ProgramStateRef trackDisequality(ProgramStateRef State, SymbolRef LHS,
+ SymbolRef RHS) {
+ return EquivalenceClass::markDisequal(RangeFactory, State, LHS, RHS);
+ }
+
+ ProgramStateRef trackEquality(ProgramStateRef State, SymbolRef LHS,
+ SymbolRef RHS) {
+ return EquivalenceClass::merge(RangeFactory, State, LHS, RHS);
+ }
+
+ [[nodiscard]] std::optional<bool> interpreteAsBool(RangeSet Constraint) {
+ assert(!Constraint.isEmpty() && "Empty ranges shouldn't get here");
+
+ if (Constraint.getConcreteValue())
+ return !Constraint.getConcreteValue()->isZero();
+
+ if (!Constraint.containsZero())
+ return true;
+
+ return std::nullopt;
+ }
+
+ ProgramStateRef State;
+ SValBuilder &Builder;
+ RangeSet::Factory &RangeFactory;
+};
+
+bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym,
+ const llvm::APSInt &Constraint) {
+ llvm::SmallSet<EquivalenceClass, 4> SimplifiedClasses;
+ // Iterate over all equivalence classes and try to simplify them.
+ ClassMembersTy Members = State->get<ClassMembers>();
+ for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members) {
+ EquivalenceClass Class = ClassToSymbolSet.first;
+ State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
+ if (!State)
+ return false;
+ SimplifiedClasses.insert(Class);
+ }
+
+ // Trivial equivalence classes (those that have only one symbol member) are
+ // not stored in the State. Thus, we must skim through the constraints as
+ // well. And we try to simplify symbols in the constraints.
+ ConstraintRangeTy Constraints = State->get<ConstraintRange>();
+ for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
+ EquivalenceClass Class = ClassConstraint.first;
+ if (SimplifiedClasses.count(Class)) // Already simplified.
+ continue;
+ State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
+ if (!State)
+ return false;
+ }
+
+ // We may have trivial equivalence classes in the disequality info as
+ // well, and we need to simplify them.
+ DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
+ for (std::pair<EquivalenceClass, ClassSet> DisequalityEntry :
+ DisequalityInfo) {
+ EquivalenceClass Class = DisequalityEntry.first;
+ ClassSet DisequalClasses = DisequalityEntry.second;
+ State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
+ if (!State)
+ return false;
+ }
+
+ return true;
+}
+
+bool ConstraintAssignor::assignSymSymExprToRangeSet(const SymSymExpr *Sym,
+ RangeSet Constraint) {
+ if (!handleRemainderOp(Sym, Constraint))
+ return false;
+
+ std::optional<bool> ConstraintAsBool = interpreteAsBool(Constraint);
+
+ if (!ConstraintAsBool)
+ return true;
+
+ if (std::optional<bool> Equality = meansEquality(Sym)) {
+ // Here we cover two cases:
+ // * if Sym is equality and the new constraint is true -> Sym's operands
+ // should be marked as equal
+ // * if Sym is disequality and the new constraint is false -> Sym's
+ // operands should be also marked as equal
+ if (*Equality == *ConstraintAsBool) {
+ State = trackEquality(State, Sym->getLHS(), Sym->getRHS());
+ } else {
+ // Other combinations leave as with disequal operands.
+ State = trackDisequality(State, Sym->getLHS(), Sym->getRHS());
+ }
+
+ if (!State)
+ return false;
+ }
+
+ return true;
+}
+
+} // end anonymous namespace
+
+std::unique_ptr<ConstraintManager>
+ento::CreateRangeConstraintManager(ProgramStateManager &StMgr,
+ ExprEngine *Eng) {
+ return std::make_unique<RangeConstraintManager>(Eng, StMgr.getSValBuilder());
+}
+
+ConstraintMap ento::getConstraintMap(ProgramStateRef State) {
+ ConstraintMap::Factory &F = State->get_context<ConstraintMap>();
+ ConstraintMap Result = F.getEmptyMap();
+
+ ConstraintRangeTy Constraints = State->get<ConstraintRange>();
+ for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
+ EquivalenceClass Class = ClassConstraint.first;
+ SymbolSet ClassMembers = Class.getClassMembers(State);
+ assert(!ClassMembers.isEmpty() &&
+ "Class must always have at least one member!");
+
+ SymbolRef Representative = *ClassMembers.begin();
+ Result = F.add(Result, Representative, ClassConstraint.second);
+ }
+
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// EqualityClass implementation details
+//===----------------------------------------------------------------------===//
+
+LLVM_DUMP_METHOD void EquivalenceClass::dumpToStream(ProgramStateRef State,
+ raw_ostream &os) const {
+ SymbolSet ClassMembers = getClassMembers(State);
+ for (const SymbolRef &MemberSym : ClassMembers) {
+ MemberSym->dump();
+ os << "\n";
+ }
+}
+
+inline EquivalenceClass EquivalenceClass::find(ProgramStateRef State,
+ SymbolRef Sym) {
+ assert(State && "State should not be null");
+ assert(Sym && "Symbol should not be null");
+ // We store far from all Symbol -> Class mappings
+ if (const EquivalenceClass *NontrivialClass = State->get<ClassMap>(Sym))
+ return *NontrivialClass;
+
+ // This is a trivial class of Sym.
+ return Sym;
+}
+
+inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F,
+ ProgramStateRef State,
+ SymbolRef First,
+ SymbolRef Second) {
+ EquivalenceClass FirstClass = find(State, First);
+ EquivalenceClass SecondClass = find(State, Second);
+
+ return FirstClass.merge(F, State, SecondClass);
+}
+
+inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F,
+ ProgramStateRef State,
+ EquivalenceClass Other) {
+ // It is already the same class.
+ if (*this == Other)
+ return State;
+
+ // FIXME: As of now, we support only equivalence classes of the same type.
+ // This limitation is connected to the lack of explicit casts in
+ // our symbolic expression model.
+ //
+ // That means that for `int x` and `char y` we don't distinguish
+ // between these two very different cases:
+ // * `x == y`
+ // * `(char)x == y`
+ //
+ // The moment we introduce symbolic casts, this restriction can be
+ // lifted.
+ if (getType()->getCanonicalTypeUnqualified() !=
+ Other.getType()->getCanonicalTypeUnqualified())
+ return State;
+
+ SymbolSet Members = getClassMembers(State);
+ SymbolSet OtherMembers = Other.getClassMembers(State);
+
+ // We estimate the size of the class by the height of tree containing
+ // its members. Merging is not a trivial operation, so it's easier to
+ // merge the smaller class into the bigger one.
+ if (Members.getHeight() >= OtherMembers.getHeight()) {
+ return mergeImpl(F, State, Members, Other, OtherMembers);
+ } else {
+ return Other.mergeImpl(F, State, OtherMembers, *this, Members);
+ }
+}
+
+inline ProgramStateRef
+EquivalenceClass::mergeImpl(RangeSet::Factory &RangeFactory,
+ ProgramStateRef State, SymbolSet MyMembers,
+ EquivalenceClass Other, SymbolSet OtherMembers) {
+ // Essentially what we try to recreate here is some kind of union-find
+ // data structure. It does have certain limitations due to persistence
+ // and the need to remove elements from classes.
+ //
+ // In this setting, EquialityClass object is the representative of the class
+ // or the parent element. ClassMap is a mapping of class members to their
+ // parent. Unlike the union-find structure, they all point directly to the
+ // class representative because we don't have an opportunity to actually do
+ // path compression when dealing with immutability. This means that we
+ // compress paths every time we do merges. It also means that we lose
+ // the main amortized complexity benefit from the original data structure.
+ ConstraintRangeTy Constraints = State->get<ConstraintRange>();
+ ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>();
+
+ // 1. If the merged classes have any constraints associated with them, we
+ // need to transfer them to the class we have left.
+ //
+ // Intersection here makes perfect sense because both of these constraints
+ // must hold for the whole new class.
+ if (std::optional<RangeSet> NewClassConstraint =
+ intersect(RangeFactory, getConstraint(State, *this),
+ getConstraint(State, Other))) {
+ // NOTE: Essentially, NewClassConstraint should NEVER be infeasible because
+ // range inferrer shouldn't generate ranges incompatible with
+ // equivalence classes. However, at the moment, due to imperfections
+ // in the solver, it is possible and the merge function can also
+ // return infeasible states aka null states.
+ if (NewClassConstraint->isEmpty())
+ // Infeasible state
+ return nullptr;
+
+ // No need in tracking constraints of a now-dissolved class.
+ Constraints = CRF.remove(Constraints, Other);
+ // Assign new constraints for this class.
+ Constraints = CRF.add(Constraints, *this, *NewClassConstraint);
+
+ assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
+ "a state with infeasible constraints");
+
+ State = State->set<ConstraintRange>(Constraints);
+ }
+
+ // 2. Get ALL equivalence-related maps
+ ClassMapTy Classes = State->get<ClassMap>();
+ ClassMapTy::Factory &CMF = State->get_context<ClassMap>();
+
+ ClassMembersTy Members = State->get<ClassMembers>();
+ ClassMembersTy::Factory &MF = State->get_context<ClassMembers>();
+
+ DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
+ DisequalityMapTy::Factory &DF = State->get_context<DisequalityMap>();
+
+ ClassSet::Factory &CF = State->get_context<ClassSet>();
+ SymbolSet::Factory &F = getMembersFactory(State);
+
+ // 2. Merge members of the Other class into the current class.
+ SymbolSet NewClassMembers = MyMembers;
+ for (SymbolRef Sym : OtherMembers) {
+ NewClassMembers = F.add(NewClassMembers, Sym);
+ // *this is now the class for all these new symbols.
+ Classes = CMF.add(Classes, Sym, *this);
+ }
+
+ // 3. Adjust member mapping.
+ //
+ // No need in tracking members of a now-dissolved class.
+ Members = MF.remove(Members, Other);
+ // Now only the current class is mapped to all the symbols.
+ Members = MF.add(Members, *this, NewClassMembers);
+
+ // 4. Update disequality relations
+ ClassSet DisequalToOther = Other.getDisequalClasses(DisequalityInfo, CF);
+ // We are about to merge two classes but they are already known to be
+ // non-equal. This is a contradiction.
+ if (DisequalToOther.contains(*this))
+ return nullptr;
+
+ if (!DisequalToOther.isEmpty()) {
+ ClassSet DisequalToThis = getDisequalClasses(DisequalityInfo, CF);
+ DisequalityInfo = DF.remove(DisequalityInfo, Other);
+
+ for (EquivalenceClass DisequalClass : DisequalToOther) {
+ DisequalToThis = CF.add(DisequalToThis, DisequalClass);
+
+ // Disequality is a symmetric relation meaning that if
+ // DisequalToOther not null then the set for DisequalClass is not
+ // empty and has at least Other.
+ ClassSet OriginalSetLinkedToOther =
+ *DisequalityInfo.lookup(DisequalClass);
+
+ // Other will be eliminated and we should replace it with the bigger
+ // united class.
+ ClassSet NewSet = CF.remove(OriginalSetLinkedToOther, Other);
+ NewSet = CF.add(NewSet, *this);
+
+ DisequalityInfo = DF.add(DisequalityInfo, DisequalClass, NewSet);
+ }
+
+ DisequalityInfo = DF.add(DisequalityInfo, *this, DisequalToThis);
+ State = State->set<DisequalityMap>(DisequalityInfo);
+ }
+
+ // 5. Update the state
+ State = State->set<ClassMap>(Classes);
+ State = State->set<ClassMembers>(Members);
+
+ return State;
+}
+
+inline SymbolSet::Factory &
+EquivalenceClass::getMembersFactory(ProgramStateRef State) {
+ return State->get_context<SymbolSet>();
+}
+
+SymbolSet EquivalenceClass::getClassMembers(ProgramStateRef State) const {
+ if (const SymbolSet *Members = State->get<ClassMembers>(*this))
+ return *Members;
+
+ // This class is trivial, so we need to construct a set
+ // with just that one symbol from the class.
+ SymbolSet::Factory &F = getMembersFactory(State);
+ return F.add(F.getEmptySet(), getRepresentativeSymbol());
+}
+
+bool EquivalenceClass::isTrivial(ProgramStateRef State) const {
+ return State->get<ClassMembers>(*this) == nullptr;
+}
+
+bool EquivalenceClass::isTriviallyDead(ProgramStateRef State,
+ SymbolReaper &Reaper) const {
+ return isTrivial(State) && Reaper.isDead(getRepresentativeSymbol());
+}
+
+inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF,
+ ProgramStateRef State,
+ SymbolRef First,
+ SymbolRef Second) {
+ return markDisequal(RF, State, find(State, First), find(State, Second));
+}
+
+inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF,
+ ProgramStateRef State,
+ EquivalenceClass First,
+ EquivalenceClass Second) {
+ return First.markDisequal(RF, State, Second);
+}
+
+inline ProgramStateRef
+EquivalenceClass::markDisequal(RangeSet::Factory &RF, ProgramStateRef State,
+ EquivalenceClass Other) const {
+ // If we know that two classes are equal, we can only produce an infeasible
+ // state.
+ if (*this == Other) {
+ return nullptr;
+ }
+
+ DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
+ ConstraintRangeTy Constraints = State->get<ConstraintRange>();
+
+ // Disequality is a symmetric relation, so if we mark A as disequal to B,
+ // we should also mark B as disequalt to A.
+ if (!addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, *this,
+ Other) ||
+ !addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, Other,
+ *this))
+ return nullptr;
+
+ assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
+ "a state with infeasible constraints");
+
+ State = State->set<DisequalityMap>(DisequalityInfo);
+ State = State->set<ConstraintRange>(Constraints);
+
+ return State;
+}
+
+inline bool EquivalenceClass::addToDisequalityInfo(
+ DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
+ RangeSet::Factory &RF, ProgramStateRef State, EquivalenceClass First,
+ EquivalenceClass Second) {
+
+ // 1. Get all of the required factories.
+ DisequalityMapTy::Factory &F = State->get_context<DisequalityMap>();
+ ClassSet::Factory &CF = State->get_context<ClassSet>();
+ ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>();
+
+ // 2. Add Second to the set of classes disequal to First.
+ const ClassSet *CurrentSet = Info.lookup(First);
+ ClassSet NewSet = CurrentSet ? *CurrentSet : CF.getEmptySet();
+ NewSet = CF.add(NewSet, Second);
+
+ Info = F.add(Info, First, NewSet);
+
+ // 3. If Second is known to be a constant, we can delete this point
+ // from the constraint asociated with First.
+ //
+ // So, if Second == 10, it means that First != 10.
+ // At the same time, the same logic does not apply to ranges.
+ if (const RangeSet *SecondConstraint = Constraints.lookup(Second))
+ if (const llvm::APSInt *Point = SecondConstraint->getConcreteValue()) {
+
+ RangeSet FirstConstraint = SymbolicRangeInferrer::inferRange(
+ RF, State, First.getRepresentativeSymbol());
+
+ FirstConstraint = RF.deletePoint(FirstConstraint, *Point);
+
+ // If the First class is about to be constrained with an empty
+ // range-set, the state is infeasible.
+ if (FirstConstraint.isEmpty())
+ return false;
+
+ Constraints = CRF.add(Constraints, First, FirstConstraint);
+ }
+
+ return true;
+}
+
+inline std::optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
+ SymbolRef FirstSym,
+ SymbolRef SecondSym) {
+ return EquivalenceClass::areEqual(State, find(State, FirstSym),
+ find(State, SecondSym));
+}
+
+inline std::optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
+ EquivalenceClass First,
+ EquivalenceClass Second) {
+ // The same equivalence class => symbols are equal.
+ if (First == Second)
+ return true;
+
+ // Let's check if we know anything about these two classes being not equal to
+ // each other.
+ ClassSet DisequalToFirst = First.getDisequalClasses(State);
+ if (DisequalToFirst.contains(Second))
+ return false;
+
+ // It is not clear.
+ return std::nullopt;
+}
+
+[[nodiscard]] ProgramStateRef
+EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) {
+
+ SymbolSet ClsMembers = getClassMembers(State);
+ assert(ClsMembers.contains(Old));
+
+ // Remove `Old`'s Class->Sym relation.
+ SymbolSet::Factory &F = getMembersFactory(State);
+ ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
+ ClsMembers = F.remove(ClsMembers, Old);
+ // Ensure another precondition of the removeMember function (we can check
+ // this only with isEmpty, thus we have to do the remove first).
+ assert(!ClsMembers.isEmpty() &&
+ "Class should have had at least two members before member removal");
+ // Overwrite the existing members assigned to this class.
+ ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
+ ClassMembersMap = EMFactory.add(ClassMembersMap, *this, ClsMembers);
+ State = State->set<ClassMembers>(ClassMembersMap);
+
+ // Remove `Old`'s Sym->Class relation.
+ ClassMapTy Classes = State->get<ClassMap>();
+ ClassMapTy::Factory &CMF = State->get_context<ClassMap>();
+ Classes = CMF.remove(Classes, Old);
+ State = State->set<ClassMap>(Classes);
+
+ return State;
+}
+
+// Re-evaluate an SVal with top-level `State->assume` logic.
+[[nodiscard]] ProgramStateRef
+reAssume(ProgramStateRef State, const RangeSet *Constraint, SVal TheValue) {
+ if (!Constraint)
+ return State;
+
+ const auto DefinedVal = TheValue.castAs<DefinedSVal>();
+
+ // If the SVal is 0, we can simply interpret that as `false`.
+ if (Constraint->encodesFalseRange())
+ return State->assume(DefinedVal, false);
+
+ // If the constraint does not encode 0 then we can interpret that as `true`
+ // AND as a Range(Set).
+ if (Constraint->encodesTrueRange()) {
+ State = State->assume(DefinedVal, true);
+ if (!State)
+ return nullptr;
+ // Fall through, re-assume based on the range values as well.
+ }
+ // Overestimate the individual Ranges with the RangeSet' lowest and
+ // highest values.
+ return State->assumeInclusiveRange(DefinedVal, Constraint->getMinValue(),
+ Constraint->getMaxValue(), true);
+}
+
+// Iterate over all symbols and try to simplify them. Once a symbol is
+// simplified then we check if we can merge the simplified symbol's equivalence
+// class to this class. This way, we simplify not just the symbols but the
+// classes as well: we strive to keep the number of the classes to be the
+// absolute minimum.
+[[nodiscard]] ProgramStateRef
+EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F,
+ ProgramStateRef State, EquivalenceClass Class) {
+ SymbolSet ClassMembers = Class.getClassMembers(State);
+ for (const SymbolRef &MemberSym : ClassMembers) {
+
+ const SVal SimplifiedMemberVal = simplifyToSVal(State, MemberSym);
+ const SymbolRef SimplifiedMemberSym = SimplifiedMemberVal.getAsSymbol();
+
+ // The symbol is collapsed to a constant, check if the current State is
+ // still feasible.
+ if (const auto CI = SimplifiedMemberVal.getAs<nonloc::ConcreteInt>()) {
+ const llvm::APSInt &SV = CI->getValue();
+ const RangeSet *ClassConstraint = getConstraint(State, Class);
+ // We have found a contradiction.
+ if (ClassConstraint && !ClassConstraint->contains(SV))
+ return nullptr;
+ }
+
+ if (SimplifiedMemberSym && MemberSym != SimplifiedMemberSym) {
+ // The simplified symbol should be the member of the original Class,
+ // however, it might be in another existing class at the moment. We
+ // have to merge these classes.
+ ProgramStateRef OldState = State;
+ State = merge(F, State, MemberSym, SimplifiedMemberSym);
+ if (!State)
+ return nullptr;
+ // No state change, no merge happened actually.
+ if (OldState == State)
+ continue;
+
+ // Be aware that `SimplifiedMemberSym` might refer to an already dead
+ // symbol. In that case, the eqclass of that might not be the same as the
+ // eqclass of `MemberSym`. This is because the dead symbols are not
+ // preserved in the `ClassMap`, hence
+ // `find(State, SimplifiedMemberSym)` will result in a trivial eqclass
+ // compared to the eqclass of `MemberSym`.
+ // These eqclasses should be the same if `SimplifiedMemberSym` is alive.
+ // --> assert(find(State, MemberSym) == find(State, SimplifiedMemberSym))
+ //
+ // Note that `MemberSym` must be alive here since that is from the
+ // `ClassMembers` where all the symbols are alive.
+
+ // Remove the old and more complex symbol.
+ State = find(State, MemberSym).removeMember(State, MemberSym);
+
+ // Query the class constraint again b/c that may have changed during the
+ // merge above.
+ const RangeSet *ClassConstraint = getConstraint(State, Class);
+
+ // Re-evaluate an SVal with top-level `State->assume`, this ignites
+ // a RECURSIVE algorithm that will reach a FIXPOINT.
+ //
+ // About performance and complexity: Let us assume that in a State we
+ // have N non-trivial equivalence classes and that all constraints and
+ // disequality info is related to non-trivial classes. In the worst case,
+ // we can simplify only one symbol of one class in each iteration. The
+ // number of symbols in one class cannot grow b/c we replace the old
+ // symbol with the simplified one. Also, the number of the equivalence
+ // classes can decrease only, b/c the algorithm does a merge operation
+ // optionally. We need N iterations in this case to reach the fixpoint.
+ // Thus, the steps needed to be done in the worst case is proportional to
+ // N*N.
+ //
+ // This worst case scenario can be extended to that case when we have
+ // trivial classes in the constraints and in the disequality map. This
+ // case can be reduced to the case with a State where there are only
+ // non-trivial classes. This is because a merge operation on two trivial
+ // classes results in one non-trivial class.
+ State = reAssume(State, ClassConstraint, SimplifiedMemberVal);
+ if (!State)
+ return nullptr;
+ }
+ }
+ return State;
+}
+
+inline ClassSet EquivalenceClass::getDisequalClasses(ProgramStateRef State,
+ SymbolRef Sym) {
+ return find(State, Sym).getDisequalClasses(State);
+}
+
+inline ClassSet
+EquivalenceClass::getDisequalClasses(ProgramStateRef State) const {
+ return getDisequalClasses(State->get<DisequalityMap>(),
+ State->get_context<ClassSet>());
+}
+
+inline ClassSet
+EquivalenceClass::getDisequalClasses(DisequalityMapTy Map,
+ ClassSet::Factory &Factory) const {
+ if (const ClassSet *DisequalClasses = Map.lookup(*this))
+ return *DisequalClasses;
+
+ return Factory.getEmptySet();
+}
+
+bool EquivalenceClass::isClassDataConsistent(ProgramStateRef State) {
+ ClassMembersTy Members = State->get<ClassMembers>();
+
+ for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair : Members) {
+ for (SymbolRef Member : ClassMembersPair.second) {
+ // Every member of the class should have a mapping back to the class.
+ if (find(State, Member) == ClassMembersPair.first) {
+ continue;
+ }
+
+ return false;
+ }
+ }
+
+ DisequalityMapTy Disequalities = State->get<DisequalityMap>();
+ for (std::pair<EquivalenceClass, ClassSet> DisequalityInfo : Disequalities) {
+ EquivalenceClass Class = DisequalityInfo.first;
+ ClassSet DisequalClasses = DisequalityInfo.second;
+
+ // There is no use in keeping empty sets in the map.
+ if (DisequalClasses.isEmpty())
+ return false;
+
+ // Disequality is symmetrical, i.e. for every Class A and B that A != B,
+ // B != A should also be true.
+ for (EquivalenceClass DisequalClass : DisequalClasses) {
+ const ClassSet *DisequalToDisequalClasses =
+ Disequalities.lookup(DisequalClass);
+
+ // It should be a set of at least one element: Class
+ if (!DisequalToDisequalClasses ||
+ !DisequalToDisequalClasses->contains(Class))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// RangeConstraintManager implementation
+//===----------------------------------------------------------------------===//
+
+bool RangeConstraintManager::canReasonAbout(SVal X) const {
+ std::optional<nonloc::SymbolVal> SymVal = X.getAs<nonloc::SymbolVal>();
+ if (SymVal && SymVal->isExpression()) {
+ const SymExpr *SE = SymVal->getSymbol();
+
+ if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SE)) {
+ switch (SIE->getOpcode()) {
+ // We don't reason yet about bitwise-constraints on symbolic values.
+ case BO_And:
+ case BO_Or:
+ case BO_Xor:
+ return false;
+ // We don't reason yet about these arithmetic constraints on
+ // symbolic values.
+ case BO_Mul:
+ case BO_Div:
+ case BO_Rem:
+ case BO_Shl:
+ case BO_Shr:
+ return false;
+ // All other cases.
+ default:
+ return true;
+ }
+ }
+
+ if (const SymSymExpr *SSE = dyn_cast<SymSymExpr>(SE)) {
+ // FIXME: Handle <=> here.
+ if (BinaryOperator::isEqualityOp(SSE->getOpcode()) ||
+ BinaryOperator::isRelationalOp(SSE->getOpcode())) {
+ // We handle Loc <> Loc comparisons, but not (yet) NonLoc <> NonLoc.
+ // We've recently started producing Loc <> NonLoc comparisons (that
+ // result from casts of one of the operands between eg. intptr_t and
+ // void *), but we can't reason about them yet.
+ if (Loc::isLocType(SSE->getLHS()->getType())) {
+ return Loc::isLocType(SSE->getRHS()->getType());
+ }
+ }
+ }
+
+ return false;
+ }
+
+ return true;
+}
+
+ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State,
+ SymbolRef Sym) {
+ const RangeSet *Ranges = getConstraint(State, Sym);
+
+ // If we don't have any information about this symbol, it's underconstrained.
+ if (!Ranges)
+ return ConditionTruthVal();
+
+ // If we have a concrete value, see if it's zero.
+ if (const llvm::APSInt *Value = Ranges->getConcreteValue())
+ return *Value == 0;
+
+ BasicValueFactory &BV = getBasicVals();
+ APSIntType IntType = BV.getAPSIntType(Sym->getType());
+ llvm::APSInt Zero = IntType.getZeroValue();
+
+ // Check if zero is in the set of possible values.
+ if (!Ranges->contains(Zero))
+ return false;
+
+ // Zero is a possible value, but it is not the /only/ possible value.
+ return ConditionTruthVal();
+}
+
+const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St,
+ SymbolRef Sym) const {
+ const RangeSet *T = getConstraint(St, Sym);
+ return T ? T->getConcreteValue() : nullptr;
+}
+
+const llvm::APSInt *RangeConstraintManager::getSymMinVal(ProgramStateRef St,
+ SymbolRef Sym) const {
+ const RangeSet *T = getConstraint(St, Sym);
+ if (!T || T->isEmpty())
+ return nullptr;
+ return &T->getMinValue();
+}
+
+const llvm::APSInt *RangeConstraintManager::getSymMaxVal(ProgramStateRef St,
+ SymbolRef Sym) const {
+ const RangeSet *T = getConstraint(St, Sym);
+ if (!T || T->isEmpty())
+ return nullptr;
+ return &T->getMaxValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Remove dead symbols from existing constraints
+//===----------------------------------------------------------------------===//
+
+/// Scan all symbols referenced by the constraints. If the symbol is not alive
+/// as marked in LSymbols, mark it as dead in DSymbols.
+ProgramStateRef
+RangeConstraintManager::removeDeadBindings(ProgramStateRef State,
+ SymbolReaper &SymReaper) {
+ ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
+ ClassMembersTy NewClassMembersMap = ClassMembersMap;
+ ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
+ SymbolSet::Factory &SetFactory = State->get_context<SymbolSet>();
+
+ ConstraintRangeTy Constraints = State->get<ConstraintRange>();
+ ConstraintRangeTy NewConstraints = Constraints;
+ ConstraintRangeTy::Factory &ConstraintFactory =
+ State->get_context<ConstraintRange>();
+
+ ClassMapTy Map = State->get<ClassMap>();
+ ClassMapTy NewMap = Map;
+ ClassMapTy::Factory &ClassFactory = State->get_context<ClassMap>();
+
+ DisequalityMapTy Disequalities = State->get<DisequalityMap>();
+ DisequalityMapTy::Factory &DisequalityFactory =
+ State->get_context<DisequalityMap>();
+ ClassSet::Factory &ClassSetFactory = State->get_context<ClassSet>();
+
+ bool ClassMapChanged = false;
+ bool MembersMapChanged = false;
+ bool ConstraintMapChanged = false;
+ bool DisequalitiesChanged = false;
+
+ auto removeDeadClass = [&](EquivalenceClass Class) {
+ // Remove associated constraint ranges.
+ Constraints = ConstraintFactory.remove(Constraints, Class);
+ ConstraintMapChanged = true;
+
+ // Update disequality information to not hold any information on the
+ // removed class.
+ ClassSet DisequalClasses =
+ Class.getDisequalClasses(Disequalities, ClassSetFactory);
+ if (!DisequalClasses.isEmpty()) {
+ for (EquivalenceClass DisequalClass : DisequalClasses) {
+ ClassSet DisequalToDisequalSet =
+ DisequalClass.getDisequalClasses(Disequalities, ClassSetFactory);
+ // DisequalToDisequalSet is guaranteed to be non-empty for consistent
+ // disequality info.
+ assert(!DisequalToDisequalSet.isEmpty());
+ ClassSet NewSet = ClassSetFactory.remove(DisequalToDisequalSet, Class);
+
+ // No need in keeping an empty set.
+ if (NewSet.isEmpty()) {
+ Disequalities =
+ DisequalityFactory.remove(Disequalities, DisequalClass);
+ } else {
+ Disequalities =
+ DisequalityFactory.add(Disequalities, DisequalClass, NewSet);
+ }
+ }
+ // Remove the data for the class
+ Disequalities = DisequalityFactory.remove(Disequalities, Class);
+ DisequalitiesChanged = true;
+ }
+ };
+
+ // 1. Let's see if dead symbols are trivial and have associated constraints.
+ for (std::pair<EquivalenceClass, RangeSet> ClassConstraintPair :
+ Constraints) {
+ EquivalenceClass Class = ClassConstraintPair.first;
+ if (Class.isTriviallyDead(State, SymReaper)) {
+ // If this class is trivial, we can remove its constraints right away.
+ removeDeadClass(Class);
+ }
+ }
+
+ // 2. We don't need to track classes for dead symbols.
+ for (std::pair<SymbolRef, EquivalenceClass> SymbolClassPair : Map) {
+ SymbolRef Sym = SymbolClassPair.first;
+
+ if (SymReaper.isDead(Sym)) {
+ ClassMapChanged = true;
+ NewMap = ClassFactory.remove(NewMap, Sym);
+ }
+ }
+
+ // 3. Remove dead members from classes and remove dead non-trivial classes
+ // and their constraints.
+ for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair :
+ ClassMembersMap) {
+ EquivalenceClass Class = ClassMembersPair.first;
+ SymbolSet LiveMembers = ClassMembersPair.second;
+ bool MembersChanged = false;
+
+ for (SymbolRef Member : ClassMembersPair.second) {
+ if (SymReaper.isDead(Member)) {
+ MembersChanged = true;
+ LiveMembers = SetFactory.remove(LiveMembers, Member);
+ }
+ }
+
+ // Check if the class changed.
+ if (!MembersChanged)
+ continue;
+
+ MembersMapChanged = true;
+
+ if (LiveMembers.isEmpty()) {
+ // The class is dead now, we need to wipe it out of the members map...
+ NewClassMembersMap = EMFactory.remove(NewClassMembersMap, Class);
+
+ // ...and remove all of its constraints.
+ removeDeadClass(Class);
+ } else {
+ // We need to change the members associated with the class.
+ NewClassMembersMap =
+ EMFactory.add(NewClassMembersMap, Class, LiveMembers);
+ }
+ }
+
+ // 4. Update the state with new maps.
+ //
+ // Here we try to be humble and update a map only if it really changed.
+ if (ClassMapChanged)
+ State = State->set<ClassMap>(NewMap);
+
+ if (MembersMapChanged)
+ State = State->set<ClassMembers>(NewClassMembersMap);
+
+ if (ConstraintMapChanged)
+ State = State->set<ConstraintRange>(Constraints);
+
+ if (DisequalitiesChanged)
+ State = State->set<DisequalityMap>(Disequalities);
+
+ assert(EquivalenceClass::isClassDataConsistent(State));
+
+ return State;
+}
+
+RangeSet RangeConstraintManager::getRange(ProgramStateRef State,
+ SymbolRef Sym) {
+ return SymbolicRangeInferrer::inferRange(F, State, Sym);
+}
+
+ProgramStateRef RangeConstraintManager::setRange(ProgramStateRef State,
+ SymbolRef Sym,
+ RangeSet Range) {
+ return ConstraintAssignor::assign(State, getSValBuilder(), F, Sym, Range);
+}
+
+//===------------------------------------------------------------------------===
+// assumeSymX methods: protected interface for RangeConstraintManager.
+//===------------------------------------------------------------------------===
+
+// The syntax for ranges below is mathematical, using [x, y] for closed ranges
+// and (x, y) for open ranges. These ranges are modular, corresponding with
+// a common treatment of C integer overflow. This means that these methods
+// do not have to worry about overflow; RangeSet::Intersect can handle such a
+// "wraparound" range.
+// As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1,
+// UINT_MAX, 0, 1, and 2.
+
+ProgramStateRef
+RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment) {
+ // Before we do any real work, see if the value can even show up.
+ APSIntType AdjustmentType(Adjustment);
+ if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within)
+ return St;
+
+ llvm::APSInt Point = AdjustmentType.convert(Int) - Adjustment;
+ RangeSet New = getRange(St, Sym);
+ New = F.deletePoint(New, Point);
+
+ return setRange(St, Sym, New);
+}
+
+ProgramStateRef
+RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment) {
+ // Before we do any real work, see if the value can even show up.
+ APSIntType AdjustmentType(Adjustment);
+ if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within)
+ return nullptr;
+
+ // [Int-Adjustment, Int-Adjustment]
+ llvm::APSInt AdjInt = AdjustmentType.convert(Int) - Adjustment;
+ RangeSet New = getRange(St, Sym);
+ New = F.intersect(New, AdjInt);
+
+ return setRange(St, Sym, New);
+}
+
+RangeSet RangeConstraintManager::getSymLTRange(ProgramStateRef St,
+ SymbolRef Sym,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment) {
+ // Before we do any real work, see if the value can even show up.
+ APSIntType AdjustmentType(Adjustment);
+ switch (AdjustmentType.testInRange(Int, true)) {
+ case APSIntType::RTR_Below:
+ return F.getEmptySet();
+ case APSIntType::RTR_Within:
+ break;
+ case APSIntType::RTR_Above:
+ return getRange(St, Sym);
+ }
+
+ // Special case for Int == Min. This is always false.
+ llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
+ llvm::APSInt Min = AdjustmentType.getMinValue();
+ if (ComparisonVal == Min)
+ return F.getEmptySet();
+
+ llvm::APSInt Lower = Min - Adjustment;
+ llvm::APSInt Upper = ComparisonVal - Adjustment;
+ --Upper;
+
+ RangeSet Result = getRange(St, Sym);
+ return F.intersect(Result, Lower, Upper);
+}
+
+ProgramStateRef
+RangeConstraintManager::assumeSymLT(ProgramStateRef St, SymbolRef Sym,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment) {
+ RangeSet New = getSymLTRange(St, Sym, Int, Adjustment);
+ return setRange(St, Sym, New);
+}
+
+RangeSet RangeConstraintManager::getSymGTRange(ProgramStateRef St,
+ SymbolRef Sym,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment) {
+ // Before we do any real work, see if the value can even show up.
+ APSIntType AdjustmentType(Adjustment);
+ switch (AdjustmentType.testInRange(Int, true)) {
+ case APSIntType::RTR_Below:
+ return getRange(St, Sym);
+ case APSIntType::RTR_Within:
+ break;
+ case APSIntType::RTR_Above:
+ return F.getEmptySet();
+ }
+
+ // Special case for Int == Max. This is always false.
+ llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
+ llvm::APSInt Max = AdjustmentType.getMaxValue();
+ if (ComparisonVal == Max)
+ return F.getEmptySet();
+
+ llvm::APSInt Lower = ComparisonVal - Adjustment;
+ llvm::APSInt Upper = Max - Adjustment;
+ ++Lower;
+
+ RangeSet SymRange = getRange(St, Sym);
+ return F.intersect(SymRange, Lower, Upper);
+}
+
+ProgramStateRef
+RangeConstraintManager::assumeSymGT(ProgramStateRef St, SymbolRef Sym,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment) {
+ RangeSet New = getSymGTRange(St, Sym, Int, Adjustment);
+ return setRange(St, Sym, New);
+}
+
+RangeSet RangeConstraintManager::getSymGERange(ProgramStateRef St,
+ SymbolRef Sym,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment) {
+ // Before we do any real work, see if the value can even show up.
+ APSIntType AdjustmentType(Adjustment);
+ switch (AdjustmentType.testInRange(Int, true)) {
+ case APSIntType::RTR_Below:
+ return getRange(St, Sym);
+ case APSIntType::RTR_Within:
+ break;
+ case APSIntType::RTR_Above:
+ return F.getEmptySet();
+ }
+
+ // Special case for Int == Min. This is always feasible.
+ llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
+ llvm::APSInt Min = AdjustmentType.getMinValue();
+ if (ComparisonVal == Min)
+ return getRange(St, Sym);
+
+ llvm::APSInt Max = AdjustmentType.getMaxValue();
+ llvm::APSInt Lower = ComparisonVal - Adjustment;
+ llvm::APSInt Upper = Max - Adjustment;
+
+ RangeSet SymRange = getRange(St, Sym);
+ return F.intersect(SymRange, Lower, Upper);
+}
+
+ProgramStateRef
+RangeConstraintManager::assumeSymGE(ProgramStateRef St, SymbolRef Sym,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment) {
+ RangeSet New = getSymGERange(St, Sym, Int, Adjustment);
+ return setRange(St, Sym, New);
+}
+
+RangeSet
+RangeConstraintManager::getSymLERange(llvm::function_ref<RangeSet()> RS,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment) {
+ // Before we do any real work, see if the value can even show up.
+ APSIntType AdjustmentType(Adjustment);
+ switch (AdjustmentType.testInRange(Int, true)) {
+ case APSIntType::RTR_Below:
+ return F.getEmptySet();
+ case APSIntType::RTR_Within:
+ break;
+ case APSIntType::RTR_Above:
+ return RS();
+ }
+
+ // Special case for Int == Max. This is always feasible.
+ llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
+ llvm::APSInt Max = AdjustmentType.getMaxValue();
+ if (ComparisonVal == Max)
+ return RS();
+
+ llvm::APSInt Min = AdjustmentType.getMinValue();
+ llvm::APSInt Lower = Min - Adjustment;
+ llvm::APSInt Upper = ComparisonVal - Adjustment;
+
+ RangeSet Default = RS();
+ return F.intersect(Default, Lower, Upper);
+}
+
+RangeSet RangeConstraintManager::getSymLERange(ProgramStateRef St,
+ SymbolRef Sym,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment) {
+ return getSymLERange([&] { return getRange(St, Sym); }, Int, Adjustment);
+}
+
+ProgramStateRef
+RangeConstraintManager::assumeSymLE(ProgramStateRef St, SymbolRef Sym,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment) {
+ RangeSet New = getSymLERange(St, Sym, Int, Adjustment);
+ return setRange(St, Sym, New);
+}
+
+ProgramStateRef RangeConstraintManager::assumeSymWithinInclusiveRange(
+ ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
+ const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
+ RangeSet New = getSymGERange(State, Sym, From, Adjustment);
+ if (New.isEmpty())
+ return nullptr;
+ RangeSet Out = getSymLERange([&] { return New; }, To, Adjustment);
+ return setRange(State, Sym, Out);
+}
+
+ProgramStateRef RangeConstraintManager::assumeSymOutsideInclusiveRange(
+ ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
+ const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
+ RangeSet RangeLT = getSymLTRange(State, Sym, From, Adjustment);
+ RangeSet RangeGT = getSymGTRange(State, Sym, To, Adjustment);
+ RangeSet New(F.add(RangeLT, RangeGT));
+ return setRange(State, Sym, New);
+}
+
+//===----------------------------------------------------------------------===//
+// Pretty-printing.
+//===----------------------------------------------------------------------===//
+
+void RangeConstraintManager::printJson(raw_ostream &Out, ProgramStateRef State,
+ const char *NL, unsigned int Space,
+ bool IsDot) const {
+ printConstraints(Out, State, NL, Space, IsDot);
+ printEquivalenceClasses(Out, State, NL, Space, IsDot);
+ printDisequalities(Out, State, NL, Space, IsDot);
+}
+
+void RangeConstraintManager::printValue(raw_ostream &Out, ProgramStateRef State,
+ SymbolRef Sym) {
+ const RangeSet RS = getRange(State, Sym);
+ if (RS.isEmpty()) {
+ Out << "<empty rangeset>";
+ return;
+ }
+ Out << RS.getBitWidth() << (RS.isUnsigned() ? "u:" : "s:");
+ RS.dump(Out);
+}
+
+static std::string toString(const SymbolRef &Sym) {
+ std::string S;
+ llvm::raw_string_ostream O(S);
+ Sym->dumpToStream(O);
+ return S;
+}
+
+void RangeConstraintManager::printConstraints(raw_ostream &Out,
+ ProgramStateRef State,
+ const char *NL,
+ unsigned int Space,
+ bool IsDot) const {
+ ConstraintRangeTy Constraints = State->get<ConstraintRange>();
+
+ Indent(Out, Space, IsDot) << "\"constraints\": ";
+ if (Constraints.isEmpty()) {
+ Out << "null," << NL;
+ return;
+ }
+
+ std::map<std::string, RangeSet> OrderedConstraints;
+ for (std::pair<EquivalenceClass, RangeSet> P : Constraints) {
+ SymbolSet ClassMembers = P.first.getClassMembers(State);
+ for (const SymbolRef &ClassMember : ClassMembers) {
+ bool insertion_took_place;
+ std::tie(std::ignore, insertion_took_place) =
+ OrderedConstraints.insert({toString(ClassMember), P.second});
+ assert(insertion_took_place &&
+ "two symbols should not have the same dump");
+ }
+ }
+
+ ++Space;
+ Out << '[' << NL;
+ bool First = true;
+ for (std::pair<std::string, RangeSet> P : OrderedConstraints) {
+ if (First) {
+ First = false;
+ } else {
+ Out << ',';
+ Out << NL;
+ }
+ Indent(Out, Space, IsDot)
+ << "{ \"symbol\": \"" << P.first << "\", \"range\": \"";
+ P.second.dump(Out);
+ Out << "\" }";
+ }
+ Out << NL;
+
+ --Space;
+ Indent(Out, Space, IsDot) << "]," << NL;
+}
+
+static std::string toString(ProgramStateRef State, EquivalenceClass Class) {
+ SymbolSet ClassMembers = Class.getClassMembers(State);
+ llvm::SmallVector<SymbolRef, 8> ClassMembersSorted(ClassMembers.begin(),
+ ClassMembers.end());
+ llvm::sort(ClassMembersSorted,
+ [](const SymbolRef &LHS, const SymbolRef &RHS) {
+ return toString(LHS) < toString(RHS);
+ });
+
+ bool FirstMember = true;
+
+ std::string Str;
+ llvm::raw_string_ostream Out(Str);
+ Out << "[ ";
+ for (SymbolRef ClassMember : ClassMembersSorted) {
+ if (FirstMember)
+ FirstMember = false;
+ else
+ Out << ", ";
+ Out << "\"" << ClassMember << "\"";
+ }
+ Out << " ]";
+ return Str;
+}
+
+void RangeConstraintManager::printEquivalenceClasses(raw_ostream &Out,
+ ProgramStateRef State,
+ const char *NL,
+ unsigned int Space,
+ bool IsDot) const {
+ ClassMembersTy Members = State->get<ClassMembers>();
+
+ Indent(Out, Space, IsDot) << "\"equivalence_classes\": ";
+ if (Members.isEmpty()) {
+ Out << "null," << NL;
+ return;
+ }
+
+ std::set<std::string> MembersStr;
+ for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members)
+ MembersStr.insert(toString(State, ClassToSymbolSet.first));
+
+ ++Space;
+ Out << '[' << NL;
+ bool FirstClass = true;
+ for (const std::string &Str : MembersStr) {
+ if (FirstClass) {
+ FirstClass = false;
+ } else {
+ Out << ',';
+ Out << NL;
+ }
+ Indent(Out, Space, IsDot);
+ Out << Str;
+ }
+ Out << NL;
+
+ --Space;
+ Indent(Out, Space, IsDot) << "]," << NL;
+}
+
+void RangeConstraintManager::printDisequalities(raw_ostream &Out,
+ ProgramStateRef State,
+ const char *NL,
+ unsigned int Space,
+ bool IsDot) const {
+ DisequalityMapTy Disequalities = State->get<DisequalityMap>();
+
+ Indent(Out, Space, IsDot) << "\"disequality_info\": ";
+ if (Disequalities.isEmpty()) {
+ Out << "null," << NL;
+ return;
+ }
+
+ // Transform the disequality info to an ordered map of
+ // [string -> (ordered set of strings)]
+ using EqClassesStrTy = std::set<std::string>;
+ using DisequalityInfoStrTy = std::map<std::string, EqClassesStrTy>;
+ DisequalityInfoStrTy DisequalityInfoStr;
+ for (std::pair<EquivalenceClass, ClassSet> ClassToDisEqSet : Disequalities) {
+ EquivalenceClass Class = ClassToDisEqSet.first;
+ ClassSet DisequalClasses = ClassToDisEqSet.second;
+ EqClassesStrTy MembersStr;
+ for (EquivalenceClass DisEqClass : DisequalClasses)
+ MembersStr.insert(toString(State, DisEqClass));
+ DisequalityInfoStr.insert({toString(State, Class), MembersStr});
+ }
+
+ ++Space;
+ Out << '[' << NL;
+ bool FirstClass = true;
+ for (std::pair<std::string, EqClassesStrTy> ClassToDisEqSet :
+ DisequalityInfoStr) {
+ const std::string &Class = ClassToDisEqSet.first;
+ if (FirstClass) {
+ FirstClass = false;
+ } else {
+ Out << ',';
+ Out << NL;
+ }
+ Indent(Out, Space, IsDot) << "{" << NL;
+ unsigned int DisEqSpace = Space + 1;
+ Indent(Out, DisEqSpace, IsDot) << "\"class\": ";
+ Out << Class;
+ const EqClassesStrTy &DisequalClasses = ClassToDisEqSet.second;
+ if (!DisequalClasses.empty()) {
+ Out << "," << NL;
+ Indent(Out, DisEqSpace, IsDot) << "\"disequal_to\": [" << NL;
+ unsigned int DisEqClassSpace = DisEqSpace + 1;
+ Indent(Out, DisEqClassSpace, IsDot);
+ bool FirstDisEqClass = true;
+ for (const std::string &DisEqClass : DisequalClasses) {
+ if (FirstDisEqClass) {
+ FirstDisEqClass = false;
+ } else {
+ Out << ',' << NL;
+ Indent(Out, DisEqClassSpace, IsDot);
+ }
+ Out << DisEqClass;
+ }
+ Out << "]" << NL;
+ }
+ Indent(Out, Space, IsDot) << "}";
+ }
+ Out << NL;
+
+ --Space;
+ Indent(Out, Space, IsDot) << "]," << NL;
+}