1 files changed, 357 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp
new file mode 100644
index 000000000000..81d92e724c7d
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp
@@ -0,0 +1,357 @@
+//===- PoisonChecking.cpp - -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements a transform pass which instruments IR such that poison semantics
+// are made explicit.  That is, it provides a (possibly partial) executable
+// semantics for every instruction w.r.t. poison as specified in the LLVM
+// LangRef.  There are obvious parallels to the sanitizer tools, but this pass
+// is focused purely on the semantics of LLVM IR, not any particular source
+// language.   If you're looking for something to see if your C/C++ contains
+// UB, this is not it.  
+// 
+// The rewritten semantics of each instruction will include the following
+// components: 
+//
+// 1) The original instruction, unmodified.
+// 2) A propagation rule which translates dynamic information about the poison
+//    state of each input to whether the dynamic output of the instruction
+//    produces poison.
+// 3) A flag validation rule which validates any poison producing flags on the
+//    instruction itself (e.g. checks for overflow on nsw).
+// 4) A check rule which traps (to a handler function) if this instruction must
+//    execute undefined behavior given the poison state of it's inputs.
+//
+// At the moment, the UB detection is done in a best effort manner; that is,
+// the resulting code may produce a false negative result (not report UB when
+// it actually exists according to the LangRef spec), but should never produce
+// a false positive (report UB where it doesn't exist).  The intention is to
+// eventually support a "strict" mode which never dynamically reports a false
+// negative at the cost of rejecting some valid inputs to translation.
+//
+// Use cases for this pass include:
+// - Understanding (and testing!) the implications of the definition of poison
+//   from the LangRef.
+// - Validating the output of a IR fuzzer to ensure that all programs produced
+//   are well defined on the specific input used.
+// - Finding/confirming poison specific miscompiles by checking the poison
+//   status of an input/IR pair is the same before and after an optimization
+//   transform. 
+// - Checking that a bugpoint reduction does not introduce UB which didn't
+//   exist in the original program being reduced.
+//
+// The major sources of inaccuracy are currently:
+// - Most validation rules not yet implemented for instructions with poison
+//   relavant flags.  At the moment, only nsw/nuw on add/sub are supported.
+// - UB which is control dependent on a branch on poison is not yet
+//   reported. Currently, only data flow dependence is modeled.
+// - Poison which is propagated through memory is not modeled.  As such,
+//   storing poison to memory and then reloading it will cause a false negative
+//   as we consider the reloaded value to not be poisoned.
+// - Poison propagation across function boundaries is not modeled.  At the
+//   moment, all arguments and return values are assumed not to be poison.
+// - Undef is not modeled.  In particular, the optimizer's freedom to pick
+//   concrete values for undef bits so as to maximize potential for producing
+//   poison is not modeled.  
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "poison-checking"
+
+static cl::opt<bool>
+LocalCheck("poison-checking-function-local",
+           cl::init(false),
+           cl::desc("Check that returns are non-poison (for testing)"));
+
+
+static bool isConstantFalse(Value* V) {
+  assert(V->getType()->isIntegerTy(1));
+  if (auto *CI = dyn_cast<ConstantInt>(V))
+    return CI->isZero();
+  return false;
+}
+
+static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
+  if (Ops.size() == 0)
+    return B.getFalse();
+  unsigned i = 0;
+  for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
+  if (i == Ops.size())
+    return B.getFalse();
+  Value *Accum = Ops[i++];
+  for (; i < Ops.size(); i++)
+    if (!isConstantFalse(Ops[i]))
+      Accum = B.CreateOr(Accum, Ops[i]);
+  return Accum;
+}
+
+static void generatePoisonChecksForBinOp(Instruction &I,
+                                         SmallVector<Value*, 2> &Checks) {
+  assert(isa<BinaryOperator>(I));
+  
+  IRBuilder<> B(&I);
+  Value *LHS = I.getOperand(0);
+  Value *RHS = I.getOperand(1);
+  switch (I.getOpcode()) {
+  default:
+    return;
+  case Instruction::Add: {
+    if (I.hasNoSignedWrap()) {
+      auto *OverflowOp =
+        B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
+      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+    }
+    if (I.hasNoUnsignedWrap()) {
+      auto *OverflowOp =
+        B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
+      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+    }
+    break;
+  }
+  case Instruction::Sub: {
+    if (I.hasNoSignedWrap()) {
+      auto *OverflowOp =
+        B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
+      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+    }
+    if (I.hasNoUnsignedWrap()) {
+      auto *OverflowOp =
+        B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
+      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+    }
+    break;
+  }
+  case Instruction::Mul: {
+    if (I.hasNoSignedWrap()) {
+      auto *OverflowOp =
+        B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
+      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+    }
+    if (I.hasNoUnsignedWrap()) {
+      auto *OverflowOp =
+        B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
+      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+    }
+    break;
+  }
+  case Instruction::UDiv: {
+    if (I.isExact()) {
+      auto *Check =
+        B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
+                     ConstantInt::get(LHS->getType(), 0));
+      Checks.push_back(Check);
+    }
+    break;
+  }
+  case Instruction::SDiv: {
+    if (I.isExact()) {
+      auto *Check =
+        B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
+                     ConstantInt::get(LHS->getType(), 0));
+      Checks.push_back(Check);
+    }
+    break;
+  }
+  case Instruction::AShr:
+  case Instruction::LShr:
+  case Instruction::Shl: {
+    Value *ShiftCheck =
+      B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
+                   ConstantInt::get(RHS->getType(),
+                                    LHS->getType()->getScalarSizeInBits()));
+    Checks.push_back(ShiftCheck);
+    break;
+  }
+  };
+}
+
+static Value* generatePoisonChecks(Instruction &I) {
+  IRBuilder<> B(&I);
+  SmallVector<Value*, 2> Checks;
+  if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
+    generatePoisonChecksForBinOp(I, Checks);
+
+  // Handle non-binops seperately
+  switch (I.getOpcode()) {
+  default:
+    break;
+  case Instruction::ExtractElement: {
+    Value *Vec = I.getOperand(0);
+    if (Vec->getType()->getVectorIsScalable())
+      break;
+    Value *Idx = I.getOperand(1);
+    unsigned NumElts = Vec->getType()->getVectorNumElements();
+    Value *Check =
+      B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
+                   ConstantInt::get(Idx->getType(), NumElts));
+    Checks.push_back(Check);
+    break;
+  }
+  case Instruction::InsertElement: {
+    Value *Vec = I.getOperand(0);
+    if (Vec->getType()->getVectorIsScalable())
+      break;
+    Value *Idx = I.getOperand(2);
+    unsigned NumElts = Vec->getType()->getVectorNumElements();
+    Value *Check =
+      B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
+                   ConstantInt::get(Idx->getType(), NumElts));
+    Checks.push_back(Check);
+    break;
+  }
+  };
+  return buildOrChain(B, Checks);
+}
+
+static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
+  auto Itr = ValToPoison.find(V);
+  if (Itr != ValToPoison.end())
+    return Itr->second;
+  if (isa<Constant>(V)) {
+    return ConstantInt::getFalse(V->getContext());
+  }
+  // Return false for unknwon values - this implements a non-strict mode where
+  // unhandled IR constructs are simply considered to never produce poison.  At
+  // some point in the future, we probably want a "strict mode" for testing if
+  // nothing else.
+  return ConstantInt::getFalse(V->getContext());
+}
+
+static void CreateAssert(IRBuilder<> &B, Value *Cond) {
+  assert(Cond->getType()->isIntegerTy(1));
+  if (auto *CI = dyn_cast<ConstantInt>(Cond))
+    if (CI->isAllOnesValue())
+      return;
+
+  Module *M = B.GetInsertBlock()->getModule();
+  M->getOrInsertFunction("__poison_checker_assert",
+                         Type::getVoidTy(M->getContext()),
+                         Type::getInt1Ty(M->getContext()));
+  Function *TrapFunc = M->getFunction("__poison_checker_assert");
+  B.CreateCall(TrapFunc, Cond);
+}
+
+static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
+  assert(Cond->getType()->isIntegerTy(1));
+  CreateAssert(B, B.CreateNot(Cond));
+}
+
+static bool rewrite(Function &F) {
+  auto * const Int1Ty = Type::getInt1Ty(F.getContext());
+
+  DenseMap<Value *, Value *> ValToPoison;
+
+  for (BasicBlock &BB : F)
+    for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
+      auto *OldPHI = cast<PHINode>(&*I);
+      auto *NewPHI = PHINode::Create(Int1Ty, 
+                                     OldPHI->getNumIncomingValues());
+      for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
+        NewPHI->addIncoming(UndefValue::get(Int1Ty),
+                            OldPHI->getIncomingBlock(i));
+      NewPHI->insertBefore(OldPHI);
+      ValToPoison[OldPHI] = NewPHI;
+    }
+  
+  for (BasicBlock &BB : F)
+    for (Instruction &I : BB) {
+      if (isa<PHINode>(I)) continue;
+
+      IRBuilder<> B(cast<Instruction>(&I));
+      
+      // Note: There are many more sources of documented UB, but this pass only
+      // attempts to find UB triggered by propagation of poison.
+      if (Value *Op = const_cast<Value*>(getGuaranteedNonFullPoisonOp(&I)))
+        CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
+
+      if (LocalCheck)
+        if (auto *RI = dyn_cast<ReturnInst>(&I))
+          if (RI->getNumOperands() != 0) {
+            Value *Op = RI->getOperand(0);
+            CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
+          }
+
+      SmallVector<Value*, 4> Checks;
+      if (propagatesFullPoison(&I))
+        for (Value *V : I.operands())
+          Checks.push_back(getPoisonFor(ValToPoison, V));
+
+      if (auto *Check = generatePoisonChecks(I))
+        Checks.push_back(Check);
+      ValToPoison[&I] = buildOrChain(B, Checks);
+    }
+
+  for (BasicBlock &BB : F)
+    for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
+      auto *OldPHI = cast<PHINode>(&*I);
+      if (!ValToPoison.count(OldPHI))
+        continue; // skip the newly inserted phis
+      auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
+      for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
+        auto *OldVal = OldPHI->getIncomingValue(i);
+        NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
+      }
+    }
+  return true;
+}
+
+
+PreservedAnalyses PoisonCheckingPass::run(Module &M,
+                                          ModuleAnalysisManager &AM) {
+  bool Changed = false;
+  for (auto &F : M)
+    Changed |= rewrite(F);
+
+  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+PreservedAnalyses PoisonCheckingPass::run(Function &F,
+                                          FunctionAnalysisManager &AM) {
+  return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+
+/* Major TODO Items:
+   - Control dependent poison UB
+   - Strict mode - (i.e. must analyze every operand)
+     - Poison through memory
+     - Function ABIs
+     - Full coverage of intrinsics, etc.. (ouch)
+
+   Instructions w/Unclear Semantics:
+   - shufflevector - It would seem reasonable for an out of bounds mask element
+     to produce poison, but the LangRef does not state.  
+   - and/or - It would seem reasonable for poison to propagate from both
+     arguments, but LangRef doesn't state and propagatesFullPoison doesn't
+     include these two.
+   - all binary ops w/vector operands - The likely interpretation would be that
+     any element overflowing should produce poison for the entire result, but
+     the LangRef does not state.
+   - Floating point binary ops w/fmf flags other than (nnan, noinfs).  It seems
+     strange that only certian flags should be documented as producing poison.
+
+   Cases of clear poison semantics not yet implemented:
+   - Exact flags on ashr/lshr produce poison
+   - NSW/NUW flags on shl produce poison
+   - Inbounds flag on getelementptr produce poison
+   - fptosi/fptoui (out of bounds input) produce poison
+   - Scalable vector types for insertelement/extractelement
+   - Floating point binary ops w/fmf nnan/noinfs flags produce poison
+ */