diff options
Diffstat (limited to 'llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp | 357 | 
1 files changed, 357 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp new file mode 100644 index 000000000000..81d92e724c7d --- /dev/null +++ b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp @@ -0,0 +1,357 @@ +//===- PoisonChecking.cpp - -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements a transform pass which instruments IR such that poison semantics +// are made explicit.  That is, it provides a (possibly partial) executable +// semantics for every instruction w.r.t. poison as specified in the LLVM +// LangRef.  There are obvious parallels to the sanitizer tools, but this pass +// is focused purely on the semantics of LLVM IR, not any particular source +// language.   If you're looking for something to see if your C/C++ contains +// UB, this is not it.   +//  +// The rewritten semantics of each instruction will include the following +// components:  +// +// 1) The original instruction, unmodified. +// 2) A propagation rule which translates dynamic information about the poison +//    state of each input to whether the dynamic output of the instruction +//    produces poison. +// 3) A flag validation rule which validates any poison producing flags on the +//    instruction itself (e.g. checks for overflow on nsw). +// 4) A check rule which traps (to a handler function) if this instruction must +//    execute undefined behavior given the poison state of it's inputs. +// +// At the moment, the UB detection is done in a best effort manner; that is, +// the resulting code may produce a false negative result (not report UB when +// it actually exists according to the LangRef spec), but should never produce +// a false positive (report UB where it doesn't exist).  The intention is to +// eventually support a "strict" mode which never dynamically reports a false +// negative at the cost of rejecting some valid inputs to translation. +// +// Use cases for this pass include: +// - Understanding (and testing!) the implications of the definition of poison +//   from the LangRef. +// - Validating the output of a IR fuzzer to ensure that all programs produced +//   are well defined on the specific input used. +// - Finding/confirming poison specific miscompiles by checking the poison +//   status of an input/IR pair is the same before and after an optimization +//   transform.  +// - Checking that a bugpoint reduction does not introduce UB which didn't +//   exist in the original program being reduced. +// +// The major sources of inaccuracy are currently: +// - Most validation rules not yet implemented for instructions with poison +//   relavant flags.  At the moment, only nsw/nuw on add/sub are supported. +// - UB which is control dependent on a branch on poison is not yet +//   reported. Currently, only data flow dependence is modeled. +// - Poison which is propagated through memory is not modeled.  As such, +//   storing poison to memory and then reloading it will cause a false negative +//   as we consider the reloaded value to not be poisoned. +// - Poison propagation across function boundaries is not modeled.  At the +//   moment, all arguments and return values are assumed not to be poison. +// - Undef is not modeled.  In particular, the optimizer's freedom to pick +//   concrete values for undef bits so as to maximize potential for producing +//   poison is not modeled.   +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/PoisonChecking.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "poison-checking" + +static cl::opt<bool> +LocalCheck("poison-checking-function-local", +           cl::init(false), +           cl::desc("Check that returns are non-poison (for testing)")); + + +static bool isConstantFalse(Value* V) { +  assert(V->getType()->isIntegerTy(1)); +  if (auto *CI = dyn_cast<ConstantInt>(V)) +    return CI->isZero(); +  return false; +} + +static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) { +  if (Ops.size() == 0) +    return B.getFalse(); +  unsigned i = 0; +  for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {} +  if (i == Ops.size()) +    return B.getFalse(); +  Value *Accum = Ops[i++]; +  for (; i < Ops.size(); i++) +    if (!isConstantFalse(Ops[i])) +      Accum = B.CreateOr(Accum, Ops[i]); +  return Accum; +} + +static void generatePoisonChecksForBinOp(Instruction &I, +                                         SmallVector<Value*, 2> &Checks) { +  assert(isa<BinaryOperator>(I)); +   +  IRBuilder<> B(&I); +  Value *LHS = I.getOperand(0); +  Value *RHS = I.getOperand(1); +  switch (I.getOpcode()) { +  default: +    return; +  case Instruction::Add: { +    if (I.hasNoSignedWrap()) { +      auto *OverflowOp = +        B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS); +      Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); +    } +    if (I.hasNoUnsignedWrap()) { +      auto *OverflowOp = +        B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS); +      Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); +    } +    break; +  } +  case Instruction::Sub: { +    if (I.hasNoSignedWrap()) { +      auto *OverflowOp = +        B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS); +      Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); +    } +    if (I.hasNoUnsignedWrap()) { +      auto *OverflowOp = +        B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS); +      Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); +    } +    break; +  } +  case Instruction::Mul: { +    if (I.hasNoSignedWrap()) { +      auto *OverflowOp = +        B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS); +      Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); +    } +    if (I.hasNoUnsignedWrap()) { +      auto *OverflowOp = +        B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS); +      Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); +    } +    break; +  } +  case Instruction::UDiv: { +    if (I.isExact()) { +      auto *Check = +        B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS), +                     ConstantInt::get(LHS->getType(), 0)); +      Checks.push_back(Check); +    } +    break; +  } +  case Instruction::SDiv: { +    if (I.isExact()) { +      auto *Check = +        B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS), +                     ConstantInt::get(LHS->getType(), 0)); +      Checks.push_back(Check); +    } +    break; +  } +  case Instruction::AShr: +  case Instruction::LShr: +  case Instruction::Shl: { +    Value *ShiftCheck = +      B.CreateICmp(ICmpInst::ICMP_UGE, RHS, +                   ConstantInt::get(RHS->getType(), +                                    LHS->getType()->getScalarSizeInBits())); +    Checks.push_back(ShiftCheck); +    break; +  } +  }; +} + +static Value* generatePoisonChecks(Instruction &I) { +  IRBuilder<> B(&I); +  SmallVector<Value*, 2> Checks; +  if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy()) +    generatePoisonChecksForBinOp(I, Checks); + +  // Handle non-binops seperately +  switch (I.getOpcode()) { +  default: +    break; +  case Instruction::ExtractElement: { +    Value *Vec = I.getOperand(0); +    if (Vec->getType()->getVectorIsScalable()) +      break; +    Value *Idx = I.getOperand(1); +    unsigned NumElts = Vec->getType()->getVectorNumElements(); +    Value *Check = +      B.CreateICmp(ICmpInst::ICMP_UGE, Idx, +                   ConstantInt::get(Idx->getType(), NumElts)); +    Checks.push_back(Check); +    break; +  } +  case Instruction::InsertElement: { +    Value *Vec = I.getOperand(0); +    if (Vec->getType()->getVectorIsScalable()) +      break; +    Value *Idx = I.getOperand(2); +    unsigned NumElts = Vec->getType()->getVectorNumElements(); +    Value *Check = +      B.CreateICmp(ICmpInst::ICMP_UGE, Idx, +                   ConstantInt::get(Idx->getType(), NumElts)); +    Checks.push_back(Check); +    break; +  } +  }; +  return buildOrChain(B, Checks); +} + +static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) { +  auto Itr = ValToPoison.find(V); +  if (Itr != ValToPoison.end()) +    return Itr->second; +  if (isa<Constant>(V)) { +    return ConstantInt::getFalse(V->getContext()); +  } +  // Return false for unknwon values - this implements a non-strict mode where +  // unhandled IR constructs are simply considered to never produce poison.  At +  // some point in the future, we probably want a "strict mode" for testing if +  // nothing else. +  return ConstantInt::getFalse(V->getContext()); +} + +static void CreateAssert(IRBuilder<> &B, Value *Cond) { +  assert(Cond->getType()->isIntegerTy(1)); +  if (auto *CI = dyn_cast<ConstantInt>(Cond)) +    if (CI->isAllOnesValue()) +      return; + +  Module *M = B.GetInsertBlock()->getModule(); +  M->getOrInsertFunction("__poison_checker_assert", +                         Type::getVoidTy(M->getContext()), +                         Type::getInt1Ty(M->getContext())); +  Function *TrapFunc = M->getFunction("__poison_checker_assert"); +  B.CreateCall(TrapFunc, Cond); +} + +static void CreateAssertNot(IRBuilder<> &B, Value *Cond) { +  assert(Cond->getType()->isIntegerTy(1)); +  CreateAssert(B, B.CreateNot(Cond)); +} + +static bool rewrite(Function &F) { +  auto * const Int1Ty = Type::getInt1Ty(F.getContext()); + +  DenseMap<Value *, Value *> ValToPoison; + +  for (BasicBlock &BB : F) +    for (auto I = BB.begin(); isa<PHINode>(&*I); I++) { +      auto *OldPHI = cast<PHINode>(&*I); +      auto *NewPHI = PHINode::Create(Int1Ty,  +                                     OldPHI->getNumIncomingValues()); +      for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) +        NewPHI->addIncoming(UndefValue::get(Int1Ty), +                            OldPHI->getIncomingBlock(i)); +      NewPHI->insertBefore(OldPHI); +      ValToPoison[OldPHI] = NewPHI; +    } +   +  for (BasicBlock &BB : F) +    for (Instruction &I : BB) { +      if (isa<PHINode>(I)) continue; + +      IRBuilder<> B(cast<Instruction>(&I)); +       +      // Note: There are many more sources of documented UB, but this pass only +      // attempts to find UB triggered by propagation of poison. +      if (Value *Op = const_cast<Value*>(getGuaranteedNonFullPoisonOp(&I))) +        CreateAssertNot(B, getPoisonFor(ValToPoison, Op)); + +      if (LocalCheck) +        if (auto *RI = dyn_cast<ReturnInst>(&I)) +          if (RI->getNumOperands() != 0) { +            Value *Op = RI->getOperand(0); +            CreateAssertNot(B, getPoisonFor(ValToPoison, Op)); +          } + +      SmallVector<Value*, 4> Checks; +      if (propagatesFullPoison(&I)) +        for (Value *V : I.operands()) +          Checks.push_back(getPoisonFor(ValToPoison, V)); + +      if (auto *Check = generatePoisonChecks(I)) +        Checks.push_back(Check); +      ValToPoison[&I] = buildOrChain(B, Checks); +    } + +  for (BasicBlock &BB : F) +    for (auto I = BB.begin(); isa<PHINode>(&*I); I++) { +      auto *OldPHI = cast<PHINode>(&*I); +      if (!ValToPoison.count(OldPHI)) +        continue; // skip the newly inserted phis +      auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]); +      for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) { +        auto *OldVal = OldPHI->getIncomingValue(i); +        NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal)); +      } +    } +  return true; +} + + +PreservedAnalyses PoisonCheckingPass::run(Module &M, +                                          ModuleAnalysisManager &AM) { +  bool Changed = false; +  for (auto &F : M) +    Changed |= rewrite(F); + +  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} + +PreservedAnalyses PoisonCheckingPass::run(Function &F, +                                          FunctionAnalysisManager &AM) { +  return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} + + +/* Major TODO Items: +   - Control dependent poison UB +   - Strict mode - (i.e. must analyze every operand) +     - Poison through memory +     - Function ABIs +     - Full coverage of intrinsics, etc.. (ouch) + +   Instructions w/Unclear Semantics: +   - shufflevector - It would seem reasonable for an out of bounds mask element +     to produce poison, but the LangRef does not state.   +   - and/or - It would seem reasonable for poison to propagate from both +     arguments, but LangRef doesn't state and propagatesFullPoison doesn't +     include these two. +   - all binary ops w/vector operands - The likely interpretation would be that +     any element overflowing should produce poison for the entire result, but +     the LangRef does not state. +   - Floating point binary ops w/fmf flags other than (nnan, noinfs).  It seems +     strange that only certian flags should be documented as producing poison. + +   Cases of clear poison semantics not yet implemented: +   - Exact flags on ashr/lshr produce poison +   - NSW/NUW flags on shl produce poison +   - Inbounds flag on getelementptr produce poison +   - fptosi/fptoui (out of bounds input) produce poison +   - Scalable vector types for insertelement/extractelement +   - Floating point binary ops w/fmf nnan/noinfs flags produce poison + */  | 
