diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp')
| -rw-r--r-- | contrib/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp | 357 |
1 files changed, 0 insertions, 357 deletions
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp deleted file mode 100644 index 81d92e724c7d..000000000000 --- a/contrib/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp +++ /dev/null @@ -1,357 +0,0 @@ -//===- PoisonChecking.cpp - -----------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Implements a transform pass which instruments IR such that poison semantics -// are made explicit. That is, it provides a (possibly partial) executable -// semantics for every instruction w.r.t. poison as specified in the LLVM -// LangRef. There are obvious parallels to the sanitizer tools, but this pass -// is focused purely on the semantics of LLVM IR, not any particular source -// language. If you're looking for something to see if your C/C++ contains -// UB, this is not it. -// -// The rewritten semantics of each instruction will include the following -// components: -// -// 1) The original instruction, unmodified. -// 2) A propagation rule which translates dynamic information about the poison -// state of each input to whether the dynamic output of the instruction -// produces poison. -// 3) A flag validation rule which validates any poison producing flags on the -// instruction itself (e.g. checks for overflow on nsw). -// 4) A check rule which traps (to a handler function) if this instruction must -// execute undefined behavior given the poison state of it's inputs. -// -// At the moment, the UB detection is done in a best effort manner; that is, -// the resulting code may produce a false negative result (not report UB when -// it actually exists according to the LangRef spec), but should never produce -// a false positive (report UB where it doesn't exist). The intention is to -// eventually support a "strict" mode which never dynamically reports a false -// negative at the cost of rejecting some valid inputs to translation. -// -// Use cases for this pass include: -// - Understanding (and testing!) the implications of the definition of poison -// from the LangRef. -// - Validating the output of a IR fuzzer to ensure that all programs produced -// are well defined on the specific input used. -// - Finding/confirming poison specific miscompiles by checking the poison -// status of an input/IR pair is the same before and after an optimization -// transform. -// - Checking that a bugpoint reduction does not introduce UB which didn't -// exist in the original program being reduced. -// -// The major sources of inaccuracy are currently: -// - Most validation rules not yet implemented for instructions with poison -// relavant flags. At the moment, only nsw/nuw on add/sub are supported. -// - UB which is control dependent on a branch on poison is not yet -// reported. Currently, only data flow dependence is modeled. -// - Poison which is propagated through memory is not modeled. As such, -// storing poison to memory and then reloading it will cause a false negative -// as we consider the reloaded value to not be poisoned. -// - Poison propagation across function boundaries is not modeled. At the -// moment, all arguments and return values are assumed not to be poison. -// - Undef is not modeled. In particular, the optimizer's freedom to pick -// concrete values for undef bits so as to maximize potential for producing -// poison is not modeled. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Instrumentation/PoisonChecking.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/InstVisitor.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/Support/Debug.h" - -using namespace llvm; - -#define DEBUG_TYPE "poison-checking" - -static cl::opt<bool> -LocalCheck("poison-checking-function-local", - cl::init(false), - cl::desc("Check that returns are non-poison (for testing)")); - - -static bool isConstantFalse(Value* V) { - assert(V->getType()->isIntegerTy(1)); - if (auto *CI = dyn_cast<ConstantInt>(V)) - return CI->isZero(); - return false; -} - -static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) { - if (Ops.size() == 0) - return B.getFalse(); - unsigned i = 0; - for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {} - if (i == Ops.size()) - return B.getFalse(); - Value *Accum = Ops[i++]; - for (; i < Ops.size(); i++) - if (!isConstantFalse(Ops[i])) - Accum = B.CreateOr(Accum, Ops[i]); - return Accum; -} - -static void generatePoisonChecksForBinOp(Instruction &I, - SmallVector<Value*, 2> &Checks) { - assert(isa<BinaryOperator>(I)); - - IRBuilder<> B(&I); - Value *LHS = I.getOperand(0); - Value *RHS = I.getOperand(1); - switch (I.getOpcode()) { - default: - return; - case Instruction::Add: { - if (I.hasNoSignedWrap()) { - auto *OverflowOp = - B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS); - Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); - } - if (I.hasNoUnsignedWrap()) { - auto *OverflowOp = - B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS); - Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); - } - break; - } - case Instruction::Sub: { - if (I.hasNoSignedWrap()) { - auto *OverflowOp = - B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS); - Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); - } - if (I.hasNoUnsignedWrap()) { - auto *OverflowOp = - B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS); - Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); - } - break; - } - case Instruction::Mul: { - if (I.hasNoSignedWrap()) { - auto *OverflowOp = - B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS); - Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); - } - if (I.hasNoUnsignedWrap()) { - auto *OverflowOp = - B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS); - Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); - } - break; - } - case Instruction::UDiv: { - if (I.isExact()) { - auto *Check = - B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS), - ConstantInt::get(LHS->getType(), 0)); - Checks.push_back(Check); - } - break; - } - case Instruction::SDiv: { - if (I.isExact()) { - auto *Check = - B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS), - ConstantInt::get(LHS->getType(), 0)); - Checks.push_back(Check); - } - break; - } - case Instruction::AShr: - case Instruction::LShr: - case Instruction::Shl: { - Value *ShiftCheck = - B.CreateICmp(ICmpInst::ICMP_UGE, RHS, - ConstantInt::get(RHS->getType(), - LHS->getType()->getScalarSizeInBits())); - Checks.push_back(ShiftCheck); - break; - } - }; -} - -static Value* generatePoisonChecks(Instruction &I) { - IRBuilder<> B(&I); - SmallVector<Value*, 2> Checks; - if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy()) - generatePoisonChecksForBinOp(I, Checks); - - // Handle non-binops seperately - switch (I.getOpcode()) { - default: - break; - case Instruction::ExtractElement: { - Value *Vec = I.getOperand(0); - if (Vec->getType()->getVectorIsScalable()) - break; - Value *Idx = I.getOperand(1); - unsigned NumElts = Vec->getType()->getVectorNumElements(); - Value *Check = - B.CreateICmp(ICmpInst::ICMP_UGE, Idx, - ConstantInt::get(Idx->getType(), NumElts)); - Checks.push_back(Check); - break; - } - case Instruction::InsertElement: { - Value *Vec = I.getOperand(0); - if (Vec->getType()->getVectorIsScalable()) - break; - Value *Idx = I.getOperand(2); - unsigned NumElts = Vec->getType()->getVectorNumElements(); - Value *Check = - B.CreateICmp(ICmpInst::ICMP_UGE, Idx, - ConstantInt::get(Idx->getType(), NumElts)); - Checks.push_back(Check); - break; - } - }; - return buildOrChain(B, Checks); -} - -static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) { - auto Itr = ValToPoison.find(V); - if (Itr != ValToPoison.end()) - return Itr->second; - if (isa<Constant>(V)) { - return ConstantInt::getFalse(V->getContext()); - } - // Return false for unknwon values - this implements a non-strict mode where - // unhandled IR constructs are simply considered to never produce poison. At - // some point in the future, we probably want a "strict mode" for testing if - // nothing else. - return ConstantInt::getFalse(V->getContext()); -} - -static void CreateAssert(IRBuilder<> &B, Value *Cond) { - assert(Cond->getType()->isIntegerTy(1)); - if (auto *CI = dyn_cast<ConstantInt>(Cond)) - if (CI->isAllOnesValue()) - return; - - Module *M = B.GetInsertBlock()->getModule(); - M->getOrInsertFunction("__poison_checker_assert", - Type::getVoidTy(M->getContext()), - Type::getInt1Ty(M->getContext())); - Function *TrapFunc = M->getFunction("__poison_checker_assert"); - B.CreateCall(TrapFunc, Cond); -} - -static void CreateAssertNot(IRBuilder<> &B, Value *Cond) { - assert(Cond->getType()->isIntegerTy(1)); - CreateAssert(B, B.CreateNot(Cond)); -} - -static bool rewrite(Function &F) { - auto * const Int1Ty = Type::getInt1Ty(F.getContext()); - - DenseMap<Value *, Value *> ValToPoison; - - for (BasicBlock &BB : F) - for (auto I = BB.begin(); isa<PHINode>(&*I); I++) { - auto *OldPHI = cast<PHINode>(&*I); - auto *NewPHI = PHINode::Create(Int1Ty, - OldPHI->getNumIncomingValues()); - for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) - NewPHI->addIncoming(UndefValue::get(Int1Ty), - OldPHI->getIncomingBlock(i)); - NewPHI->insertBefore(OldPHI); - ValToPoison[OldPHI] = NewPHI; - } - - for (BasicBlock &BB : F) - for (Instruction &I : BB) { - if (isa<PHINode>(I)) continue; - - IRBuilder<> B(cast<Instruction>(&I)); - - // Note: There are many more sources of documented UB, but this pass only - // attempts to find UB triggered by propagation of poison. - if (Value *Op = const_cast<Value*>(getGuaranteedNonFullPoisonOp(&I))) - CreateAssertNot(B, getPoisonFor(ValToPoison, Op)); - - if (LocalCheck) - if (auto *RI = dyn_cast<ReturnInst>(&I)) - if (RI->getNumOperands() != 0) { - Value *Op = RI->getOperand(0); - CreateAssertNot(B, getPoisonFor(ValToPoison, Op)); - } - - SmallVector<Value*, 4> Checks; - if (propagatesFullPoison(&I)) - for (Value *V : I.operands()) - Checks.push_back(getPoisonFor(ValToPoison, V)); - - if (auto *Check = generatePoisonChecks(I)) - Checks.push_back(Check); - ValToPoison[&I] = buildOrChain(B, Checks); - } - - for (BasicBlock &BB : F) - for (auto I = BB.begin(); isa<PHINode>(&*I); I++) { - auto *OldPHI = cast<PHINode>(&*I); - if (!ValToPoison.count(OldPHI)) - continue; // skip the newly inserted phis - auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]); - for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) { - auto *OldVal = OldPHI->getIncomingValue(i); - NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal)); - } - } - return true; -} - - -PreservedAnalyses PoisonCheckingPass::run(Module &M, - ModuleAnalysisManager &AM) { - bool Changed = false; - for (auto &F : M) - Changed |= rewrite(F); - - return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); -} - -PreservedAnalyses PoisonCheckingPass::run(Function &F, - FunctionAnalysisManager &AM) { - return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all(); -} - - -/* Major TODO Items: - - Control dependent poison UB - - Strict mode - (i.e. must analyze every operand) - - Poison through memory - - Function ABIs - - Full coverage of intrinsics, etc.. (ouch) - - Instructions w/Unclear Semantics: - - shufflevector - It would seem reasonable for an out of bounds mask element - to produce poison, but the LangRef does not state. - - and/or - It would seem reasonable for poison to propagate from both - arguments, but LangRef doesn't state and propagatesFullPoison doesn't - include these two. - - all binary ops w/vector operands - The likely interpretation would be that - any element overflowing should produce poison for the entire result, but - the LangRef does not state. - - Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems - strange that only certian flags should be documented as producing poison. - - Cases of clear poison semantics not yet implemented: - - Exact flags on ashr/lshr produce poison - - NSW/NUW flags on shl produce poison - - Inbounds flag on getelementptr produce poison - - fptosi/fptoui (out of bounds input) produce poison - - Scalable vector types for insertelement/extractelement - - Floating point binary ops w/fmf nnan/noinfs flags produce poison - */ |
