diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/IR/StructuralHash.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/IR/StructuralHash.cpp | 105 |
1 files changed, 91 insertions, 14 deletions
diff --git a/contrib/llvm-project/llvm/lib/IR/StructuralHash.cpp b/contrib/llvm-project/llvm/lib/IR/StructuralHash.cpp index 6ea108d831a1..ce2b5a38b2f3 100644 --- a/contrib/llvm-project/llvm/lib/IR/StructuralHash.cpp +++ b/contrib/llvm-project/llvm/lib/IR/StructuralHash.cpp @@ -7,8 +7,12 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/StructuralHash.h" +#include "llvm/ADT/Hashing.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" using namespace llvm; @@ -16,23 +20,89 @@ using namespace llvm; namespace { // Basic hashing mechanism to detect structural change to the IR, used to verify -// pass return status consistency with actual change. Loosely copied from -// llvm/lib/Transforms/Utils/FunctionComparator.cpp +// pass return status consistency with actual change. In addition to being used +// by the MergeFunctions pass. class StructuralHashImpl { - hash_code Hash; + uint64_t Hash; - template <typename T> void hash(const T &V) { Hash = hash_combine(Hash, V); } + void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); } + + // This will produce different values on 32-bit and 64-bit systens as + // hash_combine returns a size_t. However, this is only used for + // detailed hashing which, in-tree, only needs to distinguish between + // differences in functions. + template <typename T> void hashArbitaryType(const T &V) { + hash(hash_combine(V)); + } + + void hashType(Type *ValueType) { + hash(ValueType->getTypeID()); + if (ValueType->isIntegerTy()) + hash(ValueType->getIntegerBitWidth()); + } public: StructuralHashImpl() : Hash(4) {} - void update(const Function &F) { + void updateOperand(Value *Operand) { + hashType(Operand->getType()); + + // The cases enumerated below are not exhaustive and are only aimed to + // get decent coverage over the function. + if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) { + hashArbitaryType(ConstInt->getValue()); + } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) { + hashArbitaryType(ConstFP->getValue()); + } else if (Argument *Arg = dyn_cast<Argument>(Operand)) { + hash(Arg->getArgNo()); + } else if (Function *Func = dyn_cast<Function>(Operand)) { + // Hashing the name will be deterministic as LLVM's hashing infrastructure + // has explicit support for hashing strings and will not simply hash + // the pointer. + hashArbitaryType(Func->getName()); + } + } + + void updateInstruction(const Instruction &Inst, bool DetailedHash) { + hash(Inst.getOpcode()); + + if (!DetailedHash) + return; + + hashType(Inst.getType()); + + // Handle additional properties of specific instructions that cause + // semantic differences in the IR. + if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst)) + hash(ComparisonInstruction->getPredicate()); + + for (const auto &Op : Inst.operands()) + updateOperand(Op); + } + + // A function hash is calculated by considering only the number of arguments + // and whether a function is varargs, the order of basic blocks (given by the + // successors of each basic block in depth first order), and the order of + // opcodes of each instruction within each of these basic blocks. This mirrors + // the strategy FunctionComparator::compare() uses to compare functions by + // walking the BBs in depth first order and comparing each instruction in + // sequence. Because this hash currently does not look at the operands, it is + // insensitive to things such as the target of calls and the constants used in + // the function, which makes it useful when possibly merging functions which + // are the same modulo constants and call targets. + // + // Note that different users of StructuralHash will want different behavior + // out of it (i.e., MergeFunctions will want something different from PM + // expensive checks for pass modification status). When modifying this + // function, most changes should be gated behind an option and enabled + // selectively. + void update(const Function &F, bool DetailedHash) { // Declarations don't affect analyses. if (F.isDeclaration()) return; - hash(12345); // Function header + hash(0x62642d6b6b2d6b72); // Function header hash(F.isVarArg()); hash(F.arg_size()); @@ -40,13 +110,20 @@ public: SmallVector<const BasicBlock *, 8> BBs; SmallPtrSet<const BasicBlock *, 16> VisitedBBs; + // Walk the blocks in the same order as + // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the + // function "structure." (BB and opcode sequence) BBs.push_back(&F.getEntryBlock()); VisitedBBs.insert(BBs[0]); while (!BBs.empty()) { const BasicBlock *BB = BBs.pop_back_val(); - hash(45798); // Block header + + // This random value acts as a block header, as otherwise the partition of + // opcodes into BBs wouldn't affect the hash, only the order of the + // opcodes + hash(45798); for (auto &Inst : *BB) - hash(Inst.getOpcode()); + updateInstruction(Inst, DetailedHash); const Instruction *Term = BB->getTerminator(); for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { @@ -67,11 +144,11 @@ public: hash(GV.getValueType()->getTypeID()); } - void update(const Module &M) { + void update(const Module &M, bool DetailedHash) { for (const GlobalVariable &GV : M.globals()) update(GV); for (const Function &F : M) - update(F); + update(F, DetailedHash); } uint64_t getHash() const { return Hash; } @@ -79,14 +156,14 @@ public: } // namespace -uint64_t llvm::StructuralHash(const Function &F) { +IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) { StructuralHashImpl H; - H.update(F); + H.update(F, DetailedHash); return H.getHash(); } -uint64_t llvm::StructuralHash(const Module &M) { +IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) { StructuralHashImpl H; - H.update(M); + H.update(M, DetailedHash); return H.getHash(); } |
