diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2010-09-17 15:48:55 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2010-09-17 15:48:55 +0000 | 
| commit | d39c594d39df7f283c2fb8a704a3f31c501180d9 (patch) | |
| tree | 36453626c792cccd91f783a38a169d610a6b9db9 /tools/llvm-diff/DifferenceEngine.cpp | |
| parent | 6144c1de6a7674dad94290650e4e14f24d42e421 (diff) | |
Notes
Diffstat (limited to 'tools/llvm-diff/DifferenceEngine.cpp')
| -rw-r--r-- | tools/llvm-diff/DifferenceEngine.cpp | 676 | 
1 files changed, 676 insertions, 0 deletions
| diff --git a/tools/llvm-diff/DifferenceEngine.cpp b/tools/llvm-diff/DifferenceEngine.cpp new file mode 100644 index 0000000000000..b0a24d0737eca --- /dev/null +++ b/tools/llvm-diff/DifferenceEngine.cpp @@ -0,0 +1,676 @@ +//===-- DifferenceEngine.cpp - Structural function/module comparison ------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header defines the implementation of the LLVM difference +// engine, which structurally compares global values within a module. +// +//===----------------------------------------------------------------------===// + +#include "DifferenceEngine.h" + +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/type_traits.h" + +#include <utility> + +using namespace llvm; + +namespace { + +/// A priority queue, implemented as a heap. +template <class T, class Sorter, unsigned InlineCapacity> +class PriorityQueue { +  Sorter Precedes; +  llvm::SmallVector<T, InlineCapacity> Storage; + +public: +  PriorityQueue(const Sorter &Precedes) : Precedes(Precedes) {} + +  /// Checks whether the heap is empty. +  bool empty() const { return Storage.empty(); } + +  /// Insert a new value on the heap. +  void insert(const T &V) { +    unsigned Index = Storage.size(); +    Storage.push_back(V); +    if (Index == 0) return; + +    T *data = Storage.data(); +    while (true) { +      unsigned Target = (Index + 1) / 2 - 1; +      if (!Precedes(data[Index], data[Target])) return; +      std::swap(data[Index], data[Target]); +      if (Target == 0) return; +      Index = Target; +    } +  } + +  /// Remove the minimum value in the heap.  Only valid on a non-empty heap. +  T remove_min() { +    assert(!empty()); +    T tmp = Storage[0]; +     +    unsigned NewSize = Storage.size() - 1; +    if (NewSize) { +      // Move the slot at the end to the beginning. +      if (isPodLike<T>::value) +        Storage[0] = Storage[NewSize]; +      else +        std::swap(Storage[0], Storage[NewSize]); + +      // Bubble the root up as necessary. +      unsigned Index = 0; +      while (true) { +        // With a 1-based index, the children would be Index*2 and Index*2+1. +        unsigned R = (Index + 1) * 2; +        unsigned L = R - 1; + +        // If R is out of bounds, we're done after this in any case. +        if (R >= NewSize) { +          // If L is also out of bounds, we're done immediately. +          if (L >= NewSize) break; + +          // Otherwise, test whether we should swap L and Index. +          if (Precedes(Storage[L], Storage[Index])) +            std::swap(Storage[L], Storage[Index]); +          break; +        } + +        // Otherwise, we need to compare with the smaller of L and R. +        // Prefer R because it's closer to the end of the array. +        unsigned IndexToTest = (Precedes(Storage[L], Storage[R]) ? L : R); + +        // If Index is >= the min of L and R, then heap ordering is restored. +        if (!Precedes(Storage[IndexToTest], Storage[Index])) +          break; + +        // Otherwise, keep bubbling up. +        std::swap(Storage[IndexToTest], Storage[Index]); +        Index = IndexToTest; +      } +    } +    Storage.pop_back(); + +    return tmp; +  } +}; + +/// A function-scope difference engine. +class FunctionDifferenceEngine { +  DifferenceEngine &Engine; + +  /// The current mapping from old local values to new local values. +  DenseMap<Value*, Value*> Values; + +  /// The current mapping from old blocks to new blocks. +  DenseMap<BasicBlock*, BasicBlock*> Blocks; + +  DenseSet<std::pair<Value*, Value*> > TentativeValues; + +  unsigned getUnprocPredCount(BasicBlock *Block) const { +    unsigned Count = 0; +    for (pred_iterator I = pred_begin(Block), E = pred_end(Block); I != E; ++I) +      if (!Blocks.count(*I)) Count++; +    return Count; +  } + +  typedef std::pair<BasicBlock*, BasicBlock*> BlockPair; + +  /// A type which sorts a priority queue by the number of unprocessed +  /// predecessor blocks it has remaining. +  /// +  /// This is actually really expensive to calculate. +  struct QueueSorter { +    const FunctionDifferenceEngine &fde; +    explicit QueueSorter(const FunctionDifferenceEngine &fde) : fde(fde) {} + +    bool operator()(const BlockPair &Old, const BlockPair &New) { +      return fde.getUnprocPredCount(Old.first) +           < fde.getUnprocPredCount(New.first); +    } +  }; + +  /// A queue of unified blocks to process. +  PriorityQueue<BlockPair, QueueSorter, 20> Queue; + +  /// Try to unify the given two blocks.  Enqueues them for processing +  /// if they haven't already been processed. +  /// +  /// Returns true if there was a problem unifying them. +  bool tryUnify(BasicBlock *L, BasicBlock *R) { +    BasicBlock *&Ref = Blocks[L]; + +    if (Ref) { +      if (Ref == R) return false; + +      Engine.logf("successor %l cannot be equivalent to %r; " +                  "it's already equivalent to %r") +        << L << R << Ref; +      return true; +    } + +    Ref = R; +    Queue.insert(BlockPair(L, R)); +    return false; +  } +   +  /// Unifies two instructions, given that they're known not to have +  /// structural differences. +  void unify(Instruction *L, Instruction *R) { +    DifferenceEngine::Context C(Engine, L, R); + +    bool Result = diff(L, R, true, true); +    assert(!Result && "structural differences second time around?"); +    (void) Result; +    if (!L->use_empty()) +      Values[L] = R; +  } + +  void processQueue() { +    while (!Queue.empty()) { +      BlockPair Pair = Queue.remove_min(); +      diff(Pair.first, Pair.second); +    } +  } + +  void diff(BasicBlock *L, BasicBlock *R) { +    DifferenceEngine::Context C(Engine, L, R); + +    BasicBlock::iterator LI = L->begin(), LE = L->end(); +    BasicBlock::iterator RI = R->begin(), RE = R->end(); + +    llvm::SmallVector<std::pair<Instruction*,Instruction*>, 20> TentativePairs; + +    do { +      assert(LI != LE && RI != RE); +      Instruction *LeftI = &*LI, *RightI = &*RI; + +      // If the instructions differ, start the more sophisticated diff +      // algorithm at the start of the block. +      if (diff(LeftI, RightI, false, false)) { +        TentativeValues.clear(); +        return runBlockDiff(L->begin(), R->begin()); +      } + +      // Otherwise, tentatively unify them. +      if (!LeftI->use_empty()) +        TentativeValues.insert(std::make_pair(LeftI, RightI)); + +      ++LI, ++RI; +    } while (LI != LE); // This is sufficient: we can't get equality of +                        // terminators if there are residual instructions. + +    // Unify everything in the block, non-tentatively this time. +    TentativeValues.clear(); +    for (LI = L->begin(), RI = R->begin(); LI != LE; ++LI, ++RI) +      unify(&*LI, &*RI); +  } + +  bool matchForBlockDiff(Instruction *L, Instruction *R); +  void runBlockDiff(BasicBlock::iterator LI, BasicBlock::iterator RI); + +  bool diffCallSites(CallSite L, CallSite R, bool Complain) { +    // FIXME: call attributes +    if (!equivalentAsOperands(L.getCalledValue(), R.getCalledValue())) { +      if (Complain) Engine.log("called functions differ"); +      return true; +    } +    if (L.arg_size() != R.arg_size()) { +      if (Complain) Engine.log("argument counts differ"); +      return true; +    } +    for (unsigned I = 0, E = L.arg_size(); I != E; ++I) +      if (!equivalentAsOperands(L.getArgument(I), R.getArgument(I))) { +        if (Complain) +          Engine.logf("arguments %l and %r differ") +            << L.getArgument(I) << R.getArgument(I); +        return true; +      } +    return false; +  } + +  bool diff(Instruction *L, Instruction *R, bool Complain, bool TryUnify) { +    // FIXME: metadata (if Complain is set) + +    // Different opcodes always imply different operations. +    if (L->getOpcode() != R->getOpcode()) { +      if (Complain) Engine.log("different instruction types"); +      return true; +    } + +    if (isa<CmpInst>(L)) { +      if (cast<CmpInst>(L)->getPredicate() +            != cast<CmpInst>(R)->getPredicate()) { +        if (Complain) Engine.log("different predicates"); +        return true; +      } +    } else if (isa<CallInst>(L)) { +      return diffCallSites(CallSite(L), CallSite(R), Complain); +    } else if (isa<PHINode>(L)) { +      // FIXME: implement. + +      // This is really wierd;  type uniquing is broken? +      if (L->getType() != R->getType()) { +        if (!L->getType()->isPointerTy() || !R->getType()->isPointerTy()) { +          if (Complain) Engine.log("different phi types"); +          return true; +        } +      } +      return false; + +    // Terminators. +    } else if (isa<InvokeInst>(L)) { +      InvokeInst *LI = cast<InvokeInst>(L); +      InvokeInst *RI = cast<InvokeInst>(R); +      if (diffCallSites(CallSite(LI), CallSite(RI), Complain)) +        return true; + +      if (TryUnify) { +        tryUnify(LI->getNormalDest(), RI->getNormalDest()); +        tryUnify(LI->getUnwindDest(), RI->getUnwindDest()); +      } +      return false; + +    } else if (isa<BranchInst>(L)) { +      BranchInst *LI = cast<BranchInst>(L); +      BranchInst *RI = cast<BranchInst>(R); +      if (LI->isConditional() != RI->isConditional()) { +        if (Complain) Engine.log("branch conditionality differs"); +        return true; +      } + +      if (LI->isConditional()) { +        if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) { +          if (Complain) Engine.log("branch conditions differ"); +          return true; +        } +        if (TryUnify) tryUnify(LI->getSuccessor(1), RI->getSuccessor(1)); +      } +      if (TryUnify) tryUnify(LI->getSuccessor(0), RI->getSuccessor(0)); +      return false; + +    } else if (isa<SwitchInst>(L)) { +      SwitchInst *LI = cast<SwitchInst>(L); +      SwitchInst *RI = cast<SwitchInst>(R); +      if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) { +        if (Complain) Engine.log("switch conditions differ"); +        return true; +      } +      if (TryUnify) tryUnify(LI->getDefaultDest(), RI->getDefaultDest()); + +      bool Difference = false; + +      DenseMap<ConstantInt*,BasicBlock*> LCases; +      for (unsigned I = 1, E = LI->getNumCases(); I != E; ++I) +        LCases[LI->getCaseValue(I)] = LI->getSuccessor(I); +      for (unsigned I = 1, E = RI->getNumCases(); I != E; ++I) { +        ConstantInt *CaseValue = RI->getCaseValue(I); +        BasicBlock *LCase = LCases[CaseValue]; +        if (LCase) { +          if (TryUnify) tryUnify(LCase, RI->getSuccessor(I)); +          LCases.erase(CaseValue); +        } else if (!Difference) { +          if (Complain) +            Engine.logf("right switch has extra case %r") << CaseValue; +          Difference = true; +        } +      } +      if (!Difference) +        for (DenseMap<ConstantInt*,BasicBlock*>::iterator +               I = LCases.begin(), E = LCases.end(); I != E; ++I) { +          if (Complain) +            Engine.logf("left switch has extra case %l") << I->first; +          Difference = true; +        } +      return Difference; +    } else if (isa<UnreachableInst>(L)) { +      return false; +    } + +    if (L->getNumOperands() != R->getNumOperands()) { +      if (Complain) Engine.log("instructions have different operand counts"); +      return true; +    } + +    for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) { +      Value *LO = L->getOperand(I), *RO = R->getOperand(I); +      if (!equivalentAsOperands(LO, RO)) { +        if (Complain) Engine.logf("operands %l and %r differ") << LO << RO; +        return true; +      } +    } + +    return false; +  } + +  bool equivalentAsOperands(Constant *L, Constant *R) { +    // Use equality as a preliminary filter. +    if (L == R) +      return true; + +    if (L->getValueID() != R->getValueID()) +      return false; +     +    // Ask the engine about global values. +    if (isa<GlobalValue>(L)) +      return Engine.equivalentAsOperands(cast<GlobalValue>(L), +                                         cast<GlobalValue>(R)); + +    // Compare constant expressions structurally. +    if (isa<ConstantExpr>(L)) +      return equivalentAsOperands(cast<ConstantExpr>(L), +                                  cast<ConstantExpr>(R)); + +    // Nulls of the "same type" don't always actually have the same +    // type; I don't know why.  Just white-list them. +    if (isa<ConstantPointerNull>(L)) +      return true; + +    // Block addresses only match if we've already encountered the +    // block.  FIXME: tentative matches? +    if (isa<BlockAddress>(L)) +      return Blocks[cast<BlockAddress>(L)->getBasicBlock()] +                 == cast<BlockAddress>(R)->getBasicBlock(); + +    return false; +  } + +  bool equivalentAsOperands(ConstantExpr *L, ConstantExpr *R) { +    if (L == R) +      return true; +    if (L->getOpcode() != R->getOpcode()) +      return false; + +    switch (L->getOpcode()) { +    case Instruction::ICmp: +    case Instruction::FCmp: +      if (L->getPredicate() != R->getPredicate()) +        return false; +      break; + +    case Instruction::GetElementPtr: +      // FIXME: inbounds? +      break; + +    default: +      break; +    } + +    if (L->getNumOperands() != R->getNumOperands()) +      return false; + +    for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) +      if (!equivalentAsOperands(L->getOperand(I), R->getOperand(I))) +        return false; + +    return true; +  } + +  bool equivalentAsOperands(Value *L, Value *R) { +    // Fall out if the values have different kind. +    // This possibly shouldn't take priority over oracles. +    if (L->getValueID() != R->getValueID()) +      return false; + +    // Value subtypes:  Argument, Constant, Instruction, BasicBlock, +    //                  InlineAsm, MDNode, MDString, PseudoSourceValue + +    if (isa<Constant>(L)) +      return equivalentAsOperands(cast<Constant>(L), cast<Constant>(R)); + +    if (isa<Instruction>(L)) +      return Values[L] == R || TentativeValues.count(std::make_pair(L, R)); + +    if (isa<Argument>(L)) +      return Values[L] == R; + +    if (isa<BasicBlock>(L)) +      return Blocks[cast<BasicBlock>(L)] != R; + +    // Pretend everything else is identical. +    return true; +  } + +  // Avoid a gcc warning about accessing 'this' in an initializer. +  FunctionDifferenceEngine *this_() { return this; } + +public: +  FunctionDifferenceEngine(DifferenceEngine &Engine) : +    Engine(Engine), Queue(QueueSorter(*this_())) {} + +  void diff(Function *L, Function *R) { +    if (L->arg_size() != R->arg_size()) +      Engine.log("different argument counts"); + +    // Map the arguments. +    for (Function::arg_iterator +           LI = L->arg_begin(), LE = L->arg_end(), +           RI = R->arg_begin(), RE = R->arg_end(); +         LI != LE && RI != RE; ++LI, ++RI) +      Values[&*LI] = &*RI; + +    tryUnify(&*L->begin(), &*R->begin()); +    processQueue(); +  } +}; + +struct DiffEntry { +  DiffEntry() : Cost(0) {} + +  unsigned Cost; +  llvm::SmallVector<char, 8> Path; // actually of DifferenceEngine::DiffChange +}; + +bool FunctionDifferenceEngine::matchForBlockDiff(Instruction *L, +                                                 Instruction *R) { +  return !diff(L, R, false, false); +} + +void FunctionDifferenceEngine::runBlockDiff(BasicBlock::iterator LStart, +                                            BasicBlock::iterator RStart) { +  BasicBlock::iterator LE = LStart->getParent()->end(); +  BasicBlock::iterator RE = RStart->getParent()->end(); + +  unsigned NL = std::distance(LStart, LE); + +  SmallVector<DiffEntry, 20> Paths1(NL+1); +  SmallVector<DiffEntry, 20> Paths2(NL+1); + +  DiffEntry *Cur = Paths1.data(); +  DiffEntry *Next = Paths2.data(); + +  const unsigned LeftCost = 2; +  const unsigned RightCost = 2; +  const unsigned MatchCost = 0; + +  assert(TentativeValues.empty()); + +  // Initialize the first column. +  for (unsigned I = 0; I != NL+1; ++I) { +    Cur[I].Cost = I * LeftCost; +    for (unsigned J = 0; J != I; ++J) +      Cur[I].Path.push_back(DifferenceEngine::DC_left); +  } + +  for (BasicBlock::iterator RI = RStart; RI != RE; ++RI) { +    // Initialize the first row. +    Next[0] = Cur[0]; +    Next[0].Cost += RightCost; +    Next[0].Path.push_back(DifferenceEngine::DC_right); + +    unsigned Index = 1; +    for (BasicBlock::iterator LI = LStart; LI != LE; ++LI, ++Index) { +      if (matchForBlockDiff(&*LI, &*RI)) { +        Next[Index] = Cur[Index-1]; +        Next[Index].Cost += MatchCost; +        Next[Index].Path.push_back(DifferenceEngine::DC_match); +        TentativeValues.insert(std::make_pair(&*LI, &*RI)); +      } else if (Next[Index-1].Cost <= Cur[Index].Cost) { +        Next[Index] = Next[Index-1]; +        Next[Index].Cost += LeftCost; +        Next[Index].Path.push_back(DifferenceEngine::DC_left); +      } else { +        Next[Index] = Cur[Index]; +        Next[Index].Cost += RightCost; +        Next[Index].Path.push_back(DifferenceEngine::DC_right); +      } +    } + +    std::swap(Cur, Next); +  } + +  // We don't need the tentative values anymore; everything from here +  // on out should be non-tentative. +  TentativeValues.clear(); + +  SmallVectorImpl<char> &Path = Cur[NL].Path; +  BasicBlock::iterator LI = LStart, RI = RStart; + +  DifferenceEngine::DiffLogBuilder Diff(Engine); + +  // Drop trailing matches. +  while (Path.back() == DifferenceEngine::DC_match) +    Path.pop_back(); + +  // Skip leading matches. +  SmallVectorImpl<char>::iterator +    PI = Path.begin(), PE = Path.end(); +  while (PI != PE && *PI == DifferenceEngine::DC_match) { +    unify(&*LI, &*RI); +    ++PI, ++LI, ++RI; +  } + +  for (; PI != PE; ++PI) { +    switch (static_cast<DifferenceEngine::DiffChange>(*PI)) { +    case DifferenceEngine::DC_match: +      assert(LI != LE && RI != RE); +      { +        Instruction *L = &*LI, *R = &*RI; +        unify(L, R); +        Diff.addMatch(L, R); +      } +      ++LI; ++RI; +      break; + +    case DifferenceEngine::DC_left: +      assert(LI != LE); +      Diff.addLeft(&*LI); +      ++LI; +      break; + +    case DifferenceEngine::DC_right: +      assert(RI != RE); +      Diff.addRight(&*RI); +      ++RI; +      break; +    } +  } + +  // Finishing unifying and complaining about the tails of the block, +  // which should be matches all the way through. +  while (LI != LE) { +    assert(RI != RE); +    unify(&*LI, &*RI); +    ++LI, ++RI; +  } + +  // If the terminators have different kinds, but one is an invoke and the +  // other is an unconditional branch immediately following a call, unify +  // the results and the destinations. +  TerminatorInst *LTerm = LStart->getParent()->getTerminator(); +  TerminatorInst *RTerm = RStart->getParent()->getTerminator(); +  if (isa<BranchInst>(LTerm) && isa<InvokeInst>(RTerm)) { +    if (cast<BranchInst>(LTerm)->isConditional()) return; +    BasicBlock::iterator I = LTerm; +    if (I == LStart->getParent()->begin()) return; +    --I; +    if (!isa<CallInst>(*I)) return; +    CallInst *LCall = cast<CallInst>(&*I); +    InvokeInst *RInvoke = cast<InvokeInst>(RTerm); +    if (!equivalentAsOperands(LCall->getCalledValue(), RInvoke->getCalledValue())) +      return; +    if (!LCall->use_empty()) +      Values[LCall] = RInvoke; +    tryUnify(LTerm->getSuccessor(0), RInvoke->getNormalDest()); +  } else if (isa<InvokeInst>(LTerm) && isa<BranchInst>(RTerm)) { +    if (cast<BranchInst>(RTerm)->isConditional()) return; +    BasicBlock::iterator I = RTerm; +    if (I == RStart->getParent()->begin()) return; +    --I; +    if (!isa<CallInst>(*I)) return; +    CallInst *RCall = cast<CallInst>(I); +    InvokeInst *LInvoke = cast<InvokeInst>(LTerm); +    if (!equivalentAsOperands(LInvoke->getCalledValue(), RCall->getCalledValue())) +      return; +    if (!LInvoke->use_empty()) +      Values[LInvoke] = RCall; +    tryUnify(LInvoke->getNormalDest(), RTerm->getSuccessor(0)); +  } +} + +} + +void DifferenceEngine::diff(Function *L, Function *R) { +  Context C(*this, L, R); + +  // FIXME: types +  // FIXME: attributes and CC +  // FIXME: parameter attributes +   +  // If both are declarations, we're done. +  if (L->empty() && R->empty()) +    return; +  else if (L->empty()) +    log("left function is declaration, right function is definition"); +  else if (R->empty()) +    log("right function is declaration, left function is definition"); +  else +    FunctionDifferenceEngine(*this).diff(L, R); +} + +void DifferenceEngine::diff(Module *L, Module *R) { +  StringSet<> LNames; +  SmallVector<std::pair<Function*,Function*>, 20> Queue; + +  for (Module::iterator I = L->begin(), E = L->end(); I != E; ++I) { +    Function *LFn = &*I; +    LNames.insert(LFn->getName()); + +    if (Function *RFn = R->getFunction(LFn->getName())) +      Queue.push_back(std::make_pair(LFn, RFn)); +    else +      logf("function %l exists only in left module") << LFn; +  } + +  for (Module::iterator I = R->begin(), E = R->end(); I != E; ++I) { +    Function *RFn = &*I; +    if (!LNames.count(RFn->getName())) +      logf("function %r exists only in right module") << RFn; +  } + +  for (SmallVectorImpl<std::pair<Function*,Function*> >::iterator +         I = Queue.begin(), E = Queue.end(); I != E; ++I) +    diff(I->first, I->second); +} + +bool DifferenceEngine::equivalentAsOperands(GlobalValue *L, GlobalValue *R) { +  if (globalValueOracle) return (*globalValueOracle)(L, R); +  return L->getName() == R->getName(); +} | 
