diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 | 
| commit | d8e91e46262bc44006913e6796843909f1ac7bcd (patch) | |
| tree | 7d0c143d9b38190e0fa0180805389da22cd834c5 /lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp | |
| parent | b7eb8e35e481a74962664b63dfb09483b200209a (diff) | |
Notes
Diffstat (limited to 'lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp')
| -rw-r--r-- | lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp | 109 | 
1 files changed, 102 insertions, 7 deletions
| diff --git a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index b622d018478a..c795866ec0f2 100644 --- a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -16,7 +16,7 @@  #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"  #include "AggressiveInstCombineInternal.h"  #include "llvm-c/Initialization.h" -#include "llvm-c/Transforms/Scalar.h" +#include "llvm-c/Transforms/AggressiveInstCombine.h"  #include "llvm/Analysis/AliasAnalysis.h"  #include "llvm/Analysis/BasicAliasAnalysis.h"  #include "llvm/Analysis/GlobalsModRef.h" @@ -59,6 +59,99 @@ public:  };  } // namespace +/// Match a pattern for a bitwise rotate operation that partially guards +/// against undefined behavior by branching around the rotation when the shift +/// amount is 0. +static bool foldGuardedRotateToFunnelShift(Instruction &I) { +  if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2) +    return false; + +  // As with the one-use checks below, this is not strictly necessary, but we +  // are being cautious to avoid potential perf regressions on targets that +  // do not actually have a rotate instruction (where the funnel shift would be +  // expanded back into math/shift/logic ops). +  if (!isPowerOf2_32(I.getType()->getScalarSizeInBits())) +    return false; + +  // Match V to funnel shift left/right and capture the source operand and +  // shift amount in X and Y. +  auto matchRotate = [](Value *V, Value *&X, Value *&Y) { +    Value *L0, *L1, *R0, *R1; +    unsigned Width = V->getType()->getScalarSizeInBits(); +    auto Sub = m_Sub(m_SpecificInt(Width), m_Value(R1)); + +    // rotate_left(X, Y) == (X << Y) | (X >> (Width - Y)) +    auto RotL = m_OneUse( +        m_c_Or(m_Shl(m_Value(L0), m_Value(L1)), m_LShr(m_Value(R0), Sub))); +    if (RotL.match(V) && L0 == R0 && L1 == R1) { +      X = L0; +      Y = L1; +      return Intrinsic::fshl; +    } + +    // rotate_right(X, Y) == (X >> Y) | (X << (Width - Y)) +    auto RotR = m_OneUse( +        m_c_Or(m_LShr(m_Value(L0), m_Value(L1)), m_Shl(m_Value(R0), Sub))); +    if (RotR.match(V) && L0 == R0 && L1 == R1) { +      X = L0; +      Y = L1; +      return Intrinsic::fshr; +    } + +    return Intrinsic::not_intrinsic; +  }; + +  // One phi operand must be a rotate operation, and the other phi operand must +  // be the source value of that rotate operation: +  // phi [ rotate(RotSrc, RotAmt), RotBB ], [ RotSrc, GuardBB ] +  PHINode &Phi = cast<PHINode>(I); +  Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1); +  Value *RotSrc, *RotAmt; +  Intrinsic::ID IID = matchRotate(P0, RotSrc, RotAmt); +  if (IID == Intrinsic::not_intrinsic || RotSrc != P1) { +    IID = matchRotate(P1, RotSrc, RotAmt); +    if (IID == Intrinsic::not_intrinsic || RotSrc != P0) +      return false; +    assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) && +           "Pattern must match funnel shift left or right"); +  } + +  // The incoming block with our source operand must be the "guard" block. +  // That must contain a cmp+branch to avoid the rotate when the shift amount +  // is equal to 0. The other incoming block is the block with the rotate. +  BasicBlock *GuardBB = Phi.getIncomingBlock(RotSrc == P1); +  BasicBlock *RotBB = Phi.getIncomingBlock(RotSrc != P1); +  Instruction *TermI = GuardBB->getTerminator(); +  BasicBlock *TrueBB, *FalseBB; +  ICmpInst::Predicate Pred; +  if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(RotAmt), m_ZeroInt()), TrueBB, +                         FalseBB))) +    return false; + +  BasicBlock *PhiBB = Phi.getParent(); +  if (Pred != CmpInst::ICMP_EQ || TrueBB != PhiBB || FalseBB != RotBB) +    return false; + +  // We matched a variation of this IR pattern: +  // GuardBB: +  //   %cmp = icmp eq i32 %RotAmt, 0 +  //   br i1 %cmp, label %PhiBB, label %RotBB +  // RotBB: +  //   %sub = sub i32 32, %RotAmt +  //   %shr = lshr i32 %X, %sub +  //   %shl = shl i32 %X, %RotAmt +  //   %rot = or i32 %shr, %shl +  //   br label %PhiBB +  // PhiBB: +  //   %cond = phi i32 [ %rot, %RotBB ], [ %X, %GuardBB ] +  // --> +  // llvm.fshl.i32(i32 %X, i32 %RotAmt) +  IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt()); +  Function *F = Intrinsic::getDeclaration(Phi.getModule(), IID, Phi.getType()); +  Phi.replaceAllUsesWith(Builder.CreateCall(F, {RotSrc, RotSrc, RotAmt})); +  return true; +} +  /// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and  /// the bit indexes (Mask) needed by a masked compare. If we're matching a chain  /// of 'and' ops, then we also need to capture the fact that we saw an @@ -69,9 +162,9 @@ struct MaskOps {    bool MatchAndChain;    bool FoundAnd1; -  MaskOps(unsigned BitWidth, bool MatchAnds) : -      Root(nullptr), Mask(APInt::getNullValue(BitWidth)), -      MatchAndChain(MatchAnds), FoundAnd1(false) {} +  MaskOps(unsigned BitWidth, bool MatchAnds) +      : Root(nullptr), Mask(APInt::getNullValue(BitWidth)), +        MatchAndChain(MatchAnds), FoundAnd1(false) {}  };  /// This is a recursive helper for foldAnyOrAllBitsSet() that walks through a @@ -152,8 +245,8 @@ static bool foldAnyOrAllBitsSet(Instruction &I) {    IRBuilder<> Builder(&I);    Constant *Mask = ConstantInt::get(I.getType(), MOps.Mask);    Value *And = Builder.CreateAnd(MOps.Root, Mask); -  Value *Cmp = MatchAllBitsSet ? Builder.CreateICmpEQ(And, Mask) : -                                 Builder.CreateIsNotNull(And); +  Value *Cmp = MatchAllBitsSet ? Builder.CreateICmpEQ(And, Mask) +                               : Builder.CreateIsNotNull(And);    Value *Zext = Builder.CreateZExt(Cmp, I.getType());    I.replaceAllUsesWith(Zext);    return true; @@ -174,8 +267,10 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {      // Also, we want to avoid matching partial patterns.      // TODO: It would be more efficient if we removed dead instructions      // iteratively in this loop rather than waiting until the end. -    for (Instruction &I : make_range(BB.rbegin(), BB.rend())) +    for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {        MadeChange |= foldAnyOrAllBitsSet(I); +      MadeChange |= foldGuardedRotateToFunnelShift(I); +    }    }    // We're done with transforms, so remove dead instructions. | 
