diff options
Diffstat (limited to 'lib/Transforms/InstCombine/InstCombineCalls.cpp')
| -rw-r--r-- | lib/Transforms/InstCombine/InstCombineCalls.cpp | 174 | 
1 files changed, 84 insertions, 90 deletions
| diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 391c430dab75..aa055121e710 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -16,16 +16,20 @@  #include "llvm/ADT/APInt.h"  #include "llvm/ADT/ArrayRef.h"  #include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h"  #include "llvm/ADT/STLExtras.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/AssumptionCache.h"  #include "llvm/Analysis/InstructionSimplify.h"  #include "llvm/Analysis/MemoryBuiltins.h"  #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Attributes.h"  #include "llvm/IR/BasicBlock.h"  #include "llvm/IR/CallSite.h"  #include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/DerivedTypes.h"  #include "llvm/IR/Function.h" @@ -40,18 +44,26 @@  #include "llvm/IR/PatternMatch.h"  #include "llvm/IR/Statepoint.h"  #include "llvm/IR/Type.h" +#include "llvm/IR/User.h"  #include "llvm/IR/Value.h"  #include "llvm/IR/ValueHandle.h" +#include "llvm/Support/AtomicOrdering.h"  #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/KnownBits.h"  #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"  #include "llvm/Transforms/Utils/Local.h"  #include "llvm/Transforms/Utils/SimplifyLibCalls.h"  #include <algorithm>  #include <cassert>  #include <cstdint>  #include <cstring> +#include <utility>  #include <vector>  using namespace llvm; @@ -94,8 +106,8 @@ static Constant *getNegativeIsTrueBoolVec(ConstantDataVector *V) {    return ConstantVector::get(BoolVec);  } -Instruction *InstCombiner::SimplifyElementUnorderedAtomicMemCpy( -    ElementUnorderedAtomicMemCpyInst *AMI) { +Instruction * +InstCombiner::SimplifyElementUnorderedAtomicMemCpy(AtomicMemCpyInst *AMI) {    // Try to unfold this intrinsic into sequence of explicit atomic loads and    // stores.    // First check that number of elements is compile time constant. @@ -515,7 +527,7 @@ static Value *simplifyX86varShift(const IntrinsicInst &II,    // If all elements out of range or UNDEF, return vector of zeros/undefs.    // ArithmeticShift should only hit this if they are all UNDEF.    auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); }; -  if (all_of(ShiftAmts, OutOfRange)) { +  if (llvm::all_of(ShiftAmts, OutOfRange)) {      SmallVector<Constant *, 8> ConstantVec;      for (int Idx : ShiftAmts) {        if (Idx < 0) { @@ -1094,72 +1106,6 @@ static Value *simplifyX86vpermv(const IntrinsicInst &II,    return Builder.CreateShuffleVector(V1, V2, ShuffleMask);  } -/// The shuffle mask for a perm2*128 selects any two halves of two 256-bit -/// source vectors, unless a zero bit is set. If a zero bit is set, -/// then ignore that half of the mask and clear that half of the vector. -static Value *simplifyX86vperm2(const IntrinsicInst &II, -                                InstCombiner::BuilderTy &Builder) { -  auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2)); -  if (!CInt) -    return nullptr; - -  VectorType *VecTy = cast<VectorType>(II.getType()); -  ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy); - -  // The immediate permute control byte looks like this: -  //    [1:0] - select 128 bits from sources for low half of destination -  //    [2]   - ignore -  //    [3]   - zero low half of destination -  //    [5:4] - select 128 bits from sources for high half of destination -  //    [6]   - ignore -  //    [7]   - zero high half of destination - -  uint8_t Imm = CInt->getZExtValue(); - -  bool LowHalfZero = Imm & 0x08; -  bool HighHalfZero = Imm & 0x80; - -  // If both zero mask bits are set, this was just a weird way to -  // generate a zero vector. -  if (LowHalfZero && HighHalfZero) -    return ZeroVector; - -  // If 0 or 1 zero mask bits are set, this is a simple shuffle. -  unsigned NumElts = VecTy->getNumElements(); -  unsigned HalfSize = NumElts / 2; -  SmallVector<uint32_t, 8> ShuffleMask(NumElts); - -  // The high bit of the selection field chooses the 1st or 2nd operand. -  bool LowInputSelect = Imm & 0x02; -  bool HighInputSelect = Imm & 0x20; - -  // The low bit of the selection field chooses the low or high half -  // of the selected operand. -  bool LowHalfSelect = Imm & 0x01; -  bool HighHalfSelect = Imm & 0x10; - -  // Determine which operand(s) are actually in use for this instruction. -  Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0); -  Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0); - -  // If needed, replace operands based on zero mask. -  V0 = LowHalfZero ? ZeroVector : V0; -  V1 = HighHalfZero ? ZeroVector : V1; - -  // Permute low half of result. -  unsigned StartIndex = LowHalfSelect ? HalfSize : 0; -  for (unsigned i = 0; i < HalfSize; ++i) -    ShuffleMask[i] = StartIndex + i; - -  // Permute high half of result. -  StartIndex = HighHalfSelect ? HalfSize : 0; -  StartIndex += NumElts; -  for (unsigned i = 0; i < HalfSize; ++i) -    ShuffleMask[i + HalfSize] = StartIndex + i; - -  return Builder.CreateShuffleVector(V0, V1, ShuffleMask); -} -  /// Decode XOP integer vector comparison intrinsics.  static Value *simplifyX86vpcom(const IntrinsicInst &II,                                 InstCombiner::BuilderTy &Builder, @@ -1650,7 +1596,6 @@ static Instruction *SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC) {    // IntrinsicInstr with target-generic LLVM IR.    const SimplifyAction Action = [II]() -> SimplifyAction {      switch (II->getIntrinsicID()) { -      // NVVM intrinsics that map directly to LLVM intrinsics.      case Intrinsic::nvvm_ceil_d:        return {Intrinsic::ceil, FTZ_Any}; @@ -1932,7 +1877,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      if (Changed) return II;    } -  if (auto *AMI = dyn_cast<ElementUnorderedAtomicMemCpyInst>(II)) { +  if (auto *AMI = dyn_cast<AtomicMemCpyInst>(II)) {      if (Constant *C = dyn_cast<Constant>(AMI->getLength()))        if (C->isNullValue())          return eraseInstFromFunction(*AMI); @@ -2072,7 +2017,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {    }    case Intrinsic::fmuladd: {      // Canonicalize fast fmuladd to the separate fmul + fadd. -    if (II->hasUnsafeAlgebra()) { +    if (II->isFast()) {        BuilderTy::FastMathFlagGuard Guard(Builder);        Builder.setFastMathFlags(II->getFastMathFlags());        Value *Mul = Builder.CreateFMul(II->getArgOperand(0), @@ -2248,6 +2193,52 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      }      break; +  case Intrinsic::x86_bmi_bextr_32: +  case Intrinsic::x86_bmi_bextr_64: +  case Intrinsic::x86_tbm_bextri_u32: +  case Intrinsic::x86_tbm_bextri_u64: +    // If the RHS is a constant we can try some simplifications. +    if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) { +      uint64_t Shift = C->getZExtValue(); +      uint64_t Length = (Shift >> 8) & 0xff; +      Shift &= 0xff; +      unsigned BitWidth = II->getType()->getIntegerBitWidth(); +      // If the length is 0 or the shift is out of range, replace with zero. +      if (Length == 0 || Shift >= BitWidth) +        return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0)); +      // If the LHS is also a constant, we can completely constant fold this. +      if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) { +        uint64_t Result = InC->getZExtValue() >> Shift; +        if (Length > BitWidth) +          Length = BitWidth; +        Result &= maskTrailingOnes<uint64_t>(Length); +        return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result)); +      } +      // TODO should we turn this into 'and' if shift is 0? Or 'shl' if we +      // are only masking bits that a shift already cleared? +    } +    break; + +  case Intrinsic::x86_bmi_bzhi_32: +  case Intrinsic::x86_bmi_bzhi_64: +    // If the RHS is a constant we can try some simplifications. +    if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) { +      uint64_t Index = C->getZExtValue() & 0xff; +      unsigned BitWidth = II->getType()->getIntegerBitWidth(); +      if (Index >= BitWidth) +        return replaceInstUsesWith(CI, II->getArgOperand(0)); +      if (Index == 0) +        return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0)); +      // If the LHS is also a constant, we can completely constant fold this. +      if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) { +        uint64_t Result = InC->getZExtValue(); +        Result &= maskTrailingOnes<uint64_t>(Index); +        return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result)); +      } +      // TODO should we convert this to an AND if the RHS is constant? +    } +    break; +    case Intrinsic::x86_vcvtph2ps_128:    case Intrinsic::x86_vcvtph2ps_256: {      auto Arg = II->getArgOperand(0); @@ -2333,11 +2324,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {    case Intrinsic::x86_sse2_pmovmskb_128:    case Intrinsic::x86_avx_movmsk_pd_256:    case Intrinsic::x86_avx_movmsk_ps_256: -  case Intrinsic::x86_avx2_pmovmskb: { +  case Intrinsic::x86_avx2_pmovmskb:      if (Value *V = simplifyX86movmsk(*II))        return replaceInstUsesWith(*II, V);      break; -  }    case Intrinsic::x86_sse_comieq_ss:    case Intrinsic::x86_sse_comige_ss: @@ -2972,14 +2962,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      }      break; -  case Intrinsic::x86_avx_vperm2f128_pd_256: -  case Intrinsic::x86_avx_vperm2f128_ps_256: -  case Intrinsic::x86_avx_vperm2f128_si_256: -  case Intrinsic::x86_avx2_vperm2i128: -    if (Value *V = simplifyX86vperm2(*II, Builder)) -      return replaceInstUsesWith(*II, V); -    break; -    case Intrinsic::x86_avx_maskload_ps:    case Intrinsic::x86_avx_maskload_pd:    case Intrinsic::x86_avx_maskload_ps_256: @@ -3399,7 +3381,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {        return II;      break; -    }    case Intrinsic::amdgcn_fmed3: {      // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled @@ -3560,6 +3541,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      break;    } +  case Intrinsic::amdgcn_wqm_vote: { +    // wqm_vote is identity when the argument is constant. +    if (!isa<Constant>(II->getArgOperand(0))) +      break; + +    return replaceInstUsesWith(*II, II->getArgOperand(0)); +  } +  case Intrinsic::amdgcn_kill: { +    const ConstantInt *C = dyn_cast<ConstantInt>(II->getArgOperand(0)); +    if (!C || !C->getZExtValue()) +      break; + +    // amdgcn.kill(i1 1) is a no-op +    return eraseInstFromFunction(CI); +  }    case Intrinsic::stackrestore: {      // If the save is right next to the restore, remove the restore.  This can      // happen when variable allocas are DCE'd. @@ -3611,7 +3607,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {    case Intrinsic::lifetime_start:      // Asan needs to poison memory to detect invalid access which is possible      // even for empty lifetime range. -    if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress)) +    if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) || +        II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))        break;      if (removeTriviallyEmptyRange(*II, Intrinsic::lifetime_start, @@ -3697,7 +3694,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {          return replaceInstUsesWith(*II, ConstantPointerNull::get(PT));        // isKnownNonNull -> nonnull attribute -      if (isKnownNonNullAt(DerivedPtr, II, &DT)) +      if (isKnownNonZero(DerivedPtr, DL, 0, &AC, II, &DT))          II->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);      } @@ -3740,7 +3737,6 @@ Instruction *InstCombiner::visitFenceInst(FenceInst &FI) {  }  // InvokeInst simplification -//  Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {    return visitCallSite(&II);  } @@ -3784,7 +3780,7 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {    auto InstCombineRAUW = [this](Instruction *From, Value *With) {      replaceInstUsesWith(*From, With);    }; -  LibCallSimplifier Simplifier(DL, &TLI, InstCombineRAUW); +  LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW);    if (Value *With = Simplifier.optimizeCall(CI)) {      ++NumSimplified;      return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With); @@ -3853,7 +3849,6 @@ static IntrinsicInst *findInitTrampolineFromBB(IntrinsicInst *AdjustTramp,  // Given a call to llvm.adjust.trampoline, find and return the corresponding  // call to llvm.init.trampoline if the call to the trampoline can be optimized  // to a direct call to a function.  Otherwise return NULL. -//  static IntrinsicInst *findInitTrampoline(Value *Callee) {    Callee = Callee->stripPointerCasts();    IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee); @@ -3886,7 +3881,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {    for (Value *V : CS.args()) {      if (V->getType()->isPointerTy() &&          !CS.paramHasAttr(ArgNo, Attribute::NonNull) && -        isKnownNonNullAt(V, CS.getInstruction(), &DT)) +        isKnownNonZero(V, DL, 0, &AC, CS.getInstruction(), &DT))        ArgNos.push_back(ArgNo);      ArgNo++;    } @@ -4021,7 +4016,6 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {    // Okay, this is a cast from a function to a different type.  Unless doing so    // would cause a type conversion of one of our arguments, change this call to    // be a direct call with arguments casted to the appropriate types. -  //    FunctionType *FT = Callee->getFunctionType();    Type *OldRetTy = Caller->getType();    Type *NewRetTy = FT->getReturnType(); | 
