diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 | 
| commit | cfca06d7963fa0909f90483b42a6d7d194d01e08 (patch) | |
| tree | 209fb2a2d68f8f277793fc8df46c753d31bc853b /llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | |
| parent | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff) | |
Notes
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp')
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 983 | 
1 files changed, 589 insertions, 394 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index f463c5fa1138a..c734c9a68fb2d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -15,12 +15,15 @@  #include "llvm/ADT/APInt.h"  #include "llvm/ADT/APSInt.h"  #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/FloatingPointMode.h"  #include "llvm/ADT/None.h"  #include "llvm/ADT/Optional.h"  #include "llvm/ADT/STLExtras.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumeBundleQueries.h"  #include "llvm/Analysis/AssumptionCache.h"  #include "llvm/Analysis/InstructionSimplify.h"  #include "llvm/Analysis/Loads.h" @@ -40,12 +43,13 @@  #include "llvm/IR/Instructions.h"  #include "llvm/IR/IntrinsicInst.h"  #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IntrinsicsX86.h" -#include "llvm/IR/IntrinsicsARM.h"  #include "llvm/IR/IntrinsicsAArch64.h" -#include "llvm/IR/IntrinsicsNVPTX.h"  #include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/IntrinsicsARM.h" +#include "llvm/IR/IntrinsicsHexagon.h" +#include "llvm/IR/IntrinsicsNVPTX.h"  #include "llvm/IR/IntrinsicsPowerPC.h" +#include "llvm/IR/IntrinsicsX86.h"  #include "llvm/IR/LLVMContext.h"  #include "llvm/IR/Metadata.h"  #include "llvm/IR/PatternMatch.h" @@ -114,16 +118,16 @@ static Constant *getNegativeIsTrueBoolVec(ConstantDataVector *V) {  }  Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { -  unsigned DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT); -  unsigned CopyDstAlign = MI->getDestAlignment(); -  if (CopyDstAlign < DstAlign){ +  Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT); +  MaybeAlign CopyDstAlign = MI->getDestAlign(); +  if (!CopyDstAlign || *CopyDstAlign < DstAlign) {      MI->setDestAlignment(DstAlign);      return MI;    } -  unsigned SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT); -  unsigned CopySrcAlign = MI->getSourceAlignment(); -  if (CopySrcAlign < SrcAlign) { +  Align SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT); +  MaybeAlign CopySrcAlign = MI->getSourceAlign(); +  if (!CopySrcAlign || *CopySrcAlign < SrcAlign) {      MI->setSourceAlignment(SrcAlign);      return MI;    } @@ -157,7 +161,7 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {    // into libcall in CodeGen. This is not evident performance gain so disable    // it now.    if (isa<AtomicMemTransferInst>(MI)) -    if (CopyDstAlign < Size || CopySrcAlign < Size) +    if (*CopyDstAlign < Size || *CopySrcAlign < Size)        return nullptr;    // Use an integer load+store unless we can find something better. @@ -191,8 +195,7 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {    Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);    LoadInst *L = Builder.CreateLoad(IntType, Src);    // Alignment from the mem intrinsic will be better, so use it. -  L->setAlignment( -      MaybeAlign(CopySrcAlign)); // FIXME: Check if we can use Align instead. +  L->setAlignment(*CopySrcAlign);    if (CopyMD)      L->setMetadata(LLVMContext::MD_tbaa, CopyMD);    MDNode *LoopMemParallelMD = @@ -205,8 +208,7 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {    StoreInst *S = Builder.CreateStore(L, Dest);    // Alignment from the mem intrinsic will be better, so use it. -  S->setAlignment( -      MaybeAlign(CopyDstAlign)); // FIXME: Check if we can use Align instead. +  S->setAlignment(*CopyDstAlign);    if (CopyMD)      S->setMetadata(LLVMContext::MD_tbaa, CopyMD);    if (LoopMemParallelMD) @@ -231,9 +233,10 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {  }  Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) { -  const unsigned KnownAlignment = +  const Align KnownAlignment =        getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT); -  if (MI->getDestAlignment() < KnownAlignment) { +  MaybeAlign MemSetAlign = MI->getDestAlign(); +  if (!MemSetAlign || *MemSetAlign < KnownAlignment) {      MI->setDestAlignment(KnownAlignment);      return MI;    } @@ -293,106 +296,154 @@ static Value *simplifyX86immShift(const IntrinsicInst &II,                                    InstCombiner::BuilderTy &Builder) {    bool LogicalShift = false;    bool ShiftLeft = false; +  bool IsImm = false;    switch (II.getIntrinsicID()) {    default: llvm_unreachable("Unexpected intrinsic!"); -  case Intrinsic::x86_sse2_psra_d: -  case Intrinsic::x86_sse2_psra_w:    case Intrinsic::x86_sse2_psrai_d:    case Intrinsic::x86_sse2_psrai_w: -  case Intrinsic::x86_avx2_psra_d: -  case Intrinsic::x86_avx2_psra_w:    case Intrinsic::x86_avx2_psrai_d:    case Intrinsic::x86_avx2_psrai_w: -  case Intrinsic::x86_avx512_psra_q_128:    case Intrinsic::x86_avx512_psrai_q_128: -  case Intrinsic::x86_avx512_psra_q_256:    case Intrinsic::x86_avx512_psrai_q_256: -  case Intrinsic::x86_avx512_psra_d_512: -  case Intrinsic::x86_avx512_psra_q_512: -  case Intrinsic::x86_avx512_psra_w_512:    case Intrinsic::x86_avx512_psrai_d_512:    case Intrinsic::x86_avx512_psrai_q_512:    case Intrinsic::x86_avx512_psrai_w_512: -    LogicalShift = false; ShiftLeft = false; +    IsImm = true; +    LLVM_FALLTHROUGH; +  case Intrinsic::x86_sse2_psra_d: +  case Intrinsic::x86_sse2_psra_w: +  case Intrinsic::x86_avx2_psra_d: +  case Intrinsic::x86_avx2_psra_w: +  case Intrinsic::x86_avx512_psra_q_128: +  case Intrinsic::x86_avx512_psra_q_256: +  case Intrinsic::x86_avx512_psra_d_512: +  case Intrinsic::x86_avx512_psra_q_512: +  case Intrinsic::x86_avx512_psra_w_512: +    LogicalShift = false; +    ShiftLeft = false;      break; -  case Intrinsic::x86_sse2_psrl_d: -  case Intrinsic::x86_sse2_psrl_q: -  case Intrinsic::x86_sse2_psrl_w:    case Intrinsic::x86_sse2_psrli_d:    case Intrinsic::x86_sse2_psrli_q:    case Intrinsic::x86_sse2_psrli_w: -  case Intrinsic::x86_avx2_psrl_d: -  case Intrinsic::x86_avx2_psrl_q: -  case Intrinsic::x86_avx2_psrl_w:    case Intrinsic::x86_avx2_psrli_d:    case Intrinsic::x86_avx2_psrli_q:    case Intrinsic::x86_avx2_psrli_w: -  case Intrinsic::x86_avx512_psrl_d_512: -  case Intrinsic::x86_avx512_psrl_q_512: -  case Intrinsic::x86_avx512_psrl_w_512:    case Intrinsic::x86_avx512_psrli_d_512:    case Intrinsic::x86_avx512_psrli_q_512:    case Intrinsic::x86_avx512_psrli_w_512: -    LogicalShift = true; ShiftLeft = false; +    IsImm = true; +    LLVM_FALLTHROUGH; +  case Intrinsic::x86_sse2_psrl_d: +  case Intrinsic::x86_sse2_psrl_q: +  case Intrinsic::x86_sse2_psrl_w: +  case Intrinsic::x86_avx2_psrl_d: +  case Intrinsic::x86_avx2_psrl_q: +  case Intrinsic::x86_avx2_psrl_w: +  case Intrinsic::x86_avx512_psrl_d_512: +  case Intrinsic::x86_avx512_psrl_q_512: +  case Intrinsic::x86_avx512_psrl_w_512: +    LogicalShift = true; +    ShiftLeft = false;      break; -  case Intrinsic::x86_sse2_psll_d: -  case Intrinsic::x86_sse2_psll_q: -  case Intrinsic::x86_sse2_psll_w:    case Intrinsic::x86_sse2_pslli_d:    case Intrinsic::x86_sse2_pslli_q:    case Intrinsic::x86_sse2_pslli_w: -  case Intrinsic::x86_avx2_psll_d: -  case Intrinsic::x86_avx2_psll_q: -  case Intrinsic::x86_avx2_psll_w:    case Intrinsic::x86_avx2_pslli_d:    case Intrinsic::x86_avx2_pslli_q:    case Intrinsic::x86_avx2_pslli_w: -  case Intrinsic::x86_avx512_psll_d_512: -  case Intrinsic::x86_avx512_psll_q_512: -  case Intrinsic::x86_avx512_psll_w_512:    case Intrinsic::x86_avx512_pslli_d_512:    case Intrinsic::x86_avx512_pslli_q_512:    case Intrinsic::x86_avx512_pslli_w_512: -    LogicalShift = true; ShiftLeft = true; +    IsImm = true; +    LLVM_FALLTHROUGH; +  case Intrinsic::x86_sse2_psll_d: +  case Intrinsic::x86_sse2_psll_q: +  case Intrinsic::x86_sse2_psll_w: +  case Intrinsic::x86_avx2_psll_d: +  case Intrinsic::x86_avx2_psll_q: +  case Intrinsic::x86_avx2_psll_w: +  case Intrinsic::x86_avx512_psll_d_512: +  case Intrinsic::x86_avx512_psll_q_512: +  case Intrinsic::x86_avx512_psll_w_512: +    LogicalShift = true; +    ShiftLeft = true;      break;    }    assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left"); -  // Simplify if count is constant. -  auto Arg1 = II.getArgOperand(1); -  auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1); -  auto CDV = dyn_cast<ConstantDataVector>(Arg1); -  auto CInt = dyn_cast<ConstantInt>(Arg1); -  if (!CAZ && !CDV && !CInt) -    return nullptr; - -  APInt Count(64, 0); -  if (CDV) { -    // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector -    // operand to compute the shift amount. -    auto VT = cast<VectorType>(CDV->getType()); -    unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits(); -    assert((64 % BitWidth) == 0 && "Unexpected packed shift size"); -    unsigned NumSubElts = 64 / BitWidth; - -    // Concatenate the sub-elements to create the 64-bit value. -    for (unsigned i = 0; i != NumSubElts; ++i) { -      unsigned SubEltIdx = (NumSubElts - 1) - i; -      auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx)); -      Count <<= BitWidth; -      Count |= SubElt->getValue().zextOrTrunc(64); -    } -  } -  else if (CInt) -    Count = CInt->getValue(); -    auto Vec = II.getArgOperand(0); +  auto Amt = II.getArgOperand(1);    auto VT = cast<VectorType>(Vec->getType());    auto SVT = VT->getElementType(); +  auto AmtVT = Amt->getType();    unsigned VWidth = VT->getNumElements();    unsigned BitWidth = SVT->getPrimitiveSizeInBits(); +  // If the shift amount is guaranteed to be in-range we can replace it with a +  // generic shift. If its guaranteed to be out of range, logical shifts combine to +  // zero and arithmetic shifts are clamped to (BitWidth - 1). +  if (IsImm) { +    assert(AmtVT ->isIntegerTy(32) && +           "Unexpected shift-by-immediate type"); +    KnownBits KnownAmtBits = +        llvm::computeKnownBits(Amt, II.getModule()->getDataLayout()); +    if (KnownAmtBits.getMaxValue().ult(BitWidth)) { +      Amt = Builder.CreateZExtOrTrunc(Amt, SVT); +      Amt = Builder.CreateVectorSplat(VWidth, Amt); +      return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt) +                                        : Builder.CreateLShr(Vec, Amt)) +                           : Builder.CreateAShr(Vec, Amt)); +    } +    if (KnownAmtBits.getMinValue().uge(BitWidth)) { +      if (LogicalShift) +        return ConstantAggregateZero::get(VT); +      Amt = ConstantInt::get(SVT, BitWidth - 1); +      return Builder.CreateAShr(Vec, Builder.CreateVectorSplat(VWidth, Amt)); +    } +  } else { +    // Ensure the first element has an in-range value and the rest of the +    // elements in the bottom 64 bits are zero. +    assert(AmtVT->isVectorTy() && AmtVT->getPrimitiveSizeInBits() == 128 && +           cast<VectorType>(AmtVT)->getElementType() == SVT && +           "Unexpected shift-by-scalar type"); +    unsigned NumAmtElts = cast<VectorType>(AmtVT)->getNumElements(); +    APInt DemandedLower = APInt::getOneBitSet(NumAmtElts, 0); +    APInt DemandedUpper = APInt::getBitsSet(NumAmtElts, 1, NumAmtElts / 2); +    KnownBits KnownLowerBits = llvm::computeKnownBits( +        Amt, DemandedLower, II.getModule()->getDataLayout()); +    KnownBits KnownUpperBits = llvm::computeKnownBits( +        Amt, DemandedUpper, II.getModule()->getDataLayout()); +    if (KnownLowerBits.getMaxValue().ult(BitWidth) && +        (DemandedUpper.isNullValue() || KnownUpperBits.isZero())) { +      SmallVector<int, 16> ZeroSplat(VWidth, 0); +      Amt = Builder.CreateShuffleVector(Amt, Amt, ZeroSplat); +      return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt) +                                        : Builder.CreateLShr(Vec, Amt)) +                           : Builder.CreateAShr(Vec, Amt)); +    } +  } + +  // Simplify if count is constant vector. +  auto CDV = dyn_cast<ConstantDataVector>(Amt); +  if (!CDV) +    return nullptr; + +  // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector +  // operand to compute the shift amount. +  assert(AmtVT->isVectorTy() && AmtVT->getPrimitiveSizeInBits() == 128 && +         cast<VectorType>(AmtVT)->getElementType() == SVT && +         "Unexpected shift-by-scalar type"); + +  // Concatenate the sub-elements to create the 64-bit value. +  APInt Count(64, 0); +  for (unsigned i = 0, NumSubElts = 64 / BitWidth; i != NumSubElts; ++i) { +    unsigned SubEltIdx = (NumSubElts - 1) - i; +    auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx)); +    Count <<= BitWidth; +    Count |= SubElt->getValue().zextOrTrunc(64); +  } +    // If shift-by-zero then just return the original value.    if (Count.isNullValue())      return Vec; @@ -469,17 +520,29 @@ static Value *simplifyX86varShift(const IntrinsicInst &II,    }    assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left"); -  // Simplify if all shift amounts are constant/undef. -  auto *CShift = dyn_cast<Constant>(II.getArgOperand(1)); -  if (!CShift) -    return nullptr; -    auto Vec = II.getArgOperand(0); +  auto Amt = II.getArgOperand(1);    auto VT = cast<VectorType>(II.getType()); -  auto SVT = VT->getVectorElementType(); +  auto SVT = VT->getElementType();    int NumElts = VT->getNumElements();    int BitWidth = SVT->getIntegerBitWidth(); +  // If the shift amount is guaranteed to be in-range we can replace it with a +  // generic shift. +  APInt UpperBits = +      APInt::getHighBitsSet(BitWidth, BitWidth - Log2_32(BitWidth)); +  if (llvm::MaskedValueIsZero(Amt, UpperBits, +                              II.getModule()->getDataLayout())) { +    return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt) +                                      : Builder.CreateLShr(Vec, Amt)) +                         : Builder.CreateAShr(Vec, Amt)); +  } + +  // Simplify if all shift amounts are constant/undef. +  auto *CShift = dyn_cast<Constant>(Amt); +  if (!CShift) +    return nullptr; +    // Collect each element's shift amount.    // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth.    bool AnyOutOfRange = false; @@ -557,10 +620,10 @@ static Value *simplifyX86pack(IntrinsicInst &II,    if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))      return UndefValue::get(ResTy); -  Type *ArgTy = Arg0->getType(); +  auto *ArgTy = cast<VectorType>(Arg0->getType());    unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128; -  unsigned NumSrcElts = ArgTy->getVectorNumElements(); -  assert(ResTy->getVectorNumElements() == (2 * NumSrcElts) && +  unsigned NumSrcElts = ArgTy->getNumElements(); +  assert(cast<VectorType>(ResTy)->getNumElements() == (2 * NumSrcElts) &&           "Unexpected packing types");    unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes; @@ -600,7 +663,7 @@ static Value *simplifyX86pack(IntrinsicInst &II,    Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1);    // Shuffle clamped args together at the lane level. -  SmallVector<unsigned, 32> PackMask; +  SmallVector<int, 32> PackMask;    for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {      for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)        PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane)); @@ -617,14 +680,14 @@ static Value *simplifyX86movmsk(const IntrinsicInst &II,                                  InstCombiner::BuilderTy &Builder) {    Value *Arg = II.getArgOperand(0);    Type *ResTy = II.getType(); -  Type *ArgTy = Arg->getType();    // movmsk(undef) -> zero as we must ensure the upper bits are zero.    if (isa<UndefValue>(Arg))      return Constant::getNullValue(ResTy); +  auto *ArgTy = dyn_cast<VectorType>(Arg->getType());    // We can't easily peek through x86_mmx types. -  if (!ArgTy->isVectorTy()) +  if (!ArgTy)      return nullptr;    // Expand MOVMSK to compare/bitcast/zext: @@ -632,8 +695,8 @@ static Value *simplifyX86movmsk(const IntrinsicInst &II,    // %cmp = icmp slt <16 x i8> %x, zeroinitializer    // %int = bitcast <16 x i1> %cmp to i16    // %res = zext i16 %int to i32 -  unsigned NumElts = ArgTy->getVectorNumElements(); -  Type *IntegerVecTy = VectorType::getInteger(cast<VectorType>(ArgTy)); +  unsigned NumElts = ArgTy->getNumElements(); +  Type *IntegerVecTy = VectorType::getInteger(ArgTy);    Type *IntegerTy = Builder.getIntNTy(NumElts);    Value *Res = Builder.CreateBitCast(Arg, IntegerVecTy); @@ -697,7 +760,7 @@ static Value *simplifyX86insertps(const IntrinsicInst &II,      return ZeroVector;    // Initialize by passing all of the first source bits through. -  uint32_t ShuffleMask[4] = { 0, 1, 2, 3 }; +  int ShuffleMask[4] = {0, 1, 2, 3};    // We may replace the second operand with the zero vector.    Value *V1 = II.getArgOperand(1); @@ -777,22 +840,19 @@ static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0,        Index /= 8;        Type *IntTy8 = Type::getInt8Ty(II.getContext()); -      Type *IntTy32 = Type::getInt32Ty(II.getContext()); -      VectorType *ShufTy = VectorType::get(IntTy8, 16); +      auto *ShufTy = FixedVectorType::get(IntTy8, 16); -      SmallVector<Constant *, 16> ShuffleMask; +      SmallVector<int, 16> ShuffleMask;        for (int i = 0; i != (int)Length; ++i) -        ShuffleMask.push_back( -            Constant::getIntegerValue(IntTy32, APInt(32, i + Index))); +        ShuffleMask.push_back(i + Index);        for (int i = Length; i != 8; ++i) -        ShuffleMask.push_back( -            Constant::getIntegerValue(IntTy32, APInt(32, i + 16))); +        ShuffleMask.push_back(i + 16);        for (int i = 8; i != 16; ++i) -        ShuffleMask.push_back(UndefValue::get(IntTy32)); +        ShuffleMask.push_back(-1);        Value *SV = Builder.CreateShuffleVector(            Builder.CreateBitCast(Op0, ShufTy), -          ConstantAggregateZero::get(ShufTy), ConstantVector::get(ShuffleMask)); +          ConstantAggregateZero::get(ShufTy), ShuffleMask);        return Builder.CreateBitCast(SV, II.getType());      } @@ -857,23 +917,21 @@ static Value *simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1,      Index /= 8;      Type *IntTy8 = Type::getInt8Ty(II.getContext()); -    Type *IntTy32 = Type::getInt32Ty(II.getContext()); -    VectorType *ShufTy = VectorType::get(IntTy8, 16); +    auto *ShufTy = FixedVectorType::get(IntTy8, 16); -    SmallVector<Constant *, 16> ShuffleMask; +    SmallVector<int, 16> ShuffleMask;      for (int i = 0; i != (int)Index; ++i) -      ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i))); +      ShuffleMask.push_back(i);      for (int i = 0; i != (int)Length; ++i) -      ShuffleMask.push_back( -          Constant::getIntegerValue(IntTy32, APInt(32, i + 16))); +      ShuffleMask.push_back(i + 16);      for (int i = Index + Length; i != 8; ++i) -      ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i))); +      ShuffleMask.push_back(i);      for (int i = 8; i != 16; ++i) -      ShuffleMask.push_back(UndefValue::get(IntTy32)); +      ShuffleMask.push_back(-1);      Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),                                              Builder.CreateBitCast(Op1, ShufTy), -                                            ConstantVector::get(ShuffleMask)); +                                            ShuffleMask);      return Builder.CreateBitCast(SV, II.getType());    } @@ -925,13 +983,12 @@ static Value *simplifyX86pshufb(const IntrinsicInst &II,      return nullptr;    auto *VecTy = cast<VectorType>(II.getType()); -  auto *MaskEltTy = Type::getInt32Ty(II.getContext());    unsigned NumElts = VecTy->getNumElements();    assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&           "Unexpected number of elements in shuffle mask!");    // Construct a shuffle mask from constant integers or UNDEFs. -  Constant *Indexes[64] = {nullptr}; +  int Indexes[64];    // Each byte in the shuffle control mask forms an index to permute the    // corresponding byte in the destination operand. @@ -941,7 +998,7 @@ static Value *simplifyX86pshufb(const IntrinsicInst &II,        return nullptr;      if (isa<UndefValue>(COp)) { -      Indexes[I] = UndefValue::get(MaskEltTy); +      Indexes[I] = -1;        continue;      } @@ -955,13 +1012,12 @@ static Value *simplifyX86pshufb(const IntrinsicInst &II,      // The value of each index for the high 128-bit lane is the least      // significant 4 bits of the respective shuffle control byte.      Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0); -    Indexes[I] = ConstantInt::get(MaskEltTy, Index); +    Indexes[I] = Index;    } -  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));    auto V1 = II.getArgOperand(0);    auto V2 = Constant::getNullValue(VecTy); -  return Builder.CreateShuffleVector(V1, V2, ShuffleMask); +  return Builder.CreateShuffleVector(V1, V2, makeArrayRef(Indexes, NumElts));  }  /// Attempt to convert vpermilvar* to shufflevector if the mask is constant. @@ -972,14 +1028,13 @@ static Value *simplifyX86vpermilvar(const IntrinsicInst &II,      return nullptr;    auto *VecTy = cast<VectorType>(II.getType()); -  auto *MaskEltTy = Type::getInt32Ty(II.getContext()); -  unsigned NumElts = VecTy->getVectorNumElements(); +  unsigned NumElts = VecTy->getNumElements();    bool IsPD = VecTy->getScalarType()->isDoubleTy();    unsigned NumLaneElts = IsPD ? 2 : 4;    assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);    // Construct a shuffle mask from constant integers or UNDEFs. -  Constant *Indexes[16] = {nullptr}; +  int Indexes[16];    // The intrinsics only read one or two bits, clear the rest.    for (unsigned I = 0; I < NumElts; ++I) { @@ -988,7 +1043,7 @@ static Value *simplifyX86vpermilvar(const IntrinsicInst &II,        return nullptr;      if (isa<UndefValue>(COp)) { -      Indexes[I] = UndefValue::get(MaskEltTy); +      Indexes[I] = -1;        continue;      } @@ -1005,13 +1060,12 @@ static Value *simplifyX86vpermilvar(const IntrinsicInst &II,      // shuffle, we have to make that explicit.      Index += APInt(32, (I / NumLaneElts) * NumLaneElts); -    Indexes[I] = ConstantInt::get(MaskEltTy, Index); +    Indexes[I] = Index.getZExtValue();    } -  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));    auto V1 = II.getArgOperand(0);    auto V2 = UndefValue::get(V1->getType()); -  return Builder.CreateShuffleVector(V1, V2, ShuffleMask); +  return Builder.CreateShuffleVector(V1, V2, makeArrayRef(Indexes, NumElts));  }  /// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant. @@ -1022,13 +1076,12 @@ static Value *simplifyX86vpermv(const IntrinsicInst &II,      return nullptr;    auto *VecTy = cast<VectorType>(II.getType()); -  auto *MaskEltTy = Type::getInt32Ty(II.getContext());    unsigned Size = VecTy->getNumElements();    assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&           "Unexpected shuffle mask size");    // Construct a shuffle mask from constant integers or UNDEFs. -  Constant *Indexes[64] = {nullptr}; +  int Indexes[64];    for (unsigned I = 0; I < Size; ++I) {      Constant *COp = V->getAggregateElement(I); @@ -1036,26 +1089,26 @@ static Value *simplifyX86vpermv(const IntrinsicInst &II,        return nullptr;      if (isa<UndefValue>(COp)) { -      Indexes[I] = UndefValue::get(MaskEltTy); +      Indexes[I] = -1;        continue;      }      uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();      Index &= Size - 1; -    Indexes[I] = ConstantInt::get(MaskEltTy, Index); +    Indexes[I] = Index;    } -  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, Size));    auto V1 = II.getArgOperand(0);    auto V2 = UndefValue::get(VecTy); -  return Builder.CreateShuffleVector(V1, V2, ShuffleMask); +  return Builder.CreateShuffleVector(V1, V2, makeArrayRef(Indexes, Size));  }  // TODO, Obvious Missing Transforms:  // * Narrow width by halfs excluding zero/undef lanes  Value *InstCombiner::simplifyMaskedLoad(IntrinsicInst &II) {    Value *LoadPtr = II.getArgOperand(0); -  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue(); +  const Align Alignment = +      cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();    // If the mask is all ones or undefs, this is a plain vector load of the 1st    // argument. @@ -1065,9 +1118,9 @@ Value *InstCombiner::simplifyMaskedLoad(IntrinsicInst &II) {    // If we can unconditionally load from this address, replace with a    // load/select idiom. TODO: use DT for context sensitive query -  if (isDereferenceableAndAlignedPointer( -          LoadPtr, II.getType(), MaybeAlign(Alignment), -          II.getModule()->getDataLayout(), &II, nullptr)) { +  if (isDereferenceableAndAlignedPointer(LoadPtr, II.getType(), Alignment, +                                         II.getModule()->getDataLayout(), &II, +                                         nullptr)) {      Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,                                           "unmaskedload");      return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3)); @@ -1091,8 +1144,7 @@ Instruction *InstCombiner::simplifyMaskedStore(IntrinsicInst &II) {    // If the mask is all ones, this is a plain vector store of the 1st argument.    if (ConstMask->isAllOnesValue()) {      Value *StorePtr = II.getArgOperand(1); -    MaybeAlign Alignment( -        cast<ConstantInt>(II.getArgOperand(2))->getZExtValue()); +    Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();      return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);    } @@ -1100,10 +1152,8 @@ Instruction *InstCombiner::simplifyMaskedStore(IntrinsicInst &II) {    APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);    APInt UndefElts(DemandedElts.getBitWidth(), 0);    if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), -                                            DemandedElts, UndefElts)) { -    II.setOperand(0, V); -    return &II; -  } +                                            DemandedElts, UndefElts)) +    return replaceOperand(II, 0, V);    return nullptr;  } @@ -1138,15 +1188,11 @@ Instruction *InstCombiner::simplifyMaskedScatter(IntrinsicInst &II) {    APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);    APInt UndefElts(DemandedElts.getBitWidth(), 0);    if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), -                                            DemandedElts, UndefElts)) { -    II.setOperand(0, V); -    return &II; -  } +                                            DemandedElts, UndefElts)) +    return replaceOperand(II, 0, V);    if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1), -                                            DemandedElts, UndefElts)) { -    II.setOperand(1, V); -    return &II; -  } +                                            DemandedElts, UndefElts)) +    return replaceOperand(II, 1, V);    return nullptr;  } @@ -1202,19 +1248,15 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {    if (IsTZ) {      // cttz(-x) -> cttz(x) -    if (match(Op0, m_Neg(m_Value(X)))) { -      II.setOperand(0, X); -      return &II; -    } +    if (match(Op0, m_Neg(m_Value(X)))) +      return IC.replaceOperand(II, 0, X);      // cttz(abs(x)) -> cttz(x)      // cttz(nabs(x)) -> cttz(x)      Value *Y;      SelectPatternFlavor SPF = matchSelectPattern(Op0, X, Y).Flavor; -    if (SPF == SPF_ABS || SPF == SPF_NABS) { -      II.setOperand(0, X); -      return &II; -    } +    if (SPF == SPF_ABS || SPF == SPF_NABS) +      return IC.replaceOperand(II, 0, X);    }    KnownBits Known = IC.computeKnownBits(Op0, 0, &II); @@ -1240,10 +1282,8 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {    if (!Known.One.isNullValue() ||        isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,                       &IC.getDominatorTree())) { -    if (!match(II.getArgOperand(1), m_One())) { -      II.setOperand(1, IC.Builder.getTrue()); -      return &II; -    } +    if (!match(II.getArgOperand(1), m_One())) +      return IC.replaceOperand(II, 1, IC.Builder.getTrue());    }    // Add range metadata since known bits can't completely reflect what we know. @@ -1264,21 +1304,39 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {  static Instruction *foldCtpop(IntrinsicInst &II, InstCombiner &IC) {    assert(II.getIntrinsicID() == Intrinsic::ctpop &&           "Expected ctpop intrinsic"); +  Type *Ty = II.getType(); +  unsigned BitWidth = Ty->getScalarSizeInBits();    Value *Op0 = II.getArgOperand(0);    Value *X; +    // ctpop(bitreverse(x)) -> ctpop(x)    // ctpop(bswap(x)) -> ctpop(x) -  if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X)))) { -    II.setOperand(0, X); -    return &II; +  if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X)))) +    return IC.replaceOperand(II, 0, X); + +  // ctpop(x | -x) -> bitwidth - cttz(x, false) +  if (Op0->hasOneUse() && +      match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) { +    Function *F = +        Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty); +    auto *Cttz = IC.Builder.CreateCall(F, {X, IC.Builder.getFalse()}); +    auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth)); +    return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz)); +  } + +  // ctpop(~x & (x - 1)) -> cttz(x, false) +  if (match(Op0, +            m_c_And(m_Not(m_Value(X)), m_Add(m_Deferred(X), m_AllOnes())))) { +    Function *F = +        Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty); +    return CallInst::Create(F, {X, IC.Builder.getFalse()});    }    // FIXME: Try to simplify vectors of integers. -  auto *IT = dyn_cast<IntegerType>(Op0->getType()); +  auto *IT = dyn_cast<IntegerType>(Ty);    if (!IT)      return nullptr; -  unsigned BitWidth = IT->getBitWidth();    KnownBits Known(BitWidth);    IC.computeKnownBits(Op0, Known, 0, &II); @@ -1330,7 +1388,7 @@ static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) {    // The pass-through vector for an x86 masked load is a zero vector.    CallInst *NewMaskedLoad = -      IC.Builder.CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec); +      IC.Builder.CreateMaskedLoad(PtrCast, Align(1), BoolMask, ZeroVec);    return IC.replaceInstUsesWith(II, NewMaskedLoad);  } @@ -1371,7 +1429,7 @@ static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) {    // on each element's most significant bit (the sign bit).    Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask); -  IC.Builder.CreateMaskedStore(Vec, PtrCast, 1, BoolMask); +  IC.Builder.CreateMaskedStore(Vec, PtrCast, Align(1), BoolMask);    // 'Replace uses' doesn't work for stores. Erase the original masked store.    IC.eraseInstFromFunction(II); @@ -1417,7 +1475,7 @@ static Value *simplifyNeonTbl1(const IntrinsicInst &II,    if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8)      return nullptr; -  uint32_t Indexes[8]; +  int Indexes[8];    for (unsigned I = 0; I < NumElts; ++I) {      Constant *COp = C->getAggregateElement(I); @@ -1428,15 +1486,13 @@ static Value *simplifyNeonTbl1(const IntrinsicInst &II,      Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue();      // Make sure the mask indices are in range. -    if (Indexes[I] >= NumElts) +    if ((unsigned)Indexes[I] >= NumElts)        return nullptr;    } -  auto *ShuffleMask = ConstantDataVector::get(II.getContext(), -                                              makeArrayRef(Indexes));    auto *V1 = II.getArgOperand(0);    auto *V2 = Constant::getNullValue(V1->getType()); -  return Builder.CreateShuffleVector(V1, V2, ShuffleMask); +  return Builder.CreateShuffleVector(V1, V2, makeArrayRef(Indexes));  }  /// Convert a vector load intrinsic into a simple llvm load instruction. @@ -1458,7 +1514,7 @@ static Value *simplifyNeonVld1(const IntrinsicInst &II,    auto *BCastInst = Builder.CreateBitCast(II.getArgOperand(0),                                            PointerType::get(II.getType(), 0)); -  return Builder.CreateAlignedLoad(II.getType(), BCastInst, Alignment); +  return Builder.CreateAlignedLoad(II.getType(), BCastInst, Align(Alignment));  }  // Returns true iff the 2 intrinsics have the same operands, limiting the @@ -1478,24 +1534,30 @@ static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,  // start/end intrinsics in between). As this handles only the most trivial  // cases, tracking the nesting level is not needed:  // -//   call @llvm.foo.start(i1 0) ; &I  //   call @llvm.foo.start(i1 0) -//   call @llvm.foo.end(i1 0) ; This one will not be skipped: it will be removed +//   call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed  //   call @llvm.foo.end(i1 0) -static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID, -                                      unsigned EndID, InstCombiner &IC) { -  assert(I.getIntrinsicID() == StartID && -         "Start intrinsic does not have expected ID"); -  BasicBlock::iterator BI(I), BE(I.getParent()->end()); -  for (++BI; BI != BE; ++BI) { -    if (auto *E = dyn_cast<IntrinsicInst>(BI)) { -      if (isa<DbgInfoIntrinsic>(E) || E->getIntrinsicID() == StartID) +//   call @llvm.foo.end(i1 0) ; &I +static bool removeTriviallyEmptyRange( +    IntrinsicInst &EndI, InstCombiner &IC, +    std::function<bool(const IntrinsicInst &)> IsStart) { +  // We start from the end intrinsic and scan backwards, so that InstCombine +  // has already processed (and potentially removed) all the instructions +  // before the end intrinsic. +  BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend()); +  for (; BI != BE; ++BI) { +    if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) { +      if (isa<DbgInfoIntrinsic>(I) || +          I->getIntrinsicID() == EndI.getIntrinsicID()) +        continue; +      if (IsStart(*I)) { +        if (haveSameOperands(EndI, *I, EndI.getNumArgOperands())) { +          IC.eraseInstFromFunction(*I); +          IC.eraseInstFromFunction(EndI); +          return true; +        } +        // Skip start intrinsics that don't pair with this end intrinsic.          continue; -      if (E->getIntrinsicID() == EndID && -          haveSameOperands(I, *E, E->getNumArgOperands())) { -        IC.eraseInstFromFunction(*E); -        IC.eraseInstFromFunction(I); -        return true;        }      }      break; @@ -1709,9 +1771,11 @@ static Instruction *SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC) {    // intrinsic, we don't have to look up any module metadata, as    // FtzRequirementTy will be FTZ_Any.)    if (Action.FtzRequirement != FTZ_Any) { -    bool FtzEnabled = -        II->getFunction()->getFnAttribute("nvptx-f32ftz").getValueAsString() == -        "true"; +    StringRef Attr = II->getFunction() +                         ->getFnAttribute("denormal-fp-math-f32") +                         .getValueAsString(); +    DenormalMode Mode = parseDenormalFPAttribute(Attr); +    bool FtzEnabled = Mode.Output != DenormalMode::IEEE;      if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))        return nullptr; @@ -1751,13 +1815,11 @@ static Instruction *SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC) {    llvm_unreachable("All SpecialCase enumerators should be handled in switch.");  } -Instruction *InstCombiner::visitVAStartInst(VAStartInst &I) { -  removeTriviallyEmptyRange(I, Intrinsic::vastart, Intrinsic::vaend, *this); -  return nullptr; -} - -Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) { -  removeTriviallyEmptyRange(I, Intrinsic::vacopy, Intrinsic::vaend, *this); +Instruction *InstCombiner::visitVAEndInst(VAEndInst &I) { +  removeTriviallyEmptyRange(I, *this, [](const IntrinsicInst &I) { +    return I.getIntrinsicID() == Intrinsic::vastart || +           I.getIntrinsicID() == Intrinsic::vacopy; +  });    return nullptr;  } @@ -1786,8 +1848,11 @@ Instruction *InstCombiner::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {  /// instructions. For normal calls, it allows visitCallBase to do the heavy  /// lifting.  Instruction *InstCombiner::visitCallInst(CallInst &CI) { -  if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI))) -    return replaceInstUsesWith(CI, V); +  // Don't try to simplify calls without uses. It will not do anything useful, +  // but will result in the following folds being skipped. +  if (!CI.use_empty()) +    if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI))) +      return replaceInstUsesWith(CI, V);    if (isFreeCall(&CI, &TLI))      return visitFree(CI); @@ -1802,6 +1867,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {    IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);    if (!II) return visitCallBase(CI); +  // For atomic unordered mem intrinsics if len is not a positive or +  // not a multiple of element size then behavior is undefined. +  if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(II)) +    if (ConstantInt *NumBytes = dyn_cast<ConstantInt>(AMI->getLength())) +      if (NumBytes->getSExtValue() < 0 || +          (NumBytes->getZExtValue() % AMI->getElementSizeInBytes() != 0)) { +        CreateNonTerminatorUnreachable(AMI); +        assert(AMI->getType()->isVoidTy() && +               "non void atomic unordered mem intrinsic"); +        return eraseInstFromFunction(*AMI); +      } +    // Intrinsics cannot occur in an invoke or a callbr, so handle them here    // instead of in visitCallBase.    if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) { @@ -1863,9 +1940,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      if (Changed) return II;    } -  // For vector result intrinsics, use the generic demanded vector support. -  if (II->getType()->isVectorTy()) { -    auto VWidth = II->getType()->getVectorNumElements(); +  // For fixed width vector result intrinsics, use the generic demanded vector +  // support. +  if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) { +    auto VWidth = IIFVTy->getNumElements();      APInt UndefElts(VWidth, 0);      APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));      if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) { @@ -1958,10 +2036,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {        // Canonicalize a shift amount constant operand to modulo the bit-width.        Constant *WidthC = ConstantInt::get(Ty, BitWidth);        Constant *ModuloC = ConstantExpr::getURem(ShAmtC, WidthC); -      if (ModuloC != ShAmtC) { -        II->setArgOperand(2, ModuloC); -        return II; -      } +      if (ModuloC != ShAmtC) +        return replaceOperand(*II, 2, ModuloC); +        assert(ConstantExpr::getICmp(ICmpInst::ICMP_UGT, WidthC, ShAmtC) ==                   ConstantInt::getTrue(CmpInst::makeCmpResultType(Ty)) &&               "Shift amount expected to be modulo bitwidth"); @@ -2189,7 +2266,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {          llvm_unreachable("unexpected intrinsic ID");        }        Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II); -      Instruction *FNeg = BinaryOperator::CreateFNeg(NewCall); +      Instruction *FNeg = UnaryOperator::CreateFNeg(NewCall);        FNeg->copyIRFlags(II);        return FNeg;      } @@ -2220,12 +2297,31 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {            llvm_unreachable("unexpected intrinsic ID");          }          Instruction *NewCall = Builder.CreateBinaryIntrinsic( -            IID, X, ConstantFP::get(Arg0->getType(), Res)); -        NewCall->copyIRFlags(II); +            IID, X, ConstantFP::get(Arg0->getType(), Res), II); +        // TODO: Conservatively intersecting FMF. If Res == C2, the transform +        //       was a simplification (so Arg0 and its original flags could +        //       propagate?) +        NewCall->andIRFlags(M);          return replaceInstUsesWith(*II, NewCall);        }      } +    Value *ExtSrc0; +    Value *ExtSrc1; + +    // minnum (fpext x), (fpext y) -> minnum x, y +    // maxnum (fpext x), (fpext y) -> maxnum x, y +    if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc0)))) && +        match(II->getArgOperand(1), m_OneUse(m_FPExt(m_Value(ExtSrc1)))) && +        ExtSrc0->getType() == ExtSrc1->getType()) { +      Function *F = Intrinsic::getDeclaration( +          II->getModule(), II->getIntrinsicID(), {ExtSrc0->getType()}); +      CallInst *NewCall = Builder.CreateCall(F, { ExtSrc0, ExtSrc1 }); +      NewCall->copyFastMathFlags(II); +      NewCall->takeName(II); +      return new FPExtInst(NewCall, II->getType()); +    } +      break;    }    case Intrinsic::fmuladd: { @@ -2260,16 +2356,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      Value *Src1 = II->getArgOperand(1);      Value *X, *Y;      if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) { -      II->setArgOperand(0, X); -      II->setArgOperand(1, Y); +      replaceOperand(*II, 0, X); +      replaceOperand(*II, 1, Y);        return II;      }      // fma fabs(x), fabs(x), z -> fma x, x, z      if (match(Src0, m_FAbs(m_Value(X))) &&          match(Src1, m_FAbs(m_Specific(X)))) { -      II->setArgOperand(0, X); -      II->setArgOperand(1, X); +      replaceOperand(*II, 0, X); +      replaceOperand(*II, 1, X);        return II;      } @@ -2283,6 +2379,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {        return FAdd;      } +    // fma x, y, 0 -> fmul x, y +    // This is always valid for -0.0, but requires nsz for +0.0 as +    // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own. +    if (match(II->getArgOperand(2), m_NegZeroFP()) || +        (match(II->getArgOperand(2), m_PosZeroFP()) && +         II->getFastMathFlags().noSignedZeros())) +      return BinaryOperator::CreateFMulFMF(Src0, Src1, II); +      break;    }    case Intrinsic::copysign: { @@ -2307,10 +2411,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      // copysign X, (copysign ?, SignArg) --> copysign X, SignArg      Value *SignArg;      if (match(II->getArgOperand(1), -              m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(SignArg)))) { -      II->setArgOperand(1, SignArg); -      return II; -    } +              m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(SignArg)))) +      return replaceOperand(*II, 1, SignArg);      break;    } @@ -2329,6 +2431,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {    case Intrinsic::ceil:    case Intrinsic::floor:    case Intrinsic::round: +  case Intrinsic::roundeven:    case Intrinsic::nearbyint:    case Intrinsic::rint:    case Intrinsic::trunc: { @@ -2347,8 +2450,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X)))) {        // cos(-x) -> cos(x)        // cos(fabs(x)) -> cos(x) -      II->setArgOperand(0, X); -      return II; +      return replaceOperand(*II, 0, X);      }      break;    } @@ -2357,7 +2459,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {        // sin(-x) --> -sin(x)        Value *NewSin = Builder.CreateUnaryIntrinsic(Intrinsic::sin, X, II); -      Instruction *FNeg = BinaryOperator::CreateFNeg(NewSin); +      Instruction *FNeg = UnaryOperator::CreateFNeg(NewSin);        FNeg->copyFastMathFlags(II);        return FNeg;      } @@ -2366,11 +2468,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {    case Intrinsic::ppc_altivec_lvx:    case Intrinsic::ppc_altivec_lvxl:      // Turn PPC lvx -> load if the pointer is known aligned. -    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC, +    if (getOrEnforceKnownAlignment(II->getArgOperand(0), Align(16), DL, II, &AC,                                     &DT) >= 16) {        Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),                                           PointerType::getUnqual(II->getType())); -      return new LoadInst(II->getType(), Ptr); +      return new LoadInst(II->getType(), Ptr, "", false, Align(16));      }      break;    case Intrinsic::ppc_vsx_lxvw4x: @@ -2378,17 +2480,17 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      // Turn PPC VSX loads into normal loads.      Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),                                         PointerType::getUnqual(II->getType())); -    return new LoadInst(II->getType(), Ptr, Twine(""), false, Align::None()); +    return new LoadInst(II->getType(), Ptr, Twine(""), false, Align(1));    }    case Intrinsic::ppc_altivec_stvx:    case Intrinsic::ppc_altivec_stvxl:      // Turn stvx -> store if the pointer is known aligned. -    if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC, +    if (getOrEnforceKnownAlignment(II->getArgOperand(1), Align(16), DL, II, &AC,                                     &DT) >= 16) {        Type *OpPtrTy =          PointerType::getUnqual(II->getArgOperand(0)->getType());        Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); -      return new StoreInst(II->getArgOperand(0), Ptr); +      return new StoreInst(II->getArgOperand(0), Ptr, false, Align(16));      }      break;    case Intrinsic::ppc_vsx_stxvw4x: @@ -2396,14 +2498,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      // Turn PPC VSX stores into normal stores.      Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType());      Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); -    return new StoreInst(II->getArgOperand(0), Ptr, false, Align::None()); +    return new StoreInst(II->getArgOperand(0), Ptr, false, Align(1));    }    case Intrinsic::ppc_qpx_qvlfs:      // Turn PPC QPX qvlfs -> load if the pointer is known aligned. -    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC, +    if (getOrEnforceKnownAlignment(II->getArgOperand(0), Align(16), DL, II, &AC,                                     &DT) >= 16) { -      Type *VTy = VectorType::get(Builder.getFloatTy(), -                                  II->getType()->getVectorNumElements()); +      Type *VTy = +          VectorType::get(Builder.getFloatTy(), +                          cast<VectorType>(II->getType())->getElementCount());        Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),                                           PointerType::getUnqual(VTy));        Value *Load = Builder.CreateLoad(VTy, Ptr); @@ -2412,33 +2515,34 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      break;    case Intrinsic::ppc_qpx_qvlfd:      // Turn PPC QPX qvlfd -> load if the pointer is known aligned. -    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC, +    if (getOrEnforceKnownAlignment(II->getArgOperand(0), Align(32), DL, II, &AC,                                     &DT) >= 32) {        Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),                                           PointerType::getUnqual(II->getType())); -      return new LoadInst(II->getType(), Ptr); +      return new LoadInst(II->getType(), Ptr, "", false, Align(32));      }      break;    case Intrinsic::ppc_qpx_qvstfs:      // Turn PPC QPX qvstfs -> store if the pointer is known aligned. -    if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC, +    if (getOrEnforceKnownAlignment(II->getArgOperand(1), Align(16), DL, II, &AC,                                     &DT) >= 16) { -      Type *VTy = VectorType::get(Builder.getFloatTy(), -          II->getArgOperand(0)->getType()->getVectorNumElements()); +      Type *VTy = VectorType::get( +          Builder.getFloatTy(), +          cast<VectorType>(II->getArgOperand(0)->getType())->getElementCount());        Value *TOp = Builder.CreateFPTrunc(II->getArgOperand(0), VTy);        Type *OpPtrTy = PointerType::getUnqual(VTy);        Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); -      return new StoreInst(TOp, Ptr); +      return new StoreInst(TOp, Ptr, false, Align(16));      }      break;    case Intrinsic::ppc_qpx_qvstfd:      // Turn PPC QPX qvstfd -> store if the pointer is known aligned. -    if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, &AC, +    if (getOrEnforceKnownAlignment(II->getArgOperand(1), Align(32), DL, II, &AC,                                     &DT) >= 32) {        Type *OpPtrTy =          PointerType::getUnqual(II->getArgOperand(0)->getType());        Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); -      return new StoreInst(II->getArgOperand(0), Ptr); +      return new StoreInst(II->getArgOperand(0), Ptr, false, Align(32));      }      break; @@ -2546,50 +2650,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      }      break; -  case Intrinsic::x86_vcvtph2ps_128: -  case Intrinsic::x86_vcvtph2ps_256: { -    auto Arg = II->getArgOperand(0); -    auto ArgType = cast<VectorType>(Arg->getType()); -    auto RetType = cast<VectorType>(II->getType()); -    unsigned ArgWidth = ArgType->getNumElements(); -    unsigned RetWidth = RetType->getNumElements(); -    assert(RetWidth <= ArgWidth && "Unexpected input/return vector widths"); -    assert(ArgType->isIntOrIntVectorTy() && -           ArgType->getScalarSizeInBits() == 16 && -           "CVTPH2PS input type should be 16-bit integer vector"); -    assert(RetType->getScalarType()->isFloatTy() && -           "CVTPH2PS output type should be 32-bit float vector"); - -    // Constant folding: Convert to generic half to single conversion. -    if (isa<ConstantAggregateZero>(Arg)) -      return replaceInstUsesWith(*II, ConstantAggregateZero::get(RetType)); - -    if (isa<ConstantDataVector>(Arg)) { -      auto VectorHalfAsShorts = Arg; -      if (RetWidth < ArgWidth) { -        SmallVector<uint32_t, 8> SubVecMask; -        for (unsigned i = 0; i != RetWidth; ++i) -          SubVecMask.push_back((int)i); -        VectorHalfAsShorts = Builder.CreateShuffleVector( -            Arg, UndefValue::get(ArgType), SubVecMask); -      } - -      auto VectorHalfType = -          VectorType::get(Type::getHalfTy(II->getContext()), RetWidth); -      auto VectorHalfs = -          Builder.CreateBitCast(VectorHalfAsShorts, VectorHalfType); -      auto VectorFloats = Builder.CreateFPExt(VectorHalfs, RetType); -      return replaceInstUsesWith(*II, VectorFloats); -    } - -    // We only use the lowest lanes of the argument. -    if (Value *V = SimplifyDemandedVectorEltsLow(Arg, ArgWidth, RetWidth)) { -      II->setArgOperand(0, V); -      return II; -    } -    break; -  } -    case Intrinsic::x86_sse_cvtss2si:    case Intrinsic::x86_sse_cvtss2si64:    case Intrinsic::x86_sse_cvttss2si: @@ -2617,11 +2677,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      // These intrinsics only demand the 0th element of their input vectors. If      // we can simplify the input based on that, do so now.      Value *Arg = II->getArgOperand(0); -    unsigned VWidth = Arg->getType()->getVectorNumElements(); -    if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) { -      II->setArgOperand(0, V); -      return II; -    } +    unsigned VWidth = cast<VectorType>(Arg->getType())->getNumElements(); +    if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) +      return replaceOperand(*II, 0, V);      break;    } @@ -2669,13 +2727,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      bool MadeChange = false;      Value *Arg0 = II->getArgOperand(0);      Value *Arg1 = II->getArgOperand(1); -    unsigned VWidth = Arg0->getType()->getVectorNumElements(); +    unsigned VWidth = cast<VectorType>(Arg0->getType())->getNumElements();      if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) { -      II->setArgOperand(0, V); +      replaceOperand(*II, 0, V);        MadeChange = true;      }      if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) { -      II->setArgOperand(1, V); +      replaceOperand(*II, 1, V);        MadeChange = true;      }      if (MadeChange) @@ -2707,8 +2765,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {           cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) {        if (Arg0IsZero)          std::swap(A, B); -      II->setArgOperand(0, A); -      II->setArgOperand(1, B); +      replaceOperand(*II, 0, A); +      replaceOperand(*II, 1, B);        return II;      }      break; @@ -2800,8 +2858,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {          // We don't need a select if we know the mask bit is a 1.          if (!C || !C->getValue()[0]) {            // Cast the mask to an i1 vector and then extract the lowest element. -          auto *MaskTy = VectorType::get(Builder.getInt1Ty(), -                             cast<IntegerType>(Mask->getType())->getBitWidth()); +          auto *MaskTy = FixedVectorType::get( +              Builder.getInt1Ty(), +              cast<IntegerType>(Mask->getType())->getBitWidth());            Mask = Builder.CreateBitCast(Mask, MaskTy);            Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);            // Extract the lowest element from the passthru operand. @@ -2887,12 +2946,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      Value *Arg1 = II->getArgOperand(1);      assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&             "Unexpected packed shift size"); -    unsigned VWidth = Arg1->getType()->getVectorNumElements(); +    unsigned VWidth = cast<VectorType>(Arg1->getType())->getNumElements(); -    if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) { -      II->setArgOperand(1, V); -      return II; -    } +    if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) +      return replaceOperand(*II, 1, V);      break;    } @@ -2956,14 +3013,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {        bool MadeChange = false;        Value *Arg0 = II->getArgOperand(0);        Value *Arg1 = II->getArgOperand(1); -      unsigned VWidth = Arg0->getType()->getVectorNumElements(); +      unsigned VWidth = cast<VectorType>(Arg0->getType())->getNumElements();        APInt UndefElts1(VWidth, 0);        APInt DemandedElts1 = APInt::getSplat(VWidth,                                              APInt(2, (Imm & 0x01) ? 2 : 1));        if (Value *V = SimplifyDemandedVectorElts(Arg0, DemandedElts1,                                                  UndefElts1)) { -        II->setArgOperand(0, V); +        replaceOperand(*II, 0, V);          MadeChange = true;        } @@ -2972,7 +3029,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {                                              APInt(2, (Imm & 0x10) ? 2 : 1));        if (Value *V = SimplifyDemandedVectorElts(Arg1, DemandedElts2,                                                  UndefElts2)) { -        II->setArgOperand(1, V); +        replaceOperand(*II, 1, V);          MadeChange = true;        } @@ -2996,8 +3053,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {    case Intrinsic::x86_sse4a_extrq: {      Value *Op0 = II->getArgOperand(0);      Value *Op1 = II->getArgOperand(1); -    unsigned VWidth0 = Op0->getType()->getVectorNumElements(); -    unsigned VWidth1 = Op1->getType()->getVectorNumElements(); +    unsigned VWidth0 = cast<VectorType>(Op0->getType())->getNumElements(); +    unsigned VWidth1 = cast<VectorType>(Op1->getType())->getNumElements();      assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&             Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&             VWidth1 == 16 && "Unexpected operand sizes"); @@ -3019,11 +3076,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      // operands and the lowest 16-bits of the second.      bool MadeChange = false;      if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) { -      II->setArgOperand(0, V); +      replaceOperand(*II, 0, V);        MadeChange = true;      }      if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) { -      II->setArgOperand(1, V); +      replaceOperand(*II, 1, V);        MadeChange = true;      }      if (MadeChange) @@ -3035,7 +3092,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining      // bits of the lower 64-bits. The upper 64-bits are undefined.      Value *Op0 = II->getArgOperand(0); -    unsigned VWidth = Op0->getType()->getVectorNumElements(); +    unsigned VWidth = cast<VectorType>(Op0->getType())->getNumElements();      assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&             "Unexpected operand size"); @@ -3049,20 +3106,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      // EXTRQI only uses the lowest 64-bits of the first 128-bit vector      // operand. -    if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) { -      II->setArgOperand(0, V); -      return II; -    } +    if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) +      return replaceOperand(*II, 0, V);      break;    }    case Intrinsic::x86_sse4a_insertq: {      Value *Op0 = II->getArgOperand(0);      Value *Op1 = II->getArgOperand(1); -    unsigned VWidth = Op0->getType()->getVectorNumElements(); +    unsigned VWidth = cast<VectorType>(Op0->getType())->getNumElements();      assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&             Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 && -           Op1->getType()->getVectorNumElements() == 2 && +           cast<VectorType>(Op1->getType())->getNumElements() == 2 &&             "Unexpected operand size");      // See if we're dealing with constant values. @@ -3082,10 +3137,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      // INSERTQ only uses the lowest 64-bits of the first 128-bit vector      // operand. -    if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) { -      II->setArgOperand(0, V); -      return II; -    } +    if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) +      return replaceOperand(*II, 0, V);      break;    } @@ -3095,8 +3148,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      // undefined.      Value *Op0 = II->getArgOperand(0);      Value *Op1 = II->getArgOperand(1); -    unsigned VWidth0 = Op0->getType()->getVectorNumElements(); -    unsigned VWidth1 = Op1->getType()->getVectorNumElements(); +    unsigned VWidth0 = cast<VectorType>(Op0->getType())->getNumElements(); +    unsigned VWidth1 = cast<VectorType>(Op1->getType())->getNumElements();      assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&             Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&             VWidth1 == 2 && "Unexpected operand sizes"); @@ -3117,11 +3170,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      // operands.      bool MadeChange = false;      if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) { -      II->setArgOperand(0, V); +      replaceOperand(*II, 0, V);        MadeChange = true;      }      if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) { -      II->setArgOperand(1, V); +      replaceOperand(*II, 1, V);        MadeChange = true;      }      if (MadeChange) @@ -3163,8 +3216,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {               II->getType()->getPrimitiveSizeInBits() &&               "Not expecting mask and operands with different sizes"); -      unsigned NumMaskElts = Mask->getType()->getVectorNumElements(); -      unsigned NumOperandElts = II->getType()->getVectorNumElements(); +      unsigned NumMaskElts = +          cast<VectorType>(Mask->getType())->getNumElements(); +      unsigned NumOperandElts = +          cast<VectorType>(II->getType())->getNumElements();        if (NumMaskElts == NumOperandElts)          return SelectInst::Create(BoolVec, Op1, Op0); @@ -3255,7 +3310,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      // the permutation mask with respect to 31 and reverse the order of      // V1 and V2.      if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) { -      assert(Mask->getType()->getVectorNumElements() == 16 && +      assert(cast<VectorType>(Mask->getType())->getNumElements() == 16 &&               "Bad type for intrinsic!");        // Check that all of the elements are integer constants or undefs. @@ -3307,9 +3362,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      break;    case Intrinsic::arm_neon_vld1: { -    unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), -                                          DL, II, &AC, &DT); -    if (Value *V = simplifyNeonVld1(*II, MemAlign, Builder)) +    Align MemAlign = getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT); +    if (Value *V = simplifyNeonVld1(*II, MemAlign.value(), Builder))        return replaceInstUsesWith(*II, V);      break;    } @@ -3327,16 +3381,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {    case Intrinsic::arm_neon_vst2lane:    case Intrinsic::arm_neon_vst3lane:    case Intrinsic::arm_neon_vst4lane: { -    unsigned MemAlign = -        getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT); +    Align MemAlign = getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);      unsigned AlignArg = II->getNumArgOperands() - 1; -    ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg)); -    if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) { -      II->setArgOperand(AlignArg, -                        ConstantInt::get(Type::getInt32Ty(II->getContext()), -                                         MemAlign, false)); -      return II; -    } +    Value *AlignArgOp = II->getArgOperand(AlignArg); +    MaybeAlign Align = cast<ConstantInt>(AlignArgOp)->getMaybeAlignValue(); +    if (Align && *Align < MemAlign) +      return replaceOperand(*II, AlignArg, +                            ConstantInt::get(Type::getInt32Ty(II->getContext()), +                                             MemAlign.value(), false));      break;    } @@ -3395,8 +3447,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      Value *Data, *Key;      if (match(KeyArg, m_ZeroInt()) &&          match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) { -      II->setArgOperand(0, Data); -      II->setArgOperand(1, Key); +      replaceOperand(*II, 0, Data); +      replaceOperand(*II, 1, Key);        return II;      }      break; @@ -3415,7 +3467,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {        if (auto *CI = dyn_cast<ConstantInt>(XorMask)) {          if (CI->getValue().trunc(16).isAllOnesValue()) {            auto TrueVector = Builder.CreateVectorSplat( -              II->getType()->getVectorNumElements(), Builder.getTrue()); +              cast<VectorType>(II->getType())->getNumElements(), +              Builder.getTrue());            return BinaryOperator::Create(Instruction::Xor, ArgArg, TrueVector);          }        } @@ -3459,18 +3512,25 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      Value *Src = II->getArgOperand(0);      // TODO: Move to ConstantFolding/InstSimplify? -    if (isa<UndefValue>(Src)) -      return replaceInstUsesWith(CI, Src); +    if (isa<UndefValue>(Src)) { +      Type *Ty = II->getType(); +      auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics())); +      return replaceInstUsesWith(CI, QNaN); +    } + +    if (II->isStrictFP()) +      break;      if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {        const APFloat &ArgVal = C->getValueAPF();        APFloat Val(ArgVal.getSemantics(), 1); -      APFloat::opStatus Status = Val.divide(ArgVal, -                                            APFloat::rmNearestTiesToEven); -      // Only do this if it was exact and therefore not dependent on the -      // rounding mode. -      if (Status == APFloat::opOK) -        return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val)); +      Val.divide(ArgVal, APFloat::rmNearestTiesToEven); + +      // This is more precise than the instruction may give. +      // +      // TODO: The instruction always flushes denormal results (except for f16), +      // should this also? +      return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));      }      break; @@ -3479,8 +3539,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      Value *Src = II->getArgOperand(0);      // TODO: Move to ConstantFolding/InstSimplify? -    if (isa<UndefValue>(Src)) -      return replaceInstUsesWith(CI, Src); +    if (isa<UndefValue>(Src)) { +      Type *Ty = II->getType(); +      auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics())); +      return replaceInstUsesWith(CI, QNaN); +    } +      break;    }    case Intrinsic::amdgcn_frexp_mant: @@ -3563,11 +3627,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      }      // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other -    if (((Mask & S_NAN) || (Mask & Q_NAN)) && isKnownNeverNaN(Src0, &TLI)) { -      II->setArgOperand(1, ConstantInt::get(Src1->getType(), -                                            Mask & ~(S_NAN | Q_NAN))); -      return II; -    } +    if (((Mask & S_NAN) || (Mask & Q_NAN)) && isKnownNeverNaN(Src0, &TLI)) +      return replaceOperand(*II, 1, ConstantInt::get(Src1->getType(), +                                                     Mask & ~(S_NAN | Q_NAN)));      const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);      if (!CVal) { @@ -3657,23 +3719,19 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {        if ((Width & (IntSize - 1)) == 0)          return replaceInstUsesWith(*II, ConstantInt::getNullValue(Ty)); -      if (Width >= IntSize) { -        // Hardware ignores high bits, so remove those. -        II->setArgOperand(2, ConstantInt::get(CWidth->getType(), -                                              Width & (IntSize - 1))); -        return II; -      } +      // Hardware ignores high bits, so remove those. +      if (Width >= IntSize) +        return replaceOperand(*II, 2, ConstantInt::get(CWidth->getType(), +                                                       Width & (IntSize - 1)));      }      unsigned Offset;      ConstantInt *COffset = dyn_cast<ConstantInt>(II->getArgOperand(1));      if (COffset) {        Offset = COffset->getZExtValue(); -      if (Offset >= IntSize) { -        II->setArgOperand(1, ConstantInt::get(COffset->getType(), -                                              Offset & (IntSize - 1))); -        return II; -      } +      if (Offset >= IntSize) +        return replaceOperand(*II, 1, ConstantInt::get(COffset->getType(), +                                                       Offset & (IntSize - 1)));      }      bool Signed = IID == Intrinsic::amdgcn_sbfe; @@ -3716,7 +3774,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {            (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {          Value *Src = II->getArgOperand(I + 2);          if (!isa<UndefValue>(Src)) { -          II->setArgOperand(I + 2, UndefValue::get(Src->getType())); +          replaceOperand(*II, I + 2, UndefValue::get(Src->getType()));            Changed = true;          }        } @@ -3855,8 +3913,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {          ((match(Src1, m_One()) && match(Src0, m_ZExt(m_Value(ExtSrc)))) ||           (match(Src1, m_AllOnes()) && match(Src0, m_SExt(m_Value(ExtSrc))))) &&          ExtSrc->getType()->isIntegerTy(1)) { -      II->setArgOperand(1, ConstantInt::getNullValue(Src1->getType())); -      II->setArgOperand(2, ConstantInt::get(CC->getType(), CmpInst::ICMP_NE)); +      replaceOperand(*II, 1, ConstantInt::getNullValue(Src1->getType())); +      replaceOperand(*II, 2, ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));        return II;      } @@ -3928,6 +3986,35 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      break;    } +  case Intrinsic::amdgcn_ballot: { +    if (auto *Src = dyn_cast<ConstantInt>(II->getArgOperand(0))) { +      if (Src->isZero()) { +        // amdgcn.ballot(i1 0) is zero. +        return replaceInstUsesWith(*II, Constant::getNullValue(II->getType())); +      } + +      if (Src->isOne()) { +        // amdgcn.ballot(i1 1) is exec. +        const char *RegName = "exec"; +        if (II->getType()->isIntegerTy(32)) +          RegName = "exec_lo"; +        else if (!II->getType()->isIntegerTy(64)) +          break; + +        Function *NewF = Intrinsic::getDeclaration( +            II->getModule(), Intrinsic::read_register, II->getType()); +        Metadata *MDArgs[] = {MDString::get(II->getContext(), RegName)}; +        MDNode *MD = MDNode::get(II->getContext(), MDArgs); +        Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)}; +        CallInst *NewCall = Builder.CreateCall(NewF, Args); +        NewCall->addAttribute(AttributeList::FunctionIndex, +                              Attribute::Convergent); +        NewCall->takeName(II); +        return replaceInstUsesWith(*II, NewCall); +      } +    } +    break; +  }    case Intrinsic::amdgcn_wqm_vote: {      // wqm_vote is identity when the argument is constant.      if (!isa<Constant>(II->getArgOperand(0))) @@ -3956,8 +4043,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {        break;      // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value. -    II->setOperand(0, UndefValue::get(Old->getType())); -    return II; +    return replaceOperand(*II, 0, UndefValue::get(Old->getType())); +  } +  case Intrinsic::amdgcn_permlane16: +  case Intrinsic::amdgcn_permlanex16: { +    // Discard vdst_in if it's not going to be read. +    Value *VDstIn = II->getArgOperand(0); +   if (isa<UndefValue>(VDstIn)) +     break; + +    ConstantInt *FetchInvalid = cast<ConstantInt>(II->getArgOperand(4)); +    ConstantInt *BoundCtrl = cast<ConstantInt>(II->getArgOperand(5)); +    if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue()) +      break; + +    return replaceOperand(*II, 0, UndefValue::get(VDstIn->getType()));    }    case Intrinsic::amdgcn_readfirstlane:    case Intrinsic::amdgcn_readlane: { @@ -3990,6 +4090,71 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      break;    } +  case Intrinsic::amdgcn_ldexp: { +    // FIXME: This doesn't introduce new instructions and belongs in +    // InstructionSimplify. +    Type *Ty = II->getType(); +    Value *Op0 = II->getArgOperand(0); +    Value *Op1 = II->getArgOperand(1); + +    // Folding undef to qnan is safe regardless of the FP mode. +    if (isa<UndefValue>(Op0)) { +      auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics())); +      return replaceInstUsesWith(*II, QNaN); +    } + +    const APFloat *C = nullptr; +    match(Op0, m_APFloat(C)); + +    // FIXME: Should flush denorms depending on FP mode, but that's ignored +    // everywhere else. +    // +    // These cases should be safe, even with strictfp. +    // ldexp(0.0, x) -> 0.0 +    // ldexp(-0.0, x) -> -0.0 +    // ldexp(inf, x) -> inf +    // ldexp(-inf, x) -> -inf +    if (C && (C->isZero() || C->isInfinity())) +      return replaceInstUsesWith(*II, Op0); + +    // With strictfp, be more careful about possibly needing to flush denormals +    // or not, and snan behavior depends on ieee_mode. +    if (II->isStrictFP()) +      break; + +    if (C && C->isNaN()) { +      // FIXME: We just need to make the nan quiet here, but that's unavailable +      // on APFloat, only IEEEfloat +      auto *Quieted = ConstantFP::get( +        Ty, scalbn(*C, 0, APFloat::rmNearestTiesToEven)); +      return replaceInstUsesWith(*II, Quieted); +    } + +    // ldexp(x, 0) -> x +    // ldexp(x, undef) -> x +    if (isa<UndefValue>(Op1) || match(Op1, m_ZeroInt())) +      return replaceInstUsesWith(*II, Op0); + +    break; +  } +  case Intrinsic::hexagon_V6_vandvrt: +  case Intrinsic::hexagon_V6_vandvrt_128B: { +    // Simplify Q -> V -> Q conversion. +    if (auto Op0 = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) { +      Intrinsic::ID ID0 = Op0->getIntrinsicID(); +      if (ID0 != Intrinsic::hexagon_V6_vandqrt && +          ID0 != Intrinsic::hexagon_V6_vandqrt_128B) +        break; +      Value *Bytes = Op0->getArgOperand(1), *Mask = II->getArgOperand(1); +      uint64_t Bytes1 = computeKnownBits(Bytes, 0, Op0).One.getZExtValue(); +      uint64_t Mask1 = computeKnownBits(Mask, 0, II).One.getZExtValue(); +      // Check if every byte has common bits in Bytes and Mask. +      uint64_t C = Bytes1 & Mask1; +      if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000)) +        return replaceInstUsesWith(*II, Op0->getArgOperand(0)); +    } +    break; +  }    case Intrinsic::stackrestore: {      // If the save is right next to the restore, remove the restore.  This can      // happen when variable allocas are DCE'd. @@ -4040,7 +4205,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {        return eraseInstFromFunction(CI);      break;    } -  case Intrinsic::lifetime_start: +  case Intrinsic::lifetime_end:      // Asan needs to poison memory to detect invalid access which is possible      // even for empty lifetime range.      if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) || @@ -4048,34 +4213,41 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {          II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))        break; -    if (removeTriviallyEmptyRange(*II, Intrinsic::lifetime_start, -                                  Intrinsic::lifetime_end, *this)) +    if (removeTriviallyEmptyRange(*II, *this, [](const IntrinsicInst &I) { +          return I.getIntrinsicID() == Intrinsic::lifetime_start; +        }))        return nullptr;      break;    case Intrinsic::assume: {      Value *IIOperand = II->getArgOperand(0); +    SmallVector<OperandBundleDef, 4> OpBundles; +    II->getOperandBundlesAsDefs(OpBundles); +    bool HasOpBundles = !OpBundles.empty();      // Remove an assume if it is followed by an identical assume.      // TODO: Do we need this? Unless there are conflicting assumptions, the      // computeKnownBits(IIOperand) below here eliminates redundant assumes.      Instruction *Next = II->getNextNonDebugInstruction(); -    if (match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand)))) +    if (HasOpBundles && +        match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))) && +        !cast<IntrinsicInst>(Next)->hasOperandBundles())        return eraseInstFromFunction(CI);      // Canonicalize assume(a && b) -> assume(a); assume(b);      // Note: New assumption intrinsics created here are registered by      // the InstCombineIRInserter object.      FunctionType *AssumeIntrinsicTy = II->getFunctionType(); -    Value *AssumeIntrinsic = II->getCalledValue(); +    Value *AssumeIntrinsic = II->getCalledOperand();      Value *A, *B;      if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) { -      Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, II->getName()); +      Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles, +                         II->getName());        Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName());        return eraseInstFromFunction(*II);      }      // assume(!(a || b)) -> assume(!a); assume(!b);      if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {        Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, -                         Builder.CreateNot(A), II->getName()); +                         Builder.CreateNot(A), OpBundles, II->getName());        Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,                           Builder.CreateNot(B), II->getName());        return eraseInstFromFunction(*II); @@ -4091,7 +4263,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {          isValidAssumeForContext(II, LHS, &DT)) {        MDNode *MD = MDNode::get(II->getContext(), None);        LHS->setMetadata(LLVMContext::MD_nonnull, MD); -      return eraseInstFromFunction(*II); +      if (!HasOpBundles) +        return eraseInstFromFunction(*II);        // TODO: apply nonnull return attributes to calls and invokes        // TODO: apply range metadata for range check patterns? @@ -4101,7 +4274,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      // then this one is redundant, and should be removed.      KnownBits Known(1);      computeKnownBits(IIOperand, Known, 0, II); -    if (Known.isAllOnes()) +    if (Known.isAllOnes() && isAssumeWithEmptyBundle(*II))        return eraseInstFromFunction(*II);      // Update the cache of affected values for this assumption (we might be @@ -4117,10 +4290,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      if (GCR.getBasePtr() == GCR.getDerivedPtr() &&          GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {        auto *OpIntTy = GCR.getOperand(2)->getType(); -      II->setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex())); -      return II; +      return replaceOperand(*II, 2, +          ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));      } -     +      // Translate facts known about a pointer before relocating into      // facts about the relocate value, while being careful to      // preserve relocation semantics. @@ -4187,7 +4360,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {            MoveI = MoveI->getNextNonDebugInstruction();            Temp->moveBefore(II);          } -        II->setArgOperand(0, Builder.CreateAnd(CurrCond, NextCond)); +        replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond));        }        eraseInstFromFunction(*NextInst);        return II; @@ -4232,13 +4405,14 @@ static bool isSafeToEliminateVarargsCast(const CallBase &Call,    // TODO: This is probably something which should be expanded to all    // intrinsics since the entire point of intrinsics is that    // they are understandable by the optimizer. -  if (isStatepoint(&Call) || isGCRelocate(&Call) || isGCResult(&Call)) +  if (isa<GCStatepointInst>(Call) || isa<GCRelocateInst>(Call) || +      isa<GCResultInst>(Call))      return false;    // The size of ByVal or InAlloca arguments is derived from the type, so we    // can't change to a type with a different size.  If the size were    // passed explicitly we could avoid this check. -  if (!Call.isByValOrInAllocaArgument(ix)) +  if (!Call.isPassPointeeByValueArgument(ix))      return true;    Type* SrcTy = @@ -4264,7 +4438,7 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {    };    LibCallSimplifier Simplifier(DL, &TLI, ORE, BFI, PSI, InstCombineRAUW,                                 InstCombineErase); -  if (Value *With = Simplifier.optimizeCall(CI)) { +  if (Value *With = Simplifier.optimizeCall(CI, Builder)) {      ++NumSimplified;      return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);    } @@ -4353,7 +4527,8 @@ static void annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI) {    ConstantInt *Op0C = dyn_cast<ConstantInt>(Call.getOperand(0));    ConstantInt *Op1C =        (NumArgs == 1) ? nullptr : dyn_cast<ConstantInt>(Call.getOperand(1)); -  // Bail out if the allocation size is zero. +  // Bail out if the allocation size is zero (or an invalid alignment of zero +  // with aligned_alloc).    if ((Op0C && Op0C->isNullValue()) || (Op1C && Op1C->isNullValue()))      return; @@ -4366,6 +4541,18 @@ static void annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI) {        Call.addAttribute(AttributeList::ReturnIndex,                          Attribute::getWithDereferenceableOrNullBytes(                              Call.getContext(), Op0C->getZExtValue())); +  } else if (isAlignedAllocLikeFn(&Call, TLI) && Op1C) { +    Call.addAttribute(AttributeList::ReturnIndex, +                      Attribute::getWithDereferenceableOrNullBytes( +                          Call.getContext(), Op1C->getZExtValue())); +    // Add alignment attribute if alignment is a power of two constant. +    if (Op0C && Op0C->getValue().ult(llvm::Value::MaximumAlignment)) { +      uint64_t AlignmentVal = Op0C->getZExtValue(); +      if (llvm::isPowerOf2_64(AlignmentVal)) +        Call.addAttribute(AttributeList::ReturnIndex, +                          Attribute::getWithAlignment(Call.getContext(), +                                                      Align(AlignmentVal))); +    }    } else if (isReallocLikeFn(&Call, TLI) && Op1C) {      Call.addAttribute(AttributeList::ReturnIndex,                        Attribute::getWithDereferenceableOrNullBytes( @@ -4430,7 +4617,7 @@ Instruction *InstCombiner::visitCallBase(CallBase &Call) {    // If the callee is a pointer to a function, attempt to move any casts to the    // arguments of the call/callbr/invoke. -  Value *Callee = Call.getCalledValue(); +  Value *Callee = Call.getCalledOperand();    if (!isa<Function>(Callee) && transformConstExprCastCall(Call))      return nullptr; @@ -4500,7 +4687,7 @@ Instruction *InstCombiner::visitCallBase(CallBase &Call) {           I != E; ++I, ++ix) {        CastInst *CI = dyn_cast<CastInst>(*I);        if (CI && isSafeToEliminateVarargsCast(Call, DL, CI, ix)) { -        *I = CI->getOperand(0); +        replaceUse(*I, CI->getOperand(0));          // Update the byval type to match the argument type.          if (Call.isByValArgument(ix)) { @@ -4531,6 +4718,15 @@ Instruction *InstCombiner::visitCallBase(CallBase &Call) {      if (I) return eraseInstFromFunction(*I);    } +  if (!Call.use_empty() && !Call.isMustTailCall()) +    if (Value *ReturnedArg = Call.getReturnedArgOperand()) { +      Type *CallTy = Call.getType(); +      Type *RetArgTy = ReturnedArg->getType(); +      if (RetArgTy->canLosslesslyBitCastTo(CallTy)) +        return replaceInstUsesWith( +            Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy)); +    } +    if (isAllocLikeFn(&Call, &TLI))      return visitAllocSite(Call); @@ -4540,7 +4736,8 @@ Instruction *InstCombiner::visitCallBase(CallBase &Call) {  /// If the callee is a constexpr cast of a function, attempt to move the cast to  /// the arguments of the call/callbr/invoke.  bool InstCombiner::transformConstExprCastCall(CallBase &Call) { -  auto *Callee = dyn_cast<Function>(Call.getCalledValue()->stripPointerCasts()); +  auto *Callee = +      dyn_cast<Function>(Call.getCalledOperand()->stripPointerCasts());    if (!Callee)      return false; @@ -4618,6 +4815,7 @@ bool InstCombiner::transformConstExprCastCall(CallBase &Call) {    //    //  Similarly, avoid folding away bitcasts of byval calls.    if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) || +      Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated) ||        Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))      return false; @@ -4658,7 +4856,7 @@ bool InstCombiner::transformConstExprCastCall(CallBase &Call) {      // If the callee is just a declaration, don't change the varargsness of the      // call.  We don't want to introduce a varargs call where one doesn't      // already exist. -    PointerType *APTy = cast<PointerType>(Call.getCalledValue()->getType()); +    PointerType *APTy = cast<PointerType>(Call.getCalledOperand()->getType());      if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())        return false; @@ -4774,11 +4972,8 @@ bool InstCombiner::transformConstExprCastCall(CallBase &Call) {    NewCall->setCallingConv(Call.getCallingConv());    NewCall->setAttributes(NewCallerPAL); -  // Preserve the weight metadata for the new call instruction. The metadata -  // is used by SamplePGO to check callsite's hotness. -  uint64_t W; -  if (Caller->extractProfTotalWeight(W)) -    NewCall->setProfWeight(W); +  // Preserve prof metadata if any. +  NewCall->copyMetadata(*Caller, {LLVMContext::MD_prof});    // Insert a cast of the return type as necessary.    Instruction *NC = NewCall; @@ -4800,7 +4995,7 @@ bool InstCombiner::transformConstExprCastCall(CallBase &Call) {          // Otherwise, it's a call, just insert cast right after the call.          InsertNewInstBefore(NC, *Caller);        } -      Worklist.AddUsersToWorkList(*Caller); +      Worklist.pushUsersToWorkList(*Caller);      } else {        NV = UndefValue::get(Caller->getType());      } @@ -4826,7 +5021,7 @@ bool InstCombiner::transformConstExprCastCall(CallBase &Call) {  Instruction *  InstCombiner::transformCallThroughTrampoline(CallBase &Call,                                               IntrinsicInst &Tramp) { -  Value *Callee = Call.getCalledValue(); +  Value *Callee = Call.getCalledOperand();    Type *CalleeTy = Callee->getType();    FunctionType *FTy = Call.getFunctionType();    AttributeList Attrs = Call.getAttributes();  | 
