diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp')
| -rw-r--r-- | contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 112 | 
1 files changed, 78 insertions, 34 deletions
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 9bf87d024607..e0d85c4b49ae 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1,9 +1,8 @@  //===- InstCombineSimplifyDemanded.cpp ------------------------------------===//  // -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception  //  //===----------------------------------------------------------------------===//  // @@ -366,10 +365,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,      KnownBits InputKnown(SrcBitWidth);      if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1))        return I; -    Known = InputKnown.zextOrTrunc(BitWidth); -    // Any top bits are known to be zero. -    if (BitWidth > SrcBitWidth) -      Known.Zero.setBitsFrom(SrcBitWidth); +    assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?"); +    Known = InputKnown.zextOrTrunc(BitWidth, +                                   true /* ExtendedBitsAreKnownZero */);      assert(!Known.hasConflict() && "Bits known to be one AND zero?");      break;    } @@ -967,26 +965,16 @@ InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1,  }  /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics. +/// +/// Note: This only supports non-TFE/LWE image intrinsic calls; those have +///       struct returns.  Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,                                                             APInt DemandedElts, -                                                           int DMaskIdx, -                                                           int TFCIdx) { +                                                           int DMaskIdx) {    unsigned VWidth = II->getType()->getVectorNumElements();    if (VWidth == 1)      return nullptr; -  // Need to change to new instruction format -  ConstantInt *TFC = nullptr; -  bool TFELWEEnabled = false; -  if (TFCIdx > 0) { -    TFC = dyn_cast<ConstantInt>(II->getArgOperand(TFCIdx)); -    TFELWEEnabled =    TFC->getZExtValue() & 0x1  // TFE -                    || TFC->getZExtValue() & 0x2; // LWE -  } - -  if (TFELWEEnabled) -    return nullptr; // TFE not yet supported -    ConstantInt *NewDMask = nullptr;    if (DMaskIdx < 0) { @@ -994,10 +982,7 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,      // below.      DemandedElts = (1 << DemandedElts.getActiveBits()) - 1;    } else { -    ConstantInt *DMask = dyn_cast<ConstantInt>(II->getArgOperand(DMaskIdx)); -    if (!DMask) -      return nullptr; // non-constant dmask is not supported by codegen - +    ConstantInt *DMask = cast<ConstantInt>(II->getArgOperand(DMaskIdx));      unsigned DMaskVal = DMask->getZExtValue() & 0xf;      // Mask off values that are undefined because the dmask doesn't cover them @@ -1018,8 +1003,7 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,        NewDMask = ConstantInt::get(DMask->getType(), NewDMaskVal);    } -  // TODO: Handle 3 vectors when supported in code gen. -  unsigned NewNumElts = PowerOf2Ceil(DemandedElts.countPopulation()); +  unsigned NewNumElts = DemandedElts.countPopulation();    if (!NewNumElts)      return UndefValue::get(II->getType()); @@ -1035,13 +1019,12 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,    getIntrinsicInfoTableEntries(IID, Table);    ArrayRef<Intrinsic::IITDescriptor> TableRef = Table; +  // Validate function argument and return types, extracting overloaded types +  // along the way.    FunctionType *FTy = II->getCalledFunction()->getFunctionType();    SmallVector<Type *, 6> OverloadTys; -  Intrinsic::matchIntrinsicType(FTy->getReturnType(), TableRef, OverloadTys); -  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) -    Intrinsic::matchIntrinsicType(FTy->getParamType(i), TableRef, OverloadTys); +  Intrinsic::matchIntrinsicSignature(FTy, TableRef, OverloadTys); -  // Get the new return type overload of the intrinsic.    Module *M = II->getParent()->getParent()->getParent();    Type *EltTy = II->getType()->getVectorElementType();    Type *NewTy = (NewNumElts == 1) ? EltTy : VectorType::get(EltTy, NewNumElts); @@ -1184,6 +1167,39 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,    switch (I->getOpcode()) {    default: break; +  case Instruction::GetElementPtr: { +    // The LangRef requires that struct geps have all constant indices.  As +    // such, we can't convert any operand to partial undef. +    auto mayIndexStructType = [](GetElementPtrInst &GEP) { +      for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP); +           I != E; I++) +        if (I.isStruct()) +          return true;; +      return false; +    }; +    if (mayIndexStructType(cast<GetElementPtrInst>(*I))) +      break; +     +    // Conservatively track the demanded elements back through any vector +    // operands we may have.  We know there must be at least one, or we +    // wouldn't have a vector result to get here. Note that we intentionally +    // merge the undef bits here since gepping with either an undef base or +    // index results in undef.  +    for (unsigned i = 0; i < I->getNumOperands(); i++) { +      if (isa<UndefValue>(I->getOperand(i))) { +        // If the entire vector is undefined, just return this info. +        UndefElts = EltMask; +        return nullptr; +      } +      if (I->getOperand(i)->getType()->isVectorTy()) { +        APInt UndefEltsOp(VWidth, 0); +        simplifyAndSetOp(I, i, DemandedElts, UndefEltsOp); +        UndefElts |= UndefEltsOp; +      } +    } + +    break; +  }    case Instruction::InsertElement: {      // If this is a variable index, we don't know which element it overwrites.      // demand exactly the same input as we produce. @@ -1430,6 +1446,30 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,      IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);      if (!II) break;      switch (II->getIntrinsicID()) { +    case Intrinsic::masked_gather: // fallthrough +    case Intrinsic::masked_load: { +      // Subtlety: If we load from a pointer, the pointer must be valid +      // regardless of whether the element is demanded.  Doing otherwise risks +      // segfaults which didn't exist in the original program. +      APInt DemandedPtrs(APInt::getAllOnesValue(VWidth)), +        DemandedPassThrough(DemandedElts); +      if (auto *CV = dyn_cast<ConstantVector>(II->getOperand(2))) +        for (unsigned i = 0; i < VWidth; i++) { +          Constant *CElt = CV->getAggregateElement(i); +          if (CElt->isNullValue()) +            DemandedPtrs.clearBit(i); +          else if (CElt->isAllOnesValue()) +            DemandedPassThrough.clearBit(i); +        } +      if (II->getIntrinsicID() == Intrinsic::masked_gather) +        simplifyAndSetOp(II, 0, DemandedPtrs, UndefElts2); +      simplifyAndSetOp(II, 3, DemandedPassThrough, UndefElts3); +       +      // Output elements are undefined if the element from both sources are. +      // TODO: can strengthen via mask as well. +      UndefElts = UndefElts2 & UndefElts3; +      break; +    }      case Intrinsic::x86_xop_vfrcz_ss:      case Intrinsic::x86_xop_vfrcz_sd:        // The instructions for these intrinsics are speced to zero upper bits not @@ -1639,8 +1679,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,        return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts);      default: {        if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID())) -        return simplifyAMDGCNMemoryIntrinsicDemanded( -            II, DemandedElts, 0, II->getNumArgOperands() - 2); +        return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts, 0);        break;      } @@ -1667,5 +1706,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,      UndefElts &= UndefElts2;    } +  // If we've proven all of the lanes undef, return an undef value. +  // TODO: Intersect w/demanded lanes +  if (UndefElts.isAllOnesValue()) +    return UndefValue::get(I->getType());; +    return MadeChange ? I : nullptr;  }  | 
