aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp')
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp112
1 files changed, 78 insertions, 34 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 9bf87d024607..e0d85c4b49ae 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1,9 +1,8 @@
//===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -366,10 +365,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
KnownBits InputKnown(SrcBitWidth);
if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1))
return I;
- Known = InputKnown.zextOrTrunc(BitWidth);
- // Any top bits are known to be zero.
- if (BitWidth > SrcBitWidth)
- Known.Zero.setBitsFrom(SrcBitWidth);
+ assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?");
+ Known = InputKnown.zextOrTrunc(BitWidth,
+ true /* ExtendedBitsAreKnownZero */);
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
break;
}
@@ -967,26 +965,16 @@ InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1,
}
/// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
+///
+/// Note: This only supports non-TFE/LWE image intrinsic calls; those have
+/// struct returns.
Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
APInt DemandedElts,
- int DMaskIdx,
- int TFCIdx) {
+ int DMaskIdx) {
unsigned VWidth = II->getType()->getVectorNumElements();
if (VWidth == 1)
return nullptr;
- // Need to change to new instruction format
- ConstantInt *TFC = nullptr;
- bool TFELWEEnabled = false;
- if (TFCIdx > 0) {
- TFC = dyn_cast<ConstantInt>(II->getArgOperand(TFCIdx));
- TFELWEEnabled = TFC->getZExtValue() & 0x1 // TFE
- || TFC->getZExtValue() & 0x2; // LWE
- }
-
- if (TFELWEEnabled)
- return nullptr; // TFE not yet supported
-
ConstantInt *NewDMask = nullptr;
if (DMaskIdx < 0) {
@@ -994,10 +982,7 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
// below.
DemandedElts = (1 << DemandedElts.getActiveBits()) - 1;
} else {
- ConstantInt *DMask = dyn_cast<ConstantInt>(II->getArgOperand(DMaskIdx));
- if (!DMask)
- return nullptr; // non-constant dmask is not supported by codegen
-
+ ConstantInt *DMask = cast<ConstantInt>(II->getArgOperand(DMaskIdx));
unsigned DMaskVal = DMask->getZExtValue() & 0xf;
// Mask off values that are undefined because the dmask doesn't cover them
@@ -1018,8 +1003,7 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
NewDMask = ConstantInt::get(DMask->getType(), NewDMaskVal);
}
- // TODO: Handle 3 vectors when supported in code gen.
- unsigned NewNumElts = PowerOf2Ceil(DemandedElts.countPopulation());
+ unsigned NewNumElts = DemandedElts.countPopulation();
if (!NewNumElts)
return UndefValue::get(II->getType());
@@ -1035,13 +1019,12 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
getIntrinsicInfoTableEntries(IID, Table);
ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
+ // Validate function argument and return types, extracting overloaded types
+ // along the way.
FunctionType *FTy = II->getCalledFunction()->getFunctionType();
SmallVector<Type *, 6> OverloadTys;
- Intrinsic::matchIntrinsicType(FTy->getReturnType(), TableRef, OverloadTys);
- for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- Intrinsic::matchIntrinsicType(FTy->getParamType(i), TableRef, OverloadTys);
+ Intrinsic::matchIntrinsicSignature(FTy, TableRef, OverloadTys);
- // Get the new return type overload of the intrinsic.
Module *M = II->getParent()->getParent()->getParent();
Type *EltTy = II->getType()->getVectorElementType();
Type *NewTy = (NewNumElts == 1) ? EltTy : VectorType::get(EltTy, NewNumElts);
@@ -1184,6 +1167,39 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
switch (I->getOpcode()) {
default: break;
+ case Instruction::GetElementPtr: {
+ // The LangRef requires that struct geps have all constant indices. As
+ // such, we can't convert any operand to partial undef.
+ auto mayIndexStructType = [](GetElementPtrInst &GEP) {
+ for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP);
+ I != E; I++)
+ if (I.isStruct())
+ return true;;
+ return false;
+ };
+ if (mayIndexStructType(cast<GetElementPtrInst>(*I)))
+ break;
+
+ // Conservatively track the demanded elements back through any vector
+ // operands we may have. We know there must be at least one, or we
+ // wouldn't have a vector result to get here. Note that we intentionally
+ // merge the undef bits here since gepping with either an undef base or
+ // index results in undef.
+ for (unsigned i = 0; i < I->getNumOperands(); i++) {
+ if (isa<UndefValue>(I->getOperand(i))) {
+ // If the entire vector is undefined, just return this info.
+ UndefElts = EltMask;
+ return nullptr;
+ }
+ if (I->getOperand(i)->getType()->isVectorTy()) {
+ APInt UndefEltsOp(VWidth, 0);
+ simplifyAndSetOp(I, i, DemandedElts, UndefEltsOp);
+ UndefElts |= UndefEltsOp;
+ }
+ }
+
+ break;
+ }
case Instruction::InsertElement: {
// If this is a variable index, we don't know which element it overwrites.
// demand exactly the same input as we produce.
@@ -1430,6 +1446,30 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
if (!II) break;
switch (II->getIntrinsicID()) {
+ case Intrinsic::masked_gather: // fallthrough
+ case Intrinsic::masked_load: {
+ // Subtlety: If we load from a pointer, the pointer must be valid
+ // regardless of whether the element is demanded. Doing otherwise risks
+ // segfaults which didn't exist in the original program.
+ APInt DemandedPtrs(APInt::getAllOnesValue(VWidth)),
+ DemandedPassThrough(DemandedElts);
+ if (auto *CV = dyn_cast<ConstantVector>(II->getOperand(2)))
+ for (unsigned i = 0; i < VWidth; i++) {
+ Constant *CElt = CV->getAggregateElement(i);
+ if (CElt->isNullValue())
+ DemandedPtrs.clearBit(i);
+ else if (CElt->isAllOnesValue())
+ DemandedPassThrough.clearBit(i);
+ }
+ if (II->getIntrinsicID() == Intrinsic::masked_gather)
+ simplifyAndSetOp(II, 0, DemandedPtrs, UndefElts2);
+ simplifyAndSetOp(II, 3, DemandedPassThrough, UndefElts3);
+
+ // Output elements are undefined if the element from both sources are.
+ // TODO: can strengthen via mask as well.
+ UndefElts = UndefElts2 & UndefElts3;
+ break;
+ }
case Intrinsic::x86_xop_vfrcz_ss:
case Intrinsic::x86_xop_vfrcz_sd:
// The instructions for these intrinsics are speced to zero upper bits not
@@ -1639,8 +1679,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts);
default: {
if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID()))
- return simplifyAMDGCNMemoryIntrinsicDemanded(
- II, DemandedElts, 0, II->getNumArgOperands() - 2);
+ return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts, 0);
break;
}
@@ -1667,5 +1706,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
UndefElts &= UndefElts2;
}
+ // If we've proven all of the lanes undef, return an undef value.
+ // TODO: Intersect w/demanded lanes
+ if (UndefElts.isAllOnesValue())
+ return UndefValue::get(I->getType());;
+
return MadeChange ? I : nullptr;
}