summaryrefslogtreecommitdiff
path: root/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp')
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp255
1 files changed, 150 insertions, 105 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 425f5ce384be..9bf87d024607 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -314,11 +314,32 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Known.One = std::move(IKnownOne);
break;
}
- case Instruction::Select:
- // If this is a select as part of a min/max pattern, don't simplify any
- // further in case we break the structure.
+ case Instruction::Select: {
Value *LHS, *RHS;
- if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN)
+ SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor;
+ if (SPF == SPF_UMAX) {
+ // UMax(A, C) == A if ...
+ // The lowest non-zero bit of DemandMask is higher than the highest
+ // non-zero bit of C.
+ const APInt *C;
+ unsigned CTZ = DemandedMask.countTrailingZeros();
+ if (match(RHS, m_APInt(C)) && CTZ >= C->getActiveBits())
+ return LHS;
+ } else if (SPF == SPF_UMIN) {
+ // UMin(A, C) == A if ...
+ // The lowest non-zero bit of DemandMask is higher than the highest
+ // non-one bit of C.
+ // This comes from using DeMorgans on the above umax example.
+ const APInt *C;
+ unsigned CTZ = DemandedMask.countTrailingZeros();
+ if (match(RHS, m_APInt(C)) &&
+ CTZ >= C->getBitWidth() - C->countLeadingOnes())
+ return LHS;
+ }
+
+ // If this is a select as part of any other min/max pattern, don't simplify
+ // any further in case we break the structure.
+ if (SPF != SPF_UNKNOWN)
return nullptr;
if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) ||
@@ -336,6 +357,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Known.One = RHSKnown.One & LHSKnown.One;
Known.Zero = RHSKnown.Zero & LHSKnown.Zero;
break;
+ }
case Instruction::ZExt:
case Instruction::Trunc: {
unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
@@ -668,6 +690,30 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// TODO: Could compute known zero/one bits based on the input.
break;
}
+ case Intrinsic::fshr:
+ case Intrinsic::fshl: {
+ const APInt *SA;
+ if (!match(I->getOperand(2), m_APInt(SA)))
+ break;
+
+ // Normalize to funnel shift left. APInt shifts of BitWidth are well-
+ // defined, so no need to special-case zero shifts here.
+ uint64_t ShiftAmt = SA->urem(BitWidth);
+ if (II->getIntrinsicID() == Intrinsic::fshr)
+ ShiftAmt = BitWidth - ShiftAmt;
+
+ APInt DemandedMaskLHS(DemandedMask.lshr(ShiftAmt));
+ APInt DemandedMaskRHS(DemandedMask.shl(BitWidth - ShiftAmt));
+ if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown, Depth + 1) ||
+ SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1))
+ return I;
+
+ Known.Zero = LHSKnown.Zero.shl(ShiftAmt) |
+ RHSKnown.Zero.lshr(BitWidth - ShiftAmt);
+ Known.One = LHSKnown.One.shl(ShiftAmt) |
+ RHSKnown.One.lshr(BitWidth - ShiftAmt);
+ break;
+ }
case Intrinsic::x86_mmx_pmovmskb:
case Intrinsic::x86_sse_movmsk_ps:
case Intrinsic::x86_sse2_movmsk_pd:
@@ -923,11 +969,24 @@ InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1,
/// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
APInt DemandedElts,
- int DMaskIdx) {
+ int DMaskIdx,
+ int TFCIdx) {
unsigned VWidth = II->getType()->getVectorNumElements();
if (VWidth == 1)
return nullptr;
+ // Need to change to new instruction format
+ ConstantInt *TFC = nullptr;
+ bool TFELWEEnabled = false;
+ if (TFCIdx > 0) {
+ TFC = dyn_cast<ConstantInt>(II->getArgOperand(TFCIdx));
+ TFELWEEnabled = TFC->getZExtValue() & 0x1 // TFE
+ || TFC->getZExtValue() & 0x2; // LWE
+ }
+
+ if (TFELWEEnabled)
+ return nullptr; // TFE not yet supported
+
ConstantInt *NewDMask = nullptr;
if (DMaskIdx < 0) {
@@ -1052,8 +1111,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
UndefElts = 0;
- // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
- if (Constant *C = dyn_cast<Constant>(V)) {
+ if (auto *C = dyn_cast<Constant>(V)) {
// Check if this is identity. If so, return 0 since we are not simplifying
// anything.
if (DemandedElts.isAllOnesValue())
@@ -1061,7 +1119,6 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
Type *EltTy = cast<VectorType>(V->getType())->getElementType();
Constant *Undef = UndefValue::get(EltTy);
-
SmallVector<Constant*, 16> Elts;
for (unsigned i = 0; i != VWidth; ++i) {
if (!DemandedElts[i]) { // If not demanded, set to undef.
@@ -1109,9 +1166,21 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
if (!I) return nullptr; // Only analyze instructions.
bool MadeChange = false;
+ auto simplifyAndSetOp = [&](Instruction *Inst, unsigned OpNum,
+ APInt Demanded, APInt &Undef) {
+ auto *II = dyn_cast<IntrinsicInst>(Inst);
+ Value *Op = II ? II->getArgOperand(OpNum) : Inst->getOperand(OpNum);
+ if (Value *V = SimplifyDemandedVectorElts(Op, Demanded, Undef, Depth + 1)) {
+ if (II)
+ II->setArgOperand(OpNum, V);
+ else
+ Inst->setOperand(OpNum, V);
+ MadeChange = true;
+ }
+ };
+
APInt UndefElts2(VWidth, 0);
APInt UndefElts3(VWidth, 0);
- Value *TmpV;
switch (I->getOpcode()) {
default: break;
@@ -1122,9 +1191,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
if (!Idx) {
// Note that we can't propagate undef elt info, because we don't know
// which elt is getting updated.
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
- UndefElts2, Depth + 1);
- if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ simplifyAndSetOp(I, 0, DemandedElts, UndefElts2);
break;
}
@@ -1134,9 +1201,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt PreInsertDemandedElts = DemandedElts;
if (IdxNo < VWidth)
PreInsertDemandedElts.clearBit(IdxNo);
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), PreInsertDemandedElts,
- UndefElts, Depth + 1);
- if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
+ simplifyAndSetOp(I, 0, PreInsertDemandedElts, UndefElts);
// If this is inserting an element that isn't demanded, remove this
// insertelement.
@@ -1169,14 +1235,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
}
APInt LHSUndefElts(LHSVWidth, 0);
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
- LHSUndefElts, Depth + 1);
- if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts);
APInt RHSUndefElts(LHSVWidth, 0);
- TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
- RHSUndefElts, Depth + 1);
- if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+ simplifyAndSetOp(I, 1, RightDemanded, RHSUndefElts);
bool NewUndefElts = false;
unsigned LHSIdx = -1u, LHSValIdx = -1u;
@@ -1260,32 +1322,43 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
break;
}
case Instruction::Select: {
- APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
- if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
+ // If this is a vector select, try to transform the select condition based
+ // on the current demanded elements.
+ SelectInst *Sel = cast<SelectInst>(I);
+ if (Sel->getCondition()->getType()->isVectorTy()) {
+ // TODO: We are not doing anything with UndefElts based on this call.
+ // It is overwritten below based on the other select operands. If an
+ // element of the select condition is known undef, then we are free to
+ // choose the output value from either arm of the select. If we know that
+ // one of those values is undef, then the output can be undef.
+ simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
+ }
+
+ // Next, see if we can transform the arms of the select.
+ APInt DemandedLHS(DemandedElts), DemandedRHS(DemandedElts);
+ if (auto *CV = dyn_cast<ConstantVector>(Sel->getCondition())) {
for (unsigned i = 0; i < VWidth; i++) {
+ // isNullValue() always returns false when called on a ConstantExpr.
+ // Skip constant expressions to avoid propagating incorrect information.
Constant *CElt = CV->getAggregateElement(i);
- // Method isNullValue always returns false when called on a
- // ConstantExpr. If CElt is a ConstantExpr then skip it in order to
- // to avoid propagating incorrect information.
if (isa<ConstantExpr>(CElt))
continue;
+ // TODO: If a select condition element is undef, we can demand from
+ // either side. If one side is known undef, choosing that side would
+ // propagate undef.
if (CElt->isNullValue())
- LeftDemanded.clearBit(i);
+ DemandedLHS.clearBit(i);
else
- RightDemanded.clearBit(i);
+ DemandedRHS.clearBit(i);
}
}
- TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts,
- Depth + 1);
- if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
-
- TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
- UndefElts2, Depth + 1);
- if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
+ simplifyAndSetOp(I, 1, DemandedLHS, UndefElts2);
+ simplifyAndSetOp(I, 2, DemandedRHS, UndefElts3);
- // Output elements are undefined if both are undefined.
- UndefElts &= UndefElts2;
+ // Output elements are undefined if the element from each arm is undefined.
+ // TODO: This can be improved. See comment in select condition handling.
+ UndefElts = UndefElts2 & UndefElts3;
break;
}
case Instruction::BitCast: {
@@ -1323,12 +1396,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
break;
}
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
- UndefElts2, Depth + 1);
- if (TmpV) {
- I->setOperand(0, TmpV);
- MadeChange = true;
- }
+ simplifyAndSetOp(I, 0, InputDemandedElts, UndefElts2);
if (VWidth == InVWidth) {
UndefElts = UndefElts2;
@@ -1353,29 +1421,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
}
break;
}
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Mul:
- // div/rem demand all inputs, because they don't want divide by zero.
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
- Depth + 1);
- if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
- TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
- UndefElts2, Depth + 1);
- if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
-
- // Output elements are undefined if both are undefined. Consider things
- // like undef&0. The result is known zero, not undef.
- UndefElts &= UndefElts2;
- break;
case Instruction::FPTrunc:
case Instruction::FPExt:
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
- Depth + 1);
- if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
break;
case Instruction::Call: {
@@ -1395,9 +1443,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Only the lower element is used.
DemandedElts = 1;
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
- UndefElts, Depth + 1);
- if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+ simplifyAndSetOp(II, 0, DemandedElts, UndefElts);
// Only the lower element is undefined. The high elements are zero.
UndefElts = UndefElts[0];
@@ -1406,9 +1452,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Unary scalar-as-vector operations that work column-wise.
case Intrinsic::x86_sse_rcp_ss:
case Intrinsic::x86_sse_rsqrt_ss:
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
- UndefElts, Depth + 1);
- if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+ simplifyAndSetOp(II, 0, DemandedElts, UndefElts);
// If lowest element of a scalar op isn't used then use Arg0.
if (!DemandedElts[0]) {
@@ -1428,9 +1472,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
case Intrinsic::x86_sse2_min_sd:
case Intrinsic::x86_sse2_max_sd:
case Intrinsic::x86_sse2_cmp_sd: {
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
- UndefElts, Depth + 1);
- if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+ simplifyAndSetOp(II, 0, DemandedElts, UndefElts);
// If lowest element of a scalar op isn't used then use Arg0.
if (!DemandedElts[0]) {
@@ -1440,9 +1482,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Only lower element is used for operand 1.
DemandedElts = 1;
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
- UndefElts2, Depth + 1);
- if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+ simplifyAndSetOp(II, 1, DemandedElts, UndefElts2);
// Lower element is undefined if both lower elements are undefined.
// Consider things like undef&0. The result is known zero, not undef.
@@ -1459,9 +1499,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Don't use the low element of operand 0.
APInt DemandedElts2 = DemandedElts;
DemandedElts2.clearBit(0);
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts2,
- UndefElts, Depth + 1);
- if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+ simplifyAndSetOp(II, 0, DemandedElts2, UndefElts);
// If lowest element of a scalar op isn't used then use Arg0.
if (!DemandedElts[0]) {
@@ -1471,9 +1509,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Only lower element is used for operand 1.
DemandedElts = 1;
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
- UndefElts2, Depth + 1);
- if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+ simplifyAndSetOp(II, 1, DemandedElts, UndefElts2);
// Take the high undef elements from operand 0 and take the lower element
// from operand 1.
@@ -1497,9 +1533,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
case Intrinsic::x86_avx512_mask_sub_sd_round:
case Intrinsic::x86_avx512_mask_max_sd_round:
case Intrinsic::x86_avx512_mask_min_sd_round:
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
- UndefElts, Depth + 1);
- if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+ simplifyAndSetOp(II, 0, DemandedElts, UndefElts);
// If lowest element of a scalar op isn't used then use Arg0.
if (!DemandedElts[0]) {
@@ -1509,12 +1543,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Only lower element is used for operand 1 and 2.
DemandedElts = 1;
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
- UndefElts2, Depth + 1);
- if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts,
- UndefElts3, Depth + 1);
- if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; }
+ simplifyAndSetOp(II, 1, DemandedElts, UndefElts2);
+ simplifyAndSetOp(II, 2, DemandedElts, UndefElts3);
// Lower element is undefined if all three lower elements are undefined.
// Consider things like undef&0. The result is known zero, not undef.
@@ -1559,14 +1589,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
}
// Demand elements from the operand.
- auto *Op = II->getArgOperand(OpNum);
APInt OpUndefElts(InnerVWidth, 0);
- TmpV = SimplifyDemandedVectorElts(Op, OpDemandedElts, OpUndefElts,
- Depth + 1);
- if (TmpV) {
- II->setArgOperand(OpNum, TmpV);
- MadeChange = true;
- }
+ simplifyAndSetOp(II, OpNum, OpDemandedElts, OpUndefElts);
// Pack the operand's UNDEF elements, one lane at a time.
OpUndefElts = OpUndefElts.zext(VWidth);
@@ -1594,10 +1618,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// PERMV
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps: {
- Value *Op1 = II->getArgOperand(1);
- TmpV = SimplifyDemandedVectorElts(Op1, DemandedElts, UndefElts,
- Depth + 1);
- if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+ simplifyAndSetOp(II, 1, DemandedElts, UndefElts);
break;
}
@@ -1611,16 +1632,40 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
break;
case Intrinsic::amdgcn_buffer_load:
case Intrinsic::amdgcn_buffer_load_format:
+ case Intrinsic::amdgcn_raw_buffer_load:
+ case Intrinsic::amdgcn_raw_buffer_load_format:
+ case Intrinsic::amdgcn_struct_buffer_load:
+ case Intrinsic::amdgcn_struct_buffer_load_format:
return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts);
default: {
if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID()))
- return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts, 0);
+ return simplifyAMDGCNMemoryIntrinsicDemanded(
+ II, DemandedElts, 0, II->getNumArgOperands() - 2);
break;
}
- }
+ } // switch on IntrinsicID
break;
+ } // case Call
+ } // switch on Opcode
+
+ // TODO: We bail completely on integer div/rem and shifts because they have
+ // UB/poison potential, but that should be refined.
+ BinaryOperator *BO;
+ if (match(I, m_BinOp(BO)) && !BO->isIntDivRem() && !BO->isShift()) {
+ simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
+ simplifyAndSetOp(I, 1, DemandedElts, UndefElts2);
+
+ // Any change to an instruction with potential poison must clear those flags
+ // because we can not guarantee those constraints now. Other analysis may
+ // determine that it is safe to re-apply the flags.
+ if (MadeChange)
+ BO->dropPoisonGeneratingFlags();
+
+ // Output elements are undefined if both are undefined. Consider things
+ // like undef & 0. The result is known zero, not undef.
+ UndefElts &= UndefElts2;
}
- }
+
return MadeChange ? I : nullptr;
}