summaryrefslogtreecommitdiff
path: root/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp')
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp146
1 files changed, 110 insertions, 36 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 743d51483ea16..f3268d2c34714 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -22,10 +22,9 @@ using namespace llvm::PatternMatch;
#define DEBUG_TYPE "instcombine"
-/// ShrinkDemandedConstant - Check to see if the specified operand of the
-/// specified instruction is a constant integer. If so, check to see if there
-/// are any bits set in the constant that are not demanded. If so, shrink the
-/// constant and return true.
+/// Check to see if the specified operand of the specified instruction is a
+/// constant integer. If so, check to see if there are any bits set in the
+/// constant that are not demanded. If so, shrink the constant and return true.
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
APInt Demanded) {
assert(I && "No instruction?");
@@ -49,9 +48,8 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
-/// SimplifyDemandedInstructionBits - Inst is an integer instruction that
-/// SimplifyDemandedBits knows about. See if the instruction has any
-/// properties that allow us to simplify its operands.
+/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
+/// the instruction has any properties that allow us to simplify its operands.
bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
@@ -61,14 +59,14 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
0, &Inst);
if (!V) return false;
if (V == &Inst) return true;
- ReplaceInstUsesWith(Inst, V);
+ replaceInstUsesWith(Inst, V);
return true;
}
-/// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
-/// specified instruction operand if possible, updating it in place. It returns
-/// true if it made any change and false otherwise.
-bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
+/// This form of SimplifyDemandedBits simplifies the specified instruction
+/// operand if possible, updating it in place. It returns true if it made any
+/// change and false otherwise.
+bool InstCombiner::SimplifyDemandedBits(Use &U, const APInt &DemandedMask,
APInt &KnownZero, APInt &KnownOne,
unsigned Depth) {
auto *UserI = dyn_cast<Instruction>(U.getUser());
@@ -80,21 +78,22 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
}
-/// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
-/// value based on the demanded bits. When this function is called, it is known
-/// that only the bits set in DemandedMask of the result of V are ever used
-/// downstream. Consequently, depending on the mask and V, it may be possible
-/// to replace V with a constant or one of its operands. In such cases, this
-/// function does the replacement and returns true. In all other cases, it
-/// returns false after analyzing the expression and setting KnownOne and known
-/// to be one in the expression. KnownZero contains all the bits that are known
-/// to be zero in the expression. These are provided to potentially allow the
-/// caller (which might recursively be SimplifyDemandedBits itself) to simplify
-/// the expression. KnownOne and KnownZero always follow the invariant that
-/// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
-/// the bits in KnownOne and KnownZero may only be accurate for those bits set
-/// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
-/// and KnownOne must all be the same.
+/// This function attempts to replace V with a simpler value based on the
+/// demanded bits. When this function is called, it is known that only the bits
+/// set in DemandedMask of the result of V are ever used downstream.
+/// Consequently, depending on the mask and V, it may be possible to replace V
+/// with a constant or one of its operands. In such cases, this function does
+/// the replacement and returns true. In all other cases, it returns false after
+/// analyzing the expression and setting KnownOne and known to be one in the
+/// expression. KnownZero contains all the bits that are known to be zero in the
+/// expression. These are provided to potentially allow the caller (which might
+/// recursively be SimplifyDemandedBits itself) to simplify the expression.
+/// KnownOne and KnownZero always follow the invariant that:
+/// KnownOne & KnownZero == 0.
+/// That is, a bit can't be both 1 and 0. Note that the bits in KnownOne and
+/// KnownZero may only be accurate for those bits set in DemandedMask. Note also
+/// that the bitwidth of V, DemandedMask, KnownZero and KnownOne must all be the
+/// same.
///
/// This returns null if it did not change anything and it permits no
/// simplification. This returns V itself if it did some simplification of V's
@@ -768,6 +767,34 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// TODO: Could compute known zero/one bits based on the input.
break;
}
+ case Intrinsic::x86_mmx_pmovmskb:
+ case Intrinsic::x86_sse_movmsk_ps:
+ case Intrinsic::x86_sse2_movmsk_pd:
+ case Intrinsic::x86_sse2_pmovmskb_128:
+ case Intrinsic::x86_avx_movmsk_ps_256:
+ case Intrinsic::x86_avx_movmsk_pd_256:
+ case Intrinsic::x86_avx2_pmovmskb: {
+ // MOVMSK copies the vector elements' sign bits to the low bits
+ // and zeros the high bits.
+ unsigned ArgWidth;
+ if (II->getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) {
+ ArgWidth = 8; // Arg is x86_mmx, but treated as <8 x i8>.
+ } else {
+ auto Arg = II->getArgOperand(0);
+ auto ArgType = cast<VectorType>(Arg->getType());
+ ArgWidth = ArgType->getNumElements();
+ }
+
+ // If we don't need any of low bits then return zero,
+ // we know that DemandedMask is non-zero already.
+ APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth);
+ if (DemandedElts == 0)
+ return ConstantInt::getNullValue(VTy);
+
+ // We know that the upper bits are set to zero.
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - ArgWidth);
+ return nullptr;
+ }
case Intrinsic::x86_sse42_crc32_64_64:
KnownZero = APInt::getHighBitsSet(64, 32);
return nullptr;
@@ -802,7 +829,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
/// not successful.
Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
- Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) {
+ Instruction *Shl,
+ const APInt &DemandedMask,
+ APInt &KnownZero,
+ APInt &KnownOne) {
const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue();
const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue();
@@ -865,10 +895,10 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
return nullptr;
}
-/// SimplifyDemandedVectorElts - The specified value produces a vector with
-/// any number of elements. DemandedElts contains the set of elements that are
-/// actually used by the caller. This method analyzes which elements of the
-/// operand are undef and returns that information in UndefElts.
+/// The specified value produces a vector with any number of elements.
+/// DemandedElts contains the set of elements that are actually used by the
+/// caller. This method analyzes which elements of the operand are undef and
+/// returns that information in UndefElts.
///
/// If the information about demanded elements can be used to simplify the
/// operation, the operation is simplified, then the resultant value is
@@ -876,7 +906,7 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt &UndefElts,
unsigned Depth) {
- unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
+ unsigned VWidth = V->getType()->getVectorNumElements();
APInt EltMask(APInt::getAllOnesValue(VWidth));
assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
@@ -1179,16 +1209,42 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
switch (II->getIntrinsicID()) {
default: break;
- // Binary vector operations that work column-wise. A dest element is a
- // function of the corresponding input elements from the two inputs.
+ // Unary scalar-as-vector operations that work column-wise.
+ case Intrinsic::x86_sse_rcp_ss:
+ case Intrinsic::x86_sse_rsqrt_ss:
+ case Intrinsic::x86_sse_sqrt_ss:
+ case Intrinsic::x86_sse2_sqrt_sd:
+ case Intrinsic::x86_xop_vfrcz_ss:
+ case Intrinsic::x86_xop_vfrcz_sd:
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
+ UndefElts, Depth + 1);
+ if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+
+ // If lowest element of a scalar op isn't used then use Arg0.
+ if (DemandedElts.getLoBits(1) != 1)
+ return II->getArgOperand(0);
+ // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions
+ // checks).
+ break;
+
+ // Binary scalar-as-vector operations that work column-wise. A dest element
+ // is a function of the corresponding input elements from the two inputs.
+ case Intrinsic::x86_sse_add_ss:
case Intrinsic::x86_sse_sub_ss:
case Intrinsic::x86_sse_mul_ss:
+ case Intrinsic::x86_sse_div_ss:
case Intrinsic::x86_sse_min_ss:
case Intrinsic::x86_sse_max_ss:
+ case Intrinsic::x86_sse_cmp_ss:
+ case Intrinsic::x86_sse2_add_sd:
case Intrinsic::x86_sse2_sub_sd:
case Intrinsic::x86_sse2_mul_sd:
+ case Intrinsic::x86_sse2_div_sd:
case Intrinsic::x86_sse2_min_sd:
case Intrinsic::x86_sse2_max_sd:
+ case Intrinsic::x86_sse2_cmp_sd:
+ case Intrinsic::x86_sse41_round_ss:
+ case Intrinsic::x86_sse41_round_sd:
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
UndefElts, Depth + 1);
if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
@@ -1201,11 +1257,15 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
if (DemandedElts == 1) {
switch (II->getIntrinsicID()) {
default: break;
+ case Intrinsic::x86_sse_add_ss:
case Intrinsic::x86_sse_sub_ss:
case Intrinsic::x86_sse_mul_ss:
+ case Intrinsic::x86_sse_div_ss:
+ case Intrinsic::x86_sse2_add_sd:
case Intrinsic::x86_sse2_sub_sd:
case Intrinsic::x86_sse2_mul_sd:
- // TODO: Lower MIN/MAX/ABS/etc
+ case Intrinsic::x86_sse2_div_sd:
+ // TODO: Lower MIN/MAX/etc.
Value *LHS = II->getArgOperand(0);
Value *RHS = II->getArgOperand(1);
// Extract the element as scalars.
@@ -1216,6 +1276,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
switch (II->getIntrinsicID()) {
default: llvm_unreachable("Case stmts out of sync!");
+ case Intrinsic::x86_sse_add_ss:
+ case Intrinsic::x86_sse2_add_sd:
+ TmpV = InsertNewInstWith(BinaryOperator::CreateFAdd(LHS, RHS,
+ II->getName()), *II);
+ break;
case Intrinsic::x86_sse_sub_ss:
case Intrinsic::x86_sse2_sub_sd:
TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS,
@@ -1226,6 +1291,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS,
II->getName()), *II);
break;
+ case Intrinsic::x86_sse_div_ss:
+ case Intrinsic::x86_sse2_div_sd:
+ TmpV = InsertNewInstWith(BinaryOperator::CreateFDiv(LHS, RHS,
+ II->getName()), *II);
+ break;
}
Instruction *New =
@@ -1238,6 +1308,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
}
}
+ // If lowest element of a scalar op isn't used then use Arg0.
+ if (DemandedElts.getLoBits(1) != 1)
+ return II->getArgOperand(0);
+
// Output elements are undefined if both are undefined. Consider things
// like undef&0. The result is known zero, not undef.
UndefElts &= UndefElts2;