diff options
Diffstat (limited to 'lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp')
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 562 |
1 files changed, 383 insertions, 179 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 8b930bd95dfe..4e6f02058d83 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -30,18 +30,20 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, assert(I && "No instruction?"); assert(OpNo < I->getNumOperands() && "Operand index too large"); - // If the operand is not a constant integer, nothing to do. - ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo)); - if (!OpC) return false; + // The operand must be a constant integer or splat integer. + Value *Op = I->getOperand(OpNo); + const APInt *C; + if (!match(Op, m_APInt(C))) + return false; // If there are no bits set that aren't demanded, nothing to do. - Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth()); - if ((~Demanded & OpC->getValue()) == 0) + Demanded = Demanded.zextOrTrunc(C->getBitWidth()); + if ((~Demanded & *C) == 0) return false; // This instruction is producing bits that are not demanded. Shrink the RHS. - Demanded &= OpC->getValue(); - I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded)); + Demanded &= *C; + I->setOperand(OpNo, ConstantInt::get(Op->getType(), Demanded)); return true; } @@ -66,12 +68,13 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { /// This form of SimplifyDemandedBits simplifies the specified instruction /// operand if possible, updating it in place. It returns true if it made any /// change and false otherwise. -bool InstCombiner::SimplifyDemandedBits(Use &U, const APInt &DemandedMask, +bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo, + const APInt &DemandedMask, APInt &KnownZero, APInt &KnownOne, unsigned Depth) { - auto *UserI = dyn_cast<Instruction>(U.getUser()); + Use &U = I->getOperandUse(OpNo); Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero, - KnownOne, Depth, UserI); + KnownOne, Depth, I); if (!NewVal) return false; U = NewVal; return true; @@ -114,9 +117,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownOne.getBitWidth() == BitWidth && "Value *V, DemandedMask, KnownZero and KnownOne " "must have same BitWidth"); - if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - // We know all of the bits for a constant! - KnownOne = CI->getValue() & DemandedMask; + const APInt *C; + if (match(V, m_APInt(C))) { + // We know all of the bits for a scalar constant or a splat vector constant! + KnownOne = *C & DemandedMask; KnownZero = ~KnownOne & DemandedMask; return nullptr; } @@ -138,9 +142,6 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (Depth == 6) // Limit search depth. return nullptr; - APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - Instruction *I = dyn_cast<Instruction>(V); if (!I) { computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); @@ -151,107 +152,43 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // we can't do any simplifications of the operands, because DemandedMask // only reflects the bits demanded by *one* of the users. if (Depth != 0 && !I->hasOneUse()) { - // Despite the fact that we can't simplify this instruction in all User's - // context, we can at least compute the knownzero/knownone bits, and we can - // do simplifications that apply to *just* the one user if we know that - // this instruction has a simpler value in that context. - if (I->getOpcode() == Instruction::And) { - // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, - CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, - CxtI); - - // If all of the demanded bits are known 1 on one side, return the other. - // These bits cannot contribute to the result of the 'and' in this - // context. - if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == - (DemandedMask & ~LHSKnownZero)) - return I->getOperand(0); - if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == - (DemandedMask & ~RHSKnownZero)) - return I->getOperand(1); - - // If all of the demanded bits in the inputs are known zeros, return zero. - if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) - return Constant::getNullValue(VTy); - - } else if (I->getOpcode() == Instruction::Or) { - // We can simplify (X|Y) -> X or Y in the user's context if we know that - // only bits from X or Y are demanded. - - // If either the LHS or the RHS are One, the result is One. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, - CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, - CxtI); - - // If all of the demanded bits are known zero on one side, return the - // other. These bits cannot contribute to the result of the 'or' in this - // context. - if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == - (DemandedMask & ~LHSKnownOne)) - return I->getOperand(0); - if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == - (DemandedMask & ~RHSKnownOne)) - return I->getOperand(1); - - // If all of the potentially set bits on one side are known to be set on - // the other side, just use the 'other' side. - if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == - (DemandedMask & (~RHSKnownZero))) - return I->getOperand(0); - if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == - (DemandedMask & (~LHSKnownZero))) - return I->getOperand(1); - } else if (I->getOpcode() == Instruction::Xor) { - // We can simplify (X^Y) -> X or Y in the user's context if we know that - // only bits from X or Y are demanded. - - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, - CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, - CxtI); - - // If all of the demanded bits are known zero on one side, return the - // other. - if ((DemandedMask & RHSKnownZero) == DemandedMask) - return I->getOperand(0); - if ((DemandedMask & LHSKnownZero) == DemandedMask) - return I->getOperand(1); - } - - // Compute the KnownZero/KnownOne bits to simplify things downstream. - computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI); - return nullptr; + return SimplifyMultipleUseDemandedBits(I, DemandedMask, KnownZero, KnownOne, + Depth, CxtI); } + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + // If this is the root being simplified, allow it to have multiple uses, // just set the DemandedMask to all bits so that we can try to simplify the // operands. This allows visitTruncInst (for example) to simplify the // operand of a trunc without duplicating all the logic below. if (Depth == 0 && !V->hasOneUse()) - DemandedMask = APInt::getAllOnesValue(BitWidth); + DemandedMask.setAllBits(); switch (I->getOpcode()) { default: computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI); break; - case Instruction::And: + case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero, - RHSKnownOne, Depth + 1) || - SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero, - LHSKnownZero, LHSKnownOne, Depth + 1)) + if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne, + Depth + 1) || + SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnownZero, LHSKnownZero, + LHSKnownOne, Depth + 1)) return I; assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + // Output known-0 are known to be clear if zero in either the LHS | RHS. + APInt IKnownZero = RHSKnownZero | LHSKnownZero; + // Output known-1 bits are only known if set in both the LHS & RHS. + APInt IKnownOne = RHSKnownOne & LHSKnownOne; + // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & ((RHSKnownZero | LHSKnownZero)| - (RHSKnownOne & LHSKnownOne))) == DemandedMask) - return Constant::getIntegerValue(VTy, RHSKnownOne & LHSKnownOne); + if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + return Constant::getIntegerValue(VTy, IKnownOne); // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and'. @@ -262,34 +199,33 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, (DemandedMask & ~RHSKnownZero)) return I->getOperand(1); - // If all of the demanded bits in the inputs are known zeros, return zero. - if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) - return Constant::getNullValue(VTy); - // If the RHS is a constant, see if we can simplify it. if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero)) return I; - // Output known-1 bits are only known if set in both the LHS & RHS. - KnownOne = RHSKnownOne & LHSKnownOne; - // Output known-0 are known to be clear if zero in either the LHS | RHS. - KnownZero = RHSKnownZero | LHSKnownZero; + KnownZero = std::move(IKnownZero); + KnownOne = std::move(IKnownOne); break; - case Instruction::Or: + } + case Instruction::Or: { // If either the LHS or the RHS are One, the result is One. - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero, - RHSKnownOne, Depth + 1) || - SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, - LHSKnownZero, LHSKnownOne, Depth + 1)) + if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne, + Depth + 1) || + SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnownOne, LHSKnownZero, + LHSKnownOne, Depth + 1)) return I; assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + // Output known-0 bits are only known if clear in both the LHS & RHS. + APInt IKnownZero = RHSKnownZero & LHSKnownZero; + // Output known-1 are known to be set if set in either the LHS | RHS. + APInt IKnownOne = RHSKnownOne | LHSKnownOne; + // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & ((RHSKnownZero & LHSKnownZero)| - (RHSKnownOne | LHSKnownOne))) == DemandedMask) - return Constant::getIntegerValue(VTy, RHSKnownOne | LHSKnownOne); + if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + return Constant::getIntegerValue(VTy, IKnownOne); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. @@ -313,16 +249,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (ShrinkDemandedConstant(I, 1, DemandedMask)) return I; - // Output known-0 bits are only known if clear in both the LHS & RHS. - KnownZero = RHSKnownZero & LHSKnownZero; - // Output known-1 are known to be set if set in either the LHS | RHS. - KnownOne = RHSKnownOne | LHSKnownOne; + KnownZero = std::move(IKnownZero); + KnownOne = std::move(IKnownOne); break; + } case Instruction::Xor: { - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero, - RHSKnownOne, Depth + 1) || - SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, LHSKnownZero, - LHSKnownOne, Depth + 1)) + if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne, + Depth + 1) || + SimplifyDemandedBits(I, 0, DemandedMask, LHSKnownZero, LHSKnownOne, + Depth + 1)) return I; assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); @@ -400,9 +335,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } // Output known-0 bits are known if clear or set in both the LHS & RHS. - KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne); + KnownZero = std::move(IKnownZero); // Output known-1 are known to be set if set in only one of the LHS, RHS. - KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero); + KnownOne = std::move(IKnownOne); break; } case Instruction::Select: @@ -412,10 +347,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN) return nullptr; - if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero, - RHSKnownOne, Depth + 1) || - SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero, - LHSKnownOne, Depth + 1)) + if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnownZero, RHSKnownOne, + Depth + 1) || + SimplifyDemandedBits(I, 1, DemandedMask, LHSKnownZero, LHSKnownOne, + Depth + 1)) return I; assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); @@ -434,8 +369,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, DemandedMask = DemandedMask.zext(truncBf); KnownZero = KnownZero.zext(truncBf); KnownOne = KnownOne.zext(truncBf); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, - KnownOne, Depth + 1)) + if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne, + Depth + 1)) return I; DemandedMask = DemandedMask.trunc(BitWidth); KnownZero = KnownZero.trunc(BitWidth); @@ -460,8 +395,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // Don't touch a vector-to-scalar bitcast. return nullptr; - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, - KnownOne, Depth + 1)) + if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne, + Depth + 1)) return I; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); break; @@ -472,15 +407,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, DemandedMask = DemandedMask.trunc(SrcBitWidth); KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, - KnownOne, Depth + 1)) + if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne, + Depth + 1)) return I; DemandedMask = DemandedMask.zext(BitWidth); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); // The top bits are known to be zero. - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + KnownZero.setBitsFrom(SrcBitWidth); break; } case Instruction::SExt: { @@ -490,7 +425,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt InputDemandedBits = DemandedMask & APInt::getLowBitsSet(BitWidth, SrcBitWidth); - APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth)); + APInt NewBits(APInt::getBitsSetFrom(BitWidth, SrcBitWidth)); // If any of the sign extended bits are demanded, we know that the sign // bit is demanded. if ((NewBits & DemandedMask) != 0) @@ -499,8 +434,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth); KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero, - KnownOne, Depth + 1)) + if (SimplifyDemandedBits(I, 0, InputDemandedBits, KnownZero, KnownOne, + Depth + 1)) return I; InputDemandedBits = InputDemandedBits.zext(BitWidth); KnownZero = KnownZero.zext(BitWidth); @@ -530,11 +465,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // Right fill the mask of bits for this ADD/SUB to demand the most // significant bit and all those below it. APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth + 1) || + if (ShrinkDemandedConstant(I, 0, DemandedFromOps) || + SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnownZero, LHSKnownOne, + Depth + 1) || ShrinkDemandedConstant(I, 1, DemandedFromOps) || - SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth + 1)) { + SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnownZero, RHSKnownOne, + Depth + 1)) { // Disable the nsw and nuw flags here: We can no longer guarantee that // we won't wrap after simplification. Removing the nsw/nuw flags is // legal here because the top bit is not demanded. @@ -543,6 +479,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, BinOP.setHasNoUnsignedWrap(false); return I; } + + // If we are known to be adding/subtracting zeros to every bit below + // the highest demanded bit, we just return the other side. + if ((DemandedFromOps & RHSKnownZero) == DemandedFromOps) + return I->getOperand(0); + // We can't do this with the LHS for subtraction. + if (I->getOpcode() == Instruction::Add && + (DemandedFromOps & LHSKnownZero) == DemandedFromOps) + return I->getOperand(1); } // Otherwise just hand the add/sub off to computeKnownBits to fill in @@ -569,19 +514,19 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the shift is NUW/NSW, then it does demand the high bits. ShlOperator *IOp = cast<ShlOperator>(I); if (IOp->hasNoSignedWrap()) - DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1); + DemandedMaskIn.setHighBits(ShiftAmt+1); else if (IOp->hasNoUnsignedWrap()) - DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt); + DemandedMaskIn.setHighBits(ShiftAmt); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, - KnownOne, Depth + 1)) + if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne, + Depth + 1)) return I; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); KnownZero <<= ShiftAmt; KnownOne <<= ShiftAmt; // low bits known zero. if (ShiftAmt) - KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + KnownZero.setLowBits(ShiftAmt); } break; case Instruction::LShr: @@ -595,19 +540,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the shift is exact, then it does demand the low bits (and knows that // they are zero). if (cast<LShrOperator>(I)->isExact()) - DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + DemandedMaskIn.setLowBits(ShiftAmt); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, - KnownOne, Depth + 1)) + if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne, + Depth + 1)) return I; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); - KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); - KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); - if (ShiftAmt) { - // Compute the new bits that are at the top now. - APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); - KnownZero |= HighBits; // high bits known zero. - } + KnownZero = KnownZero.lshr(ShiftAmt); + KnownOne = KnownOne.lshr(ShiftAmt); + if (ShiftAmt) + KnownZero.setHighBits(ShiftAmt); // high bits known zero. } break; case Instruction::AShr: @@ -635,26 +577,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If any of the "high bits" are demanded, we should set the sign bit as // demanded. if (DemandedMask.countLeadingZeros() <= ShiftAmt) - DemandedMaskIn.setBit(BitWidth-1); + DemandedMaskIn.setSignBit(); // If the shift is exact, then it does demand the low bits (and knows that // they are zero). if (cast<AShrOperator>(I)->isExact()) - DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + DemandedMaskIn.setLowBits(ShiftAmt); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, - KnownOne, Depth + 1)) + if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne, + Depth + 1)) return I; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); // Compute the new bits that are at the top now. APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); - KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); - KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); + KnownZero = KnownZero.lshr(ShiftAmt); + KnownOne = KnownOne.lshr(ShiftAmt); // Handle the sign bits. APInt SignBit(APInt::getSignBit(BitWidth)); // Adjust to where it is now in the mask. - SignBit = APIntOps::lshr(SignBit, ShiftAmt); + SignBit = SignBit.lshr(ShiftAmt); // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. @@ -683,8 +625,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt LowBits = RA - 1; APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, LHSKnownZero, - LHSKnownOne, Depth + 1)) + if (SimplifyDemandedBits(I, 0, Mask2, LHSKnownZero, LHSKnownOne, + Depth + 1)) return I; // The low bits of LHS are unchanged by the srem. @@ -693,12 +635,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If LHS is non-negative or has all low bits zero, then the upper bits // are all zero. - if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits)) + if (LHSKnownZero.isNegative() || ((LHSKnownZero & LowBits) == LowBits)) KnownZero |= ~LowBits; // If LHS is negative and not all low bits are zero, then the upper bits // are all one. - if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0)) + if (LHSKnownOne.isNegative() && ((LHSKnownOne & LowBits) != 0)) KnownOne |= ~LowBits; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); @@ -713,21 +655,17 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, CxtI); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) - KnownZero.setBit(KnownZero.getBitWidth() - 1); + KnownZero.setSignBit(); } break; case Instruction::URem: { APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); APInt AllOnes = APInt::getAllOnesValue(BitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, KnownZero2, - KnownOne2, Depth + 1) || - SimplifyDemandedBits(I->getOperandUse(1), AllOnes, KnownZero2, - KnownOne2, Depth + 1)) + if (SimplifyDemandedBits(I, 0, AllOnes, KnownZero2, KnownOne2, Depth + 1) || + SimplifyDemandedBits(I, 1, AllOnes, KnownZero2, KnownOne2, Depth + 1)) return I; unsigned Leaders = KnownZero2.countLeadingOnes(); - Leaders = std::max(Leaders, - KnownZero2.countLeadingOnes()); KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; break; } @@ -792,11 +730,11 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return ConstantInt::getNullValue(VTy); // We know that the upper bits are set to zero. - KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - ArgWidth); + KnownZero.setBitsFrom(ArgWidth); return nullptr; } case Intrinsic::x86_sse42_crc32_64_64: - KnownZero = APInt::getHighBitsSet(64, 32); + KnownZero.setBitsFrom(32); return nullptr; } } @@ -811,6 +749,150 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return nullptr; } +/// Helper routine of SimplifyDemandedUseBits. It computes KnownZero/KnownOne +/// bits. It also tries to handle simplifications that can be done based on +/// DemandedMask, but without modifying the Instruction. +Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, + const APInt &DemandedMask, + APInt &KnownZero, + APInt &KnownOne, + unsigned Depth, + Instruction *CxtI) { + unsigned BitWidth = DemandedMask.getBitWidth(); + Type *ITy = I->getType(); + + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + + // Despite the fact that we can't simplify this instruction in all User's + // context, we can at least compute the knownzero/knownone bits, and we can + // do simplifications that apply to *just* the one user if we know that + // this instruction has a simpler value in that context. + switch (I->getOpcode()) { + case Instruction::And: { + // If either the LHS or the RHS are Zero, the result is zero. + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, + CxtI); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, + CxtI); + + // Output known-0 are known to be clear if zero in either the LHS | RHS. + APInt IKnownZero = RHSKnownZero | LHSKnownZero; + // Output known-1 bits are only known if set in both the LHS & RHS. + APInt IKnownOne = RHSKnownOne & LHSKnownOne; + + // If the client is only demanding bits that we know, return the known + // constant. + if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + return Constant::getIntegerValue(ITy, IKnownOne); + + // If all of the demanded bits are known 1 on one side, return the other. + // These bits cannot contribute to the result of the 'and' in this + // context. + if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == + (DemandedMask & ~LHSKnownZero)) + return I->getOperand(0); + if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == + (DemandedMask & ~RHSKnownZero)) + return I->getOperand(1); + + KnownZero = std::move(IKnownZero); + KnownOne = std::move(IKnownOne); + break; + } + case Instruction::Or: { + // We can simplify (X|Y) -> X or Y in the user's context if we know that + // only bits from X or Y are demanded. + + // If either the LHS or the RHS are One, the result is One. + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, + CxtI); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, + CxtI); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + APInt IKnownZero = RHSKnownZero & LHSKnownZero; + // Output known-1 are known to be set if set in either the LHS | RHS. + APInt IKnownOne = RHSKnownOne | LHSKnownOne; + + // If the client is only demanding bits that we know, return the known + // constant. + if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + return Constant::getIntegerValue(ITy, IKnownOne); + + // If all of the demanded bits are known zero on one side, return the + // other. These bits cannot contribute to the result of the 'or' in this + // context. + if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == + (DemandedMask & ~LHSKnownOne)) + return I->getOperand(0); + if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == + (DemandedMask & ~RHSKnownOne)) + return I->getOperand(1); + + // If all of the potentially set bits on one side are known to be set on + // the other side, just use the 'other' side. + if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == + (DemandedMask & (~RHSKnownZero))) + return I->getOperand(0); + if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == + (DemandedMask & (~LHSKnownZero))) + return I->getOperand(1); + + KnownZero = std::move(IKnownZero); + KnownOne = std::move(IKnownOne); + break; + } + case Instruction::Xor: { + // We can simplify (X^Y) -> X or Y in the user's context if we know that + // only bits from X or Y are demanded. + + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, + CxtI); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, + CxtI); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + APInt IKnownZero = (RHSKnownZero & LHSKnownZero) | + (RHSKnownOne & LHSKnownOne); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + APInt IKnownOne = (RHSKnownZero & LHSKnownOne) | + (RHSKnownOne & LHSKnownZero); + + // If the client is only demanding bits that we know, return the known + // constant. + if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + return Constant::getIntegerValue(ITy, IKnownOne); + + // If all of the demanded bits are known zero on one side, return the + // other. + if ((DemandedMask & RHSKnownZero) == DemandedMask) + return I->getOperand(0); + if ((DemandedMask & LHSKnownZero) == DemandedMask) + return I->getOperand(1); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + KnownZero = std::move(IKnownZero); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOne = std::move(IKnownOne); + break; + } + default: + // Compute the KnownZero/KnownOne bits to simplify things downstream. + computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI); + + // If this user is only demanding bits that we know, return the known + // constant. + if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) + return Constant::getIntegerValue(ITy, KnownOne); + + break; + } + + return nullptr; +} + + /// Helper routine of SimplifyDemandedUseBits. It tries to simplify /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign @@ -849,7 +931,7 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, unsigned ShrAmt = ShrOp1.getZExtValue(); KnownOne.clearAllBits(); - KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1); + KnownZero.setLowBits(ShlAmt - 1); KnownZero &= DemandedMask; APInt BitMask1(APInt::getAllOnesValue(BitWidth)); @@ -1472,14 +1554,136 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, break; } + case Intrinsic::x86_sse2_packssdw_128: + case Intrinsic::x86_sse2_packsswb_128: + case Intrinsic::x86_sse2_packuswb_128: + case Intrinsic::x86_sse41_packusdw: + case Intrinsic::x86_avx2_packssdw: + case Intrinsic::x86_avx2_packsswb: + case Intrinsic::x86_avx2_packusdw: + case Intrinsic::x86_avx2_packuswb: + case Intrinsic::x86_avx512_packssdw_512: + case Intrinsic::x86_avx512_packsswb_512: + case Intrinsic::x86_avx512_packusdw_512: + case Intrinsic::x86_avx512_packuswb_512: { + auto *Ty0 = II->getArgOperand(0)->getType(); + unsigned InnerVWidth = Ty0->getVectorNumElements(); + assert(VWidth == (InnerVWidth * 2) && "Unexpected input size"); + + unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128; + unsigned VWidthPerLane = VWidth / NumLanes; + unsigned InnerVWidthPerLane = InnerVWidth / NumLanes; + + // Per lane, pack the elements of the first input and then the second. + // e.g. + // v8i16 PACK(v4i32 X, v4i32 Y) - (X[0..3],Y[0..3]) + // v32i8 PACK(v16i16 X, v16i16 Y) - (X[0..7],Y[0..7]),(X[8..15],Y[8..15]) + for (int OpNum = 0; OpNum != 2; ++OpNum) { + APInt OpDemandedElts(InnerVWidth, 0); + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + unsigned LaneIdx = Lane * VWidthPerLane; + for (unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) { + unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum; + if (DemandedElts[Idx]) + OpDemandedElts.setBit((Lane * InnerVWidthPerLane) + Elt); + } + } + + // Demand elements from the operand. + auto *Op = II->getArgOperand(OpNum); + APInt OpUndefElts(InnerVWidth, 0); + TmpV = SimplifyDemandedVectorElts(Op, OpDemandedElts, OpUndefElts, + Depth + 1); + if (TmpV) { + II->setArgOperand(OpNum, TmpV); + MadeChange = true; + } + + // Pack the operand's UNDEF elements, one lane at a time. + OpUndefElts = OpUndefElts.zext(VWidth); + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane); + LaneElts = LaneElts.getLoBits(InnerVWidthPerLane); + LaneElts = LaneElts.shl(InnerVWidthPerLane * (2 * Lane + OpNum)); + UndefElts |= LaneElts; + } + } + break; + } + + // PSHUFB + case Intrinsic::x86_ssse3_pshuf_b_128: + case Intrinsic::x86_avx2_pshuf_b: + case Intrinsic::x86_avx512_pshuf_b_512: + // PERMILVAR + case Intrinsic::x86_avx_vpermilvar_ps: + case Intrinsic::x86_avx_vpermilvar_ps_256: + case Intrinsic::x86_avx512_vpermilvar_ps_512: + case Intrinsic::x86_avx_vpermilvar_pd: + case Intrinsic::x86_avx_vpermilvar_pd_256: + case Intrinsic::x86_avx512_vpermilvar_pd_512: + // PERMV + case Intrinsic::x86_avx2_permd: + case Intrinsic::x86_avx2_permps: { + Value *Op1 = II->getArgOperand(1); + TmpV = SimplifyDemandedVectorElts(Op1, DemandedElts, UndefElts, + Depth + 1); + if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } + break; + } + // SSE4A instructions leave the upper 64-bits of the 128-bit result // in an undefined state. case Intrinsic::x86_sse4a_extrq: case Intrinsic::x86_sse4a_extrqi: case Intrinsic::x86_sse4a_insertq: case Intrinsic::x86_sse4a_insertqi: - UndefElts |= APInt::getHighBitsSet(VWidth, VWidth / 2); + UndefElts.setHighBits(VWidth / 2); break; + case Intrinsic::amdgcn_buffer_load: + case Intrinsic::amdgcn_buffer_load_format: { + if (VWidth == 1 || !DemandedElts.isMask()) + return nullptr; + + // TODO: Handle 3 vectors when supported in code gen. + unsigned NewNumElts = PowerOf2Ceil(DemandedElts.countTrailingOnes()); + if (NewNumElts == VWidth) + return nullptr; + + Module *M = II->getParent()->getParent()->getParent(); + Type *EltTy = V->getType()->getVectorElementType(); + + Type *NewTy = (NewNumElts == 1) ? EltTy : + VectorType::get(EltTy, NewNumElts); + + Function *NewIntrin = Intrinsic::getDeclaration(M, II->getIntrinsicID(), + NewTy); + + SmallVector<Value *, 5> Args; + for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I) + Args.push_back(II->getArgOperand(I)); + + IRBuilderBase::InsertPointGuard Guard(*Builder); + Builder->SetInsertPoint(II); + + CallInst *NewCall = Builder->CreateCall(NewIntrin, Args); + NewCall->takeName(II); + NewCall->copyMetadata(*II); + if (NewNumElts == 1) { + return Builder->CreateInsertElement(UndefValue::get(V->getType()), + NewCall, static_cast<uint64_t>(0)); + } + + SmallVector<uint32_t, 8> EltMask; + for (unsigned I = 0; I < VWidth; ++I) + EltMask.push_back(I); + + Value *Shuffle = Builder->CreateShuffleVector( + NewCall, UndefValue::get(NewTy), EltMask); + + MadeChange = true; + return Shuffle; + } } break; } |