summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/ARM/ARMISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/ARM/ARMISelLowering.cpp')
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp221
1 files changed, 183 insertions, 38 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 33d115945614..3d45db349644 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -391,6 +391,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Legal);
}
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
@@ -428,7 +429,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
}
// Predicate types
- const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1};
+ const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
for (auto VT : pTypes) {
addRegisterClass(VT, &ARM::VCCRRegClass);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
@@ -445,6 +446,16 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SELECT, VT, Expand);
}
+ setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i1, Expand);
+ setOperationAction(ISD::AND, MVT::v2i1, Expand);
+ setOperationAction(ISD::OR, MVT::v2i1, Expand);
+ setOperationAction(ISD::XOR, MVT::v2i1, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Expand);
+
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
@@ -1647,6 +1658,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(ARMISD::CALL_PRED)
MAKE_CASE(ARMISD::CALL_NOLINK)
MAKE_CASE(ARMISD::tSECALL)
+ MAKE_CASE(ARMISD::t2CALL_BTI)
MAKE_CASE(ARMISD::BRCOND)
MAKE_CASE(ARMISD::BR_JT)
MAKE_CASE(ARMISD::BR2_JT)
@@ -1853,8 +1865,10 @@ EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
// MVE has a predicate register.
if ((Subtarget->hasMVEIntegerOps() &&
- (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8)) ||
- (Subtarget->hasMVEFloatOps() && (VT == MVT::v4f32 || VT == MVT::v8f16)))
+ (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
+ VT == MVT::v16i8)) ||
+ (Subtarget->hasMVEFloatOps() &&
+ (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
return VT.changeVectorElementTypeToInteger();
}
@@ -2308,6 +2322,12 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool isCmseNSCall = false;
bool isSibCall = false;
bool PreferIndirect = false;
+ bool GuardWithBTI = false;
+
+ // Lower 'returns_twice' calls to a pseudo-instruction.
+ if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
+ !Subtarget->getNoBTIAtReturnTwice())
+ GuardWithBTI = AFI->branchTargetEnforcement();
// Determine whether this is a non-secure function call.
if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
@@ -2713,7 +2733,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// FIXME: handle tail calls differently.
unsigned CallOpc;
if (Subtarget->isThumb()) {
- if (isCmseNSCall)
+ if (GuardWithBTI)
+ CallOpc = ARMISD::t2CALL_BTI;
+ else if (isCmseNSCall)
CallOpc = ARMISD::tSECALL;
else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
@@ -2930,9 +2952,17 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
// Indirect tail calls cannot be optimized for Thumb1 if the args
// to the call take up r0-r3. The reason is that there are no legal registers
// left to hold the pointer to the function to be called.
- if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
- (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect))
- return false;
+ // Similarly, if the function uses return address sign and authentication,
+ // r12 is needed to hold the PAC and is not available to hold the callee
+ // address.
+ if (Outs.size() >= 4 &&
+ (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) {
+ if (Subtarget->isThumb1Only())
+ return false;
+ // Conservatively assume the function spills LR.
+ if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true))
+ return false;
+ }
// Look for obvious safe cases to perform tail call optimization that do not
// require ABI changes. This is what gcc calls sibcall.
@@ -7616,7 +7646,10 @@ static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG,
unsigned NumElts = VT.getVectorNumElements();
unsigned BoolMask;
unsigned BitsPerBool;
- if (NumElts == 4) {
+ if (NumElts == 2) {
+ BitsPerBool = 8;
+ BoolMask = 0xff;
+ } else if (NumElts == 4) {
BitsPerBool = 4;
BoolMask = 0xf;
} else if (NumElts == 8) {
@@ -7699,6 +7732,46 @@ static SDValue LowerBUILD_VECTORToVIDUP(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(N, DL, MVT::i32));
}
+// Returns true if the operation N can be treated as qr instruction variant at
+// operand Op.
+static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op) {
+ switch (N->getOpcode()) {
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ return true;
+ case ISD::SUB:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+ return N->getOperand(1).getNode() == Op;
+ case ISD::INTRINSIC_WO_CHAIN:
+ switch (N->getConstantOperandVal(0)) {
+ case Intrinsic::arm_mve_add_predicated:
+ case Intrinsic::arm_mve_mul_predicated:
+ case Intrinsic::arm_mve_qadd_predicated:
+ case Intrinsic::arm_mve_vhadd:
+ case Intrinsic::arm_mve_hadd_predicated:
+ case Intrinsic::arm_mve_vqdmulh:
+ case Intrinsic::arm_mve_qdmulh_predicated:
+ case Intrinsic::arm_mve_vqrdmulh:
+ case Intrinsic::arm_mve_qrdmulh_predicated:
+ case Intrinsic::arm_mve_vqdmull:
+ case Intrinsic::arm_mve_vqdmull_predicated:
+ return true;
+ case Intrinsic::arm_mve_sub_predicated:
+ case Intrinsic::arm_mve_qsub_predicated:
+ case Intrinsic::arm_mve_vhsub:
+ case Intrinsic::arm_mve_hsub_predicated:
+ return N->getOperand(2).getNode() == Op;
+ default:
+ return false;
+ }
+ default:
+ return false;
+ }
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it.
SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
@@ -7720,6 +7793,20 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (SplatUndef.isAllOnes())
return DAG.getUNDEF(VT);
+ // If all the users of this constant splat are qr instruction variants,
+ // generate a vdup of the constant.
+ if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize &&
+ (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&
+ all_of(BVN->uses(),
+ [BVN](const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {
+ EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
+ : SplatBitSize == 16 ? MVT::v8i16
+ : MVT::v16i8;
+ SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
+ SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
+ return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
+ }
+
if ((ST->hasNEON() && SplatBitSize <= 64) ||
(ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
// Check if an immediate VMOV works.
@@ -8313,9 +8400,8 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
SDLoc DL(Op);
SmallVector<SDValue, 8> VTBLMask;
- for (ArrayRef<int>::iterator
- I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
- VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));
+ for (int I : ShuffleMask)
+ VTBLMask.push_back(DAG.getConstant(I, DL, MVT::i32));
if (V2.getNode()->isUndef())
return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
@@ -8346,6 +8432,8 @@ static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
static EVT getVectorTyFromPredicateVector(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::v2i1:
+ return MVT::v2f64;
case MVT::v4i1:
return MVT::v4i32;
case MVT::v8i1:
@@ -8427,7 +8515,14 @@ static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG,
DAG.getUNDEF(NewVT), ShuffleMask);
// Now return the result of comparing the shuffled vector with zero,
- // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
+ // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1
+ // we convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
+ if (VT == MVT::v2i1) {
+ SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Shuffled);
+ SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
+ DAG.getConstant(ARMCC::NE, dl, MVT::i32));
+ return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
+ }
return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
DAG.getConstant(ARMCC::NE, dl, MVT::i32));
}
@@ -8927,8 +9022,15 @@ static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG,
ConVec = ExtractInto(NewV1, ConVec, j);
ConVec = ExtractInto(NewV2, ConVec, j);
- // Now return the result of comparing the subvector with zero,
- // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
+ // Now return the result of comparing the subvector with zero, which will
+ // generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1 we
+ // convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
+ if (VT == MVT::v2i1) {
+ SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, ConVec);
+ SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
+ DAG.getConstant(ARMCC::NE, dl, MVT::i32));
+ return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
+ }
return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
DAG.getConstant(ARMCC::NE, dl, MVT::i32));
};
@@ -8993,6 +9095,22 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG,
MVT ElType = getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT();
+ if (NumElts == 2) {
+ EVT SubVT = MVT::v4i32;
+ SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
+ for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j += 2) {
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
+ DAG.getIntPtrConstant(i, dl));
+ SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
+ DAG.getConstant(j, dl, MVT::i32));
+ SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
+ DAG.getConstant(j + 1, dl, MVT::i32));
+ }
+ SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, SubVec,
+ DAG.getConstant(ARMCC::NE, dl, MVT::i32));
+ return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
+ }
+
EVT SubVT = MVT::getVectorVT(ElType, NumElts);
SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
@@ -9839,16 +9957,17 @@ void ARMTargetLowering::ExpandDIV_Windows(
static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) {
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
EVT MemVT = LD->getMemoryVT();
- assert((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
+ assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
+ MemVT == MVT::v16i1) &&
"Expected a predicate type!");
assert(MemVT == Op.getValueType());
assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
"Expected a non-extending load");
assert(LD->isUnindexed() && "Expected a unindexed load");
- // The basic MVE VLDR on a v4i1/v8i1 actually loads the entire 16bit
+ // The basic MVE VLDR on a v2i1/v4i1/v8i1 actually loads the entire 16bit
// predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
- // need to make sure that 8/4 bits are actually loaded into the correct
+ // need to make sure that 8/4/2 bits are actually loaded into the correct
// place, which means loading the value and then shuffling the values into
// the bottom bits of the predicate.
// Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
@@ -9895,14 +10014,15 @@ void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
EVT MemVT = ST->getMemoryVT();
- assert((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
+ assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
+ MemVT == MVT::v16i1) &&
"Expected a predicate type!");
assert(MemVT == ST->getValue().getValueType());
assert(!ST->isTruncatingStore() && "Expected a non-extending store");
assert(ST->isUnindexed() && "Expected a unindexed store");
- // Only store the v4i1 or v8i1 worth of bits, via a buildvector with top bits
- // unset and a scalar store.
+ // Only store the v2i1 or v4i1 or v8i1 worth of bits, via a buildvector with
+ // top bits unset and a scalar store.
SDLoc dl(Op);
SDValue Build = ST->getValue();
if (MemVT != MVT::v16i1) {
@@ -9953,7 +10073,7 @@ static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG,
{ST->getChain(), Lo, Hi, ST->getBasePtr()},
MemVT, ST->getMemOperand());
} else if (Subtarget->hasMVEIntegerOps() &&
- ((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
+ ((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
MemVT == MVT::v16i1))) {
return LowerPredicateStore(Op, DAG);
}
@@ -10561,25 +10681,23 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
// associated with.
DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2>> CallSiteNumToLPad;
unsigned MaxCSNum = 0;
- for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
- ++BB) {
- if (!BB->isEHPad()) continue;
+ for (MachineBasicBlock &BB : *MF) {
+ if (!BB.isEHPad())
+ continue;
// FIXME: We should assert that the EH_LABEL is the first MI in the landing
// pad.
- for (MachineBasicBlock::iterator
- II = BB->begin(), IE = BB->end(); II != IE; ++II) {
- if (!II->isEHLabel()) continue;
+ for (MachineInstr &II : BB) {
+ if (!II.isEHLabel())
+ continue;
- MCSymbol *Sym = II->getOperand(0).getMCSymbol();
+ MCSymbol *Sym = II.getOperand(0).getMCSymbol();
if (!MF->hasCallSiteLandingPad(Sym)) continue;
SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
- for (SmallVectorImpl<unsigned>::iterator
- CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
- CSI != CSE; ++CSI) {
- CallSiteNumToLPad[*CSI].push_back(&*BB);
- MaxCSNum = std::max(MaxCSNum, *CSI);
+ for (unsigned Idx : CallSiteIdxs) {
+ CallSiteNumToLPad[Idx].push_back(&BB);
+ MaxCSNum = std::max(MaxCSNum, Idx);
}
break;
}
@@ -14002,8 +14120,8 @@ static SDValue PerformANDCombine(SDNode *N,
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
- if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v4i1 ||
- VT == MVT::v8i1 || VT == MVT::v16i1)
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v2i1 ||
+ VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)
return SDValue();
APInt SplatBits, SplatUndef;
@@ -14298,8 +14416,8 @@ static SDValue PerformORCombine(SDNode *N,
if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
- if (Subtarget->hasMVEIntegerOps() &&
- (VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1))
+ if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||
+ VT == MVT::v8i1 || VT == MVT::v16i1))
return PerformORCombine_i1(N, DAG, Subtarget);
APInt SplatBits, SplatUndef;
@@ -14569,6 +14687,15 @@ static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) {
if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
return SDValue();
SDValue CSInc = Cmp->getOperand(0);
+
+ // Ignore any `And 1` nodes that may not yet have been removed. We are
+ // looking for a value that produces 1/0, so these have no effect on the
+ // code.
+ while (CSInc.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(CSInc.getOperand(1)) &&
+ CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())
+ CSInc = CSInc.getOperand(0);
+
if (CSInc.getOpcode() != ARMISD::CSINC ||
!isNullConstant(CSInc.getOperand(0)) ||
!isNullConstant(CSInc.getOperand(1)) || !CSInc->hasOneUse())
@@ -17897,6 +18024,23 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
if (!VT.isInteger())
return SDValue();
+ // Fold away an unneccessary CMPZ/CMOV
+ // CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) ->
+ // if C1==EQ -> CMOV A, B, C2, $cpsr, D
+ // if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D
+ if (N->getConstantOperandVal(2) == ARMCC::EQ ||
+ N->getConstantOperandVal(2) == ARMCC::NE) {
+ ARMCC::CondCodes Cond;
+ if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) {
+ if (N->getConstantOperandVal(2) == ARMCC::NE)
+ Cond = ARMCC::getOppositeCondition(Cond);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
+ N->getOperand(1),
+ DAG.getTargetConstant(Cond, SDLoc(N), MVT::i32),
+ N->getOperand(3), C);
+ }
+ }
+
// Materialize a boolean comparison for integers so we can avoid branching.
if (isNullConstant(FalseVal)) {
if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
@@ -18564,7 +18708,8 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
return false;
// These are for predicates
- if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1)) {
+ if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||
+ Ty == MVT::v2i1)) {
if (Fast)
*Fast = true;
return true;