diff options
Diffstat (limited to 'llvm/lib/Target/ARM/ARMISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 221 |
1 files changed, 183 insertions, 38 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 33d115945614..3d45db349644 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -391,6 +391,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VSELECT, VT, Legal); } setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); @@ -428,7 +429,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { } // Predicate types - const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1}; + const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1}; for (auto VT : pTypes) { addRegisterClass(VT, &ARM::VCCRRegClass); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); @@ -445,6 +446,16 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SELECT, VT, Expand); } + setOperationAction(ISD::SETCC, MVT::v2i1, Expand); + setOperationAction(ISD::TRUNCATE, MVT::v2i1, Expand); + setOperationAction(ISD::AND, MVT::v2i1, Expand); + setOperationAction(ISD::OR, MVT::v2i1, Expand); + setOperationAction(ISD::XOR, MVT::v2i1, Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Expand); + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); @@ -1647,6 +1658,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(ARMISD::CALL_PRED) MAKE_CASE(ARMISD::CALL_NOLINK) MAKE_CASE(ARMISD::tSECALL) + MAKE_CASE(ARMISD::t2CALL_BTI) MAKE_CASE(ARMISD::BRCOND) MAKE_CASE(ARMISD::BR_JT) MAKE_CASE(ARMISD::BR2_JT) @@ -1853,8 +1865,10 @@ EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, // MVE has a predicate register. if ((Subtarget->hasMVEIntegerOps() && - (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8)) || - (Subtarget->hasMVEFloatOps() && (VT == MVT::v4f32 || VT == MVT::v8f16))) + (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || + VT == MVT::v16i8)) || + (Subtarget->hasMVEFloatOps() && + (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16))) return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); return VT.changeVectorElementTypeToInteger(); } @@ -2308,6 +2322,12 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isCmseNSCall = false; bool isSibCall = false; bool PreferIndirect = false; + bool GuardWithBTI = false; + + // Lower 'returns_twice' calls to a pseudo-instruction. + if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) && + !Subtarget->getNoBTIAtReturnTwice()) + GuardWithBTI = AFI->branchTargetEnforcement(); // Determine whether this is a non-secure function call. if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call")) @@ -2713,7 +2733,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // FIXME: handle tail calls differently. unsigned CallOpc; if (Subtarget->isThumb()) { - if (isCmseNSCall) + if (GuardWithBTI) + CallOpc = ARMISD::t2CALL_BTI; + else if (isCmseNSCall) CallOpc = ARMISD::tSECALL; else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; @@ -2930,9 +2952,17 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( // Indirect tail calls cannot be optimized for Thumb1 if the args // to the call take up r0-r3. The reason is that there are no legal registers // left to hold the pointer to the function to be called. - if (Subtarget->isThumb1Only() && Outs.size() >= 4 && - (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) - return false; + // Similarly, if the function uses return address sign and authentication, + // r12 is needed to hold the PAC and is not available to hold the callee + // address. + if (Outs.size() >= 4 && + (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) { + if (Subtarget->isThumb1Only()) + return false; + // Conservatively assume the function spills LR. + if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true)) + return false; + } // Look for obvious safe cases to perform tail call optimization that do not // require ABI changes. This is what gcc calls sibcall. @@ -7616,7 +7646,10 @@ static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG, unsigned NumElts = VT.getVectorNumElements(); unsigned BoolMask; unsigned BitsPerBool; - if (NumElts == 4) { + if (NumElts == 2) { + BitsPerBool = 8; + BoolMask = 0xff; + } else if (NumElts == 4) { BitsPerBool = 4; BoolMask = 0xf; } else if (NumElts == 8) { @@ -7699,6 +7732,46 @@ static SDValue LowerBUILD_VECTORToVIDUP(SDValue Op, SelectionDAG &DAG, DAG.getConstant(N, DL, MVT::i32)); } +// Returns true if the operation N can be treated as qr instruction variant at +// operand Op. +static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op) { + switch (N->getOpcode()) { + case ISD::ADD: + case ISD::MUL: + case ISD::SADDSAT: + case ISD::UADDSAT: + return true; + case ISD::SUB: + case ISD::SSUBSAT: + case ISD::USUBSAT: + return N->getOperand(1).getNode() == Op; + case ISD::INTRINSIC_WO_CHAIN: + switch (N->getConstantOperandVal(0)) { + case Intrinsic::arm_mve_add_predicated: + case Intrinsic::arm_mve_mul_predicated: + case Intrinsic::arm_mve_qadd_predicated: + case Intrinsic::arm_mve_vhadd: + case Intrinsic::arm_mve_hadd_predicated: + case Intrinsic::arm_mve_vqdmulh: + case Intrinsic::arm_mve_qdmulh_predicated: + case Intrinsic::arm_mve_vqrdmulh: + case Intrinsic::arm_mve_qrdmulh_predicated: + case Intrinsic::arm_mve_vqdmull: + case Intrinsic::arm_mve_vqdmull_predicated: + return true; + case Intrinsic::arm_mve_sub_predicated: + case Intrinsic::arm_mve_qsub_predicated: + case Intrinsic::arm_mve_vhsub: + case Intrinsic::arm_mve_hsub_predicated: + return N->getOperand(2).getNode() == Op; + default: + return false; + } + default: + return false; + } +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, @@ -7720,6 +7793,20 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (SplatUndef.isAllOnes()) return DAG.getUNDEF(VT); + // If all the users of this constant splat are qr instruction variants, + // generate a vdup of the constant. + if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize && + (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) && + all_of(BVN->uses(), + [BVN](const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) { + EVT DupVT = SplatBitSize == 32 ? MVT::v4i32 + : SplatBitSize == 16 ? MVT::v8i16 + : MVT::v16i8; + SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32); + SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const); + return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup); + } + if ((ST->hasNEON() && SplatBitSize <= 64) || (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) { // Check if an immediate VMOV works. @@ -8313,9 +8400,8 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, SDLoc DL(Op); SmallVector<SDValue, 8> VTBLMask; - for (ArrayRef<int>::iterator - I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) - VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32)); + for (int I : ShuffleMask) + VTBLMask.push_back(DAG.getConstant(I, DL, MVT::i32)); if (V2.getNode()->isUndef()) return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, @@ -8346,6 +8432,8 @@ static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { static EVT getVectorTyFromPredicateVector(EVT VT) { switch (VT.getSimpleVT().SimpleTy) { + case MVT::v2i1: + return MVT::v2f64; case MVT::v4i1: return MVT::v4i32; case MVT::v8i1: @@ -8427,7 +8515,14 @@ static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG, DAG.getUNDEF(NewVT), ShuffleMask); // Now return the result of comparing the shuffled vector with zero, - // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. + // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1 + // we convert to a v4i1 compare to fill in the two halves of the i64 as i32s. + if (VT == MVT::v2i1) { + SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Shuffled); + SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC, + DAG.getConstant(ARMCC::NE, dl, MVT::i32)); + return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp); + } return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled, DAG.getConstant(ARMCC::NE, dl, MVT::i32)); } @@ -8927,8 +9022,15 @@ static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG, ConVec = ExtractInto(NewV1, ConVec, j); ConVec = ExtractInto(NewV2, ConVec, j); - // Now return the result of comparing the subvector with zero, - // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. + // Now return the result of comparing the subvector with zero, which will + // generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1 we + // convert to a v4i1 compare to fill in the two halves of the i64 as i32s. + if (VT == MVT::v2i1) { + SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, ConVec); + SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC, + DAG.getConstant(ARMCC::NE, dl, MVT::i32)); + return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp); + } return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec, DAG.getConstant(ARMCC::NE, dl, MVT::i32)); }; @@ -8993,6 +9095,22 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG, MVT ElType = getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT(); + if (NumElts == 2) { + EVT SubVT = MVT::v4i32; + SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT); + for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j += 2) { + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1, + DAG.getIntPtrConstant(i, dl)); + SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt, + DAG.getConstant(j, dl, MVT::i32)); + SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt, + DAG.getConstant(j + 1, dl, MVT::i32)); + } + SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, SubVec, + DAG.getConstant(ARMCC::NE, dl, MVT::i32)); + return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp); + } + EVT SubVT = MVT::getVectorVT(ElType, NumElts); SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT); for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) { @@ -9839,16 +9957,17 @@ void ARMTargetLowering::ExpandDIV_Windows( static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) { LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); EVT MemVT = LD->getMemoryVT(); - assert((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && + assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || + MemVT == MVT::v16i1) && "Expected a predicate type!"); assert(MemVT == Op.getValueType()); assert(LD->getExtensionType() == ISD::NON_EXTLOAD && "Expected a non-extending load"); assert(LD->isUnindexed() && "Expected a unindexed load"); - // The basic MVE VLDR on a v4i1/v8i1 actually loads the entire 16bit + // The basic MVE VLDR on a v2i1/v4i1/v8i1 actually loads the entire 16bit // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We - // need to make sure that 8/4 bits are actually loaded into the correct + // need to make sure that 8/4/2 bits are actually loaded into the correct // place, which means loading the value and then shuffling the values into // the bottom bits of the predicate. // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect @@ -9895,14 +10014,15 @@ void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results, static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) { StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); EVT MemVT = ST->getMemoryVT(); - assert((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && + assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || + MemVT == MVT::v16i1) && "Expected a predicate type!"); assert(MemVT == ST->getValue().getValueType()); assert(!ST->isTruncatingStore() && "Expected a non-extending store"); assert(ST->isUnindexed() && "Expected a unindexed store"); - // Only store the v4i1 or v8i1 worth of bits, via a buildvector with top bits - // unset and a scalar store. + // Only store the v2i1 or v4i1 or v8i1 worth of bits, via a buildvector with + // top bits unset and a scalar store. SDLoc dl(Op); SDValue Build = ST->getValue(); if (MemVT != MVT::v16i1) { @@ -9953,7 +10073,7 @@ static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, {ST->getChain(), Lo, Hi, ST->getBasePtr()}, MemVT, ST->getMemOperand()); } else if (Subtarget->hasMVEIntegerOps() && - ((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || + ((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1))) { return LowerPredicateStore(Op, DAG); } @@ -10561,25 +10681,23 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, // associated with. DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2>> CallSiteNumToLPad; unsigned MaxCSNum = 0; - for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; - ++BB) { - if (!BB->isEHPad()) continue; + for (MachineBasicBlock &BB : *MF) { + if (!BB.isEHPad()) + continue; // FIXME: We should assert that the EH_LABEL is the first MI in the landing // pad. - for (MachineBasicBlock::iterator - II = BB->begin(), IE = BB->end(); II != IE; ++II) { - if (!II->isEHLabel()) continue; + for (MachineInstr &II : BB) { + if (!II.isEHLabel()) + continue; - MCSymbol *Sym = II->getOperand(0).getMCSymbol(); + MCSymbol *Sym = II.getOperand(0).getMCSymbol(); if (!MF->hasCallSiteLandingPad(Sym)) continue; SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym); - for (SmallVectorImpl<unsigned>::iterator - CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end(); - CSI != CSE; ++CSI) { - CallSiteNumToLPad[*CSI].push_back(&*BB); - MaxCSNum = std::max(MaxCSNum, *CSI); + for (unsigned Idx : CallSiteIdxs) { + CallSiteNumToLPad[Idx].push_back(&BB); + MaxCSNum = std::max(MaxCSNum, Idx); } break; } @@ -14002,8 +14120,8 @@ static SDValue PerformANDCombine(SDNode *N, EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; - if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v4i1 || - VT == MVT::v8i1 || VT == MVT::v16i1) + if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v2i1 || + VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1) return SDValue(); APInt SplatBits, SplatUndef; @@ -14298,8 +14416,8 @@ static SDValue PerformORCombine(SDNode *N, if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); - if (Subtarget->hasMVEIntegerOps() && - (VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)) + if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 || + VT == MVT::v8i1 || VT == MVT::v16i1)) return PerformORCombine_i1(N, DAG, Subtarget); APInt SplatBits, SplatUndef; @@ -14569,6 +14687,15 @@ static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) { if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1))) return SDValue(); SDValue CSInc = Cmp->getOperand(0); + + // Ignore any `And 1` nodes that may not yet have been removed. We are + // looking for a value that produces 1/0, so these have no effect on the + // code. + while (CSInc.getOpcode() == ISD::AND && + isa<ConstantSDNode>(CSInc.getOperand(1)) && + CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse()) + CSInc = CSInc.getOperand(0); + if (CSInc.getOpcode() != ARMISD::CSINC || !isNullConstant(CSInc.getOperand(0)) || !isNullConstant(CSInc.getOperand(1)) || !CSInc->hasOneUse()) @@ -17897,6 +18024,23 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { if (!VT.isInteger()) return SDValue(); + // Fold away an unneccessary CMPZ/CMOV + // CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) -> + // if C1==EQ -> CMOV A, B, C2, $cpsr, D + // if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D + if (N->getConstantOperandVal(2) == ARMCC::EQ || + N->getConstantOperandVal(2) == ARMCC::NE) { + ARMCC::CondCodes Cond; + if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) { + if (N->getConstantOperandVal(2) == ARMCC::NE) + Cond = ARMCC::getOppositeCondition(Cond); + return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0), + N->getOperand(1), + DAG.getTargetConstant(Cond, SDLoc(N), MVT::i32), + N->getOperand(3), C); + } + } + // Materialize a boolean comparison for integers so we can avoid branching. if (isNullConstant(FalseVal)) { if (CC == ARMCC::EQ && isOneConstant(TrueVal)) { @@ -18564,7 +18708,8 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, return false; // These are for predicates - if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1)) { + if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 || + Ty == MVT::v2i1)) { if (Fast) *Fast = true; return true; |
