diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 743 |
1 files changed, 656 insertions, 87 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index a51aa85a931c..10c477853353 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -62,6 +62,9 @@ public: unsigned ConstraintID, std::vector<SDValue> &OutOps) override; + template <signed Low, signed High, signed Scale> + bool SelectRDVLImm(SDValue N, SDValue &Imm); + bool tryMLAV64LaneV128(SDNode *N); bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N); bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); @@ -159,6 +162,24 @@ public: return false; } + bool SelectDupZero(SDValue N) { + switch(N->getOpcode()) { + case AArch64ISD::DUP: + case ISD::SPLAT_VECTOR: { + auto Opnd0 = N->getOperand(0); + if (auto CN = dyn_cast<ConstantSDNode>(Opnd0)) + if (CN->isNullValue()) + return true; + if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0)) + if (CN->isZero()) + return true; + break; + } + } + + return false; + } + template<MVT::SimpleValueType VT> bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) { return SelectSVEAddSubImm(N, VT, Imm, Shift); @@ -169,6 +190,11 @@ public: return SelectSVELogicalImm(N, VT, Imm); } + template <unsigned Low, unsigned High> + bool SelectSVEShiftImm64(SDValue N, SDValue &Imm) { + return SelectSVEShiftImm64(N, Low, High, Imm); + } + // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. template<signed Min, signed Max, signed Scale, bool Shift> bool SelectCntImm(SDValue N, SDValue &Imm) { @@ -197,6 +223,9 @@ public: /// unchanged; otherwise a REG_SEQUENCE value is returned. SDValue createDTuple(ArrayRef<SDValue> Vecs); SDValue createQTuple(ArrayRef<SDValue> Vecs); + // Form a sequence of SVE registers for instructions using list of vectors, + // e.g. structured loads and stores (ldN, stN). + SDValue createZTuple(ArrayRef<SDValue> Vecs); /// Generic helper for the createDTuple/createQTuple /// functions. Those should almost always be called instead. @@ -216,11 +245,31 @@ public: unsigned SubRegIdx); void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); + void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, const unsigned Opc); + + bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); + /// SVE Reg+Imm addressing mode. + template <int64_t Min, int64_t Max> + bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base, + SDValue &OffImm); + /// SVE Reg+Reg address mode. + template <unsigned Scale> + bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) { + return SelectSVERegRegAddrMode(N, Scale, Base, Offset); + } void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); + template <unsigned Scale> + void SelectPredicatedStore(SDNode *N, unsigned NumVecs, const unsigned Opc_rr, + const unsigned Opc_ri); + template <unsigned Scale> + std::tuple<unsigned, SDValue, SDValue> + findAddrModeSVELoadStore(SDNode *N, const unsigned Opc_rr, + const unsigned Opc_ri, const SDValue &OldBase, + const SDValue &OldOffset); bool tryBitfieldExtractOp(SDNode *N); bool tryBitfieldExtractOpFromSExt(SDNode *N); @@ -268,13 +317,19 @@ private: bool SelectCMP_SWAP(SDNode *N); + bool SelectSVE8BitLslImm(SDValue N, SDValue &Imm, SDValue &Shift); + bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm); bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); + bool SelectSVEShiftImm64(SDValue N, uint64_t Low, uint64_t High, + SDValue &Imm); bool SelectSVEArithImm(SDValue N, SDValue &Imm); + bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, + SDValue &Offset); }; } // end anonymous namespace @@ -679,6 +734,23 @@ static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { return SDValue(Node, 0); } +// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. +template<signed Low, signed High, signed Scale> +bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) { + if (!isa<ConstantSDNode>(N)) + return false; + + int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); + if ((MulImm % std::abs(Scale)) == 0) { + int64_t RDVLImm = MulImm / Scale; + if ((RDVLImm >= Low) && (RDVLImm <= High)) { + Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32); + return true; + } + } + + return false; +} /// SelectArithExtendedRegister - Select a "extended register" operand. This /// operand folds in an extend followed by an optional left shift. @@ -832,16 +904,9 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, if (!GAN) return true; - if (GAN->getOffset() % Size == 0) { - const GlobalValue *GV = GAN->getGlobal(); - unsigned Alignment = GV->getAlignment(); - Type *Ty = GV->getValueType(); - if (Alignment == 0 && Ty->isSized()) - Alignment = DL.getABITypeAlignment(Ty); - - if (Alignment >= Size) - return true; - } + if (GAN->getOffset() % Size == 0 && + GAN->getGlobal()->getPointerAlignment(DL) >= Size) + return true; } if (CurDAG->isBaseWithConstantOffset(N)) { @@ -1132,6 +1197,16 @@ SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { return createTuple(Regs, RegClassIDs, SubRegs); } +SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) { + static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID, + AArch64::ZPR3RegClassID, + AArch64::ZPR4RegClassID}; + static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, + AArch64::zsub2, AArch64::zsub3}; + + return createTuple(Regs, RegClassIDs, SubRegs); +} + SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, const unsigned RegClassIDs[], const unsigned SubRegs[]) { @@ -1240,6 +1315,8 @@ bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { } } else if (VT == MVT::f16) { Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; + } else if (VT == MVT::bf16) { + Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; } else if (VT == MVT::f32) { Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; } else if (VT == MVT::f64 || VT.is64BitVector()) { @@ -1334,6 +1411,54 @@ void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, CurDAG->RemoveDeadNode(N); } +/// Optimize \param OldBase and \param OldOffset selecting the best addressing +/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the +/// new Base and an SDValue representing the new offset. +template <unsigned Scale> +std::tuple<unsigned, SDValue, SDValue> +AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, const unsigned Opc_rr, + const unsigned Opc_ri, + const SDValue &OldBase, + const SDValue &OldOffset) { + SDValue NewBase = OldBase; + SDValue NewOffset = OldOffset; + // Detect a possible Reg+Imm addressing mode. + const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>( + N, OldBase, NewBase, NewOffset); + + // Detect a possible reg+reg addressing mode, but only if we haven't already + // detected a Reg+Imm one. + const bool IsRegReg = + !IsRegImm && SelectSVERegRegAddrMode<Scale>(OldBase, NewBase, NewOffset); + + // Select the instruction. + return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset); +} + +void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs, + const unsigned Opc) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue Chain = N->getOperand(0); + + SDValue Ops[] = {N->getOperand(1), // Predicate + N->getOperand(2), // Memory operand + CurDAG->getTargetConstant(0, DL, MVT::i64), Chain}; + + const EVT ResTys[] = {MVT::Untyped, MVT::Other}; + + SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); + SDValue SuperReg = SDValue(Load, 0); + for (unsigned i = 0; i < NumVecs; ++i) + ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( + AArch64::zsub0 + i, DL, VT, SuperReg)); + + // Copy chain + unsigned ChainIdx = NumVecs; + ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); + CurDAG->RemoveDeadNode(N); +} + void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc) { SDLoc dl(N); @@ -1354,6 +1479,49 @@ void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, ReplaceNode(N, St); } +template <unsigned Scale> +void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs, + const unsigned Opc_rr, + const unsigned Opc_ri) { + SDLoc dl(N); + + // Form a REG_SEQUENCE to force register allocation. + SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); + SDValue RegSeq = createZTuple(Regs); + + // Optimize addressing mode. + unsigned Opc; + SDValue Offset, Base; + std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore<Scale>( + N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3), + CurDAG->getTargetConstant(0, dl, MVT::i64)); + + SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate + Base, // address + Offset, // offset + N->getOperand(0)}; // chain + SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); + + ReplaceNode(N, St); +} + +bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, + SDValue &OffImm) { + SDLoc dl(N); + const DataLayout &DL = CurDAG->getDataLayout(); + const TargetLowering *TLI = getTargetLowering(); + + // Try to match it for the frame address + if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) { + int FI = FINode->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); + OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); + return true; + } + + return false; +} + void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc) { SDLoc dl(N); @@ -2632,7 +2800,8 @@ bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { // bits that are implicitly ANDed off by the above opcodes and if so, skip // the AND. uint64_t MaskImm; - if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm)) + if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) && + !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm)) return false; if (countTrailingOnes(MaskImm) < Bits) @@ -2879,6 +3048,32 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { return true; } +bool AArch64DAGToDAGISel::SelectSVE8BitLslImm(SDValue N, SDValue &Base, + SDValue &Offset) { + auto C = dyn_cast<ConstantSDNode>(N); + if (!C) + return false; + + auto Ty = N->getValueType(0); + + int64_t Imm = C->getSExtValue(); + SDLoc DL(N); + + if ((Imm >= -128) && (Imm <= 127)) { + Base = CurDAG->getTargetConstant(Imm, DL, Ty); + Offset = CurDAG->getTargetConstant(0, DL, Ty); + return true; + } + + if (((Imm % 256) == 0) && (Imm >= -32768) && (Imm <= 32512)) { + Base = CurDAG->getTargetConstant(Imm/256, DL, Ty); + Offset = CurDAG->getTargetConstant(8, DL, Ty); + return true; + } + + return false; +} + bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift) { if (auto CNode = dyn_cast<ConstantSDNode>(N)) { const int64_t ImmVal = CNode->getZExtValue(); @@ -2917,7 +3112,7 @@ bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) { if (auto CNode = dyn_cast<ConstantSDNode>(N)) { int64_t ImmVal = CNode->getSExtValue(); SDLoc DL(N); - if (ImmVal >= -127 && ImmVal < 127) { + if (ImmVal >= -128 && ImmVal < 128) { Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); return true; } @@ -2975,6 +3170,24 @@ bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm) { return false; } +// This method is only needed to "cast" i64s into i32s when the value +// is a valid shift which has been splatted into a vector with i64 elements. +// Every other type is fine in tablegen. +bool AArch64DAGToDAGISel::SelectSVEShiftImm64(SDValue N, uint64_t Low, + uint64_t High, SDValue &Imm) { + if (auto *CN = dyn_cast<ConstantSDNode>(N)) { + uint64_t ImmVal = CN->getZExtValue(); + SDLoc DL(N); + + if (ImmVal >= Low && ImmVal <= High) { + Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); + return true; + } + } + + return false; +} + bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { // tagp(FrameIndex, IRGstack, tag_offset): // since the offset between FrameIndex and IRGstack is a compile-time @@ -3027,6 +3240,63 @@ void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { ReplaceNode(N, N3); } +// NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length +// vector types larger than NEON don't have a matching SubRegIndex. +static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { + assert(V.getValueType().isScalableVector() && + V.getValueType().getSizeInBits().getKnownMinSize() == + AArch64::SVEBitsPerBlock && + "Expected to extract from a packed scalable vector!"); + assert(VT.isFixedLengthVector() && + "Expected to extract a fixed length vector!"); + + SDLoc DL(V); + switch (VT.getSizeInBits()) { + case 64: { + auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); + return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); + } + case 128: { + auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); + return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); + } + default: { + auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); + return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); + } + } +} + +// NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length +// vector types larger than NEON don't have a matching SubRegIndex. +static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { + assert(VT.isScalableVector() && + VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock && + "Expected to insert into a packed scalable vector!"); + assert(V.getValueType().isFixedLengthVector() && + "Expected to insert a fixed length vector!"); + + SDLoc DL(V); + switch (V.getValueType().getSizeInBits()) { + case 64: { + auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); + auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); + return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, + SDValue(Container, 0), V, SubReg); + } + case 128: { + auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); + auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); + return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, + SDValue(Container, 0), V, SubReg); + } + default: { + auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); + return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); + } + } +} + void AArch64DAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { @@ -3100,6 +3370,52 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { return; break; + case ISD::EXTRACT_SUBVECTOR: { + // Bail when not a "cast" like extract_subvector. + if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0) + break; + + // Bail when normal isel can do the job. + EVT InVT = Node->getOperand(0).getValueType(); + if (VT.isScalableVector() || InVT.isFixedLengthVector()) + break; + + // NOTE: We can only get here when doing fixed length SVE code generation. + // We do manual selection because the types involved are not linked to real + // registers (despite being legal) and must be coerced into SVE registers. + // + // NOTE: If the above changes, be aware that selection will still not work + // because the td definition of extract_vector does not support extracting + // a fixed length vector from a scalable vector. + + ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0))); + return; + } + + case ISD::INSERT_SUBVECTOR: { + // Bail when not a "cast" like insert_subvector. + if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0) + break; + if (!Node->getOperand(0).isUndef()) + break; + + // Bail when normal isel should do the job. + EVT InVT = Node->getOperand(1).getValueType(); + if (VT.isFixedLengthVector() || InVT.isScalableVector()) + break; + + // NOTE: We can only get here when doing fixed length SVE code generation. + // We do manual selection because the types involved are not linked to real + // registers (despite being legal) and must be coerced into SVE registers. + // + // NOTE: If the above changes, be aware that selection will still not work + // because the td definition of insert_vector does not support inserting a + // fixed length vector into a scalable vector. + + ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1))); + return; + } + case ISD::Constant: { // Materialize zero constants as copies from WZR/XZR. This allows // the coalescer to propagate these into other instructions. @@ -3185,10 +3501,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3212,10 +3528,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3239,10 +3555,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3266,10 +3582,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3293,10 +3609,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3320,10 +3636,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3347,10 +3663,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3374,10 +3690,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3401,10 +3717,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3426,7 +3742,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectLoadLane(Node, 2, AArch64::LD2i8); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || - VT == MVT::v8f16) { + VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectLoadLane(Node, 2, AArch64::LD2i16); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || @@ -3444,7 +3760,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectLoadLane(Node, 3, AArch64::LD3i8); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || - VT == MVT::v8f16) { + VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectLoadLane(Node, 3, AArch64::LD3i16); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || @@ -3462,7 +3778,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectLoadLane(Node, 4, AArch64::LD4i8); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || - VT == MVT::v8f16) { + VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectLoadLane(Node, 4, AArch64::LD4i16); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || @@ -3537,10 +3853,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectStore(Node, 2, AArch64::ST1Twov16b); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || + VT == MVT::v4bf16) { SelectStore(Node, 2, AArch64::ST1Twov4h); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || + VT == MVT::v8bf16) { SelectStore(Node, 2, AArch64::ST1Twov8h); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3565,10 +3883,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectStore(Node, 3, AArch64::ST1Threev16b); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || + VT == MVT::v4bf16) { SelectStore(Node, 3, AArch64::ST1Threev4h); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || + VT == MVT::v8bf16) { SelectStore(Node, 3, AArch64::ST1Threev8h); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3593,10 +3913,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectStore(Node, 4, AArch64::ST1Fourv16b); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || + VT == MVT::v4bf16) { SelectStore(Node, 4, AArch64::ST1Fourv4h); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || + VT == MVT::v8bf16) { SelectStore(Node, 4, AArch64::ST1Fourv8h); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3621,10 +3943,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectStore(Node, 2, AArch64::ST2Twov16b); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || + VT == MVT::v4bf16) { SelectStore(Node, 2, AArch64::ST2Twov4h); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || + VT == MVT::v8bf16) { SelectStore(Node, 2, AArch64::ST2Twov8h); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3649,10 +3973,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectStore(Node, 3, AArch64::ST3Threev16b); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || + VT == MVT::v4bf16) { SelectStore(Node, 3, AArch64::ST3Threev4h); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || + VT == MVT::v8bf16) { SelectStore(Node, 3, AArch64::ST3Threev8h); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3677,10 +4003,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectStore(Node, 4, AArch64::ST4Fourv16b); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || + VT == MVT::v4bf16) { SelectStore(Node, 4, AArch64::ST4Fourv4h); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || + VT == MVT::v8bf16) { SelectStore(Node, 4, AArch64::ST4Fourv8h); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3703,7 +4031,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectStoreLane(Node, 2, AArch64::ST2i8); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || - VT == MVT::v8f16) { + VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectStoreLane(Node, 2, AArch64::ST2i16); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || @@ -3722,7 +4050,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectStoreLane(Node, 3, AArch64::ST3i8); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || - VT == MVT::v8f16) { + VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectStoreLane(Node, 3, AArch64::ST3i16); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || @@ -3741,7 +4069,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectStoreLane(Node, 4, AArch64::ST4i8); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || - VT == MVT::v8f16) { + VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectStoreLane(Node, 4, AArch64::ST4i16); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || @@ -3755,6 +4083,69 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } break; } + case Intrinsic::aarch64_sve_st2: { + if (VT == MVT::nxv16i8) { + SelectPredicatedStore</*Scale=*/0>(Node, 2, AArch64::ST2B, + AArch64::ST2B_IMM); + return; + } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || + (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { + SelectPredicatedStore</*Scale=*/1>(Node, 2, AArch64::ST2H, + AArch64::ST2H_IMM); + return; + } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { + SelectPredicatedStore</*Scale=*/2>(Node, 2, AArch64::ST2W, + AArch64::ST2W_IMM); + return; + } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { + SelectPredicatedStore</*Scale=*/3>(Node, 2, AArch64::ST2D, + AArch64::ST2D_IMM); + return; + } + break; + } + case Intrinsic::aarch64_sve_st3: { + if (VT == MVT::nxv16i8) { + SelectPredicatedStore</*Scale=*/0>(Node, 3, AArch64::ST3B, + AArch64::ST3B_IMM); + return; + } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || + (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { + SelectPredicatedStore</*Scale=*/1>(Node, 3, AArch64::ST3H, + AArch64::ST3H_IMM); + return; + } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { + SelectPredicatedStore</*Scale=*/2>(Node, 3, AArch64::ST3W, + AArch64::ST3W_IMM); + return; + } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { + SelectPredicatedStore</*Scale=*/3>(Node, 3, AArch64::ST3D, + AArch64::ST3D_IMM); + return; + } + break; + } + case Intrinsic::aarch64_sve_st4: { + if (VT == MVT::nxv16i8) { + SelectPredicatedStore</*Scale=*/0>(Node, 4, AArch64::ST4B, + AArch64::ST4B_IMM); + return; + } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || + (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { + SelectPredicatedStore</*Scale=*/1>(Node, 4, AArch64::ST4H, + AArch64::ST4H_IMM); + return; + } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { + SelectPredicatedStore</*Scale=*/2>(Node, 4, AArch64::ST4W, + AArch64::ST4W_IMM); + return; + } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { + SelectPredicatedStore</*Scale=*/3>(Node, 4, AArch64::ST4D, + AArch64::ST4D_IMM); + return; + } + break; + } } break; } @@ -3765,10 +4156,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3793,10 +4184,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3821,10 +4212,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3849,10 +4240,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3877,10 +4268,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3905,10 +4296,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3933,10 +4324,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3961,10 +4352,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -3989,10 +4380,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -4017,10 +4408,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -4043,7 +4434,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || - VT == MVT::v8f16) { + VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || @@ -4062,7 +4453,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || - VT == MVT::v8f16) { + VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || @@ -4081,7 +4472,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || - VT == MVT::v8f16) { + VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || @@ -4100,7 +4491,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || - VT == MVT::v8f16) { + VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || @@ -4122,10 +4513,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -4151,10 +4542,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -4180,10 +4571,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -4209,10 +4600,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -4238,10 +4629,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) { SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -4267,10 +4658,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } else if (VT == MVT::v16i8) { SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); return; - } else if (VT == MVT::v4i16 || VT == MVT::v4f16) { + } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); return; - } else if (VT == MVT::v8i16 || VT == MVT::v8f16) { + } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { @@ -4294,7 +4685,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || - VT == MVT::v8f16) { + VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || @@ -4314,7 +4705,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || - VT == MVT::v8f16) { + VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || @@ -4334,7 +4725,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || - VT == MVT::v8f16) { + VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || @@ -4348,6 +4739,57 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } break; } + case AArch64ISD::SVE_LD2_MERGE_ZERO: { + if (VT == MVT::nxv16i8) { + SelectPredicatedLoad(Node, 2, AArch64::LD2B_IMM); + return; + } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || + (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { + SelectPredicatedLoad(Node, 2, AArch64::LD2H_IMM); + return; + } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { + SelectPredicatedLoad(Node, 2, AArch64::LD2W_IMM); + return; + } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { + SelectPredicatedLoad(Node, 2, AArch64::LD2D_IMM); + return; + } + break; + } + case AArch64ISD::SVE_LD3_MERGE_ZERO: { + if (VT == MVT::nxv16i8) { + SelectPredicatedLoad(Node, 3, AArch64::LD3B_IMM); + return; + } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || + (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { + SelectPredicatedLoad(Node, 3, AArch64::LD3H_IMM); + return; + } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { + SelectPredicatedLoad(Node, 3, AArch64::LD3W_IMM); + return; + } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { + SelectPredicatedLoad(Node, 3, AArch64::LD3D_IMM); + return; + } + break; + } + case AArch64ISD::SVE_LD4_MERGE_ZERO: { + if (VT == MVT::nxv16i8) { + SelectPredicatedLoad(Node, 4, AArch64::LD4B_IMM); + return; + } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || + (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { + SelectPredicatedLoad(Node, 4, AArch64::LD4H_IMM); + return; + } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { + SelectPredicatedLoad(Node, 4, AArch64::LD4W_IMM); + return; + } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { + SelectPredicatedLoad(Node, 4, AArch64::LD4D_IMM); + return; + } + break; + } } // Select the default instruction @@ -4360,3 +4802,130 @@ FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOpt::Level OptLevel) { return new AArch64DAGToDAGISel(TM, OptLevel); } + +/// When \p PredVT is a scalable vector predicate in the form +/// MVT::nx<M>xi1, it builds the correspondent scalable vector of +/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. If the input +/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid +/// EVT. +static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT) { + if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1) + return EVT(); + + if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 && + PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1) + return EVT(); + + ElementCount EC = PredVT.getVectorElementCount(); + EVT ScalarVT = EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.Min); + EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC); + return MemVT; +} + +/// Return the EVT of the data associated to a memory operation in \p +/// Root. If such EVT cannot be retrived, it returns an invalid EVT. +static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { + if (isa<MemSDNode>(Root)) + return cast<MemSDNode>(Root)->getMemoryVT(); + + if (isa<MemIntrinsicSDNode>(Root)) + return cast<MemIntrinsicSDNode>(Root)->getMemoryVT(); + + const unsigned Opcode = Root->getOpcode(); + // For custom ISD nodes, we have to look at them individually to extract the + // type of the data moved to/from memory. + switch (Opcode) { + case AArch64ISD::LD1_MERGE_ZERO: + case AArch64ISD::LD1S_MERGE_ZERO: + case AArch64ISD::LDNF1_MERGE_ZERO: + case AArch64ISD::LDNF1S_MERGE_ZERO: + return cast<VTSDNode>(Root->getOperand(3))->getVT(); + case AArch64ISD::ST1_PRED: + return cast<VTSDNode>(Root->getOperand(4))->getVT(); + default: + break; + } + + if (Opcode != ISD::INTRINSIC_VOID) + return EVT(); + + const unsigned IntNo = + cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue(); + if (IntNo != Intrinsic::aarch64_sve_prf) + return EVT(); + + // We are using an SVE prefetch intrinsic. Type must be inferred + // from the width of the predicate. + return getPackedVectorTypeFromPredicateType( + Ctx, Root->getOperand(2)->getValueType(0)); +} + +/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode: +/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max +/// where Root is the memory access using N for its address. +template <int64_t Min, int64_t Max> +bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, + SDValue &Base, + SDValue &OffImm) { + const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root); + + if (MemVT == EVT()) + return false; + + if (N.getOpcode() != ISD::ADD) + return false; + + SDValue VScale = N.getOperand(1); + if (VScale.getOpcode() != ISD::VSCALE) + return false; + + TypeSize TS = MemVT.getSizeInBits(); + int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinSize()) / 8; + int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue(); + + if ((MulImm % MemWidthBytes) != 0) + return false; + + int64_t Offset = MulImm / MemWidthBytes; + if (Offset < Min || Offset > Max) + return false; + + Base = N.getOperand(0); + OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64); + return true; +} + +/// Select register plus register addressing mode for SVE, with scaled +/// offset. +bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale, + SDValue &Base, + SDValue &Offset) { + if (N.getOpcode() != ISD::ADD) + return false; + + // Process an ADD node. + const SDValue LHS = N.getOperand(0); + const SDValue RHS = N.getOperand(1); + + // 8 bit data does not come with the SHL node, so it is treated + // separately. + if (Scale == 0) { + Base = LHS; + Offset = RHS; + return true; + } + + // Check if the RHS is a shift node with a constant. + if (RHS.getOpcode() != ISD::SHL) + return false; + + const SDValue ShiftRHS = RHS.getOperand(1); + if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS)) + if (C->getZExtValue() == Scale) { + Base = LHS; + Offset = RHS.getOperand(0); + return true; + } + + return false; +} |