aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp743
1 files changed, 656 insertions, 87 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index a51aa85a931c..10c477853353 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -62,6 +62,9 @@ public:
unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
+ template <signed Low, signed High, signed Scale>
+ bool SelectRDVLImm(SDValue N, SDValue &Imm);
+
bool tryMLAV64LaneV128(SDNode *N);
bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
@@ -159,6 +162,24 @@ public:
return false;
}
+ bool SelectDupZero(SDValue N) {
+ switch(N->getOpcode()) {
+ case AArch64ISD::DUP:
+ case ISD::SPLAT_VECTOR: {
+ auto Opnd0 = N->getOperand(0);
+ if (auto CN = dyn_cast<ConstantSDNode>(Opnd0))
+ if (CN->isNullValue())
+ return true;
+ if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
+ if (CN->isZero())
+ return true;
+ break;
+ }
+ }
+
+ return false;
+ }
+
template<MVT::SimpleValueType VT>
bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
return SelectSVEAddSubImm(N, VT, Imm, Shift);
@@ -169,6 +190,11 @@ public:
return SelectSVELogicalImm(N, VT, Imm);
}
+ template <unsigned Low, unsigned High>
+ bool SelectSVEShiftImm64(SDValue N, SDValue &Imm) {
+ return SelectSVEShiftImm64(N, Low, High, Imm);
+ }
+
// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
template<signed Min, signed Max, signed Scale, bool Shift>
bool SelectCntImm(SDValue N, SDValue &Imm) {
@@ -197,6 +223,9 @@ public:
/// unchanged; otherwise a REG_SEQUENCE value is returned.
SDValue createDTuple(ArrayRef<SDValue> Vecs);
SDValue createQTuple(ArrayRef<SDValue> Vecs);
+ // Form a sequence of SVE registers for instructions using list of vectors,
+ // e.g. structured loads and stores (ldN, stN).
+ SDValue createZTuple(ArrayRef<SDValue> Vecs);
/// Generic helper for the createDTuple/createQTuple
/// functions. Those should almost always be called instead.
@@ -216,11 +245,31 @@ public:
unsigned SubRegIdx);
void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
+ void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, const unsigned Opc);
+
+ bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
+ /// SVE Reg+Imm addressing mode.
+ template <int64_t Min, int64_t Max>
+ bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ /// SVE Reg+Reg address mode.
+ template <unsigned Scale>
+ bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
+ return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
+ }
void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
+ template <unsigned Scale>
+ void SelectPredicatedStore(SDNode *N, unsigned NumVecs, const unsigned Opc_rr,
+ const unsigned Opc_ri);
+ template <unsigned Scale>
+ std::tuple<unsigned, SDValue, SDValue>
+ findAddrModeSVELoadStore(SDNode *N, const unsigned Opc_rr,
+ const unsigned Opc_ri, const SDValue &OldBase,
+ const SDValue &OldOffset);
bool tryBitfieldExtractOp(SDNode *N);
bool tryBitfieldExtractOpFromSExt(SDNode *N);
@@ -268,13 +317,19 @@ private:
bool SelectCMP_SWAP(SDNode *N);
+ bool SelectSVE8BitLslImm(SDValue N, SDValue &Imm, SDValue &Shift);
+
bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm);
bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
+ bool SelectSVEShiftImm64(SDValue N, uint64_t Low, uint64_t High,
+ SDValue &Imm);
bool SelectSVEArithImm(SDValue N, SDValue &Imm);
+ bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
+ SDValue &Offset);
};
} // end anonymous namespace
@@ -679,6 +734,23 @@ static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
return SDValue(Node, 0);
}
+// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
+template<signed Low, signed High, signed Scale>
+bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
+ if (!isa<ConstantSDNode>(N))
+ return false;
+
+ int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
+ if ((MulImm % std::abs(Scale)) == 0) {
+ int64_t RDVLImm = MulImm / Scale;
+ if ((RDVLImm >= Low) && (RDVLImm <= High)) {
+ Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
+ return true;
+ }
+ }
+
+ return false;
+}
/// SelectArithExtendedRegister - Select a "extended register" operand. This
/// operand folds in an extend followed by an optional left shift.
@@ -832,16 +904,9 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
if (!GAN)
return true;
- if (GAN->getOffset() % Size == 0) {
- const GlobalValue *GV = GAN->getGlobal();
- unsigned Alignment = GV->getAlignment();
- Type *Ty = GV->getValueType();
- if (Alignment == 0 && Ty->isSized())
- Alignment = DL.getABITypeAlignment(Ty);
-
- if (Alignment >= Size)
- return true;
- }
+ if (GAN->getOffset() % Size == 0 &&
+ GAN->getGlobal()->getPointerAlignment(DL) >= Size)
+ return true;
}
if (CurDAG->isBaseWithConstantOffset(N)) {
@@ -1132,6 +1197,16 @@ SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
return createTuple(Regs, RegClassIDs, SubRegs);
}
+SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
+ static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
+ AArch64::ZPR3RegClassID,
+ AArch64::ZPR4RegClassID};
+ static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
+ AArch64::zsub2, AArch64::zsub3};
+
+ return createTuple(Regs, RegClassIDs, SubRegs);
+}
+
SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
const unsigned RegClassIDs[],
const unsigned SubRegs[]) {
@@ -1240,6 +1315,8 @@ bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
}
} else if (VT == MVT::f16) {
Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
+ } else if (VT == MVT::bf16) {
+ Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
} else if (VT == MVT::f32) {
Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
} else if (VT == MVT::f64 || VT.is64BitVector()) {
@@ -1334,6 +1411,54 @@ void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
CurDAG->RemoveDeadNode(N);
}
+/// Optimize \param OldBase and \param OldOffset selecting the best addressing
+/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
+/// new Base and an SDValue representing the new offset.
+template <unsigned Scale>
+std::tuple<unsigned, SDValue, SDValue>
+AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, const unsigned Opc_rr,
+ const unsigned Opc_ri,
+ const SDValue &OldBase,
+ const SDValue &OldOffset) {
+ SDValue NewBase = OldBase;
+ SDValue NewOffset = OldOffset;
+ // Detect a possible Reg+Imm addressing mode.
+ const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
+ N, OldBase, NewBase, NewOffset);
+
+ // Detect a possible reg+reg addressing mode, but only if we haven't already
+ // detected a Reg+Imm one.
+ const bool IsRegReg =
+ !IsRegImm && SelectSVERegRegAddrMode<Scale>(OldBase, NewBase, NewOffset);
+
+ // Select the instruction.
+ return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
+}
+
+void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
+ const unsigned Opc) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue Chain = N->getOperand(0);
+
+ SDValue Ops[] = {N->getOperand(1), // Predicate
+ N->getOperand(2), // Memory operand
+ CurDAG->getTargetConstant(0, DL, MVT::i64), Chain};
+
+ const EVT ResTys[] = {MVT::Untyped, MVT::Other};
+
+ SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
+ SDValue SuperReg = SDValue(Load, 0);
+ for (unsigned i = 0; i < NumVecs; ++i)
+ ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
+ AArch64::zsub0 + i, DL, VT, SuperReg));
+
+ // Copy chain
+ unsigned ChainIdx = NumVecs;
+ ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
+ CurDAG->RemoveDeadNode(N);
+}
+
void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
unsigned Opc) {
SDLoc dl(N);
@@ -1354,6 +1479,49 @@ void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
ReplaceNode(N, St);
}
+template <unsigned Scale>
+void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
+ const unsigned Opc_rr,
+ const unsigned Opc_ri) {
+ SDLoc dl(N);
+
+ // Form a REG_SEQUENCE to force register allocation.
+ SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
+ SDValue RegSeq = createZTuple(Regs);
+
+ // Optimize addressing mode.
+ unsigned Opc;
+ SDValue Offset, Base;
+ std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore<Scale>(
+ N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
+ CurDAG->getTargetConstant(0, dl, MVT::i64));
+
+ SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
+ Base, // address
+ Offset, // offset
+ N->getOperand(0)}; // chain
+ SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
+
+ ReplaceNode(N, St);
+}
+
+bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ SDLoc dl(N);
+ const DataLayout &DL = CurDAG->getDataLayout();
+ const TargetLowering *TLI = getTargetLowering();
+
+ // Try to match it for the frame address
+ if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
+ int FI = FINode->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+ OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
+ return true;
+ }
+
+ return false;
+}
+
void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
unsigned Opc) {
SDLoc dl(N);
@@ -2632,7 +2800,8 @@ bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
// bits that are implicitly ANDed off by the above opcodes and if so, skip
// the AND.
uint64_t MaskImm;
- if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm))
+ if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
+ !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
return false;
if (countTrailingOnes(MaskImm) < Bits)
@@ -2879,6 +3048,32 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
return true;
}
+bool AArch64DAGToDAGISel::SelectSVE8BitLslImm(SDValue N, SDValue &Base,
+ SDValue &Offset) {
+ auto C = dyn_cast<ConstantSDNode>(N);
+ if (!C)
+ return false;
+
+ auto Ty = N->getValueType(0);
+
+ int64_t Imm = C->getSExtValue();
+ SDLoc DL(N);
+
+ if ((Imm >= -128) && (Imm <= 127)) {
+ Base = CurDAG->getTargetConstant(Imm, DL, Ty);
+ Offset = CurDAG->getTargetConstant(0, DL, Ty);
+ return true;
+ }
+
+ if (((Imm % 256) == 0) && (Imm >= -32768) && (Imm <= 32512)) {
+ Base = CurDAG->getTargetConstant(Imm/256, DL, Ty);
+ Offset = CurDAG->getTargetConstant(8, DL, Ty);
+ return true;
+ }
+
+ return false;
+}
+
bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift) {
if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
const int64_t ImmVal = CNode->getZExtValue();
@@ -2917,7 +3112,7 @@ bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
int64_t ImmVal = CNode->getSExtValue();
SDLoc DL(N);
- if (ImmVal >= -127 && ImmVal < 127) {
+ if (ImmVal >= -128 && ImmVal < 128) {
Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
return true;
}
@@ -2975,6 +3170,24 @@ bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm) {
return false;
}
+// This method is only needed to "cast" i64s into i32s when the value
+// is a valid shift which has been splatted into a vector with i64 elements.
+// Every other type is fine in tablegen.
+bool AArch64DAGToDAGISel::SelectSVEShiftImm64(SDValue N, uint64_t Low,
+ uint64_t High, SDValue &Imm) {
+ if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
+ uint64_t ImmVal = CN->getZExtValue();
+ SDLoc DL(N);
+
+ if (ImmVal >= Low && ImmVal <= High) {
+ Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
// tagp(FrameIndex, IRGstack, tag_offset):
// since the offset between FrameIndex and IRGstack is a compile-time
@@ -3027,6 +3240,63 @@ void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
ReplaceNode(N, N3);
}
+// NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length
+// vector types larger than NEON don't have a matching SubRegIndex.
+static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
+ assert(V.getValueType().isScalableVector() &&
+ V.getValueType().getSizeInBits().getKnownMinSize() ==
+ AArch64::SVEBitsPerBlock &&
+ "Expected to extract from a packed scalable vector!");
+ assert(VT.isFixedLengthVector() &&
+ "Expected to extract a fixed length vector!");
+
+ SDLoc DL(V);
+ switch (VT.getSizeInBits()) {
+ case 64: {
+ auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
+ return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
+ }
+ case 128: {
+ auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
+ return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
+ }
+ default: {
+ auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
+ return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
+ }
+ }
+}
+
+// NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length
+// vector types larger than NEON don't have a matching SubRegIndex.
+static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
+ assert(VT.isScalableVector() &&
+ VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock &&
+ "Expected to insert into a packed scalable vector!");
+ assert(V.getValueType().isFixedLengthVector() &&
+ "Expected to insert a fixed length vector!");
+
+ SDLoc DL(V);
+ switch (V.getValueType().getSizeInBits()) {
+ case 64: {
+ auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
+ auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
+ return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
+ SDValue(Container, 0), V, SubReg);
+ }
+ case 128: {
+ auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
+ auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
+ return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
+ SDValue(Container, 0), V, SubReg);
+ }
+ default: {
+ auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
+ return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
+ }
+ }
+}
+
void AArch64DAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
@@ -3100,6 +3370,52 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
return;
break;
+ case ISD::EXTRACT_SUBVECTOR: {
+ // Bail when not a "cast" like extract_subvector.
+ if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0)
+ break;
+
+ // Bail when normal isel can do the job.
+ EVT InVT = Node->getOperand(0).getValueType();
+ if (VT.isScalableVector() || InVT.isFixedLengthVector())
+ break;
+
+ // NOTE: We can only get here when doing fixed length SVE code generation.
+ // We do manual selection because the types involved are not linked to real
+ // registers (despite being legal) and must be coerced into SVE registers.
+ //
+ // NOTE: If the above changes, be aware that selection will still not work
+ // because the td definition of extract_vector does not support extracting
+ // a fixed length vector from a scalable vector.
+
+ ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0)));
+ return;
+ }
+
+ case ISD::INSERT_SUBVECTOR: {
+ // Bail when not a "cast" like insert_subvector.
+ if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0)
+ break;
+ if (!Node->getOperand(0).isUndef())
+ break;
+
+ // Bail when normal isel should do the job.
+ EVT InVT = Node->getOperand(1).getValueType();
+ if (VT.isFixedLengthVector() || InVT.isScalableVector())
+ break;
+
+ // NOTE: We can only get here when doing fixed length SVE code generation.
+ // We do manual selection because the types involved are not linked to real
+ // registers (despite being legal) and must be coerced into SVE registers.
+ //
+ // NOTE: If the above changes, be aware that selection will still not work
+ // because the td definition of insert_vector does not support inserting a
+ // fixed length vector into a scalable vector.
+
+ ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1)));
+ return;
+ }
+
case ISD::Constant: {
// Materialize zero constants as copies from WZR/XZR. This allows
// the coalescer to propagate these into other instructions.
@@ -3185,10 +3501,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3212,10 +3528,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3239,10 +3555,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3266,10 +3582,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3293,10 +3609,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3320,10 +3636,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3347,10 +3663,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3374,10 +3690,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3401,10 +3717,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3426,7 +3742,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectLoadLane(Node, 2, AArch64::LD2i8);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectLoadLane(Node, 2, AArch64::LD2i16);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -3444,7 +3760,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectLoadLane(Node, 3, AArch64::LD3i8);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectLoadLane(Node, 3, AArch64::LD3i16);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -3462,7 +3778,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectLoadLane(Node, 4, AArch64::LD4i8);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectLoadLane(Node, 4, AArch64::LD4i16);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -3537,10 +3853,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectStore(Node, 2, AArch64::ST1Twov16b);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v4bf16) {
SelectStore(Node, 2, AArch64::ST1Twov4h);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
+ VT == MVT::v8bf16) {
SelectStore(Node, 2, AArch64::ST1Twov8h);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3565,10 +3883,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectStore(Node, 3, AArch64::ST1Threev16b);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v4bf16) {
SelectStore(Node, 3, AArch64::ST1Threev4h);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
+ VT == MVT::v8bf16) {
SelectStore(Node, 3, AArch64::ST1Threev8h);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3593,10 +3913,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectStore(Node, 4, AArch64::ST1Fourv16b);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v4bf16) {
SelectStore(Node, 4, AArch64::ST1Fourv4h);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
+ VT == MVT::v8bf16) {
SelectStore(Node, 4, AArch64::ST1Fourv8h);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3621,10 +3943,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectStore(Node, 2, AArch64::ST2Twov16b);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v4bf16) {
SelectStore(Node, 2, AArch64::ST2Twov4h);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
+ VT == MVT::v8bf16) {
SelectStore(Node, 2, AArch64::ST2Twov8h);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3649,10 +3973,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectStore(Node, 3, AArch64::ST3Threev16b);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v4bf16) {
SelectStore(Node, 3, AArch64::ST3Threev4h);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
+ VT == MVT::v8bf16) {
SelectStore(Node, 3, AArch64::ST3Threev8h);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3677,10 +4003,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectStore(Node, 4, AArch64::ST4Fourv16b);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v4bf16) {
SelectStore(Node, 4, AArch64::ST4Fourv4h);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
+ VT == MVT::v8bf16) {
SelectStore(Node, 4, AArch64::ST4Fourv8h);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3703,7 +4031,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectStoreLane(Node, 2, AArch64::ST2i8);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectStoreLane(Node, 2, AArch64::ST2i16);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -3722,7 +4050,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectStoreLane(Node, 3, AArch64::ST3i8);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectStoreLane(Node, 3, AArch64::ST3i16);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -3741,7 +4069,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectStoreLane(Node, 4, AArch64::ST4i8);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectStoreLane(Node, 4, AArch64::ST4i16);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -3755,6 +4083,69 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
}
break;
}
+ case Intrinsic::aarch64_sve_st2: {
+ if (VT == MVT::nxv16i8) {
+ SelectPredicatedStore</*Scale=*/0>(Node, 2, AArch64::ST2B,
+ AArch64::ST2B_IMM);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ SelectPredicatedStore</*Scale=*/1>(Node, 2, AArch64::ST2H,
+ AArch64::ST2H_IMM);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectPredicatedStore</*Scale=*/2>(Node, 2, AArch64::ST2W,
+ AArch64::ST2W_IMM);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectPredicatedStore</*Scale=*/3>(Node, 2, AArch64::ST2D,
+ AArch64::ST2D_IMM);
+ return;
+ }
+ break;
+ }
+ case Intrinsic::aarch64_sve_st3: {
+ if (VT == MVT::nxv16i8) {
+ SelectPredicatedStore</*Scale=*/0>(Node, 3, AArch64::ST3B,
+ AArch64::ST3B_IMM);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ SelectPredicatedStore</*Scale=*/1>(Node, 3, AArch64::ST3H,
+ AArch64::ST3H_IMM);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectPredicatedStore</*Scale=*/2>(Node, 3, AArch64::ST3W,
+ AArch64::ST3W_IMM);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectPredicatedStore</*Scale=*/3>(Node, 3, AArch64::ST3D,
+ AArch64::ST3D_IMM);
+ return;
+ }
+ break;
+ }
+ case Intrinsic::aarch64_sve_st4: {
+ if (VT == MVT::nxv16i8) {
+ SelectPredicatedStore</*Scale=*/0>(Node, 4, AArch64::ST4B,
+ AArch64::ST4B_IMM);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ SelectPredicatedStore</*Scale=*/1>(Node, 4, AArch64::ST4H,
+ AArch64::ST4H_IMM);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectPredicatedStore</*Scale=*/2>(Node, 4, AArch64::ST4W,
+ AArch64::ST4W_IMM);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectPredicatedStore</*Scale=*/3>(Node, 4, AArch64::ST4D,
+ AArch64::ST4D_IMM);
+ return;
+ }
+ break;
+ }
}
break;
}
@@ -3765,10 +4156,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3793,10 +4184,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3821,10 +4212,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3849,10 +4240,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3877,10 +4268,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3905,10 +4296,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3933,10 +4324,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3961,10 +4352,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3989,10 +4380,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -4017,10 +4408,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -4043,7 +4434,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -4062,7 +4453,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -4081,7 +4472,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -4100,7 +4491,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -4122,10 +4513,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -4151,10 +4542,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -4180,10 +4571,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -4209,10 +4600,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -4238,10 +4629,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -4267,10 +4658,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -4294,7 +4685,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -4314,7 +4705,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -4334,7 +4725,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -4348,6 +4739,57 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
}
break;
}
+ case AArch64ISD::SVE_LD2_MERGE_ZERO: {
+ if (VT == MVT::nxv16i8) {
+ SelectPredicatedLoad(Node, 2, AArch64::LD2B_IMM);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ SelectPredicatedLoad(Node, 2, AArch64::LD2H_IMM);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectPredicatedLoad(Node, 2, AArch64::LD2W_IMM);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectPredicatedLoad(Node, 2, AArch64::LD2D_IMM);
+ return;
+ }
+ break;
+ }
+ case AArch64ISD::SVE_LD3_MERGE_ZERO: {
+ if (VT == MVT::nxv16i8) {
+ SelectPredicatedLoad(Node, 3, AArch64::LD3B_IMM);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ SelectPredicatedLoad(Node, 3, AArch64::LD3H_IMM);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectPredicatedLoad(Node, 3, AArch64::LD3W_IMM);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectPredicatedLoad(Node, 3, AArch64::LD3D_IMM);
+ return;
+ }
+ break;
+ }
+ case AArch64ISD::SVE_LD4_MERGE_ZERO: {
+ if (VT == MVT::nxv16i8) {
+ SelectPredicatedLoad(Node, 4, AArch64::LD4B_IMM);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ SelectPredicatedLoad(Node, 4, AArch64::LD4H_IMM);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectPredicatedLoad(Node, 4, AArch64::LD4W_IMM);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectPredicatedLoad(Node, 4, AArch64::LD4D_IMM);
+ return;
+ }
+ break;
+ }
}
// Select the default instruction
@@ -4360,3 +4802,130 @@ FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
CodeGenOpt::Level OptLevel) {
return new AArch64DAGToDAGISel(TM, OptLevel);
}
+
+/// When \p PredVT is a scalable vector predicate in the form
+/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
+/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. If the input
+/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
+/// EVT.
+static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT) {
+ if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
+ return EVT();
+
+ if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
+ PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
+ return EVT();
+
+ ElementCount EC = PredVT.getVectorElementCount();
+ EVT ScalarVT = EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.Min);
+ EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC);
+ return MemVT;
+}
+
+/// Return the EVT of the data associated to a memory operation in \p
+/// Root. If such EVT cannot be retrived, it returns an invalid EVT.
+static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
+ if (isa<MemSDNode>(Root))
+ return cast<MemSDNode>(Root)->getMemoryVT();
+
+ if (isa<MemIntrinsicSDNode>(Root))
+ return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
+
+ const unsigned Opcode = Root->getOpcode();
+ // For custom ISD nodes, we have to look at them individually to extract the
+ // type of the data moved to/from memory.
+ switch (Opcode) {
+ case AArch64ISD::LD1_MERGE_ZERO:
+ case AArch64ISD::LD1S_MERGE_ZERO:
+ case AArch64ISD::LDNF1_MERGE_ZERO:
+ case AArch64ISD::LDNF1S_MERGE_ZERO:
+ return cast<VTSDNode>(Root->getOperand(3))->getVT();
+ case AArch64ISD::ST1_PRED:
+ return cast<VTSDNode>(Root->getOperand(4))->getVT();
+ default:
+ break;
+ }
+
+ if (Opcode != ISD::INTRINSIC_VOID)
+ return EVT();
+
+ const unsigned IntNo =
+ cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue();
+ if (IntNo != Intrinsic::aarch64_sve_prf)
+ return EVT();
+
+ // We are using an SVE prefetch intrinsic. Type must be inferred
+ // from the width of the predicate.
+ return getPackedVectorTypeFromPredicateType(
+ Ctx, Root->getOperand(2)->getValueType(0));
+}
+
+/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
+/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
+/// where Root is the memory access using N for its address.
+template <int64_t Min, int64_t Max>
+bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
+ SDValue &Base,
+ SDValue &OffImm) {
+ const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
+
+ if (MemVT == EVT())
+ return false;
+
+ if (N.getOpcode() != ISD::ADD)
+ return false;
+
+ SDValue VScale = N.getOperand(1);
+ if (VScale.getOpcode() != ISD::VSCALE)
+ return false;
+
+ TypeSize TS = MemVT.getSizeInBits();
+ int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinSize()) / 8;
+ int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
+
+ if ((MulImm % MemWidthBytes) != 0)
+ return false;
+
+ int64_t Offset = MulImm / MemWidthBytes;
+ if (Offset < Min || Offset > Max)
+ return false;
+
+ Base = N.getOperand(0);
+ OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
+ return true;
+}
+
+/// Select register plus register addressing mode for SVE, with scaled
+/// offset.
+bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
+ SDValue &Base,
+ SDValue &Offset) {
+ if (N.getOpcode() != ISD::ADD)
+ return false;
+
+ // Process an ADD node.
+ const SDValue LHS = N.getOperand(0);
+ const SDValue RHS = N.getOperand(1);
+
+ // 8 bit data does not come with the SHL node, so it is treated
+ // separately.
+ if (Scale == 0) {
+ Base = LHS;
+ Offset = RHS;
+ return true;
+ }
+
+ // Check if the RHS is a shift node with a constant.
+ if (RHS.getOpcode() != ISD::SHL)
+ return false;
+
+ const SDValue ShiftRHS = RHS.getOperand(1);
+ if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
+ if (C->getZExtValue() == Scale) {
+ Base = LHS;
+ Offset = RHS.getOperand(0);
+ return true;
+ }
+
+ return false;
+}