aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp119
1 files changed, 107 insertions, 12 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 447ad10ddf22..e070ce2efa6b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -521,6 +521,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::i64, Custom);
setOperationAction(ISD::CTPOP, MVT::i128, Custom);
+ setOperationAction(ISD::PARITY, MVT::i64, Custom);
+ setOperationAction(ISD::PARITY, MVT::i128, Custom);
+
setOperationAction(ISD::ABS, MVT::i32, Custom);
setOperationAction(ISD::ABS, MVT::i64, Custom);
@@ -5463,7 +5466,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::SRA_PARTS:
return LowerShiftParts(Op, DAG);
case ISD::CTPOP:
- return LowerCTPOP(Op, DAG);
+ case ISD::PARITY:
+ return LowerCTPOP_PARITY(Op, DAG);
case ISD::FCOPYSIGN:
return LowerFCOPYSIGN(Op, DAG);
case ISD::OR:
@@ -7783,7 +7787,8 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
return BitCast(VT, BSP, DAG);
}
-SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
+ SelectionDAG &DAG) const {
if (DAG.getMachineFunction().getFunction().hasFnAttribute(
Attribute::NoImplicitFloat))
return SDValue();
@@ -7791,6 +7796,8 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
if (!Subtarget->hasNEON())
return SDValue();
+ bool IsParity = Op.getOpcode() == ISD::PARITY;
+
// While there is no integer popcount instruction, it can
// be more efficiently lowered to the following sequence that uses
// AdvSIMD registers/instructions as long as the copies to/from
@@ -7813,6 +7820,10 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
+ if (IsParity)
+ UaddLV = DAG.getNode(ISD::AND, DL, MVT::i32, UaddLV,
+ DAG.getConstant(1, DL, MVT::i32));
+
if (VT == MVT::i64)
UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
return UaddLV;
@@ -7824,9 +7835,15 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
+ if (IsParity)
+ UaddLV = DAG.getNode(ISD::AND, DL, MVT::i32, UaddLV,
+ DAG.getConstant(1, DL, MVT::i32));
+
return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV);
}
+ assert(!IsParity && "ISD::PARITY of vector types not supported");
+
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
@@ -11811,6 +11828,12 @@ bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
isConcatMask(M, VT, VT.getSizeInBits() == 128));
}
+bool AArch64TargetLowering::isVectorClearMaskLegal(ArrayRef<int> M,
+ EVT VT) const {
+ // Just delegate to the generic legality, clear masks aren't special.
+ return isShuffleMaskLegal(M, VT);
+}
+
/// getVShiftImm - Check if this is a valid build_vector for the immediate
/// operand of a vector shift operation, where all the elements of the
/// build_vector must have the same constant integer value.
@@ -11969,6 +11992,11 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
if (IsZero)
return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
+ case AArch64CC::LE:
+ if (!NoNans)
+ return SDValue();
+ // If we ignore NaNs then we can use to the LS implementation.
+ LLVM_FALLTHROUGH;
case AArch64CC::LS:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
@@ -12073,7 +12101,7 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
bool ShouldInvert;
changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
- bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
+ bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs();
SDValue Cmp =
EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
if (!Cmp.getNode())
@@ -13587,21 +13615,50 @@ AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
bool
AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
CombineLevel Level) const {
- N = N->getOperand(0).getNode();
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
+ SDValue ShiftLHS = N->getOperand(0);
EVT VT = N->getValueType(0);
- // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine
- // it with shift to let it be lowered to UBFX.
- if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
- isa<ConstantSDNode>(N->getOperand(1))) {
- uint64_t TruncMask = N->getConstantOperandVal(1);
+
+ // If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not combine
+ // it with shift 'N' to let it be lowered to UBFX.
+ if (ShiftLHS.getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
+ isa<ConstantSDNode>(ShiftLHS.getOperand(1))) {
+ uint64_t TruncMask = ShiftLHS.getConstantOperandVal(1);
if (isMask_64(TruncMask) &&
- N->getOperand(0).getOpcode() == ISD::SRL &&
- isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
+ ShiftLHS.getOperand(0).getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(ShiftLHS.getOperand(0).getOperand(1)))
return false;
}
return true;
}
+bool AArch64TargetLowering::isDesirableToCommuteXorWithShift(
+ const SDNode *N) const {
+ assert(N->getOpcode() == ISD::XOR &&
+ (N->getOperand(0).getOpcode() == ISD::SHL ||
+ N->getOperand(0).getOpcode() == ISD::SRL) &&
+ "Expected XOR(SHIFT) pattern");
+
+ // Only commute if the entire NOT mask is a hidden shifted mask.
+ auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
+ if (XorC && ShiftC) {
+ unsigned MaskIdx, MaskLen;
+ if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
+ unsigned ShiftAmt = ShiftC->getZExtValue();
+ unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
+ if (N->getOperand(0).getOpcode() == ISD::SHL)
+ return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
+ return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
+ }
+ }
+
+ return false;
+}
+
bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask(
const SDNode *N, CombineLevel Level) const {
assert(((N->getOpcode() == ISD::SHL &&
@@ -19221,6 +19278,41 @@ static SDValue performBSPExpandForSVE(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::OR, DL, VT, Sel, SelInv);
}
+static SDValue performDupLane128Combine(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+
+ SDValue Insert = N->getOperand(0);
+ if (Insert.getOpcode() != ISD::INSERT_SUBVECTOR)
+ return SDValue();
+
+ if (!Insert.getOperand(0).isUndef())
+ return SDValue();
+
+ uint64_t IdxInsert = Insert.getConstantOperandVal(2);
+ uint64_t IdxDupLane = N->getConstantOperandVal(1);
+ if (IdxInsert != IdxDupLane)
+ return SDValue();
+
+ SDValue Bitcast = Insert.getOperand(1);
+ if (Bitcast.getOpcode() != ISD::BITCAST)
+ return SDValue();
+
+ SDValue Subvec = Bitcast.getOperand(0);
+ EVT SubvecVT = Subvec.getValueType();
+ if (!SubvecVT.is128BitVector())
+ return SDValue();
+ EVT NewSubvecVT =
+ getPackedSVEVectorVT(Subvec.getValueType().getVectorElementType());
+
+ SDLoc DL(N);
+ SDValue NewInsert =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewSubvecVT,
+ DAG.getUNDEF(NewSubvecVT), Subvec, Insert->getOperand(2));
+ SDValue NewDuplane128 = DAG.getNode(AArch64ISD::DUPLANE128, DL, NewSubvecVT,
+ NewInsert, N->getOperand(1));
+ return DAG.getNode(ISD::BITCAST, DL, VT, NewDuplane128);
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -19307,6 +19399,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performCSELCombine(N, DCI, DAG);
case AArch64ISD::DUP:
return performDUPCombine(N, DCI);
+ case AArch64ISD::DUPLANE128:
+ return performDupLane128Combine(N, DAG);
case AArch64ISD::NVCAST:
return performNVCASTCombine(N);
case AArch64ISD::SPLICE:
@@ -19981,7 +20075,8 @@ void AArch64TargetLowering::ReplaceNodeResults(
return;
case ISD::CTPOP:
- if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG))
+ case ISD::PARITY:
+ if (SDValue Result = LowerCTPOP_PARITY(SDValue(N, 0), DAG))
Results.push_back(Result);
return;
case AArch64ISD::SADDV: