diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 119 |
1 files changed, 107 insertions, 12 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 447ad10ddf22..e070ce2efa6b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -521,6 +521,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTPOP, MVT::i64, Custom); setOperationAction(ISD::CTPOP, MVT::i128, Custom); + setOperationAction(ISD::PARITY, MVT::i64, Custom); + setOperationAction(ISD::PARITY, MVT::i128, Custom); + setOperationAction(ISD::ABS, MVT::i32, Custom); setOperationAction(ISD::ABS, MVT::i64, Custom); @@ -5463,7 +5466,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::SRA_PARTS: return LowerShiftParts(Op, DAG); case ISD::CTPOP: - return LowerCTPOP(Op, DAG); + case ISD::PARITY: + return LowerCTPOP_PARITY(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::OR: @@ -7783,7 +7787,8 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, return BitCast(VT, BSP, DAG); } -SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op, + SelectionDAG &DAG) const { if (DAG.getMachineFunction().getFunction().hasFnAttribute( Attribute::NoImplicitFloat)) return SDValue(); @@ -7791,6 +7796,8 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget->hasNEON()) return SDValue(); + bool IsParity = Op.getOpcode() == ISD::PARITY; + // While there is no integer popcount instruction, it can // be more efficiently lowered to the following sequence that uses // AdvSIMD registers/instructions as long as the copies to/from @@ -7813,6 +7820,10 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop); + if (IsParity) + UaddLV = DAG.getNode(ISD::AND, DL, MVT::i32, UaddLV, + DAG.getConstant(1, DL, MVT::i32)); + if (VT == MVT::i64) UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV); return UaddLV; @@ -7824,9 +7835,15 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop); + if (IsParity) + UaddLV = DAG.getNode(ISD::AND, DL, MVT::i32, UaddLV, + DAG.getConstant(1, DL, MVT::i32)); + return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV); } + assert(!IsParity && "ISD::PARITY of vector types not supported"); + if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU); @@ -11811,6 +11828,12 @@ bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { isConcatMask(M, VT, VT.getSizeInBits() == 128)); } +bool AArch64TargetLowering::isVectorClearMaskLegal(ArrayRef<int> M, + EVT VT) const { + // Just delegate to the generic legality, clear masks aren't special. + return isShuffleMaskLegal(M, VT); +} + /// getVShiftImm - Check if this is a valid build_vector for the immediate /// operand of a vector shift operation, where all the elements of the /// build_vector must have the same constant integer value. @@ -11969,6 +11992,11 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, if (IsZero) return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS); return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS); + case AArch64CC::LE: + if (!NoNans) + return SDValue(); + // If we ignore NaNs then we can use to the LS implementation. + LLVM_FALLTHROUGH; case AArch64CC::LS: if (IsZero) return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS); @@ -12073,7 +12101,7 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op, bool ShouldInvert; changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert); - bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath; + bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs(); SDValue Cmp = EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG); if (!Cmp.getNode()) @@ -13587,21 +13615,50 @@ AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const { bool AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const { - N = N->getOperand(0).getNode(); + assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || + N->getOpcode() == ISD::SRL) && + "Expected shift op"); + + SDValue ShiftLHS = N->getOperand(0); EVT VT = N->getValueType(0); - // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine - // it with shift to let it be lowered to UBFX. - if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) && - isa<ConstantSDNode>(N->getOperand(1))) { - uint64_t TruncMask = N->getConstantOperandVal(1); + + // If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not combine + // it with shift 'N' to let it be lowered to UBFX. + if (ShiftLHS.getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) && + isa<ConstantSDNode>(ShiftLHS.getOperand(1))) { + uint64_t TruncMask = ShiftLHS.getConstantOperandVal(1); if (isMask_64(TruncMask) && - N->getOperand(0).getOpcode() == ISD::SRL && - isa<ConstantSDNode>(N->getOperand(0)->getOperand(1))) + ShiftLHS.getOperand(0).getOpcode() == ISD::SRL && + isa<ConstantSDNode>(ShiftLHS.getOperand(0).getOperand(1))) return false; } return true; } +bool AArch64TargetLowering::isDesirableToCommuteXorWithShift( + const SDNode *N) const { + assert(N->getOpcode() == ISD::XOR && + (N->getOperand(0).getOpcode() == ISD::SHL || + N->getOperand(0).getOpcode() == ISD::SRL) && + "Expected XOR(SHIFT) pattern"); + + // Only commute if the entire NOT mask is a hidden shifted mask. + auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1)); + auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1)); + if (XorC && ShiftC) { + unsigned MaskIdx, MaskLen; + if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) { + unsigned ShiftAmt = ShiftC->getZExtValue(); + unsigned BitWidth = N->getValueType(0).getScalarSizeInBits(); + if (N->getOperand(0).getOpcode() == ISD::SHL) + return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt); + return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt); + } + } + + return false; +} + bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask( const SDNode *N, CombineLevel Level) const { assert(((N->getOpcode() == ISD::SHL && @@ -19221,6 +19278,41 @@ static SDValue performBSPExpandForSVE(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::OR, DL, VT, Sel, SelInv); } +static SDValue performDupLane128Combine(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + + SDValue Insert = N->getOperand(0); + if (Insert.getOpcode() != ISD::INSERT_SUBVECTOR) + return SDValue(); + + if (!Insert.getOperand(0).isUndef()) + return SDValue(); + + uint64_t IdxInsert = Insert.getConstantOperandVal(2); + uint64_t IdxDupLane = N->getConstantOperandVal(1); + if (IdxInsert != IdxDupLane) + return SDValue(); + + SDValue Bitcast = Insert.getOperand(1); + if (Bitcast.getOpcode() != ISD::BITCAST) + return SDValue(); + + SDValue Subvec = Bitcast.getOperand(0); + EVT SubvecVT = Subvec.getValueType(); + if (!SubvecVT.is128BitVector()) + return SDValue(); + EVT NewSubvecVT = + getPackedSVEVectorVT(Subvec.getValueType().getVectorElementType()); + + SDLoc DL(N); + SDValue NewInsert = + DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewSubvecVT, + DAG.getUNDEF(NewSubvecVT), Subvec, Insert->getOperand(2)); + SDValue NewDuplane128 = DAG.getNode(AArch64ISD::DUPLANE128, DL, NewSubvecVT, + NewInsert, N->getOperand(1)); + return DAG.getNode(ISD::BITCAST, DL, VT, NewDuplane128); +} + SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -19307,6 +19399,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performCSELCombine(N, DCI, DAG); case AArch64ISD::DUP: return performDUPCombine(N, DCI); + case AArch64ISD::DUPLANE128: + return performDupLane128Combine(N, DAG); case AArch64ISD::NVCAST: return performNVCASTCombine(N); case AArch64ISD::SPLICE: @@ -19981,7 +20075,8 @@ void AArch64TargetLowering::ReplaceNodeResults( return; case ISD::CTPOP: - if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG)) + case ISD::PARITY: + if (SDValue Result = LowerCTPOP_PARITY(SDValue(N, 0), DAG)) Results.push_back(Result); return; case AArch64ISD::SADDV: |