diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2022-08-13 15:37:04 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2023-02-08 19:04:56 +0000 |
commit | 61cfbce3347e4372143bcabf7b197577b9f3958a (patch) | |
tree | a996b7140fcecf4ec110b2ac28983b858e5df637 /contrib/llvm-project/llvm/lib/Target | |
parent | 972a253a57b6f144b0e4a3e2080a2a0076ec55a0 (diff) | |
parent | 677727e8296a802385345db6fa65e68223f4597a (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target')
10 files changed, 144 insertions, 67 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 72f0fc94940c..c28216048d7c 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -886,7 +886,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND, ISD::VECTOR_SPLICE, ISD::SIGN_EXTEND_INREG, ISD::CONCAT_VECTORS, ISD::EXTRACT_SUBVECTOR, - ISD::INSERT_SUBVECTOR, ISD::STORE}); + ISD::INSERT_SUBVECTOR, ISD::STORE, ISD::BUILD_VECTOR}); if (Subtarget->supportsAddressTopByteIgnored()) setTargetDAGCombine(ISD::LOAD); @@ -15988,6 +15988,49 @@ static SDValue performVectorAddSubExtCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static SDValue performBuildVectorCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + SDLoc DL(N); + + // A build vector of two extracted elements is equivalent to an + // extract subvector where the inner vector is any-extended to the + // extract_vector_elt VT. + // (build_vector (extract_elt_iXX_to_i32 vec Idx+0) + // (extract_elt_iXX_to_i32 vec Idx+1)) + // => (extract_subvector (anyext_iXX_to_i32 vec) Idx) + + // For now, only consider the v2i32 case, which arises as a result of + // legalization. + if (N->getValueType(0) != MVT::v2i32) + return SDValue(); + + SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1); + // Reminder, EXTRACT_VECTOR_ELT has the effect of any-extending to its VT. + if (Elt0->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Elt1->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + // Constant index. + isa<ConstantSDNode>(Elt0->getOperand(1)) && + isa<ConstantSDNode>(Elt1->getOperand(1)) && + // Both EXTRACT_VECTOR_ELT from same vector... + Elt0->getOperand(0) == Elt1->getOperand(0) && + // ... and contiguous. First element's index +1 == second element's index. + Elt0->getConstantOperandVal(1) + 1 == Elt1->getConstantOperandVal(1)) { + SDValue VecToExtend = Elt0->getOperand(0); + EVT ExtVT = VecToExtend.getValueType().changeVectorElementType(MVT::i32); + if (!DAG.getTargetLoweringInfo().isTypeLegal(ExtVT)) + return SDValue(); + + SDValue SubvectorIdx = DAG.getVectorIdxConstant(Elt0->getConstantOperandVal(1), DL); + + SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, VecToExtend); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Ext, + SubvectorIdx); + } + + return SDValue(); +} + static SDValue performAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { @@ -19457,6 +19500,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, case ISD::ADD: case ISD::SUB: return performAddSubCombine(N, DCI, DAG); + case ISD::BUILD_VECTOR: + return performBuildVectorCombine(N, DCI, DAG); case AArch64ISD::ANDS: return performFlagSettingCombine(N, DCI, ISD::AND); case AArch64ISD::ADC: diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 2231f8705998..0c5eadeffcdb 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -334,6 +334,10 @@ public: return 2; } + unsigned getMinTripCountTailFoldingThreshold() const { + return ST->hasSVE() ? 5 : 0; + } + PredicationStyle emitGetActiveLaneMask() const { if (ST->hasSVE()) return PredicationStyle::DataAndControlFlow; diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index d3617b87a851..380d3621e745 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -662,8 +662,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower(); - // TODO: Handle vector types. getActionDefinitionsBuilder(G_CTTZ) + .lowerIf(isVector(0)) .clampScalar(0, s32, s64) .scalarSameSizeAs(1, 0) .customFor({s32, s64}); diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td index 73970b9c74c5..71388bc4efa4 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td @@ -556,6 +556,15 @@ def FeatureAAPCSFrameChainLeaf : SubtargetFeature<"aapcs-frame-chain-leaf", "for leaf functions", [FeatureAAPCSFrameChain]>; +// Assume that lock-free 32-bit atomics are available, even if the target +// and operating system combination would not usually provide them. The user +// is responsible for providing any necessary __sync implementations. Code +// built with this feature is not ABI-compatible with code built without this +// feature, if atomic variables are exposed across the ABI boundary. +def FeatureAtomics32 : SubtargetFeature< + "atomics-32", "HasForced32BitAtomics", "true", + "Assume that lock-free 32-bit atomics are available">; + //===----------------------------------------------------------------------===// // ARM architecture class // diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp index 743cca9ff71f..4c24d7020932 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1370,7 +1370,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // instructions. (ARMv6 doesn't have dmb, but it has an equivalent // encoding; see ARMISD::MEMBARRIER_MCR.) setMaxAtomicSizeInBitsSupported(64); - } else if (Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) { + } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) || + Subtarget->hasForced32BitAtomics()) { // Cortex-M (besides Cortex-M0) have 32-bit atomics. setMaxAtomicSizeInBitsSupported(32); } else { diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp index 5c7f0619161c..7b1b9456080e 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -831,7 +831,7 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, // FIXME: Operands are not in canonical order at -O0, so an immediate // operand in position 1 is a lost opportunity for now. We are // similar to ARM in this regard. - long Imm = 0; + int64_t Imm = 0; bool UseImm = false; const bool HasSPE = Subtarget->hasSPE(); @@ -841,7 +841,8 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { const APInt &CIVal = ConstInt->getValue(); - Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue(); + Imm = (IsZExt) ? (int64_t)CIVal.getZExtValue() : + (int64_t)CIVal.getSExtValue(); if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm))) UseImm = true; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index baa19e81e436..d0ca325e9c14 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -8199,7 +8199,13 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, if (!isIntEqualitySetCC(Cond)) return SDValue(); - const APInt &C1 = cast<ConstantSDNode>(N1)->getAPIntValue(); + // Don't do this if the sign bit is provably zero, it will be turned back into + // an AND. + APInt SignMask = APInt::getOneBitSet(64, 31); + if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask)) + return SDValue(); + + const APInt &C1 = N1C->getAPIntValue(); SDLoc dl(N); // If the constant is larger than 2^32 - 1 it is impossible for both sides diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index fc0a983f6542..5d9bd2f67558 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1022,16 +1022,10 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &M return; } - // Two cases involving an AVL resulting from a previous vsetvli. - // 1) If the AVL is the result of a previous vsetvli which has the - // same AVL and VLMAX as our current state, we can reuse the AVL - // from the current state for the new one. This allows us to - // generate 'vsetvli x0, x0, vtype" or possible skip the transition - // entirely. - // 2) If AVL is defined by a vsetvli with the same VLMAX, we can - // replace the AVL operand with the AVL of the defining vsetvli. - // We avoid general register AVLs to avoid extending live ranges - // without being sure we can kill the original source reg entirely. + // If AVL is defined by a vsetvli with the same VLMAX, we can + // replace the AVL operand with the AVL of the defining vsetvli. + // We avoid general register AVLs to avoid extending live ranges + // without being sure we can kill the original source reg entirely. if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual()) return; MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg()); @@ -1039,17 +1033,6 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &M return; VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); - // case 1 - if (PrevInfo.isValid() && !PrevInfo.isUnknown() && - DefInfo.hasSameAVL(PrevInfo) && - DefInfo.hasSameVLMAX(PrevInfo)) { - if (PrevInfo.hasAVLImm()) - Info.setAVLImm(PrevInfo.getAVLImm()); - else - Info.setAVLReg(PrevInfo.getAVLReg()); - return; - } - // case 2 if (DefInfo.hasSameVLMAX(Info) && (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) { if (DefInfo.hasAVLImm()) diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp index b080ab7e138c..7d0fc4e8a8c6 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -419,7 +419,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTruncStoreAction(VT, MVT::bf16, Expand); setOperationAction(ISD::BF16_TO_FP, VT, Expand); - setOperationAction(ISD::FP_TO_BF16, VT, Expand); + setOperationAction(ISD::FP_TO_BF16, VT, Custom); } setOperationAction(ISD::PARITY, MVT::i8, Custom); @@ -2494,6 +2494,10 @@ MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, !Subtarget.hasX87()) return MVT::i32; + if (VT.isVector() && VT.getVectorElementType() == MVT::bf16) + return getRegisterTypeForCallingConv(Context, CC, + VT.changeVectorElementTypeToInteger()); + return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); } @@ -2525,6 +2529,10 @@ unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, return 3; } + if (VT.isVector() && VT.getVectorElementType() == MVT::bf16) + return getNumRegistersForCallingConv(Context, CC, + VT.changeVectorElementTypeToInteger()); + return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); } @@ -2733,6 +2741,40 @@ unsigned X86TargetLowering::getJumpTableEncoding() const { return TargetLowering::getJumpTableEncoding(); } +bool X86TargetLowering::splitValueIntoRegisterParts( + SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const { + bool IsABIRegCopy = CC.has_value(); + EVT ValueVT = Val.getValueType(); + if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) { + unsigned ValueBits = ValueVT.getSizeInBits(); + unsigned PartBits = PartVT.getSizeInBits(); + Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val); + Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val); + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); + Parts[0] = Val; + return true; + } + return false; +} + +SDValue X86TargetLowering::joinRegisterPartsIntoValue( + SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const { + bool IsABIRegCopy = CC.has_value(); + if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) { + unsigned ValueBits = ValueVT.getSizeInBits(); + unsigned PartBits = PartVT.getSizeInBits(); + SDValue Val = Parts[0]; + + Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val); + Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val); + Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + return Val; + } + return SDValue(); +} + bool X86TargetLowering::useSoftFloat() const { return Subtarget.useSoftFloat(); } @@ -19304,44 +19346,6 @@ static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) { return false; } -static bool canCombineAsMaskOperation(SDValue V1, SDValue V2, - const X86Subtarget &Subtarget) { - if (!Subtarget.hasAVX512()) - return false; - - MVT VT = V1.getSimpleValueType().getScalarType(); - if ((VT == MVT::i16 || VT == MVT::i8) && !Subtarget.hasBWI()) - return false; - - // i8 is better to be widen to i16, because there is PBLENDW for vXi16 - // when the vector bit size is 128 or 256. - if (VT == MVT::i8 && V1.getSimpleValueType().getSizeInBits() < 512) - return false; - - auto HasMaskOperation = [&](SDValue V) { - // TODO: Currently we only check limited opcode. We probably extend - // it to all binary operation by checking TLI.isBinOp(). - switch (V->getOpcode()) { - default: - return false; - case ISD::ADD: - case ISD::SUB: - case ISD::AND: - case ISD::XOR: - break; - } - if (!V->hasOneUse()) - return false; - - return true; - }; - - if (HasMaskOperation(V1) || HasMaskOperation(V2)) - return true; - - return false; -} - // Forward declaration. static SDValue canonicalizeShuffleMaskWithHorizOp( MutableArrayRef<SDValue> Ops, MutableArrayRef<int> Mask, @@ -19417,7 +19421,6 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget, // integers to handle flipping the low and high halves of AVX 256-bit vectors. SmallVector<int, 16> WidenedMask; if (VT.getScalarSizeInBits() < 64 && !Is1BitVector && - !canCombineAsMaskOperation(V1, V2, Subtarget) && canWidenShuffleElements(OrigMask, Zeroable, V2IsZero, WidenedMask)) { // Shuffle mask widening should not interfere with a broadcast opportunity // by obfuscating the operands with bitcasts. @@ -23058,6 +23061,18 @@ static SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) { return Res; } +SDValue X86TargetLowering::LowerFP_TO_BF16(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MakeLibCallOptions CallOptions; + RTLIB::Libcall LC = + RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16); + SDValue Res = + makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, + DAG.getBitcast(MVT::i32, Res)); +} + /// Depending on uarch and/or optimizing for size, we might prefer to use a /// vector operation in place of the typical scalar operation. static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG, @@ -32250,6 +32265,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::STRICT_FP16_TO_FP: return LowerFP16_TO_FP(Op, DAG); case ISD::FP_TO_FP16: case ISD::STRICT_FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG); + case ISD::FP_TO_BF16: return LowerFP_TO_BF16(Op, DAG); case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG); case ISD::STORE: return LowerStore(Op, Subtarget, DAG); case ISD::FADD: diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h index 85e5d0ba4c34..18fb2dbe8d71 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h @@ -1598,6 +1598,7 @@ namespace llvm { SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -1621,6 +1622,17 @@ namespace llvm { MachineBasicBlock *Entry, const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; + bool + splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, + SDValue *Parts, unsigned NumParts, MVT PartVT, + Optional<CallingConv::ID> CC) const override; + + SDValue + joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, + const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, + Optional<CallingConv::ID> CC) const override; + bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; bool mayBeEmittedAsTailCall(const CallInst *CI) const override; |