diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2024-05-04 10:19:32 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-05-04 15:44:49 +0000 |
commit | 5678d1d98a348f315453555377ccb28821a2ffcd (patch) | |
tree | 9dda9eea3f362a43cae6ec4fa369567efa7002fa /contrib/llvm-project/llvm/lib/Target | |
parent | 0ad9b235e1eaef36e07247c8c7635a8eac98f4b1 (diff) | |
parent | 9a7cb8417a2a13bcb7a300c65c43960389b85456 (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target')
6 files changed, 27 insertions, 34 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 03baa7497615..ac61dd8745d4 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4885,19 +4885,9 @@ defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)), (zext (v8i8 V64:$opB))))), (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; -def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), - (v8i16 (add (sub (zext (v8i8 V64:$opA)), - (zext (v8i8 V64:$opB))), - (AArch64vashr v8i16:$src, (i32 15))))), - (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), (zext (extract_high_v16i8 (v16i8 V128:$opB)))))), (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; -def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), - (v8i16 (add (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), - (zext (extract_high_v16i8 (v16i8 V128:$opB)))), - (AArch64vashr v8i16:$src, (i32 15))))), - (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)), (zext (v4i16 V64:$opB))))), (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 84b9330ef963..50d8bfa87508 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -2358,6 +2358,11 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal( bool Changed = false; + if (IsNonTemporal) { + // Set non-temporal hint for all cache levels. + Changed |= setTH(MI, AMDGPU::CPol::TH_NT); + } + if (IsVolatile) { Changed |= setScope(MI, AMDGPU::CPol::SCOPE_SYS); @@ -2370,11 +2375,6 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal( Position::AFTER); } - if (IsNonTemporal) { - // Set non-temporal hint for all cache levels. - Changed |= setTH(MI, AMDGPU::CPol::TH_NT); - } - return Changed; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index a0cec426002b..d46093b9e260 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -14559,7 +14559,7 @@ static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); SDLoc DL(N); SDValue OtherOp = TrueVal.getOperand(1 - OpToFold); - EVT OtherOpVT = OtherOp->getValueType(0); + EVT OtherOpVT = OtherOp.getValueType(); SDValue IdentityOperand = DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags()); if (!Commutative) diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 833f058253d8..553d338b7790 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2923,11 +2923,10 @@ bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base, } bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) { - // Cannot use 32 bit constants to reference objects in kernel code model. - // Cannot use 32 bit constants to reference objects in large PIC mode since - // GOTOFF is 64 bits. + // Cannot use 32 bit constants to reference objects in kernel/large code + // model. if (TM.getCodeModel() == CodeModel::Kernel || - (TM.getCodeModel() == CodeModel::Large && TM.isPositionIndependent())) + TM.getCodeModel() == CodeModel::Large) return false; // In static codegen with small code model, we can get the address of a label diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp index 96bbd981ff24..201dd8382536 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -47038,10 +47038,13 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget)) return V; - // fold (ashr (shl, a, [56,48,32,24,16]), SarConst) - // into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or - // into (lshr, (sext (a), SarConst - [56,48,32,24,16])) - // depending on sign of (SarConst - [56,48,32,24,16]) + // fold (SRA (SHL X, ShlConst), SraConst) + // into (SHL (sext_in_reg X), ShlConst - SraConst) + // or (sext_in_reg X) + // or (SRA (sext_in_reg X), SraConst - ShlConst) + // depending on relation between SraConst and ShlConst. + // We only do this if (Size - ShlConst) is equal to 8, 16 or 32. That allows + // us to do the sext_in_reg from corresponding bit. // sexts in X86 are MOVs. The MOVs have the same code size // as above SHIFTs (only SHIFT on 1 has lower code size). @@ -47057,29 +47060,29 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); APInt ShlConst = N01->getAsAPIntVal(); - APInt SarConst = N1->getAsAPIntVal(); + APInt SraConst = N1->getAsAPIntVal(); EVT CVT = N1.getValueType(); - if (SarConst.isNegative()) + if (CVT != N01.getValueType()) + return SDValue(); + if (SraConst.isNegative()) return SDValue(); for (MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) { unsigned ShiftSize = SVT.getSizeInBits(); - // skipping types without corresponding sext/zext and - // ShlConst that is not one of [56,48,32,24,16] + // Only deal with (Size - ShlConst) being equal to 8, 16 or 32. if (ShiftSize >= Size || ShlConst != Size - ShiftSize) continue; SDLoc DL(N); SDValue NN = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT)); - SarConst = SarConst - (Size - ShiftSize); - if (SarConst == 0) + if (SraConst.eq(ShlConst)) return NN; - if (SarConst.isNegative()) + if (SraConst.ult(ShlConst)) return DAG.getNode(ISD::SHL, DL, VT, NN, - DAG.getConstant(-SarConst, DL, CVT)); + DAG.getConstant(ShlConst - SraConst, DL, CVT)); return DAG.getNode(ISD::SRA, DL, VT, NN, - DAG.getConstant(SarConst, DL, CVT)); + DAG.getConstant(SraConst - ShlConst, DL, CVT)); } return SDValue(); } diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h index a458b5f9ec8f..4d55a084b730 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h @@ -244,7 +244,8 @@ public: // TODO: Currently we're always allowing widening on CPUs without VLX, // because for many cases we don't have a better option. bool canExtendTo512DQ() const { - return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512); + return hasAVX512() && hasEVEX512() && + (!hasVLX() || getPreferVectorWidth() >= 512); } bool canExtendTo512BW() const { return hasBWI() && canExtendTo512DQ(); |