diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2024-12-06 17:26:41 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-12-06 17:29:48 +0000 |
commit | 71ac745d76c3ba442e753daff1870893f272b29d (patch) | |
tree | 40cf2dab0e5792b1570b82dc58d5cf532ff8136f /contrib/llvm-project/llvm/lib | |
parent | 509300e1ffbe71a6f6608fa188dbcfe4ae1fbbdf (diff) | |
parent | f56b67c46df5f53882fd6d3ace1f7942689b6290 (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib')
20 files changed, 138 insertions, 79 deletions
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp index aa550f0b6a7b..94061c949b7f 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -1404,8 +1404,17 @@ void MemorySSAUpdater::changeToUnreachable(const Instruction *I) { MemoryAccess *MemorySSAUpdater::createMemoryAccessInBB( Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point) { - MemoryUseOrDef *NewAccess = MSSA->createDefinedAccess(I, Definition); - MSSA->insertIntoListsForBlock(NewAccess, BB, Point); + return createMemoryAccessInBB(I, Definition, BB, Point, + /*CreationMustSucceed=*/true); +} + +MemoryAccess *MemorySSAUpdater::createMemoryAccessInBB( + Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, + MemorySSA::InsertionPlace Point, bool CreationMustSucceed) { + MemoryUseOrDef *NewAccess = MSSA->createDefinedAccess( + I, Definition, /*Template=*/nullptr, CreationMustSucceed); + if (NewAccess) + MSSA->insertIntoListsForBlock(NewAccess, BB, Point); return NewAccess; } diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp index 51cffac80876..412cfe73d3e5 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp @@ -6313,8 +6313,10 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S) { return getConstantMultiple(Z->getOperand()).zext(BitWidth); } case scSignExtend: { + // Only multiples that are a power of 2 will hold after sext. const SCEVSignExtendExpr *E = cast<SCEVSignExtendExpr>(S); - return getConstantMultiple(E->getOperand()).sext(BitWidth); + uint32_t TZ = getMinTrailingZeros(E->getOperand()); + return GetShiftedByZeros(TZ); } case scMulExpr: { const SCEVMulExpr *M = cast<SCEVMulExpr>(S); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp index f24ab187ef40..21a02a6f0947 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp @@ -1474,7 +1474,7 @@ void MachineLICMBase::InitializeLoadsHoistableLoops() { if (!AllowedToHoistLoads[Loop]) continue; for (auto &MI : *MBB) { - if (!MI.mayStore() && !MI.isCall() && + if (!MI.isLoadFoldBarrier() && !MI.mayStore() && !MI.isCall() && !(MI.mayLoad() && MI.hasOrderedMemoryRef())) continue; for (MachineLoop *L = Loop; L != nullptr; L = L->getParentLoop()) diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 87e057a468af..c183ffd384c2 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1394,6 +1394,18 @@ bool requiresGetVGCall(MachineFunction &MF) { !MF.getSubtarget<AArch64Subtarget>().hasSVE(); } +static bool requiresSaveVG(MachineFunction &MF) { + AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); + // For Darwin platforms we don't save VG for non-SVE functions, even if SME + // is enabled with streaming mode changes. + if (!AFI->hasStreamingModeChanges()) + return false; + auto &ST = MF.getSubtarget<AArch64Subtarget>(); + if (ST.isTargetDarwin()) + return ST.hasSVE(); + return true; +} + bool isVGInstruction(MachineBasicBlock::iterator MBBI) { unsigned Opc = MBBI->getOpcode(); if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI || @@ -1430,8 +1442,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( // functions, we need to do this for both the streaming and non-streaming // vector length. Move past these instructions if necessary. MachineFunction &MF = *MBB.getParent(); - AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); - if (AFI->hasStreamingModeChanges()) + if (requiresSaveVG(MF)) while (isVGInstruction(MBBI)) ++MBBI; @@ -1936,12 +1947,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // pointer bump above. while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) && !IsSVECalleeSave(MBBI)) { - // Move past instructions generated to calculate VG - if (AFI->hasStreamingModeChanges()) - while (isVGInstruction(MBBI)) - ++MBBI; - - if (CombineSPBump) + if (CombineSPBump && + // Only fix-up frame-setup load/store instructions. + (!requiresSaveVG(MF) || !isVGInstruction(MBBI))) fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI); ++MBBI; @@ -2848,7 +2856,8 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) { return Subtarget.isTargetMachO() && !(Subtarget.getTargetLowering()->supportSwiftError() && Attrs.hasAttrSomewhere(Attribute::SwiftError)) && - MF.getFunction().getCallingConv() != CallingConv::SwiftTail; + MF.getFunction().getCallingConv() != CallingConv::SwiftTail && + !requiresSaveVG(MF); } static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, @@ -3720,7 +3729,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // non-streaming VG value. const Function &F = MF.getFunction(); SMEAttrs Attrs(F); - if (AFI->hasStreamingModeChanges()) { + if (requiresSaveVG(MF)) { if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface()) CSStackSize += 16; else @@ -3873,7 +3882,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots( } // Insert VG into the list of CSRs, immediately before LR if saved. - if (AFI->hasStreamingModeChanges()) { + if (requiresSaveVG(MF)) { std::vector<CalleeSavedInfo> VGSaves; SMEAttrs Attrs(MF.getFunction()); @@ -4602,10 +4611,9 @@ MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II, void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced( MachineFunction &MF, RegScavenger *RS = nullptr) const { - AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); for (auto &BB : MF) for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) { - if (AFI->hasStreamingModeChanges()) + if (requiresSaveVG(MF)) II = emitVGSaveRestore(II, this); if (StackTaggingMergeSetTag) II = tryMergeAdjacentSTG(II, this, RS); diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 62078822c89b..ef2789e96213 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8732,10 +8732,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue InGlue; if (RequiresSMChange) { - - Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL, - DAG.getVTList(MVT::Other, MVT::Glue), Chain); - InGlue = Chain.getValue(1); + if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) { + Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL, + DAG.getVTList(MVT::Other, MVT::Glue), Chain); + InGlue = Chain.getValue(1); + } SDValue NewChain = changeStreamingMode( DAG, DL, CalleeAttrs.hasStreamingInterface(), Chain, InGlue, @@ -8914,11 +8915,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, Result = changeStreamingMode( DAG, DL, !CalleeAttrs.hasStreamingInterface(), Result, InGlue, getSMCondition(CallerAttrs, CalleeAttrs), PStateSM); - InGlue = Result.getValue(1); - Result = - DAG.getNode(AArch64ISD::VG_RESTORE, DL, - DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue}); + if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) { + InGlue = Result.getValue(1); + Result = + DAG.getNode(AArch64ISD::VG_RESTORE, DL, + DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue}); + } } if (CallerAttrs.requiresEnablingZAAfterCall(CalleeAttrs)) diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp index c136f5b3e515..e680dda7374d 100644 --- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp +++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp @@ -721,7 +721,7 @@ bool LoongArchExpandPseudo::expandFunctionCALL( IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; Register AddrReg = IsTailCall ? LoongArch::R19 : LoongArch::R1; - bool UseGOT = Func.isGlobal() && !Func.getGlobal()->isDSOLocal(); + bool UseGOT = Func.getTargetFlags() == LoongArchII::MO_CALL_PLT; unsigned MO = UseGOT ? LoongArchII::MO_GOT_PC_HI : LoongArchII::MO_PCREL_LO; unsigned LAOpcode = UseGOT ? LoongArch::LDX_D : LoongArch::ADD_D; expandLargeAddressLoad(MBB, MBBI, NextMBBI, LAOpcode, MO, Func, AddrReg, diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 0580683c3ce3..0233baecf6dd 100644 --- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -67,8 +67,7 @@ class VecCond<SDPatternOperator OpNode, ValueType TyNode, let usesCustomInserter = 1; } -def vsplat_imm_eq_1 : PatFrags<(ops), [(build_vector), - (bitconvert (v4i32 (build_vector)))], [{ +def vsplat_imm_eq_1 : PatFrags<(ops), [(build_vector)], [{ APInt Imm; EVT EltTy = N->getValueType(0).getVectorElementType(); @@ -109,8 +108,7 @@ def vsplati32_imm_eq_31 : PatFrags<(ops), [(build_vector)], [{ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 31; }]>; -def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector), - (bitconvert (v4i32 (build_vector)))], [{ +def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector)], [{ APInt Imm; EVT EltTy = N->getValueType(0).getVectorElementType(); diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp index e40981f5b5cd..0712cc01ea03 100644 --- a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp @@ -55,7 +55,7 @@ static MCInstrInfo *createLoongArchMCInstrInfo() { static MCSubtargetInfo * createLoongArchMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { if (CPU.empty() || CPU == "generic") - CPU = TT.isArch64Bit() ? "la464" : "generic-la32"; + CPU = TT.isArch64Bit() ? "generic-la64" : "generic-la32"; return createLoongArchMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); } diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp index f6f32fde3b77..a9ffd2bedf21 100644 --- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -220,6 +220,10 @@ bool MipsDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const { return false; } +bool MipsDAGToDAGISel::selectVSplatImmEq1(SDValue N) const { + llvm_unreachable("Unimplemented function."); +} + /// Convert vector addition with vector subtraction if that allows to encode /// constant as an immediate and thus avoid extra 'ldi' instruction. /// add X, <-1, -1...> --> sub X, <1, 1...> diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.h index 6135f9680785..3485300a782c 100644 --- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.h +++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.h @@ -120,6 +120,9 @@ private: /// starting at bit zero. virtual bool selectVSplatMaskR(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value is 1. + virtual bool selectVSplatImmEq1(SDValue N) const; + /// Convert vector addition with vector subtraction if that allows to encode /// constant as an immediate and thus avoid extra 'ldi' instruction. /// add X, <-1, -1...> --> sub X, <1, 1...> diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsMSAInstrInfo.td index c4abccb24c6f..f4c32c9dcd42 100644 --- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsMSAInstrInfo.td @@ -198,14 +198,8 @@ def vsplati32 : PatFrag<(ops node:$e0), (v4i32 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>; -def vsplati64_imm_eq_1 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{ - APInt Imm; - SDNode *BV = N->getOperand(0).getNode(); - EVT EltTy = N->getValueType(0).getVectorElementType(); - - return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && - Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; -}]>; +// Any build_vector that is a constant splat with a value that equals 1 +def vsplat_imm_eq_1 : ComplexPattern<vAny, 0, "selectVSplatImmEq1">; def vsplati64 : PatFrag<(ops node:$e0), (v2i64 (build_vector node:$e0, node:$e0))>; @@ -217,7 +211,7 @@ def vsplati64_splat_d : PatFrag<(ops node:$e0), node:$e0, node:$e0, node:$e0)), - vsplati64_imm_eq_1))))>; + (vsplat_imm_eq_1)))))>; def vsplatf32 : PatFrag<(ops node:$e0), (v4f32 (build_vector node:$e0, node:$e0, @@ -352,46 +346,35 @@ def vsplat_maskr_bits_uimm6 : SplatComplexPattern<vsplat_uimm6, vAny, 1, "selectVSplatMaskR", [build_vector, bitconvert]>; -// Any build_vector that is a constant splat with a value that equals 1 -// FIXME: These should be a ComplexPattern but we can't use them because the -// ISel generator requires the uses to have a name, but providing a name -// causes other errors ("used in pattern but not operand list") -def vsplat_imm_eq_1 : PatLeaf<(build_vector), [{ - APInt Imm; - EVT EltTy = N->getValueType(0).getVectorElementType(); - - return selectVSplat(N, Imm, EltTy.getSizeInBits()) && - Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; -}]>; def vbclr_b : PatFrag<(ops node:$ws, node:$wt), - (and node:$ws, (vnot (shl vsplat_imm_eq_1, node:$wt)))>; + (and node:$ws, (vnot (shl (vsplat_imm_eq_1), node:$wt)))>; def vbclr_h : PatFrag<(ops node:$ws, node:$wt), - (and node:$ws, (vnot (shl vsplat_imm_eq_1, node:$wt)))>; + (and node:$ws, (vnot (shl (vsplat_imm_eq_1), node:$wt)))>; def vbclr_w : PatFrag<(ops node:$ws, node:$wt), - (and node:$ws, (vnot (shl vsplat_imm_eq_1, node:$wt)))>; + (and node:$ws, (vnot (shl (vsplat_imm_eq_1), node:$wt)))>; def vbclr_d : PatFrag<(ops node:$ws, node:$wt), - (and node:$ws, (vnot (shl (v2i64 vsplati64_imm_eq_1), + (and node:$ws, (vnot (shl (v2i64 (vsplat_imm_eq_1)), node:$wt)))>; def vbneg_b : PatFrag<(ops node:$ws, node:$wt), - (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; + (xor node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>; def vbneg_h : PatFrag<(ops node:$ws, node:$wt), - (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; + (xor node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>; def vbneg_w : PatFrag<(ops node:$ws, node:$wt), - (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; + (xor node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>; def vbneg_d : PatFrag<(ops node:$ws, node:$wt), - (xor node:$ws, (shl (v2i64 vsplati64_imm_eq_1), + (xor node:$ws, (shl (v2i64 (vsplat_imm_eq_1)), node:$wt))>; def vbset_b : PatFrag<(ops node:$ws, node:$wt), - (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; + (or node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>; def vbset_h : PatFrag<(ops node:$ws, node:$wt), - (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; + (or node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>; def vbset_w : PatFrag<(ops node:$ws, node:$wt), - (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; + (or node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>; def vbset_d : PatFrag<(ops node:$ws, node:$wt), - (or node:$ws, (shl (v2i64 vsplati64_imm_eq_1), + (or node:$ws, (shl (v2i64 (vsplat_imm_eq_1)), node:$wt))>; def muladd : PatFrag<(ops node:$wd, node:$ws, node:$wt), @@ -3842,7 +3825,7 @@ class MSAShiftPat<SDNode Node, ValueType VT, MSAInst Insn, dag Vec> : (VT (Insn VT:$ws, VT:$wt))>; class MSABitPat<SDNode Node, ValueType VT, MSAInst Insn, PatFrag Frag> : - MSAPat<(VT (Node VT:$ws, (shl vsplat_imm_eq_1, (Frag VT:$wt)))), + MSAPat<(VT (Node VT:$ws, (shl (vsplat_imm_eq_1), (Frag VT:$wt)))), (VT (Insn VT:$ws, VT:$wt))>; multiclass MSAShiftPats<SDNode Node, string Insn> { @@ -3861,7 +3844,7 @@ multiclass MSABitPats<SDNode Node, string Insn> { def : MSABitPat<Node, v16i8, !cast<MSAInst>(Insn#_B), vsplati8imm7>; def : MSABitPat<Node, v8i16, !cast<MSAInst>(Insn#_H), vsplati16imm15>; def : MSABitPat<Node, v4i32, !cast<MSAInst>(Insn#_W), vsplati32imm31>; - def : MSAPat<(Node v2i64:$ws, (shl (v2i64 vsplati64_imm_eq_1), + def : MSAPat<(Node v2i64:$ws, (shl (v2i64 (vsplat_imm_eq_1)), (vsplati64imm63 v2i64:$wt))), (v2i64 (!cast<MSAInst>(Insn#_D) v2i64:$ws, v2i64:$wt))>; } @@ -3872,16 +3855,16 @@ defm : MSAShiftPats<sra, "SRA">; defm : MSABitPats<xor, "BNEG">; defm : MSABitPats<or, "BSET">; -def : MSAPat<(and v16i8:$ws, (vnot (shl vsplat_imm_eq_1, +def : MSAPat<(and v16i8:$ws, (vnot (shl (vsplat_imm_eq_1), (vsplati8imm7 v16i8:$wt)))), (v16i8 (BCLR_B v16i8:$ws, v16i8:$wt))>; -def : MSAPat<(and v8i16:$ws, (vnot (shl vsplat_imm_eq_1, +def : MSAPat<(and v8i16:$ws, (vnot (shl (vsplat_imm_eq_1), (vsplati16imm15 v8i16:$wt)))), (v8i16 (BCLR_H v8i16:$ws, v8i16:$wt))>; -def : MSAPat<(and v4i32:$ws, (vnot (shl vsplat_imm_eq_1, +def : MSAPat<(and v4i32:$ws, (vnot (shl (vsplat_imm_eq_1), (vsplati32imm31 v4i32:$wt)))), (v4i32 (BCLR_W v4i32:$ws, v4i32:$wt))>; -def : MSAPat<(and v2i64:$ws, (vnot (shl (v2i64 vsplati64_imm_eq_1), +def : MSAPat<(and v2i64:$ws, (vnot (shl (v2i64 (vsplat_imm_eq_1)), (vsplati64imm63 v2i64:$wt)))), (v2i64 (BCLR_D v2i64:$ws, v2i64:$wt))>; diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 7ad300c6cccd..66c034a889c6 100644 --- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -730,6 +730,18 @@ bool MipsSEDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N, return false; } +// Select const vector splat of 1. +bool MipsSEDAGToDAGISel::selectVSplatImmEq1(SDValue N) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + return selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits() && ImmValue == 1; +} + bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { unsigned Opcode = Node->getOpcode(); SDLoc DL(Node); diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h index 7b843b0e0b25..22d8e924ac53 100644 --- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -124,6 +124,9 @@ private: /// starting at bit zero. bool selectVSplatMaskR(SDValue N, SDValue &Imm) const override; + /// Select constant vector splats whose value is 1. + bool selectVSplatImmEq1(SDValue N) const override; + bool trySelect(SDNode *Node) override; // Emits proper ABI for _mcount profiling calls. diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 6975412ce5d3..b2153a7afe73 100644 --- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -229,6 +229,10 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, // v*i8 are formally lowered as v4i8 EltVT = MVT::v4i8; NumElts = (NumElts + 3) / 4; + } else if (EltVT.getSimpleVT() == MVT::i8 && NumElts == 2) { + // v2i8 is promoted to v2i16 + NumElts = 1; + EltVT = MVT::v2i16; } for (unsigned j = 0; j != NumElts; ++j) { ValueVTs.push_back(EltVT); diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 9d2990c98ce2..3223fccbcf49 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -506,8 +506,10 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true. if (II.hasOneUse() && match(Op1, m_Zero()) && - match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) + match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) { + II.dropUBImplyingAttrsAndMetadata(); return IC.replaceOperand(II, 1, IC.Builder.getTrue()); + } Constant *C; diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 8a6ec3076ac6..b9d06b593685 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1004,7 +1004,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I, uint64_t MaskedGEPIndex = HighBitsGEPIndex | MaskedLowBitsGEPIndex; if (MaskedGEPIndex != GEPIndex) { - auto *GEP = cast<GetElementPtrInst>(II->getArgOperand(0)); + auto *GEP = cast<GEPOperator>(II->getArgOperand(0)); Builder.SetInsertPoint(I); Type *GEPIndexType = DL.getIndexType(GEP->getPointerOperand()->getType()); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 37022104d0a9..d1c80aa67124 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -1033,9 +1033,9 @@ void State::addInfoForInductions(BasicBlock &BB) { DTN, CmpInst::ICMP_SLT, PN, B, ConditionTy(CmpInst::ICMP_SLE, StartValue, B))); - // Try to add condition from header to the exit blocks. When exiting either - // with EQ or NE in the header, we know that the induction value must be u<= - // B, as other exits may only exit earlier. + // Try to add condition from header to the dedicated exit blocks. When exiting + // either with EQ or NE in the header, we know that the induction value must + // be u<= B, as other exits may only exit earlier. assert(!StepOffset.isNegative() && "induction must be increasing"); assert((Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) && "unsupported predicate"); @@ -1043,8 +1043,11 @@ void State::addInfoForInductions(BasicBlock &BB) { SmallVector<BasicBlock *> ExitBBs; L->getExitBlocks(ExitBBs); for (BasicBlock *EB : ExitBBs) { - WorkList.emplace_back(FactOrCheck::getConditionFact( - DT.getNode(EB), CmpInst::ICMP_ULE, A, B, Precond)); + // Bail out on non-dedicated exits. + if (DT.dominates(&BB, EB)) { + WorkList.emplace_back(FactOrCheck::getConditionFact( + DT.getNode(EB), CmpInst::ICMP_ULE, A, B, Precond)); + } } } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp index 91ef2b4b7c18..ca03eff7a4e2 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1464,8 +1464,11 @@ static Instruction *cloneInstructionInExitBlock( if (MSSAU.getMemorySSA()->getMemoryAccess(&I)) { // Create a new MemoryAccess and let MemorySSA set its defining access. + // After running some passes, MemorySSA might be outdated, and the + // instruction `I` may have become a non-memory touching instruction. MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB( - New, nullptr, New->getParent(), MemorySSA::Beginning); + New, nullptr, New->getParent(), MemorySSA::Beginning, + /*CreationMustSucceed=*/false); if (NewMemAcc) { if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc)) MSSAU.insertDef(MemDef, /*RenameUses=*/true); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp index 7192efe3f16b..f68cbf62b982 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp @@ -1028,7 +1028,13 @@ CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ, if (!BB->hasNPredecessorsOrMore(2)) return false; - // Get single common predecessors of both BB and Succ + if (any_of(BBPreds, [](const BasicBlock *Pred) { + return isa<IndirectBrInst>(Pred->getTerminator()); + })) + return false; + + // Get the single common predecessor of both BB and Succ. Return false + // when there are more than one common predecessors. for (BasicBlock *SuccPred : SuccPreds) { if (BBPreds.count(SuccPred)) { if (CommonPred) @@ -1133,7 +1139,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, bool BBKillable = CanPropagatePredecessorsForPHIs(BB, Succ, BBPreds); - // Even if we can not fold bB into Succ, we may be able to redirect the + // Even if we can not fold BB into Succ, we may be able to redirect the // predecessors of BB to Succ. bool BBPhisMergeable = BBKillable || diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ab2b96cdc42d..746ba51a981f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -15440,9 +15440,25 @@ bool BoUpSLP::collectValuesToDemote( MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL))); }); }; + auto AbsChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) { + assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!"); + return all_of(E.Scalars, [&](Value *V) { + auto *I = cast<Instruction>(V); + unsigned SignBits = OrigBitWidth - BitWidth; + APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth - 1); + unsigned Op0SignBits = + ComputeNumSignBits(I->getOperand(0), *DL, 0, AC, nullptr, DT); + return SignBits <= Op0SignBits && + ((SignBits != Op0SignBits && + !isKnownNonNegative(I->getOperand(0), SimplifyQuery(*DL))) || + MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL))); + }); + }; if (ID != Intrinsic::abs) { Operands.push_back(getOperandEntry(&E, 1)); CallChecker = CompChecker; + } else { + CallChecker = AbsChecker; } InstructionCost BestCost = std::numeric_limits<InstructionCost::CostType>::max(); |