diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib')
25 files changed, 195 insertions, 135 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp index 324329ce989e..f4324fffc4ed 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -293,6 +293,13 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() { // Call the function. CallInst *CI = CallInst::Create(RewindFunction, RewindFunctionArgs, "", UnwindBB); + // The verifier requires that all calls of debug-info-bearing functions + // from debug-info-bearing functions have a debug location (for inlining + // purposes). Assign a dummy location to satisfy the constraint. + Function *RewindFn = dyn_cast<Function>(RewindFunction.getCallee()); + if (RewindFn && RewindFn->getSubprogram()) + if (DISubprogram *SP = F.getSubprogram()) + CI->setDebugLoc(DILocation::get(SP->getContext(), 0, 0, SP)); CI->setCallingConv(RewindFunctionCallingConv); // We never expect _Unwind_Resume to return. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp index 497e282bb976..5c68711ff619 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -528,8 +528,16 @@ bool MachinePipeliner::useSwingModuloScheduler() { } bool MachinePipeliner::useWindowScheduler(bool Changed) { - // WindowScheduler does not work when it is off or when SwingModuloScheduler - // is successfully scheduled. + // WindowScheduler does not work for following cases: + // 1. when it is off. + // 2. when SwingModuloScheduler is successfully scheduled. + // 3. when pragma II is enabled. + if (II_setByPragma) { + LLVM_DEBUG(dbgs() << "Window scheduling is disabled when " + "llvm.loop.pipeline.initiationinterval is set.\n"); + return false; + } + return WindowSchedulingOption == WindowSchedulingFlag::WS_Force || (WindowSchedulingOption == WindowSchedulingFlag::WS_On && !Changed); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index aa9032ea2574..71cdec91e5f6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15680,13 +15680,16 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { } } - SmallSetVector<SDValue, 8> MaybePoisonOperands; - for (SDValue Op : N0->ops()) { + SmallSet<SDValue, 8> MaybePoisonOperands; + SmallVector<unsigned, 8> MaybePoisonOperandNumbers; + for (auto [OpNo, Op] : enumerate(N0->ops())) { if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false, /*Depth*/ 1)) continue; bool HadMaybePoisonOperands = !MaybePoisonOperands.empty(); - bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op); + bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second; + if (IsNewMaybePoisonOperand) + MaybePoisonOperandNumbers.push_back(OpNo); if (!HadMaybePoisonOperands) continue; if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) { @@ -15698,7 +15701,18 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { // it could create undef or poison due to it's poison-generating flags. // So not finding any maybe-poison operands is fine. - for (SDValue MaybePoisonOperand : MaybePoisonOperands) { + for (unsigned OpNo : MaybePoisonOperandNumbers) { + // N0 can mutate during iteration, so make sure to refetch the maybe poison + // operands via the operand numbers. The typical scenario is that we have + // something like this + // t262: i32 = freeze t181 + // t150: i32 = ctlz_zero_undef t262 + // t184: i32 = ctlz_zero_undef t181 + // t268: i32 = select_cc t181, Constant:i32<0>, t184, t186, setne:ch + // When freezing the t181 operand we get t262 back, and then the + // ReplaceAllUsesOfValueWith call will not only replace t181 by t262, but + // also recursively replace t184 by t150. + SDValue MaybePoisonOperand = N->getOperand(0).getOperand(OpNo); // Don't replace every single UNDEF everywhere with frozen UNDEF, though. if (MaybePoisonOperand.getOpcode() == ISD::UNDEF) continue; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WindowScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WindowScheduler.cpp index 0777480499e5..f1658e36ae1e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/WindowScheduler.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/WindowScheduler.cpp @@ -232,8 +232,11 @@ bool WindowScheduler::initialize() { return false; } for (auto &Def : MI.all_defs()) - if (Def.isReg() && Def.getReg().isPhysical()) + if (Def.isReg() && Def.getReg().isPhysical()) { + LLVM_DEBUG(dbgs() << "Physical registers are not supported in " + "window scheduling!\n"); return false; + } } if (SchedInstrNum <= WindowRegionLimit) { LLVM_DEBUG(dbgs() << "There are too few MIs in the window region!\n"); @@ -437,14 +440,17 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG, int PredCycle = getOriCycle(PredMI); ExpectCycle = std::max(ExpectCycle, PredCycle + (int)Pred.getLatency()); } - // ResourceManager can be used to detect resource conflicts between the - // current MI and the previously inserted MIs. - while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) { - ++CurCycle; - if (CurCycle == (int)WindowIILimit) - return CurCycle; + // Zero cost instructions do not need to check resource. + if (!TII->isZeroCost(MI.getOpcode())) { + // ResourceManager can be used to detect resource conflicts between the + // current MI and the previously inserted MIs. + while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) { + ++CurCycle; + if (CurCycle == (int)WindowIILimit) + return CurCycle; + } + RM.reserveResources(*SU, CurCycle); } - RM.reserveResources(*SU, CurCycle); OriToCycle[getOriMI(&MI)] = CurCycle; LLVM_DEBUG(dbgs() << "\tCycle " << CurCycle << " [S." << getOriStage(getOriMI(&MI), Offset) << "]: " << MI); @@ -485,6 +491,7 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG, // ======================================== int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) { int MaxStallCycle = 0; + int CurrentII = MaxCycle + 1; auto Range = getScheduleRange(Offset, SchedInstrNum); for (auto &MI : Range) { auto *SU = TripleDAG->getSUnit(&MI); @@ -492,8 +499,8 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) { for (auto &Succ : SU->Succs) { if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU) continue; - // If the expected cycle does not exceed MaxCycle, no check is needed. - if (DefCycle + (int)Succ.getLatency() <= MaxCycle) + // If the expected cycle does not exceed CurrentII, no check is needed. + if (DefCycle + (int)Succ.getLatency() <= CurrentII) continue; // If the cycle of the scheduled MI A is less than that of the scheduled // MI B, the scheduling will fail because the lifetime of the @@ -503,7 +510,7 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) { if (DefCycle < UseCycle) return WindowIILimit; // Get the stall cycle introduced by the register between two trips. - int StallCycle = DefCycle + (int)Succ.getLatency() - MaxCycle - UseCycle; + int StallCycle = DefCycle + (int)Succ.getLatency() - CurrentII - UseCycle; MaxStallCycle = std::max(MaxStallCycle, StallCycle); } } diff --git a/contrib/llvm-project/llvm/lib/IR/BasicBlock.cpp b/contrib/llvm-project/llvm/lib/IR/BasicBlock.cpp index bf19934da047..0a9498f051cb 100644 --- a/contrib/llvm-project/llvm/lib/IR/BasicBlock.cpp +++ b/contrib/llvm-project/llvm/lib/IR/BasicBlock.cpp @@ -961,9 +961,13 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src, // Detach the marker at Dest -- this lets us move the "====" DbgRecords // around. DbgMarker *DestMarker = nullptr; - if (Dest != end()) { - if ((DestMarker = getMarker(Dest))) + if ((DestMarker = getMarker(Dest))) { + if (Dest == end()) { + assert(DestMarker == getTrailingDbgRecords()); + deleteTrailingDbgRecords(); + } else { DestMarker->removeFromParent(); + } } // If we're moving the tail range of DbgRecords (":::"), absorb them into the @@ -1005,22 +1009,14 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src, } else { // Insert them right at the start of the range we moved, ahead of First // and the "++++" DbgRecords. + // This also covers the rare circumstance where we insert at end(), and we + // did not generate the iterator with begin() / getFirstInsertionPt(), + // meaning any trailing debug-info at the end of the block would + // "normally" have been pushed in front of "First". We move it there now. DbgMarker *FirstMarker = createMarker(First); FirstMarker->absorbDebugValues(*DestMarker, true); } DestMarker->eraseFromParent(); - } else if (Dest == end() && !InsertAtHead) { - // In the rare circumstance where we insert at end(), and we did not - // generate the iterator with begin() / getFirstInsertionPt(), it means - // any trailing debug-info at the end of the block would "normally" have - // been pushed in front of "First". Move it there now. - DbgMarker *TrailingDbgRecords = getTrailingDbgRecords(); - if (TrailingDbgRecords) { - DbgMarker *FirstMarker = createMarker(First); - FirstMarker->absorbDebugValues(*TrailingDbgRecords, true); - TrailingDbgRecords->eraseFromParent(); - deleteTrailingDbgRecords(); - } } } diff --git a/contrib/llvm-project/llvm/lib/IR/DebugProgramInstruction.cpp b/contrib/llvm-project/llvm/lib/IR/DebugProgramInstruction.cpp index 362d467beeb1..5d2189b54204 100644 --- a/contrib/llvm-project/llvm/lib/IR/DebugProgramInstruction.cpp +++ b/contrib/llvm-project/llvm/lib/IR/DebugProgramInstruction.cpp @@ -473,11 +473,12 @@ DbgLabelRecord::createDebugIntrinsic(Module *M, Value *DbgVariableRecord::getAddress() const { auto *MD = getRawAddress(); - if (auto *V = dyn_cast<ValueAsMetadata>(MD)) + if (auto *V = dyn_cast_or_null<ValueAsMetadata>(MD)) return V->getValue(); // When the value goes to null, it gets replaced by an empty MDNode. - assert(!cast<MDNode>(MD)->getNumOperands() && "Expected an empty MDNode"); + assert(!MD || + !cast<MDNode>(MD)->getNumOperands() && "Expected an empty MDNode"); return nullptr; } diff --git a/contrib/llvm-project/llvm/lib/IR/TypeFinder.cpp b/contrib/llvm-project/llvm/lib/IR/TypeFinder.cpp index 003155a4af48..963f4b4806e1 100644 --- a/contrib/llvm-project/llvm/lib/IR/TypeFinder.cpp +++ b/contrib/llvm-project/llvm/lib/IR/TypeFinder.cpp @@ -88,6 +88,20 @@ void TypeFinder::run(const Module &M, bool onlyNamed) { for (const auto &MD : MDForInst) incorporateMDNode(MD.second); MDForInst.clear(); + + // Incorporate types hiding in variable-location information. + for (const auto &Dbg : I.getDbgRecordRange()) { + // Pick out records that have Values. + if (const DbgVariableRecord *DVI = + dyn_cast<DbgVariableRecord>(&Dbg)) { + for (Value *V : DVI->location_ops()) + incorporateValue(V); + if (DVI->isDbgAssign()) { + if (Value *Addr = DVI->getAddress()) + incorporateValue(Addr); + } + } + } } } diff --git a/contrib/llvm-project/llvm/lib/Support/Z3Solver.cpp b/contrib/llvm-project/llvm/lib/Support/Z3Solver.cpp index 5a34ff160f6c..9aece099b062 100644 --- a/contrib/llvm-project/llvm/lib/Support/Z3Solver.cpp +++ b/contrib/llvm-project/llvm/lib/Support/Z3Solver.cpp @@ -19,6 +19,7 @@ using namespace llvm; #include "llvm/ADT/Twine.h" #include <set> +#include <unordered_map> #include <z3.h> diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Processors.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Processors.td index 6df87fc6a815..410b53e14de2 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Processors.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Processors.td @@ -895,7 +895,12 @@ def ProcessorFeatures { FeatureLSE, FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM, FeatureBF16, FeatureDotProd, FeatureMatMulInt8, FeatureSSBS]; - list<SubtargetFeature> AppleM4 = [HasV9_2aOps, FeatureSHA2, FeatureFPARMv8, + // Technically apple-m4 is v9.2a, but we can't use that here. + // Historically, llvm defined v9.0a as requiring SVE, but it's optional + // according to the Arm ARM, and not supported by the core. We decoupled the + // two in the clang driver and in the backend subtarget features, but it's + // still an issue in the clang frontend. v8.7a is the next closest choice. + list<SubtargetFeature> AppleM4 = [HasV8_7aOps, FeatureSHA2, FeatureFPARMv8, FeatureNEON, FeaturePerfMon, FeatureSHA3, FeatureFullFP16, FeatureFP16FML, FeatureAES, FeatureBF16, diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td index 7906e0ee9d78..9efdbd751d96 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -953,6 +953,12 @@ def FeatureRequiredExportPriority : SubtargetFeature<"required-export-priority", "Export priority must be explicitly manipulated on GFX11.5" >; +def FeatureVmemWriteVgprInOrder : SubtargetFeature<"vmem-write-vgpr-in-order", + "HasVmemWriteVgprInOrder", + "true", + "VMEM instructions of the same type write VGPR results in order" +>; + //===------------------------------------------------------------===// // Subtarget Features (options and debugging) //===------------------------------------------------------------===// @@ -1123,7 +1129,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, - FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts + FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, + FeatureVmemWriteVgprInOrder ] >; @@ -1136,7 +1143,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, + FeatureVmemWriteVgprInOrder ] >; @@ -1152,7 +1160,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32, FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS, - FeatureDefaultComponentZero + FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder ] >; @@ -1170,7 +1178,8 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16, FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK, FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, - FeatureNegativeScratchOffsetBug, FeatureGWS, FeatureDefaultComponentZero + FeatureNegativeScratchOffsetBug, FeatureGWS, FeatureDefaultComponentZero, + FeatureVmemWriteVgprInOrder ] >; @@ -1193,7 +1202,8 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureMaxHardClauseLength63, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts + FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, + FeatureVmemWriteVgprInOrder ] >; @@ -1215,7 +1225,8 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11", FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureMaxHardClauseLength32, - FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts + FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, + FeatureVmemWriteVgprInOrder ] >; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h index def89c785b85..9386bcf0d74b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -239,6 +239,7 @@ protected: bool HasVALUTransUseHazard = false; bool HasForceStoreSC0SC1 = false; bool HasRequiredExportPriority = false; + bool HasVmemWriteVgprInOrder = false; bool RequiresCOV6 = false; @@ -1285,10 +1286,18 @@ public: bool hasRequiredExportPriority() const { return HasRequiredExportPriority; } + bool hasVmemWriteVgprInOrder() const { return HasVmemWriteVgprInOrder; } + /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively. bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; } + /// \returns true if inline constants are not supported for F16 pseudo + /// scalar transcendentals. + bool hasNoF16PseudoScalarTransInlineConstants() const { + return getGeneration() == GFX12; + } + /// \returns The maximum number of instructions that can be enclosed in an /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that /// instruction. diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 1315aa085578..13130db884dc 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1778,11 +1778,12 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, if (IsVGPR) { // RAW always needs an s_waitcnt. WAW needs an s_waitcnt unless the // previous write and this write are the same type of VMEM - // instruction, in which case they're guaranteed to write their - // results in order anyway. + // instruction, in which case they are (in some architectures) + // guaranteed to write their results in order anyway. if (Op.isUse() || !updateVMCntOnly(MI) || ScoreBrackets.hasOtherPendingVmemTypes(RegNo, - getVmemType(MI))) { + getVmemType(MI)) || + !ST->hasVmemWriteVgprInOrder()) { ScoreBrackets.determineWait(LOAD_CNT, RegNo, Wait); ScoreBrackets.determineWait(SAMPLE_CNT, RegNo, Wait); ScoreBrackets.determineWait(BVH_CNT, RegNo, Wait); @@ -2389,7 +2390,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML, } if (!ST->hasVscnt() && HasVMemStore && !HasVMemLoad && UsesVgprLoadedOutside) return true; - return HasVMemLoad && UsesVgprLoadedOutside; + return HasVMemLoad && UsesVgprLoadedOutside && ST->hasVmemWriteVgprInOrder(); } bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 463737f645d4..27b8c1b17422 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5768,6 +5768,10 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, return false; } } + } else if (ST.hasNoF16PseudoScalarTransInlineConstants() && !MO->isReg() && + isF16PseudoScalarTrans(MI.getOpcode()) && + isInlineConstant(*MO, OpInfo)) { + return false; } if (MO->isReg()) { diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 1712dfe8d406..91855fb14f6f 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -946,6 +946,14 @@ public: Opcode == AMDGPU::DS_GWS_BARRIER; } + static bool isF16PseudoScalarTrans(unsigned Opcode) { + return Opcode == AMDGPU::V_S_EXP_F16_e64 || + Opcode == AMDGPU::V_S_LOG_F16_e64 || + Opcode == AMDGPU::V_S_RCP_F16_e64 || + Opcode == AMDGPU::V_S_RSQ_F16_e64 || + Opcode == AMDGPU::V_S_SQRT_F16_e64; + } + static bool doesNotReadTiedSource(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead; } diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp index 383dfcc31117..c016b2dd91dc 100644 --- a/contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp @@ -72,7 +72,7 @@ class AVRAsmParser : public MCTargetAsmParser { int parseRegisterName(); int parseRegister(bool RestoreOnFailure = false); bool tryParseRegisterOperand(OperandVector &Operands); - bool tryParseExpression(OperandVector &Operands); + bool tryParseExpression(OperandVector &Operands, int64_t offset); bool tryParseRelocExpression(OperandVector &Operands); void eatComma(); @@ -418,7 +418,7 @@ bool AVRAsmParser::tryParseRegisterOperand(OperandVector &Operands) { return false; } -bool AVRAsmParser::tryParseExpression(OperandVector &Operands) { +bool AVRAsmParser::tryParseExpression(OperandVector &Operands, int64_t offset) { SMLoc S = Parser.getTok().getLoc(); if (!tryParseRelocExpression(Operands)) @@ -437,6 +437,11 @@ bool AVRAsmParser::tryParseExpression(OperandVector &Operands) { if (getParser().parseExpression(Expression)) return true; + if (offset) { + Expression = MCBinaryExpr::createAdd( + Expression, MCConstantExpr::create(offset, getContext()), getContext()); + } + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(AVROperand::CreateImm(Expression, S, E)); return false; @@ -529,8 +534,9 @@ bool AVRAsmParser::parseOperand(OperandVector &Operands, bool maybeReg) { [[fallthrough]]; case AsmToken::LParen: case AsmToken::Integer: + return tryParseExpression(Operands, 0); case AsmToken::Dot: - return tryParseExpression(Operands); + return tryParseExpression(Operands, 2); case AsmToken::Plus: case AsmToken::Minus: { // If the sign preceeds a number, parse the number, @@ -540,7 +546,7 @@ bool AVRAsmParser::parseOperand(OperandVector &Operands, bool maybeReg) { case AsmToken::BigNum: case AsmToken::Identifier: case AsmToken::Real: - if (!tryParseExpression(Operands)) + if (!tryParseExpression(Operands, 0)) return false; break; default: @@ -643,6 +649,7 @@ bool AVRAsmParser::ParseInstruction(ParseInstructionInfo &Info, // These specific operands should be treated as addresses/symbols/labels, // other than registers. bool maybeReg = true; + if (OperandNum == 1) { std::array<StringRef, 8> Insts = {"lds", "adiw", "sbiw", "ldi"}; for (auto Inst : Insts) { diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp index 0d29912bee26..388d58a82214 100644 --- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp @@ -94,6 +94,9 @@ static void adjustRelativeBranch(unsigned Size, const MCFixup &Fixup, // Rightshifts the value by one. AVR::fixups::adjustBranchTarget(Value); + + // Jumps are relative to the current instruction. + Value -= 1; } /// 22-bit absolute fixup. @@ -513,15 +516,10 @@ bool AVRAsmBackend::shouldForceRelocation(const MCAssembler &Asm, switch ((unsigned)Fixup.getKind()) { default: return Fixup.getKind() >= FirstLiteralRelocationKind; - // Fixups which should always be recorded as relocations. case AVR::fixup_7_pcrel: case AVR::fixup_13_pcrel: - // Do not force relocation for PC relative branch like 'rjmp .', - // 'rcall . - off' and 'breq . + off'. - if (const auto *SymA = Target.getSymA()) - if (SymA->getSymbol().getName().size() == 0) - return false; - [[fallthrough]]; + // Always resolve relocations for PC-relative branches + return false; case AVR::fixup_call: return true; } diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index aaf0449a5538..21cf4d9eeac1 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9338,14 +9338,18 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); SDValue Op0 = Op->getOperand(0); + SDValue Lo = Op0.getOperand(0); + SDValue Hi = Op0.getOperand(1); + if ((Op.getValueType() != MVT::f128) || - (Op0.getOpcode() != ISD::BUILD_PAIR) || - (Op0.getOperand(0).getValueType() != MVT::i64) || - (Op0.getOperand(1).getValueType() != MVT::i64) || !Subtarget.isPPC64()) + (Op0.getOpcode() != ISD::BUILD_PAIR) || (Lo.getValueType() != MVT::i64) || + (Hi.getValueType() != MVT::i64) || !Subtarget.isPPC64()) return SDValue(); - return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0), - Op0.getOperand(1)); + if (!Subtarget.isLittleEndian()) + std::swap(Lo, Hi); + + return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi); } static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) { diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 8f5afbae01de..ed39fc67a0a7 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -2014,9 +2014,9 @@ def SLBSYNC : XForm_0<31, 338, (outs), (ins), "slbsync", IIC_SprSLBSYNC, []>; } // IsISA3_0 def : Pat<(int_ppc_stdcx ForceXForm:$dst, g8rc:$A), - (STDCX g8rc:$A, ForceXForm:$dst)>; + (RLWINM (STDCX g8rc:$A, ForceXForm:$dst), 31, 31, 31)>; def : Pat<(PPCStoreCond ForceXForm:$dst, g8rc:$A, 8), - (STDCX g8rc:$A, ForceXForm:$dst)>; + (RLWINM (STDCX g8rc:$A, ForceXForm:$dst), 31, 31, 31)>; def : Pat<(i64 (int_ppc_mfspr timm:$SPR)), (MFSPR8 $SPR)>; diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 1686249c0f89..69f8ddc0ea70 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -5286,13 +5286,13 @@ def : Pat<(i64 (bitreverse i64:$A)), (OR8 (RLDICR DWBytes7654.DWord, 32, 31), DWBytes3210.DWord)>; def : Pat<(int_ppc_stwcx ForceXForm:$dst, gprc:$A), - (STWCX gprc:$A, ForceXForm:$dst)>; + (RLWINM (STWCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>; def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 4), - (STWCX gprc:$A, ForceXForm:$dst)>; + (RLWINM (STWCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>; def : Pat<(int_ppc_stbcx ForceXForm:$dst, gprc:$A), - (STBCX gprc:$A, ForceXForm:$dst)>; + (RLWINM (STBCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>; def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 1), - (STBCX gprc:$A, ForceXForm:$dst)>; + (RLWINM (STBCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>; def : Pat<(int_ppc_fcfid f64:$A), (XSCVSXDDP $A)>; @@ -5322,9 +5322,9 @@ def : Pat<(int_ppc_mtmsr gprc:$RS), let Predicates = [IsISA2_07] in { def : Pat<(int_ppc_sthcx ForceXForm:$dst, gprc:$A), - (STHCX gprc:$A, ForceXForm:$dst)>; + (RLWINM (STHCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>; def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 2), - (STHCX gprc:$A, ForceXForm:$dst)>; + (RLWINM (STHCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>; } def : Pat<(int_ppc_dcbtstt ForceXForm:$dst), (DCBTST 16, ForceXForm:$dst)>; diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp index 6855471840e9..71ec01aeb011 100644 --- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -314,57 +314,6 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, const MachineOperand &MO = MI->getOperand (opNum); SparcMCExpr::VariantKind TF = (SparcMCExpr::VariantKind) MO.getTargetFlags(); -#ifndef NDEBUG - // Verify the target flags. - if (MO.isGlobal() || MO.isSymbol() || MO.isCPI()) { - if (MI->getOpcode() == SP::CALL) - assert(TF == SparcMCExpr::VK_Sparc_None && - "Cannot handle target flags on call address"); - else if (MI->getOpcode() == SP::SETHIi) - assert((TF == SparcMCExpr::VK_Sparc_HI - || TF == SparcMCExpr::VK_Sparc_H44 - || TF == SparcMCExpr::VK_Sparc_HH - || TF == SparcMCExpr::VK_Sparc_LM - || TF == SparcMCExpr::VK_Sparc_TLS_GD_HI22 - || TF == SparcMCExpr::VK_Sparc_TLS_LDM_HI22 - || TF == SparcMCExpr::VK_Sparc_TLS_LDO_HIX22 - || TF == SparcMCExpr::VK_Sparc_TLS_IE_HI22 - || TF == SparcMCExpr::VK_Sparc_TLS_LE_HIX22) && - "Invalid target flags for address operand on sethi"); - else if (MI->getOpcode() == SP::TLS_CALL) - assert((TF == SparcMCExpr::VK_Sparc_None - || TF == SparcMCExpr::VK_Sparc_TLS_GD_CALL - || TF == SparcMCExpr::VK_Sparc_TLS_LDM_CALL) && - "Cannot handle target flags on tls call address"); - else if (MI->getOpcode() == SP::TLS_ADDrr) - assert((TF == SparcMCExpr::VK_Sparc_TLS_GD_ADD - || TF == SparcMCExpr::VK_Sparc_TLS_LDM_ADD - || TF == SparcMCExpr::VK_Sparc_TLS_LDO_ADD - || TF == SparcMCExpr::VK_Sparc_TLS_IE_ADD) && - "Cannot handle target flags on add for TLS"); - else if (MI->getOpcode() == SP::TLS_LDrr) - assert(TF == SparcMCExpr::VK_Sparc_TLS_IE_LD && - "Cannot handle target flags on ld for TLS"); - else if (MI->getOpcode() == SP::TLS_LDXrr) - assert(TF == SparcMCExpr::VK_Sparc_TLS_IE_LDX && - "Cannot handle target flags on ldx for TLS"); - else if (MI->getOpcode() == SP::XORri) - assert((TF == SparcMCExpr::VK_Sparc_TLS_LDO_LOX10 - || TF == SparcMCExpr::VK_Sparc_TLS_LE_LOX10) && - "Cannot handle target flags on xor for TLS"); - else - assert((TF == SparcMCExpr::VK_Sparc_LO - || TF == SparcMCExpr::VK_Sparc_M44 - || TF == SparcMCExpr::VK_Sparc_L44 - || TF == SparcMCExpr::VK_Sparc_HM - || TF == SparcMCExpr::VK_Sparc_TLS_GD_LO10 - || TF == SparcMCExpr::VK_Sparc_TLS_LDM_LO10 - || TF == SparcMCExpr::VK_Sparc_TLS_IE_LO10 ) && - "Invalid target flags for small address operand"); - } -#endif - - bool CloseParen = SparcMCExpr::printVariantKind(O, TF); switch (MO.getType()) { diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedIceLake.td b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedIceLake.td index 186d4d84c251..b68be9be6d47 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedIceLake.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedIceLake.td @@ -1510,8 +1510,10 @@ def ICXWriteResGroup113 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort015 let ReleaseAtCycles = [1,8,8,2]; } def: InstRW<[ICXWriteResGroup113], (instrs VPSCATTERDQZmr, + VPSCATTERQDZmr, VPSCATTERQQZmr, VSCATTERDPDZmr, + VSCATTERQPSZmr, VSCATTERQPDZmr)>; def ICXWriteResGroup114 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> { diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 4fded44085e8..2423602d06c4 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -1499,8 +1499,10 @@ def SKXWriteResGroup113 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort015 let ReleaseAtCycles = [1,8,8,2]; } def: InstRW<[SKXWriteResGroup113], (instrs VPSCATTERDQZmr, + VPSCATTERQDZmr, VPSCATTERQQZmr, VSCATTERDPDZmr, + VSCATTERQPSZmr, VSCATTERQPDZmr)>; def SKXWriteResGroup114 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 1ce8f58c1aa1..4924d5a31747 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -1625,11 +1625,17 @@ void PGOUseFunc::setBranchWeights() { continue; // We have a non-zero Branch BB. - unsigned Size = BBCountInfo.OutEdges.size(); - SmallVector<uint64_t, 2> EdgeCounts(Size, 0); + + // SuccessorCount can be greater than OutEdgesCount, because + // removed edges don't appear in OutEdges. + unsigned OutEdgesCount = BBCountInfo.OutEdges.size(); + unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors(); + assert(OutEdgesCount <= SuccessorCount); + + SmallVector<uint64_t, 2> EdgeCounts(SuccessorCount, 0); uint64_t MaxCount = 0; - for (unsigned s = 0; s < Size; s++) { - const PGOUseEdge *E = BBCountInfo.OutEdges[s]; + for (unsigned It = 0; It < OutEdgesCount; It++) { + const PGOUseEdge *E = BBCountInfo.OutEdges[It]; const BasicBlock *SrcBB = E->SrcBB; const BasicBlock *DestBB = E->DestBB; if (DestBB == nullptr) diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index c31173879af1..37022104d0a9 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -1464,7 +1464,7 @@ static bool checkAndReplaceCmp(CmpIntrinsic *I, ConstraintInfo &Info, ToRemove.push_back(I); return true; } - if (checkCondition(ICmpInst::ICMP_EQ, LHS, RHS, I, Info)) { + if (checkCondition(ICmpInst::ICMP_EQ, LHS, RHS, I, Info).value_or(false)) { I->replaceAllUsesWith(ConstantInt::get(I->getType(), 0)); ToRemove.push_back(I); return true; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index 5bda7c50c62c..0b4a75e0bc52 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -1928,18 +1928,24 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, if (!WideAddRec.first) return nullptr; - // Reuse the IV increment that SCEVExpander created. Recompute flags, unless - // the flags for both increments agree and it is safe to use the ones from - // the original inc. In that case, the new use of the wide increment won't - // be more poisonous. - bool NeedToRecomputeFlags = - !SCEVExpander::canReuseFlagsFromOriginalIVInc(OrigPhi, WidePhi, - DU.NarrowUse, WideInc) || - DU.NarrowUse->hasNoUnsignedWrap() != WideInc->hasNoUnsignedWrap() || - DU.NarrowUse->hasNoSignedWrap() != WideInc->hasNoSignedWrap(); + auto CanUseWideInc = [&]() { + if (!WideInc) + return false; + // Reuse the IV increment that SCEVExpander created. Recompute flags, + // unless the flags for both increments agree and it is safe to use the + // ones from the original inc. In that case, the new use of the wide + // increment won't be more poisonous. + bool NeedToRecomputeFlags = + !SCEVExpander::canReuseFlagsFromOriginalIVInc( + OrigPhi, WidePhi, DU.NarrowUse, WideInc) || + DU.NarrowUse->hasNoUnsignedWrap() != WideInc->hasNoUnsignedWrap() || + DU.NarrowUse->hasNoSignedWrap() != WideInc->hasNoSignedWrap(); + return WideAddRec.first == WideIncExpr && + Rewriter.hoistIVInc(WideInc, DU.NarrowUse, NeedToRecomputeFlags); + }; + Instruction *WideUse = nullptr; - if (WideAddRec.first == WideIncExpr && - Rewriter.hoistIVInc(WideInc, DU.NarrowUse, NeedToRecomputeFlags)) + if (CanUseWideInc()) WideUse = WideInc; else { WideUse = cloneIVUser(DU, WideAddRec.first); |
