aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp311
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp71
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h61
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp77
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h48
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp84
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp2426
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp315
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp80
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp92
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp1186
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp889
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp62
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp139
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp664
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp845
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp78
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp386
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp100
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp263
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp1274
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h492
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp207
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp65
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp201
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp539
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h93
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp335
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCFGPrinter.cpp95
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp62
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp54
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp207
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp65
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp128
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp240
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp239
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp97
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp433
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp223
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp186
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp119
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp295
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp71
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp64
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp171
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp114
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h96
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp163
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp80
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp83
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp114
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2795
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp199
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp421
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp157
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp90
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp427
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp83
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp617
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp126
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp877
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp886
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h96
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp183
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp75
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp1092
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp253
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp114
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp230
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp130
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp80
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp242
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp15
217 files changed, 19738 insertions, 6511 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
index f5dbaccfcad5..b9579441a0ba 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
@@ -319,8 +319,9 @@ static const Value *getNoopInput(const Value *V,
NoopInput = Op;
} else if (isa<TruncInst>(I) &&
TLI.allowTruncateForTailCall(Op->getType(), I->getType())) {
- DataBits = std::min((uint64_t)DataBits,
- I->getType()->getPrimitiveSizeInBits().getFixedSize());
+ DataBits =
+ std::min((uint64_t)DataBits,
+ I->getType()->getPrimitiveSizeInBits().getFixedValue());
NoopInput = Op;
} else if (auto *CB = dyn_cast<CallBase>(I)) {
const Value *ReturnedOp = CB->getReturnedArgOperand();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
index 1940f46232d3..82b5ccdc70ea 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
@@ -21,9 +21,7 @@
namespace llvm {
-AIXException::AIXException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {}
-
-void AIXException::markFunctionEnd() { endFragment(); }
+AIXException::AIXException(AsmPrinter *A) : EHStreamer(A) {}
void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA,
const MCSymbol *PerSym) {
@@ -62,7 +60,7 @@ void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA,
const unsigned PointerSize = DL.getPointerSize();
// Add necessary paddings in 64 bit mode.
- Asm->OutStreamer->emitValueToAlignment(PointerSize);
+ Asm->OutStreamer->emitValueToAlignment(Align(PointerSize));
// LSDA location.
Asm->OutStreamer->emitValue(MCSymbolRefExpr::create(LSDA, Asm->OutContext),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index e04a29fbb42b..de6ebcf0c341 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -19,7 +19,7 @@
#include "llvm/MC/MCStreamer.h"
using namespace llvm;
-ARMException::ARMException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {}
+ARMException::ARMException(AsmPrinter *A) : EHStreamer(A) {}
ARMException::~ARMException() = default;
@@ -48,6 +48,11 @@ void ARMException::beginFunction(const MachineFunction *MF) {
}
}
+void ARMException::markFunctionEnd() {
+ if (shouldEmitCFI)
+ Asm->OutStreamer->emitCFIEndProc();
+}
+
/// endFunction - Gather and emit post-function exception information.
///
void ARMException::endFunction(const MachineFunction *MF) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 9526bf7610b4..22ecc5199742 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -531,7 +531,7 @@ template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emit() {
emitOffsets(EntryPool);
emitAbbrevs();
emitData();
- Asm->OutStreamer->emitValueToAlignment(4, 0);
+ Asm->OutStreamer->emitValueToAlignment(Align(4), 0);
Asm->OutStreamer->emitLabel(ContributionEnd);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 32a10ad41d1f..8c126d20fc9a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -119,6 +119,7 @@
#include <cstdint>
#include <iterator>
#include <memory>
+#include <optional>
#include <string>
#include <utility>
#include <vector>
@@ -146,14 +147,6 @@ STATISTIC(EmittedInsts, "Number of machine instrs printed");
char AsmPrinter::ID = 0;
-using gcp_map_type = DenseMap<GCStrategy *, std::unique_ptr<GCMetadataPrinter>>;
-
-static gcp_map_type &getGCMap(void *&P) {
- if (!P)
- P = new gcp_map_type();
- return *(gcp_map_type*)P;
-}
-
namespace {
class AddrLabelMapCallbackPtr final : CallbackVH {
AddrLabelMap *Map = nullptr;
@@ -354,20 +347,16 @@ Align AsmPrinter::getGVAlignment(const GlobalObject *GV, const DataLayout &DL,
AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer)
: MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()),
- OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)) {
+ OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)),
+ SM(*this) {
VerboseAsm = OutStreamer->isVerboseAsm();
+ DwarfUsesRelocationsAcrossSections =
+ MAI->doesDwarfUseRelocationsAcrossSections();
}
AsmPrinter::~AsmPrinter() {
assert(!DD && Handlers.size() == NumUserHandlers &&
"Debug/EH info didn't get finalized");
-
- if (GCMetadataPrinters) {
- gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
-
- delete &GCMap;
- GCMetadataPrinters = nullptr;
- }
}
bool AsmPrinter::isPositionIndependent() const {
@@ -489,7 +478,7 @@ bool AsmPrinter::doInitialization(Module &M) {
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
assert(MI && "AsmPrinter didn't require GCModuleInfo?");
for (const auto &I : *MI)
- if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
+ if (GCMetadataPrinter *MP = getOrCreateGCPrinter(*I))
MP->beginAssembly(M, *MI, *this);
// Emit module-level inline asm if it exists.
@@ -529,7 +518,7 @@ bool AsmPrinter::doInitialization(Module &M) {
switch (MAI->getExceptionHandlingType()) {
case ExceptionHandling::None:
// We may want to emit CFI for debug.
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ExceptionHandling::SjLj:
case ExceptionHandling::DwarfCFI:
case ExceptionHandling::ARM:
@@ -553,7 +542,7 @@ bool AsmPrinter::doInitialization(Module &M) {
case ExceptionHandling::None:
if (!needsCFIForDebug())
break;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ExceptionHandling::SjLj:
case ExceptionHandling::DwarfCFI:
ES = new DwarfCFIException(this);
@@ -710,6 +699,16 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
// GV's or GVSym's attributes will be used for the EmittedSym.
emitVisibility(EmittedSym, GV->getVisibility(), !GV->isDeclaration());
+ if (GV->isTagged()) {
+ Triple T = TM.getTargetTriple();
+
+ if (T.getArch() != Triple::aarch64 || !T.isAndroid())
+ OutContext.reportError(SMLoc(),
+ "Tagged symbols (-fsanitize=memtag-globals) are "
+ "only supported on aarch64 + Android.");
+ OutStreamer->emitSymbolAttribute(EmittedSym, MAI->getMemtagAttr());
+ }
+
if (!GV->hasInitializer()) // External globals require no extra code.
return;
@@ -742,10 +741,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
if (GVKind.isCommon()) {
if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
// .comm _foo, 42, 4
- const bool SupportsAlignment =
- getObjFileLowering().getCommDirectiveSupportsAlignment();
- OutStreamer->emitCommonSymbol(GVSym, Size,
- SupportsAlignment ? Alignment.value() : 0);
+ OutStreamer->emitCommonSymbol(GVSym, Size, Alignment);
return;
}
@@ -760,7 +756,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
Size = 1; // zerofill of 0 bytes is undefined.
emitLinkage(GV, GVSym);
// .zerofill __DATA, __bss, _foo, 400, 5
- OutStreamer->emitZerofill(TheSection, GVSym, Size, Alignment.value());
+ OutStreamer->emitZerofill(TheSection, GVSym, Size, Alignment);
return;
}
@@ -779,17 +775,14 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
// Prefer to simply fall back to .local / .comm in this case.
if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) {
// .lcomm _foo, 42
- OutStreamer->emitLocalCommonSymbol(GVSym, Size, Alignment.value());
+ OutStreamer->emitLocalCommonSymbol(GVSym, Size, Alignment);
return;
}
// .local _foo
OutStreamer->emitSymbolAttribute(GVSym, MCSA_Local);
// .comm _foo, 42, 4
- const bool SupportsAlignment =
- getObjFileLowering().getCommDirectiveSupportsAlignment();
- OutStreamer->emitCommonSymbol(GVSym, Size,
- SupportsAlignment ? Alignment.value() : 0);
+ OutStreamer->emitCommonSymbol(GVSym, Size, Alignment);
return;
}
@@ -810,7 +803,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
if (GVKind.isThreadBSS()) {
TheSection = getObjFileLowering().getTLSBSSSection();
- OutStreamer->emitTBSSSymbol(TheSection, MangSym, Size, Alignment.value());
+ OutStreamer->emitTBSSSymbol(TheSection, MangSym, Size, Alignment);
} else if (GVKind.isThreadData()) {
OutStreamer->switchSection(TheSection);
@@ -941,6 +934,9 @@ void AsmPrinter::emitFunctionHeader() {
}
}
+ // Emit KCFI type information before patchable-function-prefix nops.
+ emitKCFITypeId(*MF);
+
// Emit M NOPs for -fpatchable-function-entry=N,M where M>0. We arbitrarily
// place prefix data before NOPs.
unsigned PatchableFunctionPrefix = 0;
@@ -1000,6 +996,11 @@ void AsmPrinter::emitFunctionHeader() {
HI.TimerGroupDescription, TimePassesIsEnabled);
HI.Handler->beginFunction(MF);
}
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
+ HI.Handler->beginBasicBlockSection(MF->front());
+ }
// Emit the prologue data.
if (F.hasPrologueData())
@@ -1039,8 +1040,13 @@ void AsmPrinter::emitFunctionEntryLabel() {
if (TM.getTargetTriple().isOSBinFormatELF()) {
MCSymbol *Sym = getSymbolPreferLocal(MF->getFunction());
- if (Sym != CurrentFnSym)
+ if (Sym != CurrentFnSym) {
+ cast<MCSymbolELF>(Sym)->setType(ELF::STT_FUNC);
+ CurrentFnBeginLocal = Sym;
OutStreamer->emitLabel(Sym);
+ if (MAI->hasDotTypeDotSizeDirective())
+ OutStreamer->emitSymbolAttribute(Sym, MCSA_ELF_TypeFunction);
+ }
}
}
@@ -1053,7 +1059,7 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
// We assume a single instruction only has a spill or reload, not
// both.
- Optional<unsigned> Size;
+ std::optional<unsigned> Size;
if ((Size = MI.getRestoreSize(TII))) {
CommentOS << *Size << "-byte Reload\n";
} else if ((Size = MI.getFoldedRestoreSize(TII))) {
@@ -1128,10 +1134,15 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
OS << " <- ";
const DIExpression *Expr = MI->getDebugExpression();
+ // First convert this to a non-variadic expression if possible, to simplify
+ // the output.
+ if (auto NonVariadicExpr = DIExpression::convertToNonVariadicExpression(Expr))
+ Expr = *NonVariadicExpr;
+ // Then, output the possibly-simplified expression.
if (Expr->getNumElements()) {
OS << '[';
ListSeparator LS;
- for (auto Op : Expr->expr_ops()) {
+ for (auto &Op : Expr->expr_ops()) {
OS << LS << dwarf::OperationEncodingString(Op.getOp());
for (unsigned I = 0; I < Op.getNumArgs(); ++I)
OS << ' ' << Op.getArg(I);
@@ -1170,14 +1181,12 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
}
case MachineOperand::MO_TargetIndex: {
OS << "!target-index(" << Op.getIndex() << "," << Op.getOffset() << ")";
- // NOTE: Want this comment at start of line, don't emit with AddComment.
- AP.OutStreamer->emitRawComment(OS.str());
break;
}
case MachineOperand::MO_Register:
case MachineOperand::MO_FrameIndex: {
Register Reg;
- Optional<StackOffset> Offset;
+ std::optional<StackOffset> Offset;
if (Op.isReg()) {
Reg = Op.getReg();
} else {
@@ -1328,7 +1337,8 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
OutStreamer->pushSection();
OutStreamer->switchSection(BBAddrMapSection);
OutStreamer->AddComment("version");
- OutStreamer->emitInt8(OutStreamer->getContext().getBBAddrMapVersion());
+ uint8_t BBAddrMapVersion = OutStreamer->getContext().getBBAddrMapVersion();
+ OutStreamer->emitInt8(BBAddrMapVersion);
OutStreamer->AddComment("feature");
OutStreamer->emitInt8(0);
OutStreamer->AddComment("function address");
@@ -1340,18 +1350,49 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
for (const MachineBasicBlock &MBB : MF) {
const MCSymbol *MBBSymbol =
MBB.isEntryBlock() ? FunctionSymbol : MBB.getSymbol();
+ // TODO: Remove this check when version 1 is deprecated.
+ if (BBAddrMapVersion > 1) {
+ OutStreamer->AddComment("BB id");
+ // Emit the BB ID for this basic block.
+ OutStreamer->emitULEB128IntValue(*MBB.getBBID());
+ }
// Emit the basic block offset relative to the end of the previous block.
// This is zero unless the block is padded due to alignment.
emitLabelDifferenceAsULEB128(MBBSymbol, PrevMBBEndSymbol);
// Emit the basic block size. When BBs have alignments, their size cannot
// always be computed from their offsets.
emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), MBBSymbol);
+ // Emit the Metadata.
OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB));
PrevMBBEndSymbol = MBB.getEndSymbol();
}
OutStreamer->popSection();
}
+void AsmPrinter::emitKCFITrapEntry(const MachineFunction &MF,
+ const MCSymbol *Symbol) {
+ MCSection *Section =
+ getObjFileLowering().getKCFITrapSection(*MF.getSection());
+ if (!Section)
+ return;
+
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(Section);
+
+ MCSymbol *Loc = OutContext.createLinkerPrivateTempSymbol();
+ OutStreamer->emitLabel(Loc);
+ OutStreamer->emitAbsoluteSymbolDiff(Symbol, Loc, 4);
+
+ OutStreamer->popSection();
+}
+
+void AsmPrinter::emitKCFITypeId(const MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ if (const MDNode *MD = F.getMetadata(LLVMContext::MD_kcfi_type))
+ emitGlobalConstant(F.getParent()->getDataLayout(),
+ mdconst::extract<ConstantInt>(MD->getOperand(0)));
+}
+
void AsmPrinter::emitPseudoProbe(const MachineInstr &MI) {
if (PP) {
auto GUID = MI.getOperand(0).getImm();
@@ -1421,9 +1462,87 @@ void AsmPrinter::emitStackUsage(const MachineFunction &MF) {
*StackUsageStream << "static\n";
}
-static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF) {
+void AsmPrinter::emitPCSectionsLabel(const MachineFunction &MF,
+ const MDNode &MD) {
+ MCSymbol *S = MF.getContext().createTempSymbol("pcsection");
+ OutStreamer->emitLabel(S);
+ PCSectionsSymbols[&MD].emplace_back(S);
+}
+
+void AsmPrinter::emitPCSections(const MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ if (PCSectionsSymbols.empty() && !F.hasMetadata(LLVMContext::MD_pcsections))
+ return;
+
+ const CodeModel::Model CM = MF.getTarget().getCodeModel();
+ const unsigned RelativeRelocSize =
+ (CM == CodeModel::Medium || CM == CodeModel::Large) ? getPointerSize()
+ : 4;
+
+ // Switch to PCSection, short-circuiting the common case where the current
+ // section is still valid (assume most MD_pcsections contain just 1 section).
+ auto SwitchSection = [&, Prev = StringRef()](const StringRef &Sec) mutable {
+ if (Sec == Prev)
+ return;
+ MCSection *S = getObjFileLowering().getPCSection(Sec, MF.getSection());
+ assert(S && "PC section is not initialized");
+ OutStreamer->switchSection(S);
+ Prev = Sec;
+ };
+ // Emit symbols into sections and data as specified in the pcsections MDNode.
+ auto EmitForMD = [&](const MDNode &MD, ArrayRef<const MCSymbol *> Syms,
+ bool Deltas) {
+ // Expect the first operand to be a section name. After that, a tuple of
+ // constants may appear, which will simply be emitted into the current
+ // section (the user of MD_pcsections decides the format of encoded data).
+ assert(isa<MDString>(MD.getOperand(0)) && "first operand not a string");
+ for (const MDOperand &MDO : MD.operands()) {
+ if (auto *S = dyn_cast<MDString>(MDO)) {
+ SwitchSection(S->getString());
+ const MCSymbol *Prev = Syms.front();
+ for (const MCSymbol *Sym : Syms) {
+ if (Sym == Prev || !Deltas) {
+ // Use the entry itself as the base of the relative offset.
+ MCSymbol *Base = MF.getContext().createTempSymbol("pcsection_base");
+ OutStreamer->emitLabel(Base);
+ // Emit relative relocation `addr - base`, which avoids a dynamic
+ // relocation in the final binary. User will get the address with
+ // `base + addr`.
+ emitLabelDifference(Sym, Base, RelativeRelocSize);
+ } else {
+ emitLabelDifference(Sym, Prev, 4);
+ }
+ Prev = Sym;
+ }
+ } else {
+ assert(isa<MDNode>(MDO) && "expecting either string or tuple");
+ const auto *AuxMDs = cast<MDNode>(MDO);
+ for (const MDOperand &AuxMDO : AuxMDs->operands()) {
+ assert(isa<ConstantAsMetadata>(AuxMDO) && "expecting a constant");
+ const auto *C = cast<ConstantAsMetadata>(AuxMDO);
+ emitGlobalConstant(F.getParent()->getDataLayout(), C->getValue());
+ }
+ }
+ }
+ };
+
+ OutStreamer->pushSection();
+ // Emit PCs for function start and function size.
+ if (const MDNode *MD = F.getMetadata(LLVMContext::MD_pcsections))
+ EmitForMD(*MD, {getFunctionBegin(), getFunctionEnd()}, true);
+ // Emit PCs for instructions collected.
+ for (const auto &MS : PCSectionsSymbols)
+ EmitForMD(*MS.first, MS.second, false);
+ OutStreamer->popSection();
+ PCSectionsSymbols.clear();
+}
+
+/// Returns true if function begin and end labels should be emitted.
+static bool needFuncLabels(const MachineFunction &MF) {
MachineModuleInfo &MMI = MF.getMMI();
- if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || MMI.hasDebugInfo())
+ if (!MF.getLandingPads().empty() || MF.hasEHFunclets() ||
+ MMI.hasDebugInfo() ||
+ MF.getFunction().hasMetadata(LLVMContext::MD_pcsections))
return true;
// We might emit an EH table that uses function begin and end labels even if
@@ -1481,6 +1600,9 @@ void AsmPrinter::emitFunctionBody() {
if (MCSymbol *S = MI.getPreInstrSymbol())
OutStreamer->emitLabel(S);
+ if (MDNode *MD = MI.getPCSections())
+ emitPCSectionsLabel(*MF, *MD);
+
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
HI.TimerGroupDescription, TimePassesIsEnabled);
@@ -1541,6 +1663,9 @@ void AsmPrinter::emitFunctionBody() {
if (isVerbose())
OutStreamer->emitRawComment("ARITH_FENCE");
break;
+ case TargetOpcode::MEMBARRIER:
+ OutStreamer->emitRawComment("MEMBARRIER");
+ break;
default:
emitInstruction(&MI);
if (CanDoExtraAnalysis) {
@@ -1666,8 +1791,11 @@ void AsmPrinter::emitFunctionBody() {
// Emit target-specific gunk after the function body.
emitFunctionBodyEnd();
- if (needFuncLabelsForEHOrDebugInfo(*MF) ||
- MAI->hasDotTypeDotSizeDirective()) {
+ // Even though wasm supports .type and .size in general, function symbols
+ // are automatically sized.
+ bool EmitFunctionSize = MAI->hasDotTypeDotSizeDirective() && !TT.isWasm();
+
+ if (needFuncLabels(*MF) || EmitFunctionSize) {
// Create a symbol for the end of function.
CurrentFnEnd = createTempSymbol("func_end");
OutStreamer->emitLabel(CurrentFnEnd);
@@ -1675,15 +1803,26 @@ void AsmPrinter::emitFunctionBody() {
// If the target wants a .size directive for the size of the function, emit
// it.
- if (MAI->hasDotTypeDotSizeDirective()) {
+ if (EmitFunctionSize) {
// We can get the size as difference between the function label and the
// temp label.
const MCExpr *SizeExp = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(CurrentFnEnd, OutContext),
MCSymbolRefExpr::create(CurrentFnSymForSize, OutContext), OutContext);
OutStreamer->emitELFSize(CurrentFnSym, SizeExp);
+ if (CurrentFnBeginLocal)
+ OutStreamer->emitELFSize(CurrentFnBeginLocal, SizeExp);
}
+ // Call endBasicBlockSection on the last block now, if it wasn't already
+ // called.
+ if (!MF->back().isEndSection()) {
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
+ HI.Handler->endBasicBlockSection(MF->back());
+ }
+ }
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
HI.TimerGroupDescription, TimePassesIsEnabled);
@@ -1708,6 +1847,9 @@ void AsmPrinter::emitFunctionBody() {
if (MF->hasBBLabels() && HasAnyRealCode)
emitBBAddrMapSection(*MF);
+ // Emit sections containing instruction and function PCs.
+ emitPCSections(*MF);
+
// Emit section containing stack size metadata.
emitStackSizeSection(*MF);
@@ -1909,8 +2051,8 @@ void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) {
remarks::RemarkSerializer &RemarkSerializer = RS.getSerializer();
- Optional<SmallString<128>> Filename;
- if (Optional<StringRef> FilenameRef = RS.getFilename()) {
+ std::optional<SmallString<128>> Filename;
+ if (std::optional<StringRef> FilenameRef = RS.getFilename()) {
Filename = *FilenameRef;
sys::fs::make_absolute(*Filename);
assert(!Filename->empty() && "The filename can't be empty.");
@@ -2041,6 +2183,12 @@ bool AsmPrinter::doFinalization(Module &M) {
if (auto *TS = OutStreamer->getTargetStreamer())
TS->emitConstantPools();
+ // Emit Stack maps before any debug info. Mach-O requires that no data or
+ // text sections come after debug info has been emitted. This matters for
+ // stack maps as they are arbitrary data, and may even have a custom format
+ // through user plugins.
+ emitStackMaps();
+
// Finalize debug and EH information.
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
@@ -2103,7 +2251,7 @@ bool AsmPrinter::doFinalization(Module &M) {
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
assert(MI && "AsmPrinter didn't require GCModuleInfo?");
for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; )
- if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(**--I))
+ if (GCMetadataPrinter *MP = getOrCreateGCPrinter(**--I))
MP->finishAssembly(M, *MI, *this);
// Emit llvm.ident metadata in an '.ident' directive.
@@ -2133,9 +2281,9 @@ bool AsmPrinter::doFinalization(Module &M) {
// Emit address-significance attributes for all globals.
OutStreamer->emitAddrsig();
for (const GlobalValue &GV : M.global_values()) {
- if (!GV.use_empty() && !GV.isTransitiveUsedByMetadataOnly() &&
- !GV.isThreadLocal() && !GV.hasDLLImportStorageClass() &&
- !GV.getName().startswith("llvm.") && !GV.hasAtLeastLocalUnnamedAddr())
+ if (!GV.use_empty() && !GV.isThreadLocal() &&
+ !GV.hasDLLImportStorageClass() && !GV.getName().startswith("llvm.") &&
+ !GV.hasAtLeastLocalUnnamedAddr())
OutStreamer->emitAddrsigSym(getSymbol(&GV));
}
}
@@ -2213,6 +2361,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
CurrentFnSymForSize = CurrentFnSym;
CurrentFnBegin = nullptr;
+ CurrentFnBeginLocal = nullptr;
CurrentSectionBeginSym = nullptr;
MBBSectionRanges.clear();
MBBSectionExceptionSyms.clear();
@@ -2220,7 +2369,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
if (F.hasFnAttribute("patchable-function-entry") ||
F.hasFnAttribute("function-instrument") ||
F.hasFnAttribute("xray-instruction-threshold") ||
- needFuncLabelsForEHOrDebugInfo(MF) || NeedsLocalForSize ||
+ needFuncLabels(MF) || NeedsLocalForSize ||
MF.getTarget().Options.EmitStackSizeSection || MF.hasBBLabels()) {
CurrentFnBegin = createTempSymbol("func_begin");
if (NeedsLocalForSize)
@@ -2692,9 +2841,9 @@ void AsmPrinter::emitAlignment(Align Alignment, const GlobalObject *GV,
STI = &getSubtargetInfo();
else
STI = TM.getMCSubtargetInfo();
- OutStreamer->emitCodeAlignment(Alignment.value(), STI, MaxBytesToEmit);
+ OutStreamer->emitCodeAlignment(Alignment, STI, MaxBytesToEmit);
} else
- OutStreamer->emitValueToAlignment(Alignment.value(), 0, 1, MaxBytesToEmit);
+ OutStreamer->emitValueToAlignment(Alignment, 0, 1, MaxBytesToEmit);
}
//===----------------------------------------------------------------------===//
@@ -2761,7 +2910,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
// expression properly. This is important for differences between
// blockaddress labels. Since the two labels are in the same function, it
// is reasonable to treat their delta as a 32-bit value.
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case Instruction::BitCast:
return lowerConstant(CE->getOperand(0));
@@ -2791,8 +2940,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
//
// If the pointer is larger than the resultant integer, then
// as with Trunc just depend on the assembler to truncate it.
- if (DL.getTypeAllocSize(Ty).getFixedSize() <=
- DL.getTypeAllocSize(Op->getType()).getFixedSize())
+ if (DL.getTypeAllocSize(Ty).getFixedValue() <=
+ DL.getTypeAllocSize(Op->getType()).getFixedValue())
return OpExpr;
break; // Error
@@ -3526,11 +3675,6 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
}
}
- // Emit an alignment directive for this block, if needed.
- const Align Alignment = MBB.getAlignment();
- if (Alignment != Align(1))
- emitAlignment(Alignment, nullptr, MBB.getMaxBytesForAlignment());
-
// Switch to a new section if this basic block must begin a section. The
// entry block is always placed in the function section and is handled
// separately.
@@ -3541,25 +3685,30 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
CurrentSectionBeginSym = MBB.getSymbol();
}
+ // Emit an alignment directive for this block, if needed.
+ const Align Alignment = MBB.getAlignment();
+ if (Alignment != Align(1))
+ emitAlignment(Alignment, nullptr, MBB.getMaxBytesForAlignment());
+
// If the block has its address taken, emit any labels that were used to
// reference the block. It is possible that there is more than one label
// here, because multiple LLVM BB's may have been RAUW'd to this block after
// the references were generated.
- const BasicBlock *BB = MBB.getBasicBlock();
- if (MBB.hasAddressTaken()) {
+ if (MBB.isIRBlockAddressTaken()) {
if (isVerbose())
OutStreamer->AddComment("Block address taken");
- // MBBs can have their address taken as part of CodeGen without having
- // their corresponding BB's address taken in IR
- if (BB && BB->hasAddressTaken())
- for (MCSymbol *Sym : getAddrLabelSymbolToEmit(BB))
- OutStreamer->emitLabel(Sym);
+ BasicBlock *BB = MBB.getAddressTakenIRBlock();
+ assert(BB && BB->hasAddressTaken() && "Missing BB");
+ for (MCSymbol *Sym : getAddrLabelSymbolToEmit(BB))
+ OutStreamer->emitLabel(Sym);
+ } else if (isVerbose() && MBB.isMachineBlockAddressTaken()) {
+ OutStreamer->AddComment("Block address taken");
}
// Print some verbose block comments.
if (isVerbose()) {
- if (BB) {
+ if (const BasicBlock *BB = MBB.getBasicBlock()) {
if (BB->hasName()) {
BB->printAsOperand(OutStreamer->getCommentOS(),
/*PrintType=*/false, BB->getModule());
@@ -3590,11 +3739,11 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
}
// With BB sections, each basic block must handle CFI information on its own
- // if it begins a section (Entry block is handled separately by
- // AsmPrinterHandler::beginFunction).
+ // if it begins a section (Entry block call is handled separately, next to
+ // beginFunction).
if (MBB.isBeginSection() && !MBB.isEntryBlock())
for (const HandlerInfo &HI : Handlers)
- HI.Handler->beginBasicBlock(MBB);
+ HI.Handler->beginBasicBlockSection(MBB);
}
void AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) {
@@ -3602,7 +3751,7 @@ void AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) {
// sections.
if (MBB.isEndSection())
for (const HandlerInfo &HI : Handlers)
- HI.Handler->endBasicBlock(MBB);
+ HI.Handler->endBasicBlockSection(MBB);
}
void AsmPrinter::emitVisibility(MCSymbol *Sym, unsigned Visibility,
@@ -3684,13 +3833,12 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
return true;
}
-GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
+GCMetadataPrinter *AsmPrinter::getOrCreateGCPrinter(GCStrategy &S) {
if (!S.usesMetadata())
return nullptr;
- gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
- gcp_map_type::iterator GCPI = GCMap.find(&S);
- if (GCPI != GCMap.end())
+ auto [GCPI, Inserted] = GCMetadataPrinters.insert({&S, nullptr});
+ if (!Inserted)
return GCPI->second.get();
auto Name = S.getName();
@@ -3700,14 +3848,14 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
if (Name == GCMetaPrinter.getName()) {
std::unique_ptr<GCMetadataPrinter> GMP = GCMetaPrinter.instantiate();
GMP->S = &S;
- auto IterBool = GCMap.insert(std::make_pair(&S, std::move(GMP)));
- return IterBool.first->second.get();
+ GCPI->second = std::move(GMP);
+ return GCPI->second.get();
}
report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
}
-void AsmPrinter::emitStackMaps(StackMaps &SM) {
+void AsmPrinter::emitStackMaps() {
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
assert(MI && "AsmPrinter didn't require GCModuleInfo?");
bool NeedsDefault = false;
@@ -3716,7 +3864,7 @@ void AsmPrinter::emitStackMaps(StackMaps &SM) {
NeedsDefault = true;
else
for (const auto &I : *MI) {
- if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
+ if (GCMetadataPrinter *MP = getOrCreateGCPrinter(*I))
if (MP->emitStackMaps(SM, *this))
continue;
// The strategy doesn't have printer or doesn't emit custom stack maps.
@@ -3818,7 +3966,8 @@ void AsmPrinter::emitXRayTable() {
// pointers. This should work for both 32-bit and 64-bit platforms.
if (FnSledIndex) {
OutStreamer->switchSection(FnSledIndex);
- OutStreamer->emitCodeAlignment(2 * WordSizeBytes, &getSubtargetInfo());
+ OutStreamer->emitCodeAlignment(Align(2 * WordSizeBytes),
+ &getSubtargetInfo());
OutStreamer->emitSymbolValue(SledsStart, WordSizeBytes, false);
OutStreamer->emitSymbolValue(SledsEnd, WordSizeBytes, false);
OutStreamer->switchSection(PrevSection);
@@ -3894,7 +4043,7 @@ unsigned int AsmPrinter::getDwarfOffsetByteSize() const {
dwarf::FormParams AsmPrinter::getDwarfFormParams() const {
return {getDwarfVersion(), uint8_t(getPointerSize()),
OutStreamer->getContext().getDwarfFormat(),
- MAI->doesDwarfUseRelocationsAcrossSections()};
+ doesDwarfUseRelocationsAcrossSections()};
}
unsigned int AsmPrinter::getUnitLengthFieldByteSize() const {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index bfa53f5b9374..ecaa64afab4d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -163,7 +163,7 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label,
}
// If the format uses relocations with dwarf, refer to the symbol directly.
- if (MAI->doesDwarfUseRelocationsAcrossSections()) {
+ if (doesDwarfUseRelocationsAcrossSections()) {
OutStreamer->emitSymbolValue(Label, getDwarfOffsetByteSize());
return;
}
@@ -175,7 +175,7 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label,
}
void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntry S) const {
- if (MAI->doesDwarfUseRelocationsAcrossSections()) {
+ if (doesDwarfUseRelocationsAcrossSections()) {
assert(S.Symbol && "No symbol available");
emitDwarfSymbolReference(S.Symbol);
return;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 88c82cbc958b..c1588aaea05e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -330,16 +330,8 @@ static void EmitInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
- // Count the number of register definitions to find the asm string.
- unsigned NumDefs = 0;
- for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
- ++NumDefs)
- assert(NumDefs != MI->getNumOperands()-2 && "No asm string?");
-
- assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
-
// Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
- const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+ const char *AsmStr = MI->getOperand(0).getSymbolName();
// If this asmstr is empty, just print the #APP/#NOAPP markers.
// These are useful to see where empty asm's wound up.
@@ -411,6 +403,14 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
LocCookie, Msg, DiagnosticSeverity::DS_Warning));
MMI->getModule()->getContext().diagnose(
DiagnosticInfoInlineAsm(LocCookie, Note, DiagnosticSeverity::DS_Note));
+
+ for (const Register RR : RestrRegs) {
+ if (std::optional<std::string> reason =
+ TRI->explainReservedReg(*MF, RR)) {
+ MMI->getModule()->getContext().diagnose(DiagnosticInfoInlineAsm(
+ LocCookie, *reason, DiagnosticSeverity::DS_Note));
+ }
+ }
}
emitInlineAsm(OS.str(), getSubtargetInfo(), TM.Options.MCOptions, LocMD,
@@ -480,7 +480,7 @@ bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
PrintAsmMemoryOperand(MI, OpNo, nullptr, O);
return false;
}
- LLVM_FALLTHROUGH; // GCC allows '%a' to behave like '%c' with immediates.
+ [[fallthrough]]; // GCC allows '%a' to behave like '%c' with immediates.
case 'c': // Substitute immediate value without immediate syntax
if (MO.isImm()) {
O << MO.getImm();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 701c0affdfa6..0a67c4b6beb6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -12,8 +12,6 @@
#include "CodeViewDebug.h"
#include "llvm/ADT/APSInt.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
@@ -560,7 +558,7 @@ void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL,
}
void CodeViewDebug::emitCodeViewMagicVersion() {
- OS.emitValueToAlignment(4);
+ OS.emitValueToAlignment(Align(4));
OS.AddComment("Debug section magic");
OS.emitInt32(COFF::DEBUG_SECTION_MAGIC);
}
@@ -730,7 +728,7 @@ void CodeViewDebug::emitTypeInformation() {
TypeRecordMapping typeMapping(CVMCOS);
Pipeline.addCallbackToPipeline(typeMapping);
- Optional<TypeIndex> B = Table.getFirst();
+ std::optional<TypeIndex> B = Table.getFirst();
while (B) {
// This will fail if the record data is invalid.
CVType Record = Table.getType(*B);
@@ -754,13 +752,13 @@ void CodeViewDebug::emitTypeGlobalHashes() {
// hardcoded to version 0, SHA1.
OS.switchSection(Asm->getObjFileLowering().getCOFFGlobalTypeHashesSection());
- OS.emitValueToAlignment(4);
+ OS.emitValueToAlignment(Align(4));
OS.AddComment("Magic");
OS.emitInt32(COFF::DEBUG_HASHES_SECTION_MAGIC);
OS.AddComment("Section Version");
OS.emitInt16(0);
OS.AddComment("Hash Algorithm");
- OS.emitInt16(uint16_t(GlobalTypeHashAlg::SHA1_8));
+ OS.emitInt16(uint16_t(GlobalTypeHashAlg::BLAKE3));
TypeIndex TI(TypeIndex::FirstNonSimpleIndex);
for (const auto &GHR : TypeTable.hashes()) {
@@ -908,6 +906,9 @@ static std::string flattenCommandLine(ArrayRef<std::string> Args,
}
if (Arg.startswith("-object-file-name") || Arg == MainFilename)
continue;
+ // Skip fmessage-length for reproduciability.
+ if (Arg.startswith("-fmessage-length"))
+ continue;
if (PrintedOneArg)
OS << " ";
llvm::sys::printArg(OS, Arg, /*Quote=*/true);
@@ -1337,10 +1338,20 @@ void CodeViewDebug::calculateRanges(
assert(DVInst->isDebugValue() && "Invalid History entry");
// FIXME: Find a way to represent constant variables, since they are
// relatively common.
- Optional<DbgVariableLocation> Location =
+ std::optional<DbgVariableLocation> Location =
DbgVariableLocation::extractFromMachineInstruction(*DVInst);
if (!Location)
+ {
+ // When we don't have a location this is usually because LLVM has
+ // transformed it into a constant and we only have an llvm.dbg.value. We
+ // can't represent these well in CodeView since S_LOCAL only works on
+ // registers and memory locations. Instead, we will pretend this to be a
+ // constant value to at least have it show up in the debugger.
+ auto Op = DVInst->getDebugOperand(0);
+ if (Op.isImm())
+ Var.ConstantValue = APSInt(APInt(64, Op.getImm()), false);
continue;
+ }
// CodeView can only express variables in register and variables in memory
// at a constant offset from a register. However, for variables passed
@@ -1498,8 +1509,16 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
FPO |= FrameProcedureOptions::MarkedInline;
if (GV.hasFnAttribute(Attribute::Naked))
FPO |= FrameProcedureOptions::Naked;
- if (MFI.hasStackProtectorIndex())
+ if (MFI.hasStackProtectorIndex()) {
FPO |= FrameProcedureOptions::SecurityChecks;
+ if (GV.hasFnAttribute(Attribute::StackProtectStrong) ||
+ GV.hasFnAttribute(Attribute::StackProtectReq)) {
+ FPO |= FrameProcedureOptions::StrictSecurityChecks;
+ }
+ } else if (!GV.hasStackProtectorFnAttr()) {
+ // __declspec(safebuffers) disables stack guards.
+ FPO |= FrameProcedureOptions::SafeBuffers;
+ }
FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedLocalFramePtrReg) << 14U);
FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedParamFramePtrReg) << 16U);
if (Asm->TM.getOptLevel() != CodeGenOpt::None &&
@@ -1620,7 +1639,7 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
case dwarf::DW_TAG_pointer_type:
if (cast<DIDerivedType>(Ty)->getName() == "__vtbl_ptr_type")
return lowerTypeVFTableShape(cast<DIDerivedType>(Ty));
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case dwarf::DW_TAG_reference_type:
case dwarf::DW_TAG_rvalue_reference_type:
return lowerTypePointer(cast<DIDerivedType>(Ty));
@@ -2023,9 +2042,9 @@ TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
ReturnAndArgTypeIndices.back() = TypeIndex::None();
}
TypeIndex ReturnTypeIndex = TypeIndex::Void();
- ArrayRef<TypeIndex> ArgTypeIndices = None;
+ ArrayRef<TypeIndex> ArgTypeIndices = std::nullopt;
if (!ReturnAndArgTypeIndices.empty()) {
- auto ReturnAndArgTypesRef = makeArrayRef(ReturnAndArgTypeIndices);
+ auto ReturnAndArgTypesRef = ArrayRef(ReturnAndArgTypeIndices);
ReturnTypeIndex = ReturnAndArgTypesRef.front();
ArgTypeIndices = ReturnAndArgTypesRef.drop_front();
}
@@ -2777,9 +2796,19 @@ void CodeViewDebug::emitLocalVariableList(const FunctionInfo &FI,
emitLocalVariable(FI, *L);
// Next emit all non-parameters in the order that we found them.
- for (const LocalVariable &L : Locals)
- if (!L.DIVar->isParameter())
- emitLocalVariable(FI, L);
+ for (const LocalVariable &L : Locals) {
+ if (!L.DIVar->isParameter()) {
+ if (L.ConstantValue) {
+ // If ConstantValue is set we will emit it as a S_CONSTANT instead of a
+ // S_LOCAL in order to be able to represent it at all.
+ const DIType *Ty = L.DIVar->getType();
+ APSInt Val(*L.ConstantValue);
+ emitConstantSymbolRecord(Ty, Val, std::string(L.DIVar->getName()));
+ } else {
+ emitLocalVariable(FI, L);
+ }
+ }
+ }
}
void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
@@ -3098,7 +3127,7 @@ MCSymbol *CodeViewDebug::beginCVSubsection(DebugSubsectionKind Kind) {
void CodeViewDebug::endCVSubsection(MCSymbol *EndLabel) {
OS.emitLabel(EndLabel);
// Every subsection must be aligned to a 4-byte boundary.
- OS.emitValueToAlignment(4);
+ OS.emitValueToAlignment(Align(4));
}
static StringRef getSymbolName(SymbolKind SymKind) {
@@ -3125,7 +3154,7 @@ void CodeViewDebug::endSymbolRecord(MCSymbol *SymEnd) {
// an extra copy of every symbol record in LLD. This increases object file
// size by less than 1% in the clang build, and is compatible with the Visual
// C++ linker.
- OS.emitValueToAlignment(4);
+ OS.emitValueToAlignment(Align(4));
OS.emitLabel(SymEnd);
}
@@ -3350,11 +3379,13 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
if (const auto *MemberDecl = dyn_cast_or_null<DIDerivedType>(
DIGV->getRawStaticDataMemberDeclaration()))
Scope = MemberDecl->getScope();
- // For Fortran, the scoping portion is elided in its name so that we can
- // reference the variable in the command line of the VS debugger.
+ // For static local variables and Fortran, the scoping portion is elided
+ // in its name so that we can reference the variable in the command line
+ // of the VS debugger.
std::string QualifiedName =
- (moduleIsInFortran()) ? std::string(DIGV->getName())
- : getFullyQualifiedName(Scope, DIGV->getName());
+ (moduleIsInFortran() || (Scope && isa<DILocalScope>(Scope)))
+ ? std::string(DIGV->getName())
+ : getFullyQualifiedName(Scope, DIGV->getName());
if (const GlobalVariable *GV =
CVGV.GVInfo.dyn_cast<const GlobalVariable *>()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 16f0082723ed..495822a6e653 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -82,7 +82,7 @@ public:
}
};
- static_assert(sizeof(uint64_t) == sizeof(LocalVarDef), "");
+ static_assert(sizeof(uint64_t) == sizeof(LocalVarDef));
private:
MCStreamer &OS;
@@ -104,6 +104,7 @@ private:
SmallVector<std::pair<const MCSymbol *, const MCSymbol *>, 1>>
DefRanges;
bool UseReferenceType = false;
+ std::optional<APSInt> ConstantValue;
};
struct CVGlobalVariable {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 617ddbd66e4e..308d4b1b5d61 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -425,7 +425,7 @@ void DIEInteger::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
///
unsigned DIEInteger::sizeOf(const dwarf::FormParams &FormParams,
dwarf::Form Form) const {
- if (Optional<uint8_t> FixedSize =
+ if (std::optional<uint8_t> FixedSize =
dwarf::getFixedFormByteSize(Form, FormParams))
return *FixedSize;
@@ -580,7 +580,7 @@ void DIEString::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
DIEInteger(S.getIndex()).emitValue(AP, Form);
return;
case dwarf::DW_FORM_strp:
- if (AP->MAI->doesDwarfUseRelocationsAcrossSections())
+ if (AP->doesDwarfUseRelocationsAcrossSections())
DIELabel(S.getSymbol()).emitValue(AP, Form);
else
DIEInteger(S.getOffset()).emitValue(AP, Form);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 1d546e5fd72e..08ed78eb20a1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -42,7 +42,7 @@ static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) {
void DIEHash::addString(StringRef Str) {
LLVM_DEBUG(dbgs() << "Adding string " << Str << " to hash.\n");
Hash.update(Str);
- Hash.update(makeArrayRef((uint8_t)'\0'));
+ Hash.update(ArrayRef((uint8_t)'\0'));
}
// FIXME: The LEB128 routines are copied and only slightly modified out of
@@ -389,7 +389,7 @@ void DIEHash::computeHash(const DIE &Die) {
}
// Following the last (or if there are no children), append a zero byte.
- Hash.update(makeArrayRef((uint8_t)'\0'));
+ Hash.update(ArrayRef((uint8_t)'\0'));
}
/// This is based on the type signature computation given in section 7.27 of the
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index dabbfb45f687..0b40cdb0c3cc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -26,6 +25,7 @@
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <map>
+#include <optional>
#include <utility>
using namespace llvm;
@@ -76,7 +76,7 @@ bool DbgValueHistoryMap::startDbgValue(InlinedEntity Var,
auto &Entries = VarEntries[Var];
if (!Entries.empty() && Entries.back().isDbgValue() &&
!Entries.back().isClosed() &&
- Entries.back().getInstr()->isIdenticalTo(MI)) {
+ Entries.back().getInstr()->isEquivalentDbgInstr(MI)) {
LLVM_DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n"
<< "\t" << Entries.back().getInstr() << "\t" << MI
<< "\n");
@@ -110,20 +110,20 @@ void DbgValueHistoryMap::Entry::endEntry(EntryIndex Index) {
/// range in Ranges. EndMI can be nullptr to indicate that the range is
/// unbounded. Assumes Ranges is ordered and disjoint. Returns true and points
/// to the first intersecting scope range if one exists.
-static Optional<ArrayRef<InsnRange>::iterator>
+static std::optional<ArrayRef<InsnRange>::iterator>
intersects(const MachineInstr *StartMI, const MachineInstr *EndMI,
const ArrayRef<InsnRange> &Ranges,
const InstructionOrdering &Ordering) {
for (auto RangesI = Ranges.begin(), RangesE = Ranges.end();
RangesI != RangesE; ++RangesI) {
if (EndMI && Ordering.isBefore(EndMI, RangesI->first))
- return None;
+ return std::nullopt;
if (EndMI && !Ordering.isBefore(RangesI->second, EndMI))
return RangesI;
if (Ordering.isBefore(StartMI, RangesI->second))
return RangesI;
}
- return None;
+ return std::nullopt;
}
void DbgValueHistoryMap::trimLocationRanges(
@@ -264,7 +264,7 @@ bool DbgValueHistoryMap::hasNonEmptyLocation(const Entries &Entries) const {
const MachineInstr *MI = Entry.getInstr();
assert(MI->isDebugValue());
// A DBG_VALUE $noreg is an empty variable location
- if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == 0)
+ if (MI->isUndefDebugValue())
continue;
return true;
@@ -495,7 +495,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
continue;
// If this is a virtual register, only clobber it since it doesn't
// have aliases.
- if (Register::isVirtualRegister(MO.getReg()))
+ if (MO.getReg().isVirtual())
clobberRegisterUses(RegVars, MO.getReg(), DbgValues, LiveEntries,
MI);
// If this is a register def operand, it may end a debug value
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 8ebbed974abb..858a3e75e515 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/DebugHandlerBase.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -30,15 +29,15 @@ using namespace llvm;
/// variable's lexical scope instruction ranges.
static cl::opt<bool> TrimVarLocs("trim-var-locs", cl::Hidden, cl::init(true));
-Optional<DbgVariableLocation>
+std::optional<DbgVariableLocation>
DbgVariableLocation::extractFromMachineInstruction(
const MachineInstr &Instruction) {
DbgVariableLocation Location;
// Variables calculated from multiple locations can't be represented here.
if (Instruction.getNumDebugOperands() != 1)
- return None;
+ return std::nullopt;
if (!Instruction.getDebugOperand(0).isReg())
- return None;
+ return std::nullopt;
Location.Register = Instruction.getDebugOperand(0).getReg();
Location.FragmentInfo.reset();
// We only handle expressions generated by DIExpression::appendOffset,
@@ -53,7 +52,7 @@ DbgVariableLocation::extractFromMachineInstruction(
Op->getOp() == dwarf::DW_OP_LLVM_arg)
++Op;
else
- return None;
+ return std::nullopt;
}
while (Op != DIExpr->expr_op_end()) {
switch (Op->getOp()) {
@@ -84,7 +83,7 @@ DbgVariableLocation::extractFromMachineInstruction(
Offset = 0;
break;
default:
- return None;
+ return std::nullopt;
}
++Op;
}
@@ -416,16 +415,12 @@ void DebugHandlerBase::endFunction(const MachineFunction *MF) {
InstOrdering.clear();
}
-void DebugHandlerBase::beginBasicBlock(const MachineBasicBlock &MBB) {
- if (!MBB.isBeginSection())
- return;
-
- PrevLabel = MBB.getSymbol();
+void DebugHandlerBase::beginBasicBlockSection(const MachineBasicBlock &MBB) {
+ EpilogBeginBlock = nullptr;
+ if (!MBB.isEntryBlock())
+ PrevLabel = MBB.getSymbol();
}
-void DebugHandlerBase::endBasicBlock(const MachineBasicBlock &MBB) {
- if (!MBB.isEndSection())
- return;
-
+void DebugHandlerBase::endBasicBlockSection(const MachineBasicBlock &MBB) {
PrevLabel = nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index d7ab2091967f..2008aa39ff87 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -76,6 +76,9 @@ public:
: EntryKind(E_TargetIndexLocation), TIL(Loc) {}
bool isLocation() const { return EntryKind == E_Location; }
+ bool isIndirectLocation() const {
+ return EntryKind == E_Location && Loc.isIndirect();
+ }
bool isTargetIndexLocation() const {
return EntryKind == E_TargetIndexLocation;
}
@@ -116,13 +119,7 @@ class DbgValueLoc {
public:
DbgValueLoc(const DIExpression *Expr, ArrayRef<DbgValueLocEntry> Locs)
: Expression(Expr), ValueLocEntries(Locs.begin(), Locs.end()),
- IsVariadic(true) {
-#ifndef NDEBUG
- // Currently, DBG_VALUE_VAR expressions must use stack_value.
- assert(Expr && Expr->isValid() &&
- is_contained(Locs, dwarf::DW_OP_stack_value));
-#endif
- }
+ IsVariadic(true) {}
DbgValueLoc(const DIExpression *Expr, ArrayRef<DbgValueLocEntry> Locs,
bool IsVariadic)
@@ -133,10 +130,6 @@ public:
!any_of(Locs, [](auto LE) { return LE.isLocation(); }));
if (!IsVariadic) {
assert(ValueLocEntries.size() == 1);
- } else {
- // Currently, DBG_VALUE_VAR expressions must use stack_value.
- assert(Expr && Expr->isValid() &&
- is_contained(Expr->getElements(), dwarf::DW_OP_stack_value));
}
#endif
}
@@ -150,10 +143,31 @@ public:
bool isFragment() const { return getExpression()->isFragment(); }
bool isEntryVal() const { return getExpression()->isEntryValue(); }
bool isVariadic() const { return IsVariadic; }
- const DIExpression *getExpression() const { return Expression; }
- const ArrayRef<DbgValueLocEntry> getLocEntries() const {
- return ValueLocEntries;
+ bool isEquivalent(const DbgValueLoc &Other) const {
+ // Cannot be equivalent with different numbers of entries.
+ if (ValueLocEntries.size() != Other.ValueLocEntries.size())
+ return false;
+ bool ThisIsIndirect =
+ !IsVariadic && ValueLocEntries[0].isIndirectLocation();
+ bool OtherIsIndirect =
+ !Other.IsVariadic && Other.ValueLocEntries[0].isIndirectLocation();
+ // Check equivalence of DIExpressions + Directness together.
+ if (!DIExpression::isEqualExpression(Expression, ThisIsIndirect,
+ Other.Expression, OtherIsIndirect))
+ return false;
+ // Indirectness should have been accounted for in the above check, so just
+ // compare register values directly here.
+ if (ThisIsIndirect || OtherIsIndirect) {
+ DbgValueLocEntry ThisOp = ValueLocEntries[0];
+ DbgValueLocEntry OtherOp = Other.ValueLocEntries[0];
+ return ThisOp.isLocation() && OtherOp.isLocation() &&
+ ThisOp.getLoc().getReg() == OtherOp.getLoc().getReg();
+ }
+ // If neither are indirect, then just compare the loc entries directly.
+ return ValueLocEntries == Other.ValueLocEntries;
}
+ const DIExpression *getExpression() const { return Expression; }
+ ArrayRef<DbgValueLocEntry> getLocEntries() const { return ValueLocEntries; }
friend bool operator==(const DbgValueLoc &, const DbgValueLoc &);
friend bool operator<(const DbgValueLoc &, const DbgValueLoc &);
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -193,11 +207,15 @@ public:
/// Entry.
bool MergeRanges(const DebugLocEntry &Next) {
// If this and Next are describing the same variable, merge them.
- if ((End == Next.Begin && Values == Next.Values)) {
- End = Next.End;
- return true;
- }
- return false;
+ if (End != Next.Begin)
+ return false;
+ if (Values.size() != Next.Values.size())
+ return false;
+ for (unsigned EntryIdx = 0; EntryIdx < Values.size(); ++EntryIdx)
+ if (!Values[EntryIdx].isEquivalent(Next.Values[EntryIdx]))
+ return false;
+ End = Next.End;
+ return true;
}
const MCSymbol *getBeginSym() const { return Begin; }
@@ -214,6 +232,11 @@ public:
// Sort the pieces by offset.
// Remove any duplicate entries by dropping all but the first.
void sortUniqueValues() {
+ // Values is either 1 item that does not have a fragment, or many items
+ // that all do. No need to sort if the former and also prevents operator<
+ // being called on a non fragment item when _GLIBCXX_DEBUG is defined.
+ if (Values.size() == 1)
+ return;
llvm::sort(Values);
Values.erase(std::unique(Values.begin(), Values.end(),
[](const DbgValueLoc &A, const DbgValueLoc &B) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
index 10019a4720e6..0515173b4a24 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -109,19 +109,18 @@ public:
ArrayRef<Entry> getEntries(const List &L) const {
size_t LI = getIndex(L);
- return makeArrayRef(Entries)
- .slice(Lists[LI].EntryOffset, getNumEntries(LI));
+ return ArrayRef(Entries).slice(Lists[LI].EntryOffset, getNumEntries(LI));
}
ArrayRef<char> getBytes(const Entry &E) const {
size_t EI = getIndex(E);
- return makeArrayRef(DWARFBytes.begin(), DWARFBytes.end())
+ return ArrayRef(DWARFBytes.begin(), DWARFBytes.end())
.slice(Entries[EI].ByteOffset, getNumBytes(EI));
}
ArrayRef<std::string> getComments(const Entry &E) const {
size_t EI = getIndex(E);
- return makeArrayRef(Comments)
- .slice(Entries[EI].CommentOffset, getNumComments(EI));
+ return ArrayRef(Comments).slice(Entries[EI].CommentOffset,
+ getNumComments(EI));
}
private:
@@ -159,13 +158,13 @@ class DebugLocStream::ListBuilder {
DbgVariable &V;
const MachineInstr &MI;
size_t ListIndex;
- Optional<uint8_t> TagOffset;
+ std::optional<uint8_t> TagOffset;
public:
ListBuilder(DebugLocStream &Locs, DwarfCompileUnit &CU, AsmPrinter &Asm,
DbgVariable &V, const MachineInstr &MI)
: Locs(Locs), Asm(Asm), V(V), MI(MI), ListIndex(Locs.startList(&CU)),
- TagOffset(None) {}
+ TagOffset(std::nullopt) {}
void setTagOffset(uint8_t TO) {
TagOffset = TO;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 5f187acf13dc..df4fe8d49806 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -23,28 +23,15 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
-DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A) : EHStreamer(A) {}
+DwarfCFIException::DwarfCFIException(AsmPrinter *A) : EHStreamer(A) {}
-void DwarfCFIExceptionBase::markFunctionEnd() {
- endFragment();
-
- // Map all labels and get rid of any dead landing pads.
- if (!Asm->MF->getLandingPads().empty()) {
- MachineFunction *NonConstMF = const_cast<MachineFunction*>(Asm->MF);
- NonConstMF->tidyLandingPads();
- }
-}
+DwarfCFIException::~DwarfCFIException() = default;
-void DwarfCFIExceptionBase::endFragment() {
- if (shouldEmitCFI && !Asm->MF->hasBBSections())
- Asm->OutStreamer->emitCFIEndProc();
+void DwarfCFIException::addPersonality(const GlobalValue *Personality) {
+ if (!llvm::is_contained(Personalities, Personality))
+ Personalities.push_back(Personality);
}
-DwarfCFIException::DwarfCFIException(AsmPrinter *A)
- : DwarfCFIExceptionBase(A) {}
-
-DwarfCFIException::~DwarfCFIException() = default;
-
/// endModule - Emit all exception information that should come after the
/// content.
void DwarfCFIException::endModule() {
@@ -59,18 +46,12 @@ void DwarfCFIException::endModule() {
if ((PerEncoding & 0x80) != dwarf::DW_EH_PE_indirect)
return;
- // Emit references to all used personality functions
- for (const Function *Personality : MMI->getPersonalities()) {
- if (!Personality)
- continue;
+ // Emit indirect reference table for all used personality functions
+ for (const GlobalValue *Personality : Personalities) {
MCSymbol *Sym = Asm->getSymbol(Personality);
TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->getDataLayout(), Sym);
}
-}
-
-static MCSymbol *getExceptionSym(AsmPrinter *Asm,
- const MachineBasicBlock *MBB) {
- return Asm->getMBBExceptionSym(*MBB);
+ Personalities.clear();
}
void DwarfCFIException::beginFunction(const MachineFunction *MF) {
@@ -86,9 +67,9 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
unsigned PerEncoding = TLOF.getPersonalityEncoding();
- const Function *Per = nullptr;
+ const GlobalValue *Per = nullptr;
if (F.hasPersonalityFn())
- Per = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
+ Per = dyn_cast<GlobalValue>(F.getPersonalityFn()->stripPointerCasts());
// Emit a personality function even when there are no landing pads
forceEmitPersonality =
@@ -114,12 +95,9 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
MAI.usesCFIForEH() && (shouldEmitPersonality || shouldEmitMoves);
else
shouldEmitCFI = Asm->needsCFIForDebug() && shouldEmitMoves;
-
- beginFragment(&*MF->begin(), getExceptionSym);
}
-void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
- ExceptionSymbolProvider ESP) {
+void DwarfCFIException::beginBasicBlockSection(const MachineBasicBlock &MBB) {
if (!shouldEmitCFI)
return;
@@ -141,14 +119,11 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
if (!shouldEmitPersonality)
return;
- auto &F = MBB->getParent()->getFunction();
- auto *P = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
+ auto &F = MBB.getParent()->getFunction();
+ auto *P = dyn_cast<GlobalValue>(F.getPersonalityFn()->stripPointerCasts());
assert(P && "Expected personality function");
-
- // If we are forced to emit this personality, make sure to record
- // it because it might not appear in any landingpad
- if (forceEmitPersonality)
- MMI->addPersonality(P);
+ // Record the personality function.
+ addPersonality(P);
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
unsigned PerEncoding = TLOF.getPersonalityEncoding();
@@ -157,7 +132,13 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
// Provide LSDA information.
if (shouldEmitLSDA)
- Asm->OutStreamer->emitCFILsda(ESP(Asm, MBB), TLOF.getLSDAEncoding());
+ Asm->OutStreamer->emitCFILsda(Asm->getMBBExceptionSym(MBB),
+ TLOF.getLSDAEncoding());
+}
+
+void DwarfCFIException::endBasicBlockSection(const MachineBasicBlock &MBB) {
+ if (shouldEmitCFI)
+ Asm->OutStreamer->emitCFIEndProc();
}
/// endFunction - Gather and emit post-function exception information.
@@ -168,12 +149,3 @@ void DwarfCFIException::endFunction(const MachineFunction *MF) {
emitExceptionTable();
}
-
-void DwarfCFIException::beginBasicBlock(const MachineBasicBlock &MBB) {
- beginFragment(&MBB, getExceptionSym);
-}
-
-void DwarfCFIException::endBasicBlock(const MachineBasicBlock &MBB) {
- if (shouldEmitCFI)
- Asm->OutStreamer->emitCFIEndProc();
-}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index b26960cdebb8..6dde50375a60 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -13,7 +13,6 @@
#include "DwarfCompileUnit.h"
#include "AddressPool.h"
#include "DwarfExpression.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/BinaryFormat/Dwarf.h"
@@ -36,6 +35,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <iterator>
+#include <optional>
#include <string>
#include <utility>
@@ -121,8 +121,8 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) {
// extend .file to support this.
unsigned CUID = Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID();
if (!File)
- return Asm->OutStreamer->emitDwarfFileDirective(0, "", "", None, None,
- CUID);
+ return Asm->OutStreamer->emitDwarfFileDirective(0, "", "", std::nullopt,
+ std::nullopt, CUID);
if (LastFile != File) {
LastFile = File;
@@ -203,7 +203,7 @@ void DwarfCompileUnit::addLocationAttribute(
DIE *VariableDIE, const DIGlobalVariable *GV, ArrayRef<GlobalExpr> GlobalExprs) {
bool addToAccelTable = false;
DIELoc *Loc = nullptr;
- Optional<unsigned> NVPTXAddressSpace;
+ std::optional<unsigned> NVPTXAddressSpace;
std::unique_ptr<DIEDwarfExpression> DwarfExpr;
for (const auto &GE : GlobalExprs) {
const GlobalVariable *Global = GE.Var;
@@ -340,7 +340,7 @@ void DwarfCompileUnit::addLocationAttribute(
// correctly interpret address space of the variable address.
const unsigned NVPTX_ADDR_global_space = 5;
addUInt(*VariableDIE, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
- NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_global_space);
+ NVPTXAddressSpace.value_or(NVPTX_ADDR_global_space));
}
if (Loc)
addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize());
@@ -445,7 +445,12 @@ void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin,
// scope then create and insert DIEs for these variables.
DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes());
+ auto *ContextCU = static_cast<DwarfCompileUnit *>(SPDie->getUnit());
+ return ContextCU->updateSubprogramScopeDIEImpl(SP, SPDie);
+}
+DIE &DwarfCompileUnit::updateSubprogramScopeDIEImpl(const DISubprogram *SP,
+ DIE *SPDie) {
SmallVector<RangeSpan, 2> BB_List;
// If basic block sections are on, ranges for each basic block section has
// to be emitted separately.
@@ -547,11 +552,8 @@ void DwarfCompileUnit::constructScopeDIE(LexicalScope *Scope,
// Emit inlined subprograms.
if (Scope->getParent() && isa<DISubprogram>(DS)) {
- DIE *ScopeDIE = constructInlinedScopeDIE(Scope);
- if (!ScopeDIE)
- return;
-
- ParentScopeDIE.addChild(ScopeDIE);
+ DIE *ScopeDIE = constructInlinedScopeDIE(Scope, ParentScopeDIE);
+ assert(ScopeDIE && "Scope DIE should not be null.");
createAndAddScopeChildren(Scope, *ScopeDIE);
return;
}
@@ -650,9 +652,8 @@ void DwarfCompileUnit::attachRangesOrLowHighPC(
attachRangesOrLowHighPC(Die, std::move(List));
}
-// This scope represents inlined body of a function. Construct DIE to
-// represent this concrete inlined copy of the function.
-DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
+DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope,
+ DIE &ParentScopeDIE) {
assert(Scope->getScopeNode());
auto *DS = Scope->getScopeNode();
auto *InlinedSP = getDISubprogram(DS);
@@ -662,19 +663,20 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
assert(OriginDIE && "Unable to find original DIE for an inlined subprogram.");
auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_inlined_subroutine);
+ ParentScopeDIE.addChild(ScopeDIE);
addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE);
attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());
// Add the call site information to the DIE.
const DILocation *IA = Scope->getInlinedAt();
- addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
+ addUInt(*ScopeDIE, dwarf::DW_AT_call_file, std::nullopt,
getOrCreateSourceID(IA->getFile()));
- addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());
+ addUInt(*ScopeDIE, dwarf::DW_AT_call_line, std::nullopt, IA->getLine());
if (IA->getColumn())
- addUInt(*ScopeDIE, dwarf::DW_AT_call_column, None, IA->getColumn());
+ addUInt(*ScopeDIE, dwarf::DW_AT_call_column, std::nullopt, IA->getColumn());
if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4)
- addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None,
+ addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, std::nullopt,
IA->getDiscriminator());
// Add name to the name table, we do this here because we're guaranteed
@@ -845,7 +847,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
if (!DV.hasFrameIndexExprs())
return VariableDie;
- Optional<unsigned> NVPTXAddressSpace;
+ std::optional<unsigned> NVPTXAddressSpace;
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
for (const auto &Fragment : DV.getFrameIndexExprs()) {
@@ -893,7 +895,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
// correctly interpret address space of the variable address.
const unsigned NVPTX_ADDR_local_space = 6;
addUInt(*VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
- NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_local_space);
+ NVPTXAddressSpace.value_or(NVPTX_ADDR_local_space));
}
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
if (DwarfExpr.TagOffset)
@@ -1018,6 +1020,7 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) {
DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub,
LexicalScope *Scope) {
DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);
+ auto *ContextCU = static_cast<DwarfCompileUnit *>(ScopeDIE.getUnit());
if (Scope) {
assert(!Scope->getInlinedAt());
@@ -1025,8 +1028,10 @@ DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub,
// Collect lexical scope children first.
// ObjectPointer might be a local (non-argument) local variable if it's a
// block's synthetic this pointer.
- if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE))
- addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer);
+ if (DIE *ObjectPointer =
+ ContextCU->createAndAddScopeChildren(Scope, ScopeDIE))
+ ContextCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer,
+ *ObjectPointer);
}
// If this is a variadic function, add an unspecified parameter.
@@ -1124,7 +1129,7 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
AbsDef = &ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr);
ContextCU->applySubprogramAttributesToDefinition(SP, *AbsDef);
ContextCU->addSInt(*AbsDef, dwarf::DW_AT_inline,
- DD->getDwarfVersion() <= 4 ? Optional<dwarf::Form>()
+ DD->getDwarfVersion() <= 4 ? std::optional<dwarf::Form>()
: dwarf::DW_FORM_implicit_const,
dwarf::DW_INL_inlined);
if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, *AbsDef))
@@ -1588,7 +1593,8 @@ void DwarfCompileUnit::createBaseTypeDIEs() {
"_" + Twine(Btr.BitSize)).toStringRef(Str));
addUInt(Die, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Btr.Encoding);
// Round up to smallest number of bytes that contains this number of bits.
- addUInt(Die, dwarf::DW_AT_byte_size, None, divideCeil(Btr.BitSize, 8));
+ addUInt(Die, dwarf::DW_AT_byte_size, std::nullopt,
+ divideCeil(Btr.BitSize, 8));
Btr.Die = &Die;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 61412cde34c8..7d87f35021bb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -192,6 +192,7 @@ public:
/// variables in this scope then create and insert DIEs for these
/// variables.
DIE &updateSubprogramScopeDIE(const DISubprogram *SP);
+ DIE &updateSubprogramScopeDIEImpl(const DISubprogram *SP, DIE *SPDie);
void constructScopeDIE(LexicalScope *Scope, DIE &ParentScopeDIE);
@@ -204,9 +205,9 @@ public:
void attachRangesOrLowHighPC(DIE &D,
const SmallVectorImpl<InsnRange> &Ranges);
- /// This scope represents inlined body of a function. Construct
+ /// This scope represents an inlined body of a function. Construct a
/// DIE to represent this concrete inlined copy of the function.
- DIE *constructInlinedScopeDIE(LexicalScope *Scope);
+ DIE *constructInlinedScopeDIE(LexicalScope *Scope, DIE &ParentScopeDIE);
/// Construct new DW_TAG_lexical_block for this scope and
/// attach DW_AT_low_pc/DW_AT_high_pc labels.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 54af14429907..cde790cc77fb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -56,6 +56,7 @@
#include <algorithm>
#include <cstddef>
#include <iterator>
+#include <optional>
#include <string>
using namespace llvm;
@@ -428,8 +429,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A)
// https://sourceware.org/bugzilla/show_bug.cgi?id=11616
UseGNUTLSOpcode = tuneForGDB() || DwarfVersion < 3;
- // GDB does not fully support the DWARF 4 representation for bitfields.
- UseDWARF2Bitfields = (DwarfVersion < 4) || tuneForGDB();
+ UseDWARF2Bitfields = DwarfVersion < 4;
// The DWARF v5 string offsets table has - possibly shared - contributions
// from each compile and type unit each preceded by a header. The string
@@ -597,6 +597,9 @@ struct FwdRegParamInfo {
/// Register worklist for finding call site values.
using FwdRegWorklist = MapVector<unsigned, SmallVector<FwdRegParamInfo, 2>>;
+/// Container for the set of registers known to be clobbered on the path to a
+/// call site.
+using ClobberedRegSet = SmallSet<Register, 16>;
/// Append the expression \p Addition to \p Original and return the result.
static const DIExpression *combineDIExpressions(const DIExpression *Original,
@@ -668,7 +671,8 @@ static void addToFwdRegWorklist(FwdRegWorklist &Worklist, unsigned Reg,
/// Interpret values loaded into registers by \p CurMI.
static void interpretValues(const MachineInstr *CurMI,
FwdRegWorklist &ForwardedRegWorklist,
- ParamSet &Params) {
+ ParamSet &Params,
+ ClobberedRegSet &ClobberedRegUnits) {
const MachineFunction *MF = CurMI->getMF();
const DIExpression *EmptyExpr =
@@ -700,17 +704,19 @@ static void interpretValues(const MachineInstr *CurMI,
// If the MI is an instruction defining one or more parameters' forwarding
// registers, add those defines.
+ ClobberedRegSet NewClobberedRegUnits;
auto getForwardingRegsDefinedByMI = [&](const MachineInstr &MI,
SmallSetVector<unsigned, 4> &Defs) {
if (MI.isDebugInstr())
return;
for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.isDef() &&
- Register::isPhysicalRegister(MO.getReg())) {
+ if (MO.isReg() && MO.isDef() && MO.getReg().isPhysical()) {
for (auto &FwdReg : ForwardedRegWorklist)
if (TRI.regsOverlap(FwdReg.first, MO.getReg()))
Defs.insert(FwdReg.first);
+ for (MCRegUnitIterator Units(MO.getReg(), &TRI); Units.isValid(); ++Units)
+ NewClobberedRegUnits.insert(*Units);
}
}
};
@@ -719,8 +725,22 @@ static void interpretValues(const MachineInstr *CurMI,
SmallSetVector<unsigned, 4> FwdRegDefs;
getForwardingRegsDefinedByMI(*CurMI, FwdRegDefs);
- if (FwdRegDefs.empty())
+ if (FwdRegDefs.empty()) {
+ // Any definitions by this instruction will clobber earlier reg movements.
+ ClobberedRegUnits.insert(NewClobberedRegUnits.begin(),
+ NewClobberedRegUnits.end());
return;
+ }
+
+ // It's possible that we find a copy from a non-volatile register to the param
+ // register, which is clobbered in the meantime. Test for clobbered reg unit
+ // overlaps before completing.
+ auto IsRegClobberedInMeantime = [&](Register Reg) -> bool {
+ for (auto &RegUnit : ClobberedRegUnits)
+ if (TRI.hasRegUnit(Reg, RegUnit))
+ return true;
+ return false;
+ };
for (auto ParamFwdReg : FwdRegDefs) {
if (auto ParamValue = TII.describeLoadedValue(*CurMI, ParamFwdReg)) {
@@ -733,7 +753,8 @@ static void interpretValues(const MachineInstr *CurMI,
Register SP = TLI.getStackPointerRegisterToSaveRestore();
Register FP = TRI.getFrameRegister(*MF);
bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP);
- if (TRI.isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) {
+ if (!IsRegClobberedInMeantime(RegLoc) &&
+ (TRI.isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP)) {
MachineLocation MLoc(RegLoc, /*Indirect=*/IsSPorFP);
finishCallSiteParams(MLoc, ParamValue->second,
ForwardedRegWorklist[ParamFwdReg], Params);
@@ -755,6 +776,10 @@ static void interpretValues(const MachineInstr *CurMI,
for (auto ParamFwdReg : FwdRegDefs)
ForwardedRegWorklist.erase(ParamFwdReg);
+ // Any definitions by this instruction will clobber earlier reg movements.
+ ClobberedRegUnits.insert(NewClobberedRegUnits.begin(),
+ NewClobberedRegUnits.end());
+
// Now that we are done handling this instruction, add items from the
// temporary worklist to the real one.
for (auto &New : TmpWorklistItems)
@@ -764,7 +789,8 @@ static void interpretValues(const MachineInstr *CurMI,
static bool interpretNextInstr(const MachineInstr *CurMI,
FwdRegWorklist &ForwardedRegWorklist,
- ParamSet &Params) {
+ ParamSet &Params,
+ ClobberedRegSet &ClobberedRegUnits) {
// Skip bundle headers.
if (CurMI->isBundle())
return true;
@@ -782,7 +808,7 @@ static bool interpretNextInstr(const MachineInstr *CurMI,
if (CurMI->getNumOperands() == 0)
return true;
- interpretValues(CurMI, ForwardedRegWorklist, Params);
+ interpretValues(CurMI, ForwardedRegWorklist, Params, ClobberedRegUnits);
return true;
}
@@ -834,6 +860,7 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
bool ShouldTryEmitEntryVals = MBB->getIterator() == MF->begin();
// Search for a loading value in forwarding registers inside call delay slot.
+ ClobberedRegSet ClobberedRegUnits;
if (CallMI->hasDelaySlot()) {
auto Suc = std::next(CallMI->getIterator());
// Only one-instruction delay slot is supported.
@@ -842,14 +869,14 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
assert(std::next(Suc) == BundleEnd &&
"More than one instruction in call delay slot");
// Try to interpret value loaded by instruction.
- if (!interpretNextInstr(&*Suc, ForwardedRegWorklist, Params))
+ if (!interpretNextInstr(&*Suc, ForwardedRegWorklist, Params, ClobberedRegUnits))
return;
}
// Search for a loading value in forwarding registers.
for (; I != MBB->rend(); ++I) {
// Try to interpret values loaded by instruction.
- if (!interpretNextInstr(&*I, ForwardedRegWorklist, Params))
+ if (!interpretNextInstr(&*I, ForwardedRegWorklist, Params, ClobberedRegUnits))
return;
}
@@ -929,8 +956,7 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
// the callee.
const MachineOperand &CalleeOp = TII->getCalleeOperand(MI);
if (!CalleeOp.isGlobal() &&
- (!CalleeOp.isReg() ||
- !Register::isPhysicalRegister(CalleeOp.getReg())))
+ (!CalleeOp.isReg() || !CalleeOp.getReg().isPhysical()))
continue;
unsigned CallReg = 0;
@@ -2004,6 +2030,17 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
if (MI->isMetaInstruction() || MI->getFlag(MachineInstr::FrameSetup))
return;
const DebugLoc &DL = MI->getDebugLoc();
+ unsigned Flags = 0;
+
+ if (MI->getFlag(MachineInstr::FrameDestroy) && DL) {
+ const MachineBasicBlock *MBB = MI->getParent();
+ if (MBB && (MBB != EpilogBeginBlock)) {
+ // First time FrameDestroy has been seen in this basic block
+ EpilogBeginBlock = MBB;
+ Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
+ }
+ }
+
// When we emit a line-0 record, we don't update PrevInstLoc; so look at
// the last line number actually emitted, to see if it was line 0.
unsigned LastAsmLine =
@@ -2015,10 +2052,10 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
return;
// We have an explicit location, same as the previous location.
// But we might be coming back to it after a line 0 record.
- if (LastAsmLine == 0 && DL.getLine() != 0) {
+ if ((LastAsmLine == 0 && DL.getLine() != 0) || Flags) {
// Reinstate the source location but not marked as a statement.
const MDNode *Scope = DL.getScope();
- recordSourceLine(DL.getLine(), DL.getCol(), Scope, /*Flags=*/0);
+ recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
}
return;
}
@@ -2059,7 +2096,6 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
// (The new location might be an explicit line 0, which we do emit.)
if (DL.getLine() == 0 && LastAsmLine == 0)
return;
- unsigned Flags = 0;
if (DL == PrologEndLoc) {
Flags |= DWARF2_FLAG_PROLOGUE_END | DWARF2_FLAG_IS_STMT;
PrologEndLoc = DebugLoc();
@@ -3530,13 +3566,14 @@ void DwarfDebug::insertSectionLabel(const MCSymbol *S) {
AddrPool.getIndex(S);
}
-Optional<MD5::MD5Result> DwarfDebug::getMD5AsBytes(const DIFile *File) const {
+std::optional<MD5::MD5Result>
+DwarfDebug::getMD5AsBytes(const DIFile *File) const {
assert(File);
if (getDwarfVersion() < 5)
- return None;
- Optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum();
+ return std::nullopt;
+ std::optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum();
if (!Checksum || Checksum->Kind != DIFile::CSK_MD5)
- return None;
+ return std::nullopt;
// Convert the string checksum to an MD5Result for the streamer.
// The verifier validates the checksum so we assume it's okay.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 31e4081b7141..5d2ef8ee79a7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -116,7 +116,7 @@ class DbgVariable : public DbgEntity {
/// Index of the entry list in DebugLocs.
unsigned DebugLocListIndex = ~0u;
/// DW_OP_LLVM_tag_offset value from DebugLocs.
- Optional<uint8_t> DebugLocListTagOffset;
+ std::optional<uint8_t> DebugLocListTagOffset;
/// Single value location description.
std::unique_ptr<DbgValueLoc> ValueLoc = nullptr;
@@ -175,7 +175,9 @@ public:
void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
void setDebugLocListTagOffset(uint8_t O) { DebugLocListTagOffset = O; }
- Optional<uint8_t> getDebugLocListTagOffset() const { return DebugLocListTagOffset; }
+ std::optional<uint8_t> getDebugLocListTagOffset() const {
+ return DebugLocListTagOffset;
+ }
StringRef getName() const { return getVariable()->getName(); }
const DbgValueLoc *getValueLoc() const { return ValueLoc.get(); }
/// Get the FI entries, sorted by fragment offset.
@@ -839,7 +841,7 @@ public:
/// If the \p File has an MD5 checksum, return it as an MD5Result
/// allocated in the MCContext.
- Optional<MD5::MD5Result> getMD5AsBytes(const DIFile *File) const;
+ std::optional<MD5::MD5Result> getMD5AsBytes(const DIFile *File) const;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index e5cda4739fde..c2c11c7bc14d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -21,20 +21,7 @@ namespace llvm {
class MachineFunction;
class ARMTargetStreamer;
-class LLVM_LIBRARY_VISIBILITY DwarfCFIExceptionBase : public EHStreamer {
-protected:
- DwarfCFIExceptionBase(AsmPrinter *A);
-
- /// Per-function flag to indicate if frame CFI info should be emitted.
- bool shouldEmitCFI = false;
- /// Per-module flag to indicate if .cfi_section has beeen emitted.
- bool hasEmittedCFISections = false;
-
- void markFunctionEnd() override;
- void endFragment() override;
-};
-
-class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase {
+class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public EHStreamer {
/// Per-function flag to indicate if .cfi_personality should be emitted.
bool shouldEmitPersonality = false;
@@ -44,6 +31,17 @@ class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase {
/// Per-function flag to indicate if .cfi_lsda should be emitted.
bool shouldEmitLSDA = false;
+ /// Per-function flag to indicate if frame CFI info should be emitted.
+ bool shouldEmitCFI = false;
+
+ /// Per-module flag to indicate if .cfi_section has beeen emitted.
+ bool hasEmittedCFISections = false;
+
+ /// Vector of all personality functions seen so far in the module.
+ std::vector<const GlobalValue *> Personalities;
+
+ void addPersonality(const GlobalValue *Personality);
+
public:
//===--------------------------------------------------------------------===//
// Main entry points.
@@ -61,14 +59,17 @@ public:
/// Gather and emit post-function exception information.
void endFunction(const MachineFunction *) override;
- void beginFragment(const MachineBasicBlock *MBB,
- ExceptionSymbolProvider ESP) override;
-
- void beginBasicBlock(const MachineBasicBlock &MBB) override;
- void endBasicBlock(const MachineBasicBlock &MBB) override;
+ void beginBasicBlockSection(const MachineBasicBlock &MBB) override;
+ void endBasicBlockSection(const MachineBasicBlock &MBB) override;
};
-class LLVM_LIBRARY_VISIBILITY ARMException : public DwarfCFIExceptionBase {
+class LLVM_LIBRARY_VISIBILITY ARMException : public EHStreamer {
+ /// Per-function flag to indicate if frame CFI info should be emitted.
+ bool shouldEmitCFI = false;
+
+ /// Per-module flag to indicate if .cfi_section has beeen emitted.
+ bool hasEmittedCFISections = false;
+
void emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) override;
ARMTargetStreamer &getTargetStreamer();
@@ -88,9 +89,11 @@ public:
/// Gather and emit post-function exception information.
void endFunction(const MachineFunction *) override;
+
+ void markFunctionEnd() override;
};
-class LLVM_LIBRARY_VISIBILITY AIXException : public DwarfCFIExceptionBase {
+class LLVM_LIBRARY_VISIBILITY AIXException : public EHStreamer {
/// This is AIX's compat unwind section, which unwinder would use
/// to find the location of LSDA area and personality rountine.
void emitExceptionInfoTable(const MCSymbol *LSDA, const MCSymbol *PerSym);
@@ -98,11 +101,8 @@ class LLVM_LIBRARY_VISIBILITY AIXException : public DwarfCFIExceptionBase {
public:
AIXException(AsmPrinter *A);
- void markFunctionEnd() override;
-
void endModule() override {}
void beginFunction(const MachineFunction *MF) override {}
-
void endFunction(const MachineFunction *MF) override;
};
} // End of namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 1c21d5ee8bb1..ab6967f50e30 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -99,7 +99,7 @@ void DwarfExpression::addAnd(unsigned Mask) {
bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
llvm::Register MachineReg,
unsigned MaxSize) {
- if (!llvm::Register::isPhysicalRegister(MachineReg)) {
+ if (!MachineReg.isPhysical()) {
if (isFrameRegister(TRI, MachineReg)) {
DwarfRegs.push_back(Register::createRegister(-1, nullptr));
return true;
@@ -494,7 +494,7 @@ bool DwarfExpression::addExpression(
// and not any other parts of the following DWARF expression.
assert(!IsEmittingEntryValue && "Can't emit entry value around expression");
- Optional<DIExpression::ExprOperand> PrevConvertOp = None;
+ std::optional<DIExpression::ExprOperand> PrevConvertOp;
while (ExprCursor) {
auto Op = ExprCursor.take();
@@ -604,7 +604,7 @@ bool DwarfExpression::addExpression(
emitLegacySExt(PrevConvertOp->getArg(0));
else if (Encoding == dwarf::DW_ATE_unsigned)
emitLegacyZExt(PrevConvertOp->getArg(0));
- PrevConvertOp = None;
+ PrevConvertOp = std::nullopt;
} else {
PrevConvertOp = Op;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index e605fe2f7d39..667a9efc6f6c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -15,13 +15,12 @@
#include "ByteStreamer.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include <cassert>
#include <cstdint>
#include <iterator>
+#include <optional>
namespace llvm {
@@ -53,9 +52,9 @@ public:
DIExpressionCursor(const DIExpressionCursor &) = default;
/// Consume one operation.
- Optional<DIExpression::ExprOperand> take() {
+ std::optional<DIExpression::ExprOperand> take() {
if (Start == End)
- return None;
+ return std::nullopt;
return *(Start++);
}
@@ -63,20 +62,20 @@ public:
void consume(unsigned N) { std::advance(Start, N); }
/// Return the current operation.
- Optional<DIExpression::ExprOperand> peek() const {
+ std::optional<DIExpression::ExprOperand> peek() const {
if (Start == End)
- return None;
+ return std::nullopt;
return *(Start);
}
/// Return the next operation.
- Optional<DIExpression::ExprOperand> peekNext() const {
+ std::optional<DIExpression::ExprOperand> peekNext() const {
if (Start == End)
- return None;
+ return std::nullopt;
auto Next = Start.getNext();
if (Next == End)
- return None;
+ return std::nullopt;
return *Next;
}
@@ -88,7 +87,7 @@ public:
DIExpression::expr_op_iterator end() const { return End; }
/// Retrieve the fragment information, if any.
- Optional<DIExpression::FragmentInfo> getFragmentInfo() const {
+ std::optional<DIExpression::FragmentInfo> getFragmentInfo() const {
return DIExpression::getFragmentInfo(Start, End);
}
};
@@ -170,7 +169,7 @@ public:
bool isParameterValue() { return LocationFlags & CallSiteParamValue; }
- Optional<uint8_t> TagOffset;
+ std::optional<uint8_t> TagOffset;
protected:
/// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index a497aa07284e..3fe437a07c92 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -42,7 +42,7 @@ void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) {
// Skip CUs that ended up not being needed (split CUs that were abandoned
// because they added no information beyond the non-split CU)
- if (llvm::empty(TheU->getUnitDie().values()))
+ if (TheU->getUnitDie().values().empty())
return;
Asm->OutStreamer->switchSection(S);
@@ -66,7 +66,7 @@ void DwarfFile::computeSizeAndOffsets() {
// Skip CUs that ended up not being needed (split CUs that were abandoned
// because they added no information beyond the non-split CU)
- if (llvm::empty(TheU->getUnitDie().values()))
+ if (TheU->getUnitDie().values().empty())
return;
TheU->setDebugSectionOffset(SecOffset);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index 67b72f0b455d..2292590b135e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -20,7 +20,7 @@ using namespace llvm;
DwarfStringPool::DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm,
StringRef Prefix)
: Pool(A), Prefix(Prefix),
- ShouldCreateSymbols(Asm.MAI->doesDwarfUseRelocationsAcrossSections()) {}
+ ShouldCreateSymbols(Asm.doesDwarfUseRelocationsAcrossSections()) {}
StringMapEntry<DwarfStringPool::EntryTy> &
DwarfStringPool::getEntryImpl(AsmPrinter &Asm, StringRef Str) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 81238b0fe0d2..c2ff899c04ab 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -16,7 +16,6 @@
#include "DwarfExpression.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
@@ -219,7 +218,7 @@ void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) {
}
void DwarfUnit::addUInt(DIEValueList &Die, dwarf::Attribute Attribute,
- Optional<dwarf::Form> Form, uint64_t Integer) {
+ std::optional<dwarf::Form> Form, uint64_t Integer) {
if (!Form)
Form = DIEInteger::BestForm(false, Integer);
assert(Form != dwarf::DW_FORM_implicit_const &&
@@ -233,13 +232,13 @@ void DwarfUnit::addUInt(DIEValueList &Block, dwarf::Form Form,
}
void DwarfUnit::addSInt(DIEValueList &Die, dwarf::Attribute Attribute,
- Optional<dwarf::Form> Form, int64_t Integer) {
+ std::optional<dwarf::Form> Form, int64_t Integer) {
if (!Form)
Form = DIEInteger::BestForm(true, Integer);
addAttribute(Die, Attribute, *Form, DIEInteger(Integer));
}
-void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form,
+void DwarfUnit::addSInt(DIELoc &Die, std::optional<dwarf::Form> Form,
int64_t Integer) {
addSInt(Die, (dwarf::Attribute)0, Form, Integer);
}
@@ -411,8 +410,8 @@ void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, const DIFile *File) {
return;
unsigned FileID = getOrCreateSourceID(File);
- addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
- addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
+ addUInt(Die, dwarf::DW_AT_decl_file, std::nullopt, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, std::nullopt, Line);
}
void DwarfUnit::addSourceLine(DIE &Die, const DILocalVariable *V) {
@@ -705,12 +704,12 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) {
BTy->getEncoding());
uint64_t Size = BTy->getSizeInBits() >> 3;
- addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
+ addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, Size);
if (BTy->isBigEndian())
- addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_big);
+ addUInt(Buffer, dwarf::DW_AT_endianity, std::nullopt, dwarf::DW_END_big);
else if (BTy->isLittleEndian())
- addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_little);
+ addUInt(Buffer, dwarf::DW_AT_endianity, std::nullopt, dwarf::DW_END_little);
}
void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIStringType *STy) {
@@ -734,7 +733,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIStringType *STy) {
addBlock(Buffer, dwarf::DW_AT_string_length, DwarfExpr.finalize());
} else {
uint64_t Size = STy->getSizeInBits() >> 3;
- addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
+ addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, Size);
}
if (DIExpression *Expr = STy->getStringLocationExp()) {
@@ -785,11 +784,14 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
&& Tag != dwarf::DW_TAG_ptr_to_member_type
&& Tag != dwarf::DW_TAG_reference_type
&& Tag != dwarf::DW_TAG_rvalue_reference_type)
- addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
+ addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, Size);
if (Tag == dwarf::DW_TAG_ptr_to_member_type)
addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
*getOrCreateTypeDIE(cast<DIDerivedType>(DTy)->getClassType()));
+
+ addAccess(Buffer, DTy->getFlags());
+
// Add source line info if available and TyDesc is not a forward declaration.
if (!DTy->isForwardDecl())
addSourceLine(Buffer, DTy);
@@ -832,10 +834,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
// Add prototype flag if we're dealing with a C language and the function has
// been prototyped.
- uint16_t Language = getLanguage();
- if (isPrototyped &&
- (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 ||
- Language == dwarf::DW_LANG_ObjC))
+ if (isPrototyped && dwarf::isC((dwarf::SourceLanguage)getLanguage()))
addFlag(Buffer, dwarf::DW_AT_prototyped);
// Add a DW_AT_calling_convention if this has an explicit convention.
@@ -929,9 +928,11 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
if (const ConstantInt *CI =
dyn_cast_or_null<ConstantInt>(DDTy->getDiscriminantValue())) {
if (DD->isUnsignedDIType(Discriminator->getBaseType()))
- addUInt(Variant, dwarf::DW_AT_discr_value, None, CI->getZExtValue());
+ addUInt(Variant, dwarf::DW_AT_discr_value, std::nullopt,
+ CI->getZExtValue());
else
- addSInt(Variant, dwarf::DW_AT_discr_value, None, CI->getSExtValue());
+ addSInt(Variant, dwarf::DW_AT_discr_value, std::nullopt,
+ CI->getSExtValue());
}
constructMemberDIE(Variant, DDTy);
} else {
@@ -951,7 +952,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
if (!SetterName.empty())
addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
if (unsigned PropertyAttributes = Property->getAttributes())
- addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None,
+ addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, std::nullopt,
PropertyAttributes);
} else if (auto *Composite = dyn_cast<DICompositeType>(Element)) {
if (Composite->getTag() == dwarf::DW_TAG_variant_part) {
@@ -1017,10 +1018,10 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
// TODO: Do we care about size for enum forward declarations?
if (Size &&
(!CTy->isForwardDecl() || Tag == dwarf::DW_TAG_enumeration_type))
- addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
+ addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, Size);
else if (!CTy->isForwardDecl())
// Add zero size if it is not a forward declaration.
- addUInt(Buffer, dwarf::DW_AT_byte_size, None, 0);
+ addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, 0);
// If we're a forward decl, say so.
if (CTy->isForwardDecl())
@@ -1055,7 +1056,7 @@ void DwarfUnit::constructTemplateTypeParameterDIE(
addType(ParamDIE, TP->getType());
if (!TP->getName().empty())
addString(ParamDIE, dwarf::DW_AT_name, TP->getName());
- if (TP->isDefault() && (DD->getDwarfVersion() >= 5))
+ if (TP->isDefault() && isCompatibleWithVersion(5))
addFlag(ParamDIE, dwarf::DW_AT_default_value);
}
@@ -1069,7 +1070,7 @@ void DwarfUnit::constructTemplateValueParameterDIE(
addType(ParamDIE, VP->getType());
if (!VP->getName().empty())
addString(ParamDIE, dwarf::DW_AT_name, VP->getName());
- if (VP->isDefault() && (DD->getDwarfVersion() >= 5))
+ if (VP->isDefault() && isCompatibleWithVersion(5))
addFlag(ParamDIE, dwarf::DW_AT_default_value);
if (Metadata *Val = VP->getValue()) {
if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val))
@@ -1139,10 +1140,10 @@ DIE *DwarfUnit::getOrCreateModule(const DIModule *M) {
if (!M->getAPINotesFile().empty())
addString(MDie, dwarf::DW_AT_LLVM_apinotes, M->getAPINotesFile());
if (M->getFile())
- addUInt(MDie, dwarf::DW_AT_decl_file, None,
+ addUInt(MDie, dwarf::DW_AT_decl_file, std::nullopt,
getOrCreateSourceID(M->getFile()));
if (M->getLineNo())
- addUInt(MDie, dwarf::DW_AT_decl_line, None, M->getLineNo());
+ addUInt(MDie, dwarf::DW_AT_decl_line, std::nullopt, M->getLineNo());
if (M->getIsDecl())
addFlag(MDie, dwarf::DW_AT_declaration);
@@ -1205,10 +1206,10 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
unsigned DeclID = getOrCreateSourceID(SPDecl->getFile());
unsigned DefID = getOrCreateSourceID(SP->getFile());
if (DeclID != DefID)
- addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID);
+ addUInt(SPDie, dwarf::DW_AT_decl_file, std::nullopt, DefID);
if (SP->getLine() != SPDecl->getLine())
- addUInt(SPDie, dwarf::DW_AT_decl_line, None, SP->getLine());
+ addUInt(SPDie, dwarf::DW_AT_decl_line, std::nullopt, SP->getLine());
}
}
@@ -1259,10 +1260,7 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
// Add the prototype if we have a prototype and we have a C like
// language.
- uint16_t Language = getLanguage();
- if (SP->isPrototyped() &&
- (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 ||
- Language == dwarf::DW_LANG_ObjC))
+ if (SP->isPrototyped() && dwarf::isC((dwarf::SourceLanguage)getLanguage()))
addFlag(SPDie, dwarf::DW_AT_prototyped);
if (SP->isObjCDirect())
@@ -1376,7 +1374,7 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
} else if (auto *BI = Bound.dyn_cast<ConstantInt *>()) {
if (Attr == dwarf::DW_AT_count) {
if (BI->getSExtValue() != -1)
- addUInt(DW_Subrange, Attr, None, BI->getSExtValue());
+ addUInt(DW_Subrange, Attr, std::nullopt, BI->getSExtValue());
} else if (Attr != dwarf::DW_AT_lower_bound || DefaultLowerBound == -1 ||
BI->getSExtValue() != DefaultLowerBound)
addSInt(DW_Subrange, Attr, dwarf::DW_FORM_sdata, BI->getSExtValue());
@@ -1437,7 +1435,7 @@ DIE *DwarfUnit::getIndexTyDie() {
IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, getUnitDie());
StringRef Name = "__ARRAY_SIZE_TYPE__";
addString(*IndexTyDie, dwarf::DW_AT_name, Name);
- addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t));
+ addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, std::nullopt, sizeof(int64_t));
addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
dwarf::getArrayIndexTypeEncoding(
(dwarf::SourceLanguage)getLanguage()));
@@ -1478,7 +1476,7 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
if (CTy->isVector()) {
addFlag(Buffer, dwarf::DW_AT_GNU_vector);
if (hasVectorBeenPadded(CTy))
- addUInt(Buffer, dwarf::DW_AT_byte_size, None,
+ addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt,
CTy->getSizeInBits() / CHAR_BIT);
}
@@ -1625,12 +1623,12 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
uint32_t AlignInBytes = DT->getAlignInBytes();
uint64_t OffsetInBytes;
- bool IsBitfield = FieldSize && Size != FieldSize;
+ bool IsBitfield = DT->isBitField();
if (IsBitfield) {
// Handle bitfield, assume bytes are 8 bits.
if (DD->useDWARF2Bitfields())
- addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8);
- addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size);
+ addUInt(MemberDie, dwarf::DW_AT_byte_size, std::nullopt, FieldSize / 8);
+ addUInt(MemberDie, dwarf::DW_AT_bit_size, std::nullopt, Size);
uint64_t Offset = DT->getOffsetInBits();
// We can't use DT->getAlignInBits() here: AlignInBits for member type
@@ -1652,10 +1650,10 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
if (Asm->getDataLayout().isLittleEndian())
Offset = FieldSize - (Offset + Size);
- addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, Offset);
+ addUInt(MemberDie, dwarf::DW_AT_bit_offset, std::nullopt, Offset);
OffsetInBytes = FieldOffset >> 3;
} else {
- addUInt(MemberDie, dwarf::DW_AT_data_bit_offset, None, Offset);
+ addUInt(MemberDie, dwarf::DW_AT_data_bit_offset, std::nullopt, Offset);
}
} else {
// This is not a bitfield.
@@ -1679,7 +1677,7 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
addUInt(MemberDie, dwarf::DW_AT_data_member_location,
dwarf::DW_FORM_udata, OffsetInBytes);
else
- addUInt(MemberDie, dwarf::DW_AT_data_member_location, None,
+ addUInt(MemberDie, dwarf::DW_AT_data_member_location, std::nullopt,
OffsetInBytes);
}
}
@@ -1798,7 +1796,7 @@ void DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
void DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label, const MCSymbol *Sec) {
- if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ if (Asm->doesDwarfUseRelocationsAcrossSections())
addLabel(Die, Attribute, DD->getDwarfSectionOffsetForm(), Label);
else
addSectionDelta(Die, Attribute, Label, Sec);
@@ -1821,7 +1819,7 @@ void DwarfTypeUnit::addGlobalType(const DIType *Ty, const DIE &Die,
}
const MCSymbol *DwarfUnit::getCrossSectionRelativeBaseAddress() const {
- if (!Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ if (!Asm->doesDwarfUseRelocationsAcrossSections())
return nullptr;
if (isDwoUnit())
return nullptr;
@@ -1847,3 +1845,7 @@ void DwarfUnit::addRnglistsBase() {
void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
DD->getAddressPool().resetUsedFlag(true);
}
+
+bool DwarfUnit::isCompatibleWithVersion(uint16_t Version) const {
+ return !Asm->TM.Options.DebugStrictDwarf || DD->getDwarfVersion() >= Version;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 48d63d126701..0caa6adbfa62 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -15,10 +15,10 @@
#include "DwarfDebug.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/Target/TargetMachine.h"
+#include <optional>
#include <string>
namespace llvm {
@@ -143,15 +143,15 @@ public:
/// Add an unsigned integer attribute data and value.
void addUInt(DIEValueList &Die, dwarf::Attribute Attribute,
- Optional<dwarf::Form> Form, uint64_t Integer);
+ std::optional<dwarf::Form> Form, uint64_t Integer);
void addUInt(DIEValueList &Block, dwarf::Form Form, uint64_t Integer);
/// Add an signed integer attribute data and value.
void addSInt(DIEValueList &Die, dwarf::Attribute Attribute,
- Optional<dwarf::Form> Form, int64_t Integer);
+ std::optional<dwarf::Form> Form, int64_t Integer);
- void addSInt(DIELoc &Die, Optional<dwarf::Form> Form, int64_t Integer);
+ void addSInt(DIELoc &Die, std::optional<dwarf::Form> Form, int64_t Integer);
/// Add a string attribute data and value.
///
@@ -350,6 +350,10 @@ private:
virtual bool isDwoUnit() const = 0;
const MCSymbol *getCrossSectionRelativeBaseAddress() const override;
+
+ /// Returns 'true' if the current DwarfVersion is compatible
+ /// with the specified \p Version.
+ bool isCompatibleWithVersion(uint16_t Version) const;
};
class DwarfTypeUnit final : public DwarfUnit {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 31644959bdca..67e2c0e07095 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -195,6 +195,12 @@ void EHStreamer::computePadMap(
const LandingPadInfo *LandingPad = LandingPads[i];
for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
+ MCSymbol *EndLabel = LandingPad->BeginLabels[j];
+ // If we have deleted the code for a given invoke after registering it in
+ // the LandingPad label list, the associated symbols will not have been
+ // emitted. In that case, ignore this callsite entry.
+ if (!BeginLabel->isDefined() || !EndLabel->isDefined())
+ continue;
assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
PadRange P = { i, j };
PadMap[BeginLabel] = P;
@@ -383,8 +389,14 @@ MCSymbol *EHStreamer::emitExceptionTable() {
SmallVector<const LandingPadInfo *, 64> LandingPads;
LandingPads.reserve(PadInfos.size());
- for (const LandingPadInfo &LPI : PadInfos)
+ for (const LandingPadInfo &LPI : PadInfos) {
+ // If a landing-pad has an associated label, but the label wasn't ever
+ // emitted, then skip it. (This can occur if the landingpad's MBB was
+ // deleted).
+ if (LPI.LandingPadLabel && !LPI.LandingPadLabel->isDefined())
+ continue;
LandingPads.push_back(&LPI);
+ }
// Order landing pads lexicographically by type id.
llvm::sort(LandingPads, [](const LandingPadInfo *L, const LandingPadInfo *R) {
@@ -663,9 +675,10 @@ MCSymbol *EHStreamer::emitExceptionTable() {
Asm->OutStreamer->emitLabel(CSRange.ExceptionLabel);
// Emit the LSDA header.
- // If only one call-site range exists, LPStart is omitted as it is the
- // same as the function entry.
- if (CallSiteRanges.size() == 1) {
+ // LPStart is omitted if either we have a single call-site range (in which
+ // case the function entry is treated as @LPStart) or if this function has
+ // no landing pads (in which case @LPStart is undefined).
+ if (CallSiteRanges.size() == 1 || LandingPadRange == nullptr) {
Asm->emitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
} else if (!Asm->isPositionIndependent()) {
// For more than one call-site ranges, LPStart must be explicitly
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
index 135eabc34838..3e75b4371033 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
@@ -48,5 +48,6 @@ void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
}
SmallVector<InlineSite, 8> InlineStack(llvm::reverse(ReversedInlineStack));
- Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, InlineStack);
+ Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, InlineStack,
+ Asm->CurrentFnSym);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
index a514ff161cee..bf65e525dde1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
@@ -42,16 +42,6 @@ void WasmException::endModule() {
}
}
-void WasmException::markFunctionEnd() {
- // Get rid of any dead landing pads.
- if (!Asm->MF->getLandingPads().empty()) {
- auto *NonConstMF = const_cast<MachineFunction *>(Asm->MF);
- // Wasm does not set BeginLabel and EndLabel information for landing pads,
- // so we should set the second argument false.
- NonConstMF->tidyLandingPads(nullptr, /* TidyIfNoBeginLabels */ false);
- }
-}
-
void WasmException::endFunction(const MachineFunction *MF) {
bool ShouldEmitExceptionTable = false;
for (const LandingPadInfo &Info : MF->getLandingPads()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h
index 419b569d123c..86cc37dfde07 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h
@@ -28,7 +28,6 @@ public:
void endModule() override;
void beginFunction(const MachineFunction *MF) override {}
- void markFunctionEnd() override;
void endFunction(const MachineFunction *MF) override;
protected:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index c3ca9c92bf71..7a800438592c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -130,14 +130,6 @@ void WinException::endFunction(const MachineFunction *MF) {
if (F.hasPersonalityFn())
Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts());
- // Get rid of any dead landing pads if we're not using funclets. In funclet
- // schemes, the landing pad is not actually reachable. It only exists so
- // that we can emit the right table data.
- if (!isFuncletEHPersonality(Per)) {
- MachineFunction *NonConstMF = const_cast<MachineFunction*>(MF);
- NonConstMF->tidyLandingPads();
- }
-
endFuncletImpl();
// endFunclet will emit the necessary .xdata tables for table-based SEH.
@@ -736,7 +728,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
// EHFlags & 1 -> Synchronous exceptions only, no async exceptions.
// EHFlags & 2 -> ???
// EHFlags & 4 -> The function is noexcept(true), unwinding can't continue.
- OS.emitValueToAlignment(4);
+ OS.emitValueToAlignment(Align(4));
OS.emitLabel(FuncInfoXData);
AddComment("MagicNumber");
@@ -1010,7 +1002,7 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
// Emit the __ehtable label that we use for llvm.x86.seh.lsda.
MCSymbol *LSDALabel = Asm->OutContext.getOrCreateLSDASymbol(FLinkageName);
- OS.emitValueToAlignment(4);
+ OS.emitValueToAlignment(Align(4));
OS.emitLabel(LSDALabel);
const auto *Per = cast<Function>(F.getPersonalityFn()->stripPointerCasts());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
new file mode 100644
index 000000000000..7098824dbe4b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
@@ -0,0 +1,2426 @@
+#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/Analysis/Interval.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PrintPasses.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <assert.h>
+#include <cstdint>
+#include <optional>
+#include <sstream>
+#include <unordered_map>
+
+using namespace llvm;
+#define DEBUG_TYPE "debug-ata"
+
+STATISTIC(NumDefsScanned, "Number of dbg locs that get scanned for removal");
+STATISTIC(NumDefsRemoved, "Number of dbg locs removed");
+STATISTIC(NumWedgesScanned, "Number of dbg wedges scanned");
+STATISTIC(NumWedgesChanged, "Number of dbg wedges changed");
+
+static cl::opt<unsigned>
+ MaxNumBlocks("debug-ata-max-blocks", cl::init(10000),
+ cl::desc("Maximum num basic blocks before debug info dropped"),
+ cl::Hidden);
+/// Option for debugging the pass, determines if the memory location fragment
+/// filling happens after generating the variable locations.
+static cl::opt<bool> EnableMemLocFragFill("mem-loc-frag-fill", cl::init(true),
+ cl::Hidden);
+/// Print the results of the analysis. Respects -filter-print-funcs.
+static cl::opt<bool> PrintResults("print-debug-ata", cl::init(false),
+ cl::Hidden);
+
+// Implicit conversions are disabled for enum class types, so unfortunately we
+// need to create a DenseMapInfo wrapper around the specified underlying type.
+template <> struct llvm::DenseMapInfo<VariableID> {
+ using Wrapped = DenseMapInfo<unsigned>;
+ static inline VariableID getEmptyKey() {
+ return static_cast<VariableID>(Wrapped::getEmptyKey());
+ }
+ static inline VariableID getTombstoneKey() {
+ return static_cast<VariableID>(Wrapped::getTombstoneKey());
+ }
+ static unsigned getHashValue(const VariableID &Val) {
+ return Wrapped::getHashValue(static_cast<unsigned>(Val));
+ }
+ static bool isEqual(const VariableID &LHS, const VariableID &RHS) {
+ return LHS == RHS;
+ }
+};
+
+/// Helper class to build FunctionVarLocs, since that class isn't easy to
+/// modify. TODO: There's not a great deal of value in the split, it could be
+/// worth merging the two classes.
+class FunctionVarLocsBuilder {
+ friend FunctionVarLocs;
+ UniqueVector<DebugVariable> Variables;
+ // Use an unordered_map so we don't invalidate iterators after
+ // insert/modifications.
+ std::unordered_map<const Instruction *, SmallVector<VarLocInfo>>
+ VarLocsBeforeInst;
+
+ SmallVector<VarLocInfo> SingleLocVars;
+
+public:
+ /// Find or insert \p V and return the ID.
+ VariableID insertVariable(DebugVariable V) {
+ return static_cast<VariableID>(Variables.insert(V));
+ }
+
+ /// Get a variable from its \p ID.
+ const DebugVariable &getVariable(VariableID ID) const {
+ return Variables[static_cast<unsigned>(ID)];
+ }
+
+ /// Return ptr to wedge of defs or nullptr if no defs come just before /p
+ /// Before.
+ const SmallVectorImpl<VarLocInfo> *getWedge(const Instruction *Before) const {
+ auto R = VarLocsBeforeInst.find(Before);
+ if (R == VarLocsBeforeInst.end())
+ return nullptr;
+ return &R->second;
+ }
+
+ /// Replace the defs that come just before /p Before with /p Wedge.
+ void setWedge(const Instruction *Before, SmallVector<VarLocInfo> &&Wedge) {
+ VarLocsBeforeInst[Before] = std::move(Wedge);
+ }
+
+ /// Add a def for a variable that is valid for its lifetime.
+ void addSingleLocVar(DebugVariable Var, DIExpression *Expr, DebugLoc DL,
+ Value *V) {
+ VarLocInfo VarLoc;
+ VarLoc.VariableID = insertVariable(Var);
+ VarLoc.Expr = Expr;
+ VarLoc.DL = DL;
+ VarLoc.V = V;
+ SingleLocVars.emplace_back(VarLoc);
+ }
+
+ /// Add a def to the wedge of defs just before /p Before.
+ void addVarLoc(Instruction *Before, DebugVariable Var, DIExpression *Expr,
+ DebugLoc DL, Value *V) {
+ VarLocInfo VarLoc;
+ VarLoc.VariableID = insertVariable(Var);
+ VarLoc.Expr = Expr;
+ VarLoc.DL = DL;
+ VarLoc.V = V;
+ VarLocsBeforeInst[Before].emplace_back(VarLoc);
+ }
+};
+
+void FunctionVarLocs::print(raw_ostream &OS, const Function &Fn) const {
+ // Print the variable table first. TODO: Sorting by variable could make the
+ // output more stable?
+ unsigned Counter = -1;
+ OS << "=== Variables ===\n";
+ for (const DebugVariable &V : Variables) {
+ ++Counter;
+ // Skip first entry because it is a dummy entry.
+ if (Counter == 0) {
+ continue;
+ }
+ OS << "[" << Counter << "] " << V.getVariable()->getName();
+ if (auto F = V.getFragment())
+ OS << " bits [" << F->OffsetInBits << ", "
+ << F->OffsetInBits + F->SizeInBits << ")";
+ if (const auto *IA = V.getInlinedAt())
+ OS << " inlined-at " << *IA;
+ OS << "\n";
+ }
+
+ auto PrintLoc = [&OS](const VarLocInfo &Loc) {
+ OS << "DEF Var=[" << (unsigned)Loc.VariableID << "]"
+ << " Expr=" << *Loc.Expr << " V=" << *Loc.V << "\n";
+ };
+
+ // Print the single location variables.
+ OS << "=== Single location vars ===\n";
+ for (auto It = single_locs_begin(), End = single_locs_end(); It != End;
+ ++It) {
+ PrintLoc(*It);
+ }
+
+ // Print the non-single-location defs in line with IR.
+ OS << "=== In-line variable defs ===";
+ for (const BasicBlock &BB : Fn) {
+ OS << "\n" << BB.getName() << ":\n";
+ for (const Instruction &I : BB) {
+ for (auto It = locs_begin(&I), End = locs_end(&I); It != End; ++It) {
+ PrintLoc(*It);
+ }
+ OS << I << "\n";
+ }
+ }
+}
+
+void FunctionVarLocs::init(FunctionVarLocsBuilder &Builder) {
+ // Add the single-location variables first.
+ for (const auto &VarLoc : Builder.SingleLocVars)
+ VarLocRecords.emplace_back(VarLoc);
+ // Mark the end of the section.
+ SingleVarLocEnd = VarLocRecords.size();
+
+ // Insert a contiguous block of VarLocInfos for each instruction, mapping it
+ // to the start and end position in the vector with VarLocsBeforeInst.
+ for (auto &P : Builder.VarLocsBeforeInst) {
+ unsigned BlockStart = VarLocRecords.size();
+ for (const VarLocInfo &VarLoc : P.second)
+ VarLocRecords.emplace_back(VarLoc);
+ unsigned BlockEnd = VarLocRecords.size();
+ // Record the start and end indices.
+ if (BlockEnd != BlockStart)
+ VarLocsBeforeInst[P.first] = {BlockStart, BlockEnd};
+ }
+
+ // Copy the Variables vector from the builder's UniqueVector.
+ assert(Variables.empty() && "Expect clear before init");
+ // UniqueVectors IDs are one-based (which means the VarLocInfo VarID values
+ // are one-based) so reserve an extra and insert a dummy.
+ Variables.reserve(Builder.Variables.size() + 1);
+ Variables.push_back(DebugVariable(nullptr, std::nullopt, nullptr));
+ Variables.append(Builder.Variables.begin(), Builder.Variables.end());
+}
+
+void FunctionVarLocs::clear() {
+ Variables.clear();
+ VarLocRecords.clear();
+ VarLocsBeforeInst.clear();
+ SingleVarLocEnd = 0;
+}
+
+/// Walk backwards along constant GEPs and bitcasts to the base storage from \p
+/// Start as far as possible. Prepend \Expression with the offset and append it
+/// with a DW_OP_deref that haes been implicit until now. Returns the walked-to
+/// value and modified expression.
+static std::pair<Value *, DIExpression *>
+walkToAllocaAndPrependOffsetDeref(const DataLayout &DL, Value *Start,
+ DIExpression *Expression) {
+ APInt OffsetInBytes(DL.getTypeSizeInBits(Start->getType()), false);
+ Value *End =
+ Start->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetInBytes);
+ SmallVector<uint64_t, 3> Ops;
+ if (OffsetInBytes.getBoolValue()) {
+ Ops = {dwarf::DW_OP_plus_uconst, OffsetInBytes.getZExtValue()};
+ Expression = DIExpression::prependOpcodes(
+ Expression, Ops, /*StackValue=*/false, /*EntryValue=*/false);
+ }
+ Expression = DIExpression::append(Expression, {dwarf::DW_OP_deref});
+ return {End, Expression};
+}
+
+/// Extract the offset used in \p DIExpr. Returns std::nullopt if the expression
+/// doesn't explicitly describe a memory location with DW_OP_deref or if the
+/// expression is too complex to interpret.
+static std::optional<int64_t>
+getDerefOffsetInBytes(const DIExpression *DIExpr) {
+ int64_t Offset = 0;
+ const unsigned NumElements = DIExpr->getNumElements();
+ const auto Elements = DIExpr->getElements();
+ unsigned NextElement = 0;
+ // Extract the offset.
+ if (NumElements > 2 && Elements[0] == dwarf::DW_OP_plus_uconst) {
+ Offset = Elements[1];
+ NextElement = 2;
+ } else if (NumElements > 3 && Elements[0] == dwarf::DW_OP_constu) {
+ NextElement = 3;
+ if (Elements[2] == dwarf::DW_OP_plus)
+ Offset = Elements[1];
+ else if (Elements[2] == dwarf::DW_OP_minus)
+ Offset = -Elements[1];
+ else
+ return std::nullopt;
+ }
+
+ // If that's all there is it means there's no deref.
+ if (NextElement >= NumElements)
+ return std::nullopt;
+
+ // Check the next element is DW_OP_deref - otherwise this is too complex or
+ // isn't a deref expression.
+ if (Elements[NextElement] != dwarf::DW_OP_deref)
+ return std::nullopt;
+
+ // Check the final operation is either the DW_OP_deref or is a fragment.
+ if (NumElements == NextElement + 1)
+ return Offset; // Ends with deref.
+ else if (NumElements == NextElement + 3 &&
+ Elements[NextElement] == dwarf::DW_OP_LLVM_fragment)
+ return Offset; // Ends with deref + fragment.
+
+ // Don't bother trying to interpret anything more complex.
+ return std::nullopt;
+}
+
+/// A whole (unfragmented) source variable.
+using DebugAggregate = std::pair<const DILocalVariable *, const DILocation *>;
+static DebugAggregate getAggregate(const DbgVariableIntrinsic *DII) {
+ return DebugAggregate(DII->getVariable(), DII->getDebugLoc().getInlinedAt());
+}
+static DebugAggregate getAggregate(const DebugVariable &Var) {
+ return DebugAggregate(Var.getVariable(), Var.getInlinedAt());
+}
+
+namespace {
+/// In dwarf emission, the following sequence
+/// 1. dbg.value ... Fragment(0, 64)
+/// 2. dbg.value ... Fragment(0, 32)
+/// effectively sets Fragment(32, 32) to undef (each def sets all bits not in
+/// the intersection of the fragments to having "no location"). This makes
+/// sense for implicit location values because splitting the computed values
+/// could be troublesome, and is probably quite uncommon. When we convert
+/// dbg.assigns to dbg.value+deref this kind of thing is common, and describing
+/// a location (memory) rather than a value means we don't need to worry about
+/// splitting any values, so we try to recover the rest of the fragment
+/// location here.
+/// This class performs a(nother) dataflow analysis over the function, adding
+/// variable locations so that any bits of a variable with a memory location
+/// have that location explicitly reinstated at each subsequent variable
+/// location definition that that doesn't overwrite those bits. i.e. after a
+/// variable location def, insert new defs for the memory location with
+/// fragments for the difference of "all bits currently in memory" and "the
+/// fragment of the second def".
+class MemLocFragmentFill {
+ Function &Fn;
+ FunctionVarLocsBuilder *FnVarLocs;
+ const DenseSet<DebugAggregate> *VarsWithStackSlot;
+
+ // 0 = no memory location.
+ using BaseAddress = unsigned;
+ using OffsetInBitsTy = unsigned;
+ using FragTraits = IntervalMapHalfOpenInfo<OffsetInBitsTy>;
+ using FragsInMemMap = IntervalMap<
+ OffsetInBitsTy, BaseAddress,
+ IntervalMapImpl::NodeSizer<OffsetInBitsTy, BaseAddress>::LeafSize,
+ FragTraits>;
+ FragsInMemMap::Allocator IntervalMapAlloc;
+ using VarFragMap = DenseMap<unsigned, FragsInMemMap>;
+
+ /// IDs for memory location base addresses in maps. Use 0 to indicate that
+ /// there's no memory location.
+ UniqueVector<Value *> Bases;
+ UniqueVector<DebugAggregate> Aggregates;
+ DenseMap<const BasicBlock *, VarFragMap> LiveIn;
+ DenseMap<const BasicBlock *, VarFragMap> LiveOut;
+
+ struct FragMemLoc {
+ unsigned Var;
+ unsigned Base;
+ unsigned OffsetInBits;
+ unsigned SizeInBits;
+ DebugLoc DL;
+ };
+ using InsertMap = MapVector<Instruction *, SmallVector<FragMemLoc>>;
+
+ /// BBInsertBeforeMap holds a description for the set of location defs to be
+ /// inserted after the analysis is complete. It is updated during the dataflow
+ /// and the entry for a block is CLEARED each time it is (re-)visited. After
+ /// the dataflow is complete, each block entry will contain the set of defs
+ /// calculated during the final (fixed-point) iteration.
+ DenseMap<const BasicBlock *, InsertMap> BBInsertBeforeMap;
+
+ static bool intervalMapsAreEqual(const FragsInMemMap &A,
+ const FragsInMemMap &B) {
+ auto AIt = A.begin(), AEnd = A.end();
+ auto BIt = B.begin(), BEnd = B.end();
+ for (; AIt != AEnd; ++AIt, ++BIt) {
+ if (BIt == BEnd)
+ return false; // B has fewer elements than A.
+ if (AIt.start() != BIt.start() || AIt.stop() != BIt.stop())
+ return false; // Interval is different.
+ if (*AIt != *BIt)
+ return false; // Value at interval is different.
+ }
+ // AIt == AEnd. Check BIt is also now at end.
+ return BIt == BEnd;
+ }
+
+ static bool varFragMapsAreEqual(const VarFragMap &A, const VarFragMap &B) {
+ if (A.size() != B.size())
+ return false;
+ for (const auto &APair : A) {
+ auto BIt = B.find(APair.first);
+ if (BIt == B.end())
+ return false;
+ if (!intervalMapsAreEqual(APair.second, BIt->second))
+ return false;
+ }
+ return true;
+ }
+
+ /// Return a string for the value that \p BaseID represents.
+ std::string toString(unsigned BaseID) {
+ if (BaseID)
+ return Bases[BaseID]->getName().str();
+ else
+ return "None";
+ }
+
+ /// Format string describing an FragsInMemMap (IntervalMap) interval.
+ std::string toString(FragsInMemMap::const_iterator It, bool Newline = true) {
+ std::string String;
+ std::stringstream S(String);
+ if (It.valid()) {
+ S << "[" << It.start() << ", " << It.stop()
+ << "): " << toString(It.value());
+ } else {
+ S << "invalid iterator (end)";
+ }
+ if (Newline)
+ S << "\n";
+ return S.str();
+ };
+
+ FragsInMemMap meetFragments(const FragsInMemMap &A, const FragsInMemMap &B) {
+ FragsInMemMap Result(IntervalMapAlloc);
+ for (auto AIt = A.begin(), AEnd = A.end(); AIt != AEnd; ++AIt) {
+ LLVM_DEBUG(dbgs() << "a " << toString(AIt));
+ // This is basically copied from process() and inverted (process is
+ // performing something like a union whereas this is more of an
+ // intersect).
+
+ // There's no work to do if interval `a` overlaps no fragments in map `B`.
+ if (!B.overlaps(AIt.start(), AIt.stop()))
+ continue;
+
+ // Does StartBit intersect an existing fragment?
+ auto FirstOverlap = B.find(AIt.start());
+ assert(FirstOverlap != B.end());
+ bool IntersectStart = FirstOverlap.start() < AIt.start();
+ LLVM_DEBUG(dbgs() << "- FirstOverlap " << toString(FirstOverlap, false)
+ << ", IntersectStart: " << IntersectStart << "\n");
+
+ // Does EndBit intersect an existing fragment?
+ auto LastOverlap = B.find(AIt.stop());
+ bool IntersectEnd =
+ LastOverlap != B.end() && LastOverlap.start() < AIt.stop();
+ LLVM_DEBUG(dbgs() << "- LastOverlap " << toString(LastOverlap, false)
+ << ", IntersectEnd: " << IntersectEnd << "\n");
+
+ // Check if both ends of `a` intersect the same interval `b`.
+ if (IntersectStart && IntersectEnd && FirstOverlap == LastOverlap) {
+ // Insert `a` (`a` is contained in `b`) if the values match.
+ // [ a ]
+ // [ - b - ]
+ // -
+ // [ r ]
+ LLVM_DEBUG(dbgs() << "- a is contained within "
+ << toString(FirstOverlap));
+ if (*AIt && *AIt == *FirstOverlap)
+ Result.insert(AIt.start(), AIt.stop(), *AIt);
+ } else {
+ // There's an overlap but `a` is not fully contained within
+ // `b`. Shorten any end-point intersections.
+ // [ - a - ]
+ // [ - b - ]
+ // -
+ // [ r ]
+ auto Next = FirstOverlap;
+ if (IntersectStart) {
+ LLVM_DEBUG(dbgs() << "- insert intersection of a and "
+ << toString(FirstOverlap));
+ if (*AIt && *AIt == *FirstOverlap)
+ Result.insert(AIt.start(), FirstOverlap.stop(), *AIt);
+ ++Next;
+ }
+ // [ - a - ]
+ // [ - b - ]
+ // -
+ // [ r ]
+ if (IntersectEnd) {
+ LLVM_DEBUG(dbgs() << "- insert intersection of a and "
+ << toString(LastOverlap));
+ if (*AIt && *AIt == *LastOverlap)
+ Result.insert(LastOverlap.start(), AIt.stop(), *AIt);
+ }
+
+ // Insert all intervals in map `B` that are contained within interval
+ // `a` where the values match.
+ // [ - - a - - ]
+ // [ b1 ] [ b2 ]
+ // -
+ // [ r1 ] [ r2 ]
+ while (Next != B.end() && Next.start() < AIt.stop() &&
+ Next.stop() <= AIt.stop()) {
+ LLVM_DEBUG(dbgs()
+ << "- insert intersection of a and " << toString(Next));
+ if (*AIt && *AIt == *Next)
+ Result.insert(Next.start(), Next.stop(), *Next);
+ ++Next;
+ }
+ }
+ }
+ return Result;
+ }
+
+ /// Meet \p A and \p B, storing the result in \p A.
+ void meetVars(VarFragMap &A, const VarFragMap &B) {
+ // Meet A and B.
+ //
+ // Result = meet(a, b) for a in A, b in B where Var(a) == Var(b)
+ for (auto It = A.begin(), End = A.end(); It != End; ++It) {
+ unsigned AVar = It->first;
+ FragsInMemMap &AFrags = It->second;
+ auto BIt = B.find(AVar);
+ if (BIt == B.end()) {
+ A.erase(It);
+ continue; // Var has no bits defined in B.
+ }
+ LLVM_DEBUG(dbgs() << "meet fragment maps for "
+ << Aggregates[AVar].first->getName() << "\n");
+ AFrags = meetFragments(AFrags, BIt->second);
+ }
+ }
+
+ bool meet(const BasicBlock &BB,
+ const SmallPtrSet<BasicBlock *, 16> &Visited) {
+ LLVM_DEBUG(dbgs() << "meet block info from preds of " << BB.getName()
+ << "\n");
+
+ VarFragMap BBLiveIn;
+ bool FirstMeet = true;
+ // LiveIn locs for BB is the meet of the already-processed preds' LiveOut
+ // locs.
+ for (auto I = pred_begin(&BB), E = pred_end(&BB); I != E; I++) {
+ // Ignore preds that haven't been processed yet. This is essentially the
+ // same as initialising all variables to implicit top value (⊤) which is
+ // the identity value for the meet operation.
+ const BasicBlock *Pred = *I;
+ if (!Visited.count(Pred))
+ continue;
+
+ auto PredLiveOut = LiveOut.find(Pred);
+ assert(PredLiveOut != LiveOut.end());
+
+ if (FirstMeet) {
+ LLVM_DEBUG(dbgs() << "BBLiveIn = " << Pred->getName() << "\n");
+ BBLiveIn = PredLiveOut->second;
+ FirstMeet = false;
+ } else {
+ LLVM_DEBUG(dbgs() << "BBLiveIn = meet BBLiveIn, " << Pred->getName()
+ << "\n");
+ meetVars(BBLiveIn, PredLiveOut->second);
+ }
+
+ // An empty set is ⊥ for the intersect-like meet operation. If we've
+ // already got ⊥ there's no need to run the code - we know the result is
+ // ⊥ since `meet(a, ⊥) = ⊥`.
+ if (BBLiveIn.size() == 0)
+ break;
+ }
+
+ auto CurrentLiveInEntry = LiveIn.find(&BB);
+ // If there's no LiveIn entry for the block yet, add it.
+ if (CurrentLiveInEntry == LiveIn.end()) {
+ LLVM_DEBUG(dbgs() << "change=true (first) on meet on " << BB.getName()
+ << "\n");
+ LiveIn[&BB] = std::move(BBLiveIn);
+ return /*Changed=*/true;
+ }
+
+ // If the LiveIn set has changed (expensive check) update it and return
+ // true.
+ if (!varFragMapsAreEqual(BBLiveIn, CurrentLiveInEntry->second)) {
+ LLVM_DEBUG(dbgs() << "change=true on meet on " << BB.getName() << "\n");
+ CurrentLiveInEntry->second = std::move(BBLiveIn);
+ return /*Changed=*/true;
+ }
+
+ LLVM_DEBUG(dbgs() << "change=false on meet on " << BB.getName() << "\n");
+ return /*Changed=*/false;
+ }
+
+ void insertMemLoc(BasicBlock &BB, Instruction &Before, unsigned Var,
+ unsigned StartBit, unsigned EndBit, unsigned Base,
+ DebugLoc DL) {
+ assert(StartBit < EndBit && "Cannot create fragment of size <= 0");
+ if (!Base)
+ return;
+ FragMemLoc Loc;
+ Loc.Var = Var;
+ Loc.OffsetInBits = StartBit;
+ Loc.SizeInBits = EndBit - StartBit;
+ assert(Base && "Expected a non-zero ID for Base address");
+ Loc.Base = Base;
+ Loc.DL = DL;
+ BBInsertBeforeMap[&BB][&Before].push_back(Loc);
+ LLVM_DEBUG(dbgs() << "Add mem def for " << Aggregates[Var].first->getName()
+ << " bits [" << StartBit << ", " << EndBit << ")\n");
+ }
+
+ void addDef(const VarLocInfo &VarLoc, Instruction &Before, BasicBlock &BB,
+ VarFragMap &LiveSet) {
+ DebugVariable DbgVar = FnVarLocs->getVariable(VarLoc.VariableID);
+ if (skipVariable(DbgVar.getVariable()))
+ return;
+ // Don't bother doing anything for this variables if we know it's fully
+ // promoted. We're only interested in variables that (sometimes) live on
+ // the stack here.
+ if (!VarsWithStackSlot->count(getAggregate(DbgVar)))
+ return;
+ unsigned Var = Aggregates.insert(
+ DebugAggregate(DbgVar.getVariable(), VarLoc.DL.getInlinedAt()));
+
+ // [StartBit: EndBit) are the bits affected by this def.
+ const DIExpression *DIExpr = VarLoc.Expr;
+ unsigned StartBit;
+ unsigned EndBit;
+ if (auto Frag = DIExpr->getFragmentInfo()) {
+ StartBit = Frag->OffsetInBits;
+ EndBit = StartBit + Frag->SizeInBits;
+ } else {
+ assert(static_cast<bool>(DbgVar.getVariable()->getSizeInBits()));
+ StartBit = 0;
+ EndBit = *DbgVar.getVariable()->getSizeInBits();
+ }
+
+ // We will only fill fragments for simple memory-describing dbg.value
+ // intrinsics. If the fragment offset is the same as the offset from the
+ // base pointer, do The Thing, otherwise fall back to normal dbg.value
+ // behaviour. AssignmentTrackingLowering has generated DIExpressions
+ // written in terms of the base pointer.
+ // TODO: Remove this condition since the fragment offset doesn't always
+ // equal the offset from base pointer (e.g. for a SROA-split variable).
+ const auto DerefOffsetInBytes = getDerefOffsetInBytes(DIExpr);
+ const unsigned Base =
+ DerefOffsetInBytes && *DerefOffsetInBytes * 8 == StartBit
+ ? Bases.insert(VarLoc.V)
+ : 0;
+ LLVM_DEBUG(dbgs() << "DEF " << DbgVar.getVariable()->getName() << " ["
+ << StartBit << ", " << EndBit << "): " << toString(Base)
+ << "\n");
+
+ // First of all, any locs that use mem that are disrupted need reinstating.
+ // Unfortunately, IntervalMap doesn't let us insert intervals that overlap
+ // with existing intervals so this code involves a lot of fiddling around
+ // with intervals to do that manually.
+ auto FragIt = LiveSet.find(Var);
+
+ // Check if the variable does not exist in the map.
+ if (FragIt == LiveSet.end()) {
+ // Add this variable to the BB map.
+ auto P = LiveSet.try_emplace(Var, FragsInMemMap(IntervalMapAlloc));
+ assert(P.second && "Var already in map?");
+ // Add the interval to the fragment map.
+ P.first->second.insert(StartBit, EndBit, Base);
+ return;
+ }
+ // The variable has an entry in the map.
+
+ FragsInMemMap &FragMap = FragIt->second;
+ // First check the easy case: the new fragment `f` doesn't overlap with any
+ // intervals.
+ if (!FragMap.overlaps(StartBit, EndBit)) {
+ LLVM_DEBUG(dbgs() << "- No overlaps\n");
+ FragMap.insert(StartBit, EndBit, Base);
+ return;
+ }
+ // There is at least one overlap.
+
+ // Does StartBit intersect an existing fragment?
+ auto FirstOverlap = FragMap.find(StartBit);
+ assert(FirstOverlap != FragMap.end());
+ bool IntersectStart = FirstOverlap.start() < StartBit;
+
+ // Does EndBit intersect an existing fragment?
+ auto LastOverlap = FragMap.find(EndBit);
+ bool IntersectEnd = LastOverlap.valid() && LastOverlap.start() < EndBit;
+
+ // Check if both ends of `f` intersect the same interval `i`.
+ if (IntersectStart && IntersectEnd && FirstOverlap == LastOverlap) {
+ LLVM_DEBUG(dbgs() << "- Intersect single interval @ both ends\n");
+ // Shorten `i` so that there's space to insert `f`.
+ // [ f ]
+ // [ - i - ]
+ // +
+ // [ i ][ f ][ i ]
+
+ // Save values for use after inserting a new interval.
+ auto EndBitOfOverlap = FirstOverlap.stop();
+ unsigned OverlapValue = FirstOverlap.value();
+
+ // Shorten the overlapping interval.
+ FirstOverlap.setStop(StartBit);
+ insertMemLoc(BB, Before, Var, FirstOverlap.start(), StartBit,
+ OverlapValue, VarLoc.DL);
+
+ // Insert a new interval to represent the end part.
+ FragMap.insert(EndBit, EndBitOfOverlap, OverlapValue);
+ insertMemLoc(BB, Before, Var, EndBit, EndBitOfOverlap, OverlapValue,
+ VarLoc.DL);
+
+ // Insert the new (middle) fragment now there is space.
+ FragMap.insert(StartBit, EndBit, Base);
+ } else {
+ // There's an overlap but `f` may not be fully contained within
+ // `i`. Shorten any end-point intersections so that we can then
+ // insert `f`.
+ // [ - f - ]
+ // [ - i - ]
+ // | |
+ // [ i ]
+ // Shorten any end-point intersections.
+ if (IntersectStart) {
+ LLVM_DEBUG(dbgs() << "- Intersect interval at start\n");
+ // Split off at the intersection.
+ FirstOverlap.setStop(StartBit);
+ insertMemLoc(BB, Before, Var, FirstOverlap.start(), StartBit,
+ *FirstOverlap, VarLoc.DL);
+ }
+ // [ - f - ]
+ // [ - i - ]
+ // | |
+ // [ i ]
+ if (IntersectEnd) {
+ LLVM_DEBUG(dbgs() << "- Intersect interval at end\n");
+ // Split off at the intersection.
+ LastOverlap.setStart(EndBit);
+ insertMemLoc(BB, Before, Var, EndBit, LastOverlap.stop(), *LastOverlap,
+ VarLoc.DL);
+ }
+
+ LLVM_DEBUG(dbgs() << "- Erase intervals contained within\n");
+ // FirstOverlap and LastOverlap have been shortened such that they're
+ // no longer overlapping with [StartBit, EndBit). Delete any overlaps
+ // that remain (these will be fully contained within `f`).
+ // [ - f - ] }
+ // [ - i - ] } Intersection shortening that has happened above.
+ // | | }
+ // [ i ] }
+ // -----------------
+ // [i2 ] } Intervals fully contained within `f` get erased.
+ // -----------------
+ // [ - f - ][ i ] } Completed insertion.
+ auto It = FirstOverlap;
+ if (IntersectStart)
+ ++It; // IntersectStart: first overlap has been shortened.
+ while (It.valid() && It.start() >= StartBit && It.stop() <= EndBit) {
+ LLVM_DEBUG(dbgs() << "- Erase " << toString(It));
+ It.erase(); // This increments It after removing the interval.
+ }
+ // We've dealt with all the overlaps now!
+ assert(!FragMap.overlaps(StartBit, EndBit));
+ LLVM_DEBUG(dbgs() << "- Insert DEF into now-empty space\n");
+ FragMap.insert(StartBit, EndBit, Base);
+ }
+ }
+
+ bool skipVariable(const DILocalVariable *V) { return !V->getSizeInBits(); }
+
+ void process(BasicBlock &BB, VarFragMap &LiveSet) {
+ BBInsertBeforeMap[&BB].clear();
+ for (auto &I : BB) {
+ if (const auto *Locs = FnVarLocs->getWedge(&I)) {
+ for (const VarLocInfo &Loc : *Locs) {
+ addDef(Loc, I, *I.getParent(), LiveSet);
+ }
+ }
+ }
+ }
+
+public:
+ MemLocFragmentFill(Function &Fn,
+ const DenseSet<DebugAggregate> *VarsWithStackSlot)
+ : Fn(Fn), VarsWithStackSlot(VarsWithStackSlot) {}
+
+ /// Add variable locations to \p FnVarLocs so that any bits of a variable
+ /// with a memory location have that location explicitly reinstated at each
+ /// subsequent variable location definition that that doesn't overwrite those
+ /// bits. i.e. after a variable location def, insert new defs for the memory
+ /// location with fragments for the difference of "all bits currently in
+ /// memory" and "the fragment of the second def". e.g.
+ ///
+ /// Before:
+ ///
+ /// var x bits 0 to 63: value in memory
+ /// more instructions
+ /// var x bits 0 to 31: value is %0
+ ///
+ /// After:
+ ///
+ /// var x bits 0 to 63: value in memory
+ /// more instructions
+ /// var x bits 0 to 31: value is %0
+ /// var x bits 32 to 61: value in memory ; <-- new loc def
+ ///
+ void run(FunctionVarLocsBuilder *FnVarLocs) {
+ if (!EnableMemLocFragFill)
+ return;
+
+ this->FnVarLocs = FnVarLocs;
+
+ // Prepare for traversal.
+ //
+ ReversePostOrderTraversal<Function *> RPOT(&Fn);
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>>
+ Worklist;
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>>
+ Pending;
+ DenseMap<unsigned int, BasicBlock *> OrderToBB;
+ DenseMap<BasicBlock *, unsigned int> BBToOrder;
+ { // Init OrderToBB and BBToOrder.
+ unsigned int RPONumber = 0;
+ for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) {
+ OrderToBB[RPONumber] = *RI;
+ BBToOrder[*RI] = RPONumber;
+ Worklist.push(RPONumber);
+ ++RPONumber;
+ }
+ LiveIn.init(RPONumber);
+ LiveOut.init(RPONumber);
+ }
+
+ // Perform the traversal.
+ //
+ // This is a standard "intersect of predecessor outs" dataflow problem. To
+ // solve it, we perform meet() and process() using the two worklist method
+ // until the LiveIn data for each block becomes unchanging.
+ //
+ // This dataflow is essentially working on maps of sets and at each meet we
+ // intersect the maps and the mapped sets. So, initialized live-in maps
+ // monotonically decrease in value throughout the dataflow.
+ SmallPtrSet<BasicBlock *, 16> Visited;
+ while (!Worklist.empty() || !Pending.empty()) {
+ // We track what is on the pending worklist to avoid inserting the same
+ // thing twice. We could avoid this with a custom priority queue, but
+ // this is probably not worth it.
+ SmallPtrSet<BasicBlock *, 16> OnPending;
+ LLVM_DEBUG(dbgs() << "Processing Worklist\n");
+ while (!Worklist.empty()) {
+ BasicBlock *BB = OrderToBB[Worklist.top()];
+ LLVM_DEBUG(dbgs() << "\nPop BB " << BB->getName() << "\n");
+ Worklist.pop();
+ bool InChanged = meet(*BB, Visited);
+ // Always consider LiveIn changed on the first visit.
+ InChanged |= Visited.insert(BB).second;
+ if (InChanged) {
+ LLVM_DEBUG(dbgs()
+ << BB->getName() << " has new InLocs, process it\n");
+ // Mutate a copy of LiveIn while processing BB. Once we've processed
+ // the terminator LiveSet is the LiveOut set for BB.
+ // This is an expensive copy!
+ VarFragMap LiveSet = LiveIn[BB];
+
+ // Process the instructions in the block.
+ process(*BB, LiveSet);
+
+ // Relatively expensive check: has anything changed in LiveOut for BB?
+ if (!varFragMapsAreEqual(LiveOut[BB], LiveSet)) {
+ LLVM_DEBUG(dbgs() << BB->getName()
+ << " has new OutLocs, add succs to worklist: [ ");
+ LiveOut[BB] = std::move(LiveSet);
+ for (auto I = succ_begin(BB), E = succ_end(BB); I != E; I++) {
+ if (OnPending.insert(*I).second) {
+ LLVM_DEBUG(dbgs() << I->getName() << " ");
+ Pending.push(BBToOrder[*I]);
+ }
+ }
+ LLVM_DEBUG(dbgs() << "]\n");
+ }
+ }
+ }
+ Worklist.swap(Pending);
+ // At this point, pending must be empty, since it was just the empty
+ // worklist
+ assert(Pending.empty() && "Pending should be empty");
+ }
+
+ // Insert new location defs.
+ for (auto Pair : BBInsertBeforeMap) {
+ InsertMap &Map = Pair.second;
+ for (auto Pair : Map) {
+ Instruction *InsertBefore = Pair.first;
+ assert(InsertBefore && "should never be null");
+ auto FragMemLocs = Pair.second;
+ auto &Ctx = Fn.getContext();
+
+ for (auto FragMemLoc : FragMemLocs) {
+ DIExpression *Expr = DIExpression::get(Ctx, std::nullopt);
+ Expr = *DIExpression::createFragmentExpression(
+ Expr, FragMemLoc.OffsetInBits, FragMemLoc.SizeInBits);
+ Expr = DIExpression::prepend(Expr, DIExpression::DerefAfter,
+ FragMemLoc.OffsetInBits / 8);
+ DebugVariable Var(Aggregates[FragMemLoc.Var].first, Expr,
+ FragMemLoc.DL.getInlinedAt());
+ FnVarLocs->addVarLoc(InsertBefore, Var, Expr, FragMemLoc.DL,
+ Bases[FragMemLoc.Base]);
+ }
+ }
+ }
+ }
+};
+
+/// AssignmentTrackingLowering encapsulates a dataflow analysis over a function
+/// that interprets assignment tracking debug info metadata and stores in IR to
+/// create a map of variable locations.
+class AssignmentTrackingLowering {
+public:
+ /// The kind of location in use for a variable, where Mem is the stack home,
+ /// Val is an SSA value or const, and None means that there is not one single
+ /// kind (either because there are multiple or because there is none; it may
+ /// prove useful to split this into two values in the future).
+ ///
+ /// LocKind is a join-semilattice with the partial order:
+ /// None > Mem, Val
+ ///
+ /// i.e.
+ /// join(Mem, Mem) = Mem
+ /// join(Val, Val) = Val
+ /// join(Mem, Val) = None
+ /// join(None, Mem) = None
+ /// join(None, Val) = None
+ /// join(None, None) = None
+ ///
+ /// Note: the order is not `None > Val > Mem` because we're using DIAssignID
+ /// to name assignments and are not tracking the actual stored values.
+ /// Therefore currently there's no way to ensure that Mem values and Val
+ /// values are the same. This could be a future extension, though it's not
+ /// clear that many additional locations would be recovered that way in
+ /// practice as the likelihood of this sitation arising naturally seems
+ /// incredibly low.
+ enum class LocKind { Mem, Val, None };
+
+ /// An abstraction of the assignment of a value to a variable or memory
+ /// location.
+ ///
+ /// An Assignment is Known or NoneOrPhi. A Known Assignment means we have a
+ /// DIAssignID ptr that represents it. NoneOrPhi means that we don't (or
+ /// can't) know the ID of the last assignment that took place.
+ ///
+ /// The Status of the Assignment (Known or NoneOrPhi) is another
+ /// join-semilattice. The partial order is:
+ /// NoneOrPhi > Known {id_0, id_1, ...id_N}
+ ///
+ /// i.e. for all values x and y where x != y:
+ /// join(x, x) = x
+ /// join(x, y) = NoneOrPhi
+ struct Assignment {
+ enum S { Known, NoneOrPhi } Status;
+ /// ID of the assignment. nullptr if Status is not Known.
+ DIAssignID *ID;
+ /// The dbg.assign that marks this dbg-def. Mem-defs don't use this field.
+ /// May be nullptr.
+ DbgAssignIntrinsic *Source;
+
+ bool isSameSourceAssignment(const Assignment &Other) const {
+ // Don't include Source in the equality check. Assignments are
+ // defined by their ID, not debug intrinsic(s).
+ return std::tie(Status, ID) == std::tie(Other.Status, Other.ID);
+ }
+ void dump(raw_ostream &OS) {
+ static const char *LUT[] = {"Known", "NoneOrPhi"};
+ OS << LUT[Status] << "(id=";
+ if (ID)
+ OS << ID;
+ else
+ OS << "null";
+ OS << ", s=";
+ if (Source)
+ OS << *Source;
+ else
+ OS << "null";
+ OS << ")";
+ }
+
+ static Assignment make(DIAssignID *ID, DbgAssignIntrinsic *Source) {
+ return Assignment(Known, ID, Source);
+ }
+ static Assignment makeFromMemDef(DIAssignID *ID) {
+ return Assignment(Known, ID, nullptr);
+ }
+ static Assignment makeNoneOrPhi() {
+ return Assignment(NoneOrPhi, nullptr, nullptr);
+ }
+ // Again, need a Top value?
+ Assignment()
+ : Status(NoneOrPhi), ID(nullptr), Source(nullptr) {
+ } // Can we delete this?
+ Assignment(S Status, DIAssignID *ID, DbgAssignIntrinsic *Source)
+ : Status(Status), ID(ID), Source(Source) {
+ // If the Status is Known then we expect there to be an assignment ID.
+ assert(Status == NoneOrPhi || ID);
+ }
+ };
+
+ using AssignmentMap = DenseMap<VariableID, Assignment>;
+ using LocMap = DenseMap<VariableID, LocKind>;
+ using OverlapMap = DenseMap<VariableID, SmallVector<VariableID, 4>>;
+ using UntaggedStoreAssignmentMap =
+ DenseMap<const Instruction *,
+ SmallVector<std::pair<VariableID, at::AssignmentInfo>>>;
+
+private:
+ /// Map a variable to the set of variables that it fully contains.
+ OverlapMap VarContains;
+ /// Map untagged stores to the variable fragments they assign to. Used by
+ /// processUntaggedInstruction.
+ UntaggedStoreAssignmentMap UntaggedStoreVars;
+
+ // Machinery to defer inserting dbg.values.
+ using InsertMap = MapVector<Instruction *, SmallVector<VarLocInfo>>;
+ InsertMap InsertBeforeMap;
+ /// Clear the location definitions currently cached for insertion after /p
+ /// After.
+ void resetInsertionPoint(Instruction &After);
+ void emitDbgValue(LocKind Kind, const DbgVariableIntrinsic *Source,
+ Instruction *After);
+
+ static bool mapsAreEqual(const AssignmentMap &A, const AssignmentMap &B) {
+ if (A.size() != B.size())
+ return false;
+ for (const auto &Pair : A) {
+ VariableID Var = Pair.first;
+ const Assignment &AV = Pair.second;
+ auto R = B.find(Var);
+ // Check if this entry exists in B, otherwise ret false.
+ if (R == B.end())
+ return false;
+ // Check that the assignment value is the same.
+ if (!AV.isSameSourceAssignment(R->second))
+ return false;
+ }
+ return true;
+ }
+
+ /// Represents the stack and debug assignments in a block. Used to describe
+ /// the live-in and live-out values for blocks, as well as the "current"
+ /// value as we process each instruction in a block.
+ struct BlockInfo {
+ /// Dominating assignment to memory for each variable.
+ AssignmentMap StackHomeValue;
+ /// Dominating assignemnt to each variable.
+ AssignmentMap DebugValue;
+ /// Location kind for each variable. LiveLoc indicates whether the
+ /// dominating assignment in StackHomeValue (LocKind::Mem), DebugValue
+ /// (LocKind::Val), or neither (LocKind::None) is valid, in that order of
+ /// preference. This cannot be derived by inspecting DebugValue and
+ /// StackHomeValue due to the fact that there's no distinction in
+ /// Assignment (the class) between whether an assignment is unknown or a
+ /// merge of multiple assignments (both are Status::NoneOrPhi). In other
+ /// words, the memory location may well be valid while both DebugValue and
+ /// StackHomeValue contain Assignments that have a Status of NoneOrPhi.
+ LocMap LiveLoc;
+
+ /// Compare every element in each map to determine structural equality
+ /// (slow).
+ bool operator==(const BlockInfo &Other) const {
+ return LiveLoc == Other.LiveLoc &&
+ mapsAreEqual(StackHomeValue, Other.StackHomeValue) &&
+ mapsAreEqual(DebugValue, Other.DebugValue);
+ }
+ bool operator!=(const BlockInfo &Other) const { return !(*this == Other); }
+ bool isValid() {
+ return LiveLoc.size() == DebugValue.size() &&
+ LiveLoc.size() == StackHomeValue.size();
+ }
+ };
+
+ Function &Fn;
+ const DataLayout &Layout;
+ const DenseSet<DebugAggregate> *VarsWithStackSlot;
+ FunctionVarLocsBuilder *FnVarLocs;
+ DenseMap<const BasicBlock *, BlockInfo> LiveIn;
+ DenseMap<const BasicBlock *, BlockInfo> LiveOut;
+
+ /// Helper for process methods to track variables touched each frame.
+ DenseSet<VariableID> VarsTouchedThisFrame;
+
+ /// The set of variables that sometimes are not located in their stack home.
+ DenseSet<DebugAggregate> NotAlwaysStackHomed;
+
+ VariableID getVariableID(const DebugVariable &Var) {
+ return static_cast<VariableID>(FnVarLocs->insertVariable(Var));
+ }
+
+ /// Join the LiveOut values of preds that are contained in \p Visited into
+ /// LiveIn[BB]. Return True if LiveIn[BB] has changed as a result. LiveIn[BB]
+ /// values monotonically increase. See the @link joinMethods join methods
+ /// @endlink documentation for more info.
+ bool join(const BasicBlock &BB, const SmallPtrSet<BasicBlock *, 16> &Visited);
+ ///@name joinMethods
+ /// Functions that implement `join` (the least upper bound) for the
+ /// join-semilattice types used in the dataflow. There is an explicit bottom
+ /// value (⊥) for some types and and explicit top value (⊤) for all types.
+ /// By definition:
+ ///
+ /// Join(A, B) >= A && Join(A, B) >= B
+ /// Join(A, ⊥) = A
+ /// Join(A, ⊤) = ⊤
+ ///
+ /// These invariants are important for monotonicity.
+ ///
+ /// For the map-type functions, all unmapped keys in an empty map are
+ /// associated with a bottom value (⊥). This represents their values being
+ /// unknown. Unmapped keys in non-empty maps (joining two maps with a key
+ /// only present in one) represents either a variable going out of scope or
+ /// dropped debug info. It is assumed the key is associated with a top value
+ /// (⊤) in this case (unknown location / assignment).
+ ///@{
+ static LocKind joinKind(LocKind A, LocKind B);
+ static LocMap joinLocMap(const LocMap &A, const LocMap &B);
+ static Assignment joinAssignment(const Assignment &A, const Assignment &B);
+ static AssignmentMap joinAssignmentMap(const AssignmentMap &A,
+ const AssignmentMap &B);
+ static BlockInfo joinBlockInfo(const BlockInfo &A, const BlockInfo &B);
+ ///@}
+
+ /// Process the instructions in \p BB updating \p LiveSet along the way. \p
+ /// LiveSet must be initialized with the current live-in locations before
+ /// calling this.
+ void process(BasicBlock &BB, BlockInfo *LiveSet);
+ ///@name processMethods
+ /// Methods to process instructions in order to update the LiveSet (current
+ /// location information).
+ ///@{
+ void processNonDbgInstruction(Instruction &I, BlockInfo *LiveSet);
+ void processDbgInstruction(Instruction &I, BlockInfo *LiveSet);
+ /// Update \p LiveSet after encountering an instruction with a DIAssignID
+ /// attachment, \p I.
+ void processTaggedInstruction(Instruction &I, BlockInfo *LiveSet);
+ /// Update \p LiveSet after encountering an instruciton without a DIAssignID
+ /// attachment, \p I.
+ void processUntaggedInstruction(Instruction &I, BlockInfo *LiveSet);
+ void processDbgAssign(DbgAssignIntrinsic &DAI, BlockInfo *LiveSet);
+ void processDbgValue(DbgValueInst &DVI, BlockInfo *LiveSet);
+ /// Add an assignment to memory for the variable /p Var.
+ void addMemDef(BlockInfo *LiveSet, VariableID Var, const Assignment &AV);
+ /// Add an assignment to the variable /p Var.
+ void addDbgDef(BlockInfo *LiveSet, VariableID Var, const Assignment &AV);
+ ///@}
+
+ /// Set the LocKind for \p Var.
+ void setLocKind(BlockInfo *LiveSet, VariableID Var, LocKind K);
+ /// Get the live LocKind for a \p Var. Requires addMemDef or addDbgDef to
+ /// have been called for \p Var first.
+ LocKind getLocKind(BlockInfo *LiveSet, VariableID Var);
+ /// Return true if \p Var has an assignment in \p M matching \p AV.
+ bool hasVarWithAssignment(VariableID Var, const Assignment &AV,
+ const AssignmentMap &M);
+
+ /// Emit info for variables that are fully promoted.
+ bool emitPromotedVarLocs(FunctionVarLocsBuilder *FnVarLocs);
+
+public:
+ AssignmentTrackingLowering(Function &Fn, const DataLayout &Layout,
+ const DenseSet<DebugAggregate> *VarsWithStackSlot)
+ : Fn(Fn), Layout(Layout), VarsWithStackSlot(VarsWithStackSlot) {}
+ /// Run the analysis, adding variable location info to \p FnVarLocs. Returns
+ /// true if any variable locations have been added to FnVarLocs.
+ bool run(FunctionVarLocsBuilder *FnVarLocs);
+};
+} // namespace
+
+void AssignmentTrackingLowering::setLocKind(BlockInfo *LiveSet, VariableID Var,
+ LocKind K) {
+ auto SetKind = [this](BlockInfo *LiveSet, VariableID Var, LocKind K) {
+ VarsTouchedThisFrame.insert(Var);
+ LiveSet->LiveLoc[Var] = K;
+ };
+ SetKind(LiveSet, Var, K);
+
+ // Update the LocKind for all fragments contained within Var.
+ for (VariableID Frag : VarContains[Var])
+ SetKind(LiveSet, Frag, K);
+}
+
+AssignmentTrackingLowering::LocKind
+AssignmentTrackingLowering::getLocKind(BlockInfo *LiveSet, VariableID Var) {
+ auto Pair = LiveSet->LiveLoc.find(Var);
+ assert(Pair != LiveSet->LiveLoc.end());
+ return Pair->second;
+}
+
+void AssignmentTrackingLowering::addMemDef(BlockInfo *LiveSet, VariableID Var,
+ const Assignment &AV) {
+ auto AddDef = [](BlockInfo *LiveSet, VariableID Var, Assignment AV) {
+ LiveSet->StackHomeValue[Var] = AV;
+ // Add default (Var -> ⊤) to DebugValue if Var isn't in DebugValue yet.
+ LiveSet->DebugValue.insert({Var, Assignment::makeNoneOrPhi()});
+ // Add default (Var -> ⊤) to LiveLocs if Var isn't in LiveLocs yet. Callers
+ // of addMemDef will call setLocKind to override.
+ LiveSet->LiveLoc.insert({Var, LocKind::None});
+ };
+ AddDef(LiveSet, Var, AV);
+
+ // Use this assigment for all fragments contained within Var, but do not
+ // provide a Source because we cannot convert Var's value to a value for the
+ // fragment.
+ Assignment FragAV = AV;
+ FragAV.Source = nullptr;
+ for (VariableID Frag : VarContains[Var])
+ AddDef(LiveSet, Frag, FragAV);
+}
+
+void AssignmentTrackingLowering::addDbgDef(BlockInfo *LiveSet, VariableID Var,
+ const Assignment &AV) {
+ auto AddDef = [](BlockInfo *LiveSet, VariableID Var, Assignment AV) {
+ LiveSet->DebugValue[Var] = AV;
+ // Add default (Var -> ⊤) to StackHome if Var isn't in StackHome yet.
+ LiveSet->StackHomeValue.insert({Var, Assignment::makeNoneOrPhi()});
+ // Add default (Var -> ⊤) to LiveLocs if Var isn't in LiveLocs yet. Callers
+ // of addDbgDef will call setLocKind to override.
+ LiveSet->LiveLoc.insert({Var, LocKind::None});
+ };
+ AddDef(LiveSet, Var, AV);
+
+ // Use this assigment for all fragments contained within Var, but do not
+ // provide a Source because we cannot convert Var's value to a value for the
+ // fragment.
+ Assignment FragAV = AV;
+ FragAV.Source = nullptr;
+ for (VariableID Frag : VarContains[Var])
+ AddDef(LiveSet, Frag, FragAV);
+}
+
+static DIAssignID *getIDFromInst(const Instruction &I) {
+ return cast<DIAssignID>(I.getMetadata(LLVMContext::MD_DIAssignID));
+}
+
+static DIAssignID *getIDFromMarker(const DbgAssignIntrinsic &DAI) {
+ return cast<DIAssignID>(DAI.getAssignID());
+}
+
+/// Return true if \p Var has an assignment in \p M matching \p AV.
+bool AssignmentTrackingLowering::hasVarWithAssignment(VariableID Var,
+ const Assignment &AV,
+ const AssignmentMap &M) {
+ auto AssignmentIsMapped = [](VariableID Var, const Assignment &AV,
+ const AssignmentMap &M) {
+ auto R = M.find(Var);
+ if (R == M.end())
+ return false;
+ return AV.isSameSourceAssignment(R->second);
+ };
+
+ if (!AssignmentIsMapped(Var, AV, M))
+ return false;
+
+ // Check all the frags contained within Var as these will have all been
+ // mapped to AV at the last store to Var.
+ for (VariableID Frag : VarContains[Var])
+ if (!AssignmentIsMapped(Frag, AV, M))
+ return false;
+ return true;
+}
+
+#ifndef NDEBUG
+const char *locStr(AssignmentTrackingLowering::LocKind Loc) {
+ using LocKind = AssignmentTrackingLowering::LocKind;
+ switch (Loc) {
+ case LocKind::Val:
+ return "Val";
+ case LocKind::Mem:
+ return "Mem";
+ case LocKind::None:
+ return "None";
+ };
+ llvm_unreachable("unknown LocKind");
+}
+#endif
+
+void AssignmentTrackingLowering::emitDbgValue(
+ AssignmentTrackingLowering::LocKind Kind,
+ const DbgVariableIntrinsic *Source, Instruction *After) {
+
+ DILocation *DL = Source->getDebugLoc();
+ auto Emit = [this, Source, After, DL](Value *Val, DIExpression *Expr) {
+ assert(Expr);
+ if (!Val)
+ Val = PoisonValue::get(Type::getInt1Ty(Source->getContext()));
+
+ // Find a suitable insert point.
+ Instruction *InsertBefore = After->getNextNode();
+ assert(InsertBefore && "Shouldn't be inserting after a terminator");
+
+ VariableID Var = getVariableID(DebugVariable(Source));
+ VarLocInfo VarLoc;
+ VarLoc.VariableID = static_cast<VariableID>(Var);
+ VarLoc.Expr = Expr;
+ VarLoc.V = Val;
+ VarLoc.DL = DL;
+ // Insert it into the map for later.
+ InsertBeforeMap[InsertBefore].push_back(VarLoc);
+ };
+
+ // NOTE: This block can mutate Kind.
+ if (Kind == LocKind::Mem) {
+ const auto *DAI = cast<DbgAssignIntrinsic>(Source);
+ // Check the address hasn't been dropped (e.g. the debug uses may not have
+ // been replaced before deleting a Value).
+ if (DAI->isKillAddress()) {
+ // The address isn't valid so treat this as a non-memory def.
+ Kind = LocKind::Val;
+ } else {
+ Value *Val = DAI->getAddress();
+ DIExpression *Expr = DAI->getAddressExpression();
+ assert(!Expr->getFragmentInfo() &&
+ "fragment info should be stored in value-expression only");
+ // Copy the fragment info over from the value-expression to the new
+ // DIExpression.
+ if (auto OptFragInfo = Source->getExpression()->getFragmentInfo()) {
+ auto FragInfo = *OptFragInfo;
+ Expr = *DIExpression::createFragmentExpression(
+ Expr, FragInfo.OffsetInBits, FragInfo.SizeInBits);
+ }
+ // The address-expression has an implicit deref, add it now.
+ std::tie(Val, Expr) =
+ walkToAllocaAndPrependOffsetDeref(Layout, Val, Expr);
+ Emit(Val, Expr);
+ return;
+ }
+ }
+
+ if (Kind == LocKind::Val) {
+ /// Get the value component, converting to Undef if it is variadic.
+ Value *Val =
+ Source->hasArgList() ? nullptr : Source->getVariableLocationOp(0);
+ Emit(Val, Source->getExpression());
+ return;
+ }
+
+ if (Kind == LocKind::None) {
+ Emit(nullptr, Source->getExpression());
+ return;
+ }
+}
+
+void AssignmentTrackingLowering::processNonDbgInstruction(
+ Instruction &I, AssignmentTrackingLowering::BlockInfo *LiveSet) {
+ if (I.hasMetadata(LLVMContext::MD_DIAssignID))
+ processTaggedInstruction(I, LiveSet);
+ else
+ processUntaggedInstruction(I, LiveSet);
+}
+
+void AssignmentTrackingLowering::processUntaggedInstruction(
+ Instruction &I, AssignmentTrackingLowering::BlockInfo *LiveSet) {
+ // Interpret stack stores that are not tagged as an assignment in memory for
+ // the variables associated with that address. These stores may not be tagged
+ // because a) the store cannot be represented using dbg.assigns (non-const
+ // length or offset) or b) the tag was accidentally dropped during
+ // optimisations. For these stores we fall back to assuming that the stack
+ // home is a valid location for the variables. The benefit is that this
+ // prevents us missing an assignment and therefore incorrectly maintaining
+ // earlier location definitions, and in many cases it should be a reasonable
+ // assumption. However, this will occasionally lead to slight
+ // inaccuracies. The value of a hoisted untagged store will be visible
+ // "early", for example.
+ assert(!I.hasMetadata(LLVMContext::MD_DIAssignID));
+ auto It = UntaggedStoreVars.find(&I);
+ if (It == UntaggedStoreVars.end())
+ return; // No variables associated with the store destination.
+
+ LLVM_DEBUG(dbgs() << "processUntaggedInstruction on UNTAGGED INST " << I
+ << "\n");
+ // Iterate over the variables that this store affects, add a NoneOrPhi dbg
+ // and mem def, set lockind to Mem, and emit a location def for each.
+ for (auto [Var, Info] : It->second) {
+ // This instruction is treated as both a debug and memory assignment,
+ // meaning the memory location should be used. We don't have an assignment
+ // ID though so use Assignment::makeNoneOrPhi() to create an imaginary one.
+ addMemDef(LiveSet, Var, Assignment::makeNoneOrPhi());
+ addDbgDef(LiveSet, Var, Assignment::makeNoneOrPhi());
+ setLocKind(LiveSet, Var, LocKind::Mem);
+ LLVM_DEBUG(dbgs() << " setting Stack LocKind to: " << locStr(LocKind::Mem)
+ << "\n");
+ // Build the dbg location def to insert.
+ //
+ // DIExpression: Add fragment and offset.
+ DebugVariable V = FnVarLocs->getVariable(Var);
+ DIExpression *DIE = DIExpression::get(I.getContext(), std::nullopt);
+ if (auto Frag = V.getFragment()) {
+ auto R = DIExpression::createFragmentExpression(DIE, Frag->OffsetInBits,
+ Frag->SizeInBits);
+ assert(R && "unexpected createFragmentExpression failure");
+ DIE = *R;
+ }
+ SmallVector<uint64_t, 3> Ops;
+ if (Info.OffsetInBits)
+ Ops = {dwarf::DW_OP_plus_uconst, Info.OffsetInBits / 8};
+ Ops.push_back(dwarf::DW_OP_deref);
+ DIE = DIExpression::prependOpcodes(DIE, Ops, /*StackValue=*/false,
+ /*EntryValue=*/false);
+ // Find a suitable insert point.
+ Instruction *InsertBefore = I.getNextNode();
+ assert(InsertBefore && "Shouldn't be inserting after a terminator");
+
+ // Get DILocation for this unrecorded assignment.
+ DILocation *InlinedAt = const_cast<DILocation *>(V.getInlinedAt());
+ const DILocation *DILoc = DILocation::get(
+ Fn.getContext(), 0, 0, V.getVariable()->getScope(), InlinedAt);
+
+ VarLocInfo VarLoc;
+ VarLoc.VariableID = static_cast<VariableID>(Var);
+ VarLoc.Expr = DIE;
+ VarLoc.V = const_cast<AllocaInst *>(Info.Base);
+ VarLoc.DL = DILoc;
+ // 3. Insert it into the map for later.
+ InsertBeforeMap[InsertBefore].push_back(VarLoc);
+ }
+}
+
+void AssignmentTrackingLowering::processTaggedInstruction(
+ Instruction &I, AssignmentTrackingLowering::BlockInfo *LiveSet) {
+ auto Linked = at::getAssignmentMarkers(&I);
+ // No dbg.assign intrinsics linked.
+ // FIXME: All vars that have a stack slot this store modifies that don't have
+ // a dbg.assign linked to it should probably treat this like an untagged
+ // store.
+ if (Linked.empty())
+ return;
+
+ LLVM_DEBUG(dbgs() << "processTaggedInstruction on " << I << "\n");
+ for (DbgAssignIntrinsic *DAI : Linked) {
+ VariableID Var = getVariableID(DebugVariable(DAI));
+ // Something has gone wrong if VarsWithStackSlot doesn't contain a variable
+ // that is linked to a store.
+ assert(VarsWithStackSlot->count(getAggregate(DAI)) &&
+ "expected DAI's variable to have stack slot");
+
+ Assignment AV = Assignment::makeFromMemDef(getIDFromInst(I));
+ addMemDef(LiveSet, Var, AV);
+
+ LLVM_DEBUG(dbgs() << " linked to " << *DAI << "\n");
+ LLVM_DEBUG(dbgs() << " LiveLoc " << locStr(getLocKind(LiveSet, Var))
+ << " -> ");
+
+ // The last assignment to the stack is now AV. Check if the last debug
+ // assignment has a matching Assignment.
+ if (hasVarWithAssignment(Var, AV, LiveSet->DebugValue)) {
+ // The StackHomeValue and DebugValue for this variable match so we can
+ // emit a stack home location here.
+ LLVM_DEBUG(dbgs() << "Mem, Stack matches Debug program\n";);
+ LLVM_DEBUG(dbgs() << " Stack val: "; AV.dump(dbgs()); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << " Debug val: ";
+ LiveSet->DebugValue[Var].dump(dbgs()); dbgs() << "\n");
+ setLocKind(LiveSet, Var, LocKind::Mem);
+ emitDbgValue(LocKind::Mem, DAI, &I);
+ continue;
+ }
+
+ // The StackHomeValue and DebugValue for this variable do not match. I.e.
+ // The value currently stored in the stack is not what we'd expect to
+ // see, so we cannot use emit a stack home location here. Now we will
+ // look at the live LocKind for the variable and determine an appropriate
+ // dbg.value to emit.
+ LocKind PrevLoc = getLocKind(LiveSet, Var);
+ switch (PrevLoc) {
+ case LocKind::Val: {
+ // The value in memory in memory has changed but we're not currently
+ // using the memory location. Do nothing.
+ LLVM_DEBUG(dbgs() << "Val, (unchanged)\n";);
+ setLocKind(LiveSet, Var, LocKind::Val);
+ } break;
+ case LocKind::Mem: {
+ // There's been an assignment to memory that we were using as a
+ // location for this variable, and the Assignment doesn't match what
+ // we'd expect to see in memory.
+ if (LiveSet->DebugValue[Var].Status == Assignment::NoneOrPhi) {
+ // We need to terminate any previously open location now.
+ LLVM_DEBUG(dbgs() << "None, No Debug value available\n";);
+ setLocKind(LiveSet, Var, LocKind::None);
+ emitDbgValue(LocKind::None, DAI, &I);
+ } else {
+ // The previous DebugValue Value can be used here.
+ LLVM_DEBUG(dbgs() << "Val, Debug value is Known\n";);
+ setLocKind(LiveSet, Var, LocKind::Val);
+ Assignment PrevAV = LiveSet->DebugValue.lookup(Var);
+ if (PrevAV.Source) {
+ emitDbgValue(LocKind::Val, PrevAV.Source, &I);
+ } else {
+ // PrevAV.Source is nullptr so we must emit undef here.
+ emitDbgValue(LocKind::None, DAI, &I);
+ }
+ }
+ } break;
+ case LocKind::None: {
+ // There's been an assignment to memory and we currently are
+ // not tracking a location for the variable. Do not emit anything.
+ LLVM_DEBUG(dbgs() << "None, (unchanged)\n";);
+ setLocKind(LiveSet, Var, LocKind::None);
+ } break;
+ }
+ }
+}
+
+void AssignmentTrackingLowering::processDbgAssign(DbgAssignIntrinsic &DAI,
+ BlockInfo *LiveSet) {
+ // Only bother tracking variables that are at some point stack homed. Other
+ // variables can be dealt with trivially later.
+ if (!VarsWithStackSlot->count(getAggregate(&DAI)))
+ return;
+
+ VariableID Var = getVariableID(DebugVariable(&DAI));
+ Assignment AV = Assignment::make(getIDFromMarker(DAI), &DAI);
+ addDbgDef(LiveSet, Var, AV);
+
+ LLVM_DEBUG(dbgs() << "processDbgAssign on " << DAI << "\n";);
+ LLVM_DEBUG(dbgs() << " LiveLoc " << locStr(getLocKind(LiveSet, Var))
+ << " -> ");
+
+ // Check if the DebugValue and StackHomeValue both hold the same
+ // Assignment.
+ if (hasVarWithAssignment(Var, AV, LiveSet->StackHomeValue)) {
+ // They match. We can use the stack home because the debug intrinsics state
+ // that an assignment happened here, and we know that specific assignment
+ // was the last one to take place in memory for this variable.
+ LocKind Kind;
+ if (DAI.isKillAddress()) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Val, Stack matches Debug program but address is killed\n";);
+ Kind = LocKind::Val;
+ } else {
+ LLVM_DEBUG(dbgs() << "Mem, Stack matches Debug program\n";);
+ Kind = LocKind::Mem;
+ };
+ setLocKind(LiveSet, Var, Kind);
+ emitDbgValue(Kind, &DAI, &DAI);
+ } else {
+ // The last assignment to the memory location isn't the one that we want to
+ // show to the user so emit a dbg.value(Value). Value may be undef.
+ LLVM_DEBUG(dbgs() << "Val, Stack contents is unknown\n";);
+ setLocKind(LiveSet, Var, LocKind::Val);
+ emitDbgValue(LocKind::Val, &DAI, &DAI);
+ }
+}
+
+void AssignmentTrackingLowering::processDbgValue(DbgValueInst &DVI,
+ BlockInfo *LiveSet) {
+ // Only other tracking variables that are at some point stack homed.
+ // Other variables can be dealt with trivally later.
+ if (!VarsWithStackSlot->count(getAggregate(&DVI)))
+ return;
+
+ VariableID Var = getVariableID(DebugVariable(&DVI));
+ // We have no ID to create an Assignment with so we mark this assignment as
+ // NoneOrPhi. Note that the dbg.value still exists, we just cannot determine
+ // the assignment responsible for setting this value.
+ // This is fine; dbg.values are essentially interchangable with unlinked
+ // dbg.assigns, and some passes such as mem2reg and instcombine add them to
+ // PHIs for promoted variables.
+ Assignment AV = Assignment::makeNoneOrPhi();
+ addDbgDef(LiveSet, Var, AV);
+
+ LLVM_DEBUG(dbgs() << "processDbgValue on " << DVI << "\n";);
+ LLVM_DEBUG(dbgs() << " LiveLoc " << locStr(getLocKind(LiveSet, Var))
+ << " -> Val, dbg.value override");
+
+ setLocKind(LiveSet, Var, LocKind::Val);
+ emitDbgValue(LocKind::Val, &DVI, &DVI);
+}
+
+void AssignmentTrackingLowering::processDbgInstruction(
+ Instruction &I, AssignmentTrackingLowering::BlockInfo *LiveSet) {
+ assert(!isa<DbgAddrIntrinsic>(&I) && "unexpected dbg.addr");
+ if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(&I))
+ processDbgAssign(*DAI, LiveSet);
+ else if (auto *DVI = dyn_cast<DbgValueInst>(&I))
+ processDbgValue(*DVI, LiveSet);
+}
+
+void AssignmentTrackingLowering::resetInsertionPoint(Instruction &After) {
+ assert(!After.isTerminator() && "Can't insert after a terminator");
+ auto R = InsertBeforeMap.find(After.getNextNode());
+ if (R == InsertBeforeMap.end())
+ return;
+ R->second.clear();
+}
+
+void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) {
+ for (auto II = BB.begin(), EI = BB.end(); II != EI;) {
+ assert(VarsTouchedThisFrame.empty());
+ // Process the instructions in "frames". A "frame" includes a single
+ // non-debug instruction followed any debug instructions before the
+ // next non-debug instruction.
+ if (!isa<DbgInfoIntrinsic>(&*II)) {
+ if (II->isTerminator())
+ break;
+ resetInsertionPoint(*II);
+ processNonDbgInstruction(*II, LiveSet);
+ assert(LiveSet->isValid());
+ ++II;
+ }
+ while (II != EI) {
+ if (!isa<DbgInfoIntrinsic>(&*II))
+ break;
+ resetInsertionPoint(*II);
+ processDbgInstruction(*II, LiveSet);
+ assert(LiveSet->isValid());
+ ++II;
+ }
+
+ // We've processed everything in the "frame". Now determine which variables
+ // cannot be represented by a dbg.declare.
+ for (auto Var : VarsTouchedThisFrame) {
+ LocKind Loc = getLocKind(LiveSet, Var);
+ // If a variable's LocKind is anything other than LocKind::Mem then we
+ // must note that it cannot be represented with a dbg.declare.
+ // Note that this check is enough without having to check the result of
+ // joins() because for join to produce anything other than Mem after
+ // we've already seen a Mem we'd be joining None or Val with Mem. In that
+ // case, we've already hit this codepath when we set the LocKind to Val
+ // or None in that block.
+ if (Loc != LocKind::Mem) {
+ DebugVariable DbgVar = FnVarLocs->getVariable(Var);
+ DebugAggregate Aggr{DbgVar.getVariable(), DbgVar.getInlinedAt()};
+ NotAlwaysStackHomed.insert(Aggr);
+ }
+ }
+ VarsTouchedThisFrame.clear();
+ }
+}
+
+AssignmentTrackingLowering::LocKind
+AssignmentTrackingLowering::joinKind(LocKind A, LocKind B) {
+ // Partial order:
+ // None > Mem, Val
+ return A == B ? A : LocKind::None;
+}
+
+AssignmentTrackingLowering::LocMap
+AssignmentTrackingLowering::joinLocMap(const LocMap &A, const LocMap &B) {
+ // Join A and B.
+ //
+ // U = join(a, b) for a in A, b in B where Var(a) == Var(b)
+ // D = join(x, ⊤) for x where Var(x) is in A xor B
+ // Join = U ∪ D
+ //
+ // This is achieved by performing a join on elements from A and B with
+ // variables common to both A and B (join elements indexed by var intersect),
+ // then adding LocKind::None elements for vars in A xor B. The latter part is
+ // equivalent to performing join on elements with variables in A xor B with
+ // LocKind::None (⊤) since join(x, ⊤) = ⊤.
+ LocMap Join;
+ SmallVector<VariableID, 16> SymmetricDifference;
+ // Insert the join of the elements with common vars into Join. Add the
+ // remaining elements to into SymmetricDifference.
+ for (const auto &[Var, Loc] : A) {
+ // If this Var doesn't exist in B then add it to the symmetric difference
+ // set.
+ auto R = B.find(Var);
+ if (R == B.end()) {
+ SymmetricDifference.push_back(Var);
+ continue;
+ }
+ // There is an entry for Var in both, join it.
+ Join[Var] = joinKind(Loc, R->second);
+ }
+ unsigned IntersectSize = Join.size();
+ (void)IntersectSize;
+
+ // Add the elements in B with variables that are not in A into
+ // SymmetricDifference.
+ for (const auto &Pair : B) {
+ VariableID Var = Pair.first;
+ if (A.count(Var) == 0)
+ SymmetricDifference.push_back(Var);
+ }
+
+ // Add SymmetricDifference elements to Join and return the result.
+ for (const auto &Var : SymmetricDifference)
+ Join.insert({Var, LocKind::None});
+
+ assert(Join.size() == (IntersectSize + SymmetricDifference.size()));
+ assert(Join.size() >= A.size() && Join.size() >= B.size());
+ return Join;
+}
+
+AssignmentTrackingLowering::Assignment
+AssignmentTrackingLowering::joinAssignment(const Assignment &A,
+ const Assignment &B) {
+ // Partial order:
+ // NoneOrPhi(null, null) > Known(v, ?s)
+
+ // If either are NoneOrPhi the join is NoneOrPhi.
+ // If either value is different then the result is
+ // NoneOrPhi (joining two values is a Phi).
+ if (!A.isSameSourceAssignment(B))
+ return Assignment::makeNoneOrPhi();
+ if (A.Status == Assignment::NoneOrPhi)
+ return Assignment::makeNoneOrPhi();
+
+ // Source is used to lookup the value + expression in the debug program if
+ // the stack slot gets assigned a value earlier than expected. Because
+ // we're only tracking the one dbg.assign, we can't capture debug PHIs.
+ // It's unlikely that we're losing out on much coverage by avoiding that
+ // extra work.
+ // The Source may differ in this situation:
+ // Pred.1:
+ // dbg.assign i32 0, ..., !1, ...
+ // Pred.2:
+ // dbg.assign i32 1, ..., !1, ...
+ // Here the same assignment (!1) was performed in both preds in the source,
+ // but we can't use either one unless they are identical (e.g. .we don't
+ // want to arbitrarily pick between constant values).
+ auto JoinSource = [&]() -> DbgAssignIntrinsic * {
+ if (A.Source == B.Source)
+ return A.Source;
+ if (A.Source == nullptr || B.Source == nullptr)
+ return nullptr;
+ if (A.Source->isIdenticalTo(B.Source))
+ return A.Source;
+ return nullptr;
+ };
+ DbgAssignIntrinsic *Source = JoinSource();
+ assert(A.Status == B.Status && A.Status == Assignment::Known);
+ assert(A.ID == B.ID);
+ return Assignment::make(A.ID, Source);
+}
+
+AssignmentTrackingLowering::AssignmentMap
+AssignmentTrackingLowering::joinAssignmentMap(const AssignmentMap &A,
+ const AssignmentMap &B) {
+ // Join A and B.
+ //
+ // U = join(a, b) for a in A, b in B where Var(a) == Var(b)
+ // D = join(x, ⊤) for x where Var(x) is in A xor B
+ // Join = U ∪ D
+ //
+ // This is achieved by performing a join on elements from A and B with
+ // variables common to both A and B (join elements indexed by var intersect),
+ // then adding LocKind::None elements for vars in A xor B. The latter part is
+ // equivalent to performing join on elements with variables in A xor B with
+ // Status::NoneOrPhi (⊤) since join(x, ⊤) = ⊤.
+ AssignmentMap Join;
+ SmallVector<VariableID, 16> SymmetricDifference;
+ // Insert the join of the elements with common vars into Join. Add the
+ // remaining elements to into SymmetricDifference.
+ for (const auto &[Var, AV] : A) {
+ // If this Var doesn't exist in B then add it to the symmetric difference
+ // set.
+ auto R = B.find(Var);
+ if (R == B.end()) {
+ SymmetricDifference.push_back(Var);
+ continue;
+ }
+ // There is an entry for Var in both, join it.
+ Join[Var] = joinAssignment(AV, R->second);
+ }
+ unsigned IntersectSize = Join.size();
+ (void)IntersectSize;
+
+ // Add the elements in B with variables that are not in A into
+ // SymmetricDifference.
+ for (const auto &Pair : B) {
+ VariableID Var = Pair.first;
+ if (A.count(Var) == 0)
+ SymmetricDifference.push_back(Var);
+ }
+
+ // Add SymmetricDifference elements to Join and return the result.
+ for (auto Var : SymmetricDifference)
+ Join.insert({Var, Assignment::makeNoneOrPhi()});
+
+ assert(Join.size() == (IntersectSize + SymmetricDifference.size()));
+ assert(Join.size() >= A.size() && Join.size() >= B.size());
+ return Join;
+}
+
+AssignmentTrackingLowering::BlockInfo
+AssignmentTrackingLowering::joinBlockInfo(const BlockInfo &A,
+ const BlockInfo &B) {
+ BlockInfo Join;
+ Join.LiveLoc = joinLocMap(A.LiveLoc, B.LiveLoc);
+ Join.StackHomeValue = joinAssignmentMap(A.StackHomeValue, B.StackHomeValue);
+ Join.DebugValue = joinAssignmentMap(A.DebugValue, B.DebugValue);
+ assert(Join.isValid());
+ return Join;
+}
+
+bool AssignmentTrackingLowering::join(
+ const BasicBlock &BB, const SmallPtrSet<BasicBlock *, 16> &Visited) {
+ BlockInfo BBLiveIn;
+ bool FirstJoin = true;
+ // LiveIn locs for BB is the join of the already-processed preds' LiveOut
+ // locs.
+ for (auto I = pred_begin(&BB), E = pred_end(&BB); I != E; I++) {
+ // Ignore backedges if we have not visited the predecessor yet. As the
+ // predecessor hasn't yet had locations propagated into it, most locations
+ // will not yet be valid, so treat them as all being uninitialized and
+ // potentially valid. If a location guessed to be correct here is
+ // invalidated later, we will remove it when we revisit this block. This
+ // is essentially the same as initialising all LocKinds and Assignments to
+ // an implicit ⊥ value which is the identity value for the join operation.
+ const BasicBlock *Pred = *I;
+ if (!Visited.count(Pred))
+ continue;
+
+ auto PredLiveOut = LiveOut.find(Pred);
+ // Pred must have been processed already. See comment at start of this loop.
+ assert(PredLiveOut != LiveOut.end());
+
+ // Perform the join of BBLiveIn (current live-in info) and PrevLiveOut.
+ if (FirstJoin)
+ BBLiveIn = PredLiveOut->second;
+ else
+ BBLiveIn = joinBlockInfo(std::move(BBLiveIn), PredLiveOut->second);
+ FirstJoin = false;
+ }
+
+ auto CurrentLiveInEntry = LiveIn.find(&BB);
+ // Check if there isn't an entry, or there is but the LiveIn set has changed
+ // (expensive check).
+ if (CurrentLiveInEntry == LiveIn.end() ||
+ BBLiveIn != CurrentLiveInEntry->second) {
+ LiveIn[&BB] = std::move(BBLiveIn);
+ // A change has occured.
+ return true;
+ }
+ // No change.
+ return false;
+}
+
+/// Return true if A fully contains B.
+static bool fullyContains(DIExpression::FragmentInfo A,
+ DIExpression::FragmentInfo B) {
+ auto ALeft = A.OffsetInBits;
+ auto BLeft = B.OffsetInBits;
+ if (BLeft < ALeft)
+ return false;
+
+ auto ARight = ALeft + A.SizeInBits;
+ auto BRight = BLeft + B.SizeInBits;
+ if (BRight > ARight)
+ return false;
+ return true;
+}
+
+static std::optional<at::AssignmentInfo>
+getUntaggedStoreAssignmentInfo(const Instruction &I, const DataLayout &Layout) {
+ // Don't bother checking if this is an AllocaInst. We know this
+ // instruction has no tag which means there are no variables associated
+ // with it.
+ if (const auto *SI = dyn_cast<StoreInst>(&I))
+ return at::getAssignmentInfo(Layout, SI);
+ if (const auto *MI = dyn_cast<MemIntrinsic>(&I))
+ return at::getAssignmentInfo(Layout, MI);
+ // Alloca or non-store-like inst.
+ return std::nullopt;
+}
+
+/// Build a map of {Variable x: Variables y} where all variable fragments
+/// contained within the variable fragment x are in set y. This means that
+/// y does not contain all overlaps because partial overlaps are excluded.
+///
+/// While we're iterating over the function, add single location defs for
+/// dbg.declares to \p FnVarLocs
+///
+/// Finally, populate UntaggedStoreVars with a mapping of untagged stores to
+/// the stored-to variable fragments.
+///
+/// These tasks are bundled together to reduce the number of times we need
+/// to iterate over the function as they can be achieved together in one pass.
+static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares(
+ Function &Fn, FunctionVarLocsBuilder *FnVarLocs,
+ AssignmentTrackingLowering::UntaggedStoreAssignmentMap &UntaggedStoreVars) {
+ DenseSet<DebugVariable> Seen;
+ // Map of Variable: [Fragments].
+ DenseMap<DebugAggregate, SmallVector<DebugVariable, 8>> FragmentMap;
+ // Iterate over all instructions:
+ // - dbg.declare -> add single location variable record
+ // - dbg.* -> Add fragments to FragmentMap
+ // - untagged store -> Add fragments to FragmentMap and update
+ // UntaggedStoreVars.
+ // We need to add fragments for untagged stores too so that we can correctly
+ // clobber overlapped fragment locations later.
+ for (auto &BB : Fn) {
+ for (auto &I : BB) {
+ if (auto *DDI = dyn_cast<DbgDeclareInst>(&I)) {
+ FnVarLocs->addSingleLocVar(DebugVariable(DDI), DDI->getExpression(),
+ DDI->getDebugLoc(), DDI->getAddress());
+ } else if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I)) {
+ DebugVariable DV = DebugVariable(DII);
+ DebugAggregate DA = {DV.getVariable(), DV.getInlinedAt()};
+ if (Seen.insert(DV).second)
+ FragmentMap[DA].push_back(DV);
+ } else if (auto Info = getUntaggedStoreAssignmentInfo(
+ I, Fn.getParent()->getDataLayout())) {
+ // Find markers linked to this alloca.
+ for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(Info->Base)) {
+ // Discard the fragment if it covers the entire variable.
+ std::optional<DIExpression::FragmentInfo> FragInfo =
+ [&Info, DAI]() -> std::optional<DIExpression::FragmentInfo> {
+ DIExpression::FragmentInfo F;
+ F.OffsetInBits = Info->OffsetInBits;
+ F.SizeInBits = Info->SizeInBits;
+ if (auto ExistingFrag = DAI->getExpression()->getFragmentInfo())
+ F.OffsetInBits += ExistingFrag->OffsetInBits;
+ if (auto Sz = DAI->getVariable()->getSizeInBits()) {
+ if (F.OffsetInBits == 0 && F.SizeInBits == *Sz)
+ return std::nullopt;
+ }
+ return F;
+ }();
+
+ DebugVariable DV = DebugVariable(DAI->getVariable(), FragInfo,
+ DAI->getDebugLoc().getInlinedAt());
+ DebugAggregate DA = {DV.getVariable(), DV.getInlinedAt()};
+
+ // Cache this info for later.
+ UntaggedStoreVars[&I].push_back(
+ {FnVarLocs->insertVariable(DV), *Info});
+
+ if (Seen.insert(DV).second)
+ FragmentMap[DA].push_back(DV);
+ }
+ }
+ }
+ }
+
+ // Sort the fragment map for each DebugAggregate in non-descending
+ // order of fragment size. Assert no entries are duplicates.
+ for (auto &Pair : FragmentMap) {
+ SmallVector<DebugVariable, 8> &Frags = Pair.second;
+ std::sort(
+ Frags.begin(), Frags.end(), [](DebugVariable Next, DebugVariable Elmt) {
+ assert(!(Elmt.getFragmentOrDefault() == Next.getFragmentOrDefault()));
+ return Elmt.getFragmentOrDefault().SizeInBits >
+ Next.getFragmentOrDefault().SizeInBits;
+ });
+ }
+
+ // Build the map.
+ AssignmentTrackingLowering::OverlapMap Map;
+ for (auto Pair : FragmentMap) {
+ auto &Frags = Pair.second;
+ for (auto It = Frags.begin(), IEnd = Frags.end(); It != IEnd; ++It) {
+ DIExpression::FragmentInfo Frag = It->getFragmentOrDefault();
+ // Find the frags that this is contained within.
+ //
+ // Because Frags is sorted by size and none have the same offset and
+ // size, we know that this frag can only be contained by subsequent
+ // elements.
+ SmallVector<DebugVariable, 8>::iterator OtherIt = It;
+ ++OtherIt;
+ VariableID ThisVar = FnVarLocs->insertVariable(*It);
+ for (; OtherIt != IEnd; ++OtherIt) {
+ DIExpression::FragmentInfo OtherFrag = OtherIt->getFragmentOrDefault();
+ VariableID OtherVar = FnVarLocs->insertVariable(*OtherIt);
+ if (fullyContains(OtherFrag, Frag))
+ Map[OtherVar].push_back(ThisVar);
+ }
+ }
+ }
+
+ return Map;
+}
+
+bool AssignmentTrackingLowering::run(FunctionVarLocsBuilder *FnVarLocsBuilder) {
+ if (Fn.size() > MaxNumBlocks) {
+ LLVM_DEBUG(dbgs() << "[AT] Dropping var locs in: " << Fn.getName()
+ << ": too many blocks (" << Fn.size() << ")\n");
+ at::deleteAll(&Fn);
+ return false;
+ }
+
+ FnVarLocs = FnVarLocsBuilder;
+
+ // The general structure here is inspired by VarLocBasedImpl.cpp
+ // (LiveDebugValues).
+
+ // Build the variable fragment overlap map.
+ // Note that this pass doesn't handle partial overlaps correctly (FWIW
+ // neither does LiveDebugVariables) because that is difficult to do and
+ // appears to be rare occurance.
+ VarContains =
+ buildOverlapMapAndRecordDeclares(Fn, FnVarLocs, UntaggedStoreVars);
+
+ // Prepare for traversal.
+ ReversePostOrderTraversal<Function *> RPOT(&Fn);
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>>
+ Worklist;
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>>
+ Pending;
+ DenseMap<unsigned int, BasicBlock *> OrderToBB;
+ DenseMap<BasicBlock *, unsigned int> BBToOrder;
+ { // Init OrderToBB and BBToOrder.
+ unsigned int RPONumber = 0;
+ for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) {
+ OrderToBB[RPONumber] = *RI;
+ BBToOrder[*RI] = RPONumber;
+ Worklist.push(RPONumber);
+ ++RPONumber;
+ }
+ LiveIn.init(RPONumber);
+ LiveOut.init(RPONumber);
+ }
+
+ // Perform the traversal.
+ //
+ // This is a standard "union of predecessor outs" dataflow problem. To solve
+ // it, we perform join() and process() using the two worklist method until
+ // the LiveIn data for each block becomes unchanging. The "proof" that this
+ // terminates can be put together by looking at the comments around LocKind,
+ // Assignment, and the various join methods, which show that all the elements
+ // involved are made up of join-semilattices; LiveIn(n) can only
+ // monotonically increase in value throughout the dataflow.
+ //
+ SmallPtrSet<BasicBlock *, 16> Visited;
+ while (!Worklist.empty()) {
+ // We track what is on the pending worklist to avoid inserting the same
+ // thing twice.
+ SmallPtrSet<BasicBlock *, 16> OnPending;
+ LLVM_DEBUG(dbgs() << "Processing Worklist\n");
+ while (!Worklist.empty()) {
+ BasicBlock *BB = OrderToBB[Worklist.top()];
+ LLVM_DEBUG(dbgs() << "\nPop BB " << BB->getName() << "\n");
+ Worklist.pop();
+ bool InChanged = join(*BB, Visited);
+ // Always consider LiveIn changed on the first visit.
+ InChanged |= Visited.insert(BB).second;
+ if (InChanged) {
+ LLVM_DEBUG(dbgs() << BB->getName() << " has new InLocs, process it\n");
+ // Mutate a copy of LiveIn while processing BB. After calling process
+ // LiveSet is the LiveOut set for BB.
+ BlockInfo LiveSet = LiveIn[BB];
+
+ // Process the instructions in the block.
+ process(*BB, &LiveSet);
+
+ // Relatively expensive check: has anything changed in LiveOut for BB?
+ if (LiveOut[BB] != LiveSet) {
+ LLVM_DEBUG(dbgs() << BB->getName()
+ << " has new OutLocs, add succs to worklist: [ ");
+ LiveOut[BB] = std::move(LiveSet);
+ for (auto I = succ_begin(BB), E = succ_end(BB); I != E; I++) {
+ if (OnPending.insert(*I).second) {
+ LLVM_DEBUG(dbgs() << I->getName() << " ");
+ Pending.push(BBToOrder[*I]);
+ }
+ }
+ LLVM_DEBUG(dbgs() << "]\n");
+ }
+ }
+ }
+ Worklist.swap(Pending);
+ // At this point, pending must be empty, since it was just the empty
+ // worklist
+ assert(Pending.empty() && "Pending should be empty");
+ }
+
+ // That's the hard part over. Now we just have some admin to do.
+
+ // Record whether we inserted any intrinsics.
+ bool InsertedAnyIntrinsics = false;
+
+ // Identify and add defs for single location variables.
+ //
+ // Go through all of the defs that we plan to add. If the aggregate variable
+ // it's a part of is not in the NotAlwaysStackHomed set we can emit a single
+ // location def and omit the rest. Add an entry to AlwaysStackHomed so that
+ // we can identify those uneeded defs later.
+ DenseSet<DebugAggregate> AlwaysStackHomed;
+ for (const auto &Pair : InsertBeforeMap) {
+ const auto &Vec = Pair.second;
+ for (VarLocInfo VarLoc : Vec) {
+ DebugVariable Var = FnVarLocs->getVariable(VarLoc.VariableID);
+ DebugAggregate Aggr{Var.getVariable(), Var.getInlinedAt()};
+
+ // Skip this Var if it's not always stack homed.
+ if (NotAlwaysStackHomed.contains(Aggr))
+ continue;
+
+ // Skip complex cases such as when different fragments of a variable have
+ // been split into different allocas. Skipping in this case means falling
+ // back to using a list of defs (which could reduce coverage, but is no
+ // less correct).
+ bool Simple =
+ VarLoc.Expr->getNumElements() == 1 && VarLoc.Expr->startsWithDeref();
+ if (!Simple) {
+ NotAlwaysStackHomed.insert(Aggr);
+ continue;
+ }
+
+ // All source assignments to this variable remain and all stores to any
+ // part of the variable store to the same address (with varying
+ // offsets). We can just emit a single location for the whole variable.
+ //
+ // Unless we've already done so, create the single location def now.
+ if (AlwaysStackHomed.insert(Aggr).second) {
+ assert(isa<AllocaInst>(VarLoc.V));
+ // TODO: When more complex cases are handled VarLoc.Expr should be
+ // built appropriately rather than always using an empty DIExpression.
+ // The assert below is a reminder.
+ assert(Simple);
+ VarLoc.Expr = DIExpression::get(Fn.getContext(), std::nullopt);
+ DebugVariable Var = FnVarLocs->getVariable(VarLoc.VariableID);
+ FnVarLocs->addSingleLocVar(Var, VarLoc.Expr, VarLoc.DL, VarLoc.V);
+ InsertedAnyIntrinsics = true;
+ }
+ }
+ }
+
+ // Insert the other DEFs.
+ for (const auto &[InsertBefore, Vec] : InsertBeforeMap) {
+ SmallVector<VarLocInfo> NewDefs;
+ for (const VarLocInfo &VarLoc : Vec) {
+ DebugVariable Var = FnVarLocs->getVariable(VarLoc.VariableID);
+ DebugAggregate Aggr{Var.getVariable(), Var.getInlinedAt()};
+ // If this variable is always stack homed then we have already inserted a
+ // dbg.declare and deleted this dbg.value.
+ if (AlwaysStackHomed.contains(Aggr))
+ continue;
+ NewDefs.push_back(VarLoc);
+ InsertedAnyIntrinsics = true;
+ }
+
+ FnVarLocs->setWedge(InsertBefore, std::move(NewDefs));
+ }
+
+ InsertedAnyIntrinsics |= emitPromotedVarLocs(FnVarLocs);
+
+ return InsertedAnyIntrinsics;
+}
+
+bool AssignmentTrackingLowering::emitPromotedVarLocs(
+ FunctionVarLocsBuilder *FnVarLocs) {
+ bool InsertedAnyIntrinsics = false;
+ // Go through every block, translating debug intrinsics for fully promoted
+ // variables into FnVarLocs location defs. No analysis required for these.
+ for (auto &BB : Fn) {
+ for (auto &I : BB) {
+ // Skip instructions other than dbg.values and dbg.assigns.
+ auto *DVI = dyn_cast<DbgValueInst>(&I);
+ if (!DVI)
+ continue;
+ // Skip variables that haven't been promoted - we've dealt with those
+ // already.
+ if (VarsWithStackSlot->contains(getAggregate(DVI)))
+ continue;
+ // Wrapper to get a single value (or undef) from DVI.
+ auto GetValue = [DVI]() -> Value * {
+ // We can't handle variadic DIExpressions yet so treat those as
+ // kill locations.
+ if (DVI->isKillLocation() || DVI->getValue() == nullptr ||
+ DVI->hasArgList())
+ return PoisonValue::get(Type::getInt32Ty(DVI->getContext()));
+ return DVI->getValue();
+ };
+ Instruction *InsertBefore = I.getNextNode();
+ assert(InsertBefore && "Unexpected: debug intrinsics after a terminator");
+ FnVarLocs->addVarLoc(InsertBefore, DebugVariable(DVI),
+ DVI->getExpression(), DVI->getDebugLoc(),
+ GetValue());
+ InsertedAnyIntrinsics = true;
+ }
+ }
+ return InsertedAnyIntrinsics;
+}
+
+/// Remove redundant definitions within sequences of consecutive location defs.
+/// This is done using a backward scan to keep the last def describing a
+/// specific variable/fragment.
+///
+/// This implements removeRedundantDbgInstrsUsingBackwardScan from
+/// lib/Transforms/Utils/BasicBlockUtils.cpp for locations described with
+/// FunctionVarLocsBuilder instead of with intrinsics.
+static bool
+removeRedundantDbgLocsUsingBackwardScan(const BasicBlock *BB,
+ FunctionVarLocsBuilder &FnVarLocs) {
+ bool Changed = false;
+ SmallDenseSet<DebugVariable> VariableSet;
+
+ // Scan over the entire block, not just over the instructions mapped by
+ // FnVarLocs, because wedges in FnVarLocs may only be seperated by debug
+ // instructions.
+ for (const Instruction &I : reverse(*BB)) {
+ if (!isa<DbgVariableIntrinsic>(I)) {
+ // Sequence of consecutive defs ended. Clear map for the next one.
+ VariableSet.clear();
+ }
+
+ // Get the location defs that start just before this instruction.
+ const auto *Locs = FnVarLocs.getWedge(&I);
+ if (!Locs)
+ continue;
+
+ NumWedgesScanned++;
+ bool ChangedThisWedge = false;
+ // The new pruned set of defs, reversed because we're scanning backwards.
+ SmallVector<VarLocInfo> NewDefsReversed;
+
+ // Iterate over the existing defs in reverse.
+ for (auto RIt = Locs->rbegin(), REnd = Locs->rend(); RIt != REnd; ++RIt) {
+ NumDefsScanned++;
+ const DebugVariable &Key = FnVarLocs.getVariable(RIt->VariableID);
+ bool FirstDefOfFragment = VariableSet.insert(Key).second;
+
+ // If the same variable fragment is described more than once it is enough
+ // to keep the last one (i.e. the first found in this reverse iteration).
+ if (FirstDefOfFragment) {
+ // New def found: keep it.
+ NewDefsReversed.push_back(*RIt);
+ } else {
+ // Redundant def found: throw it away. Since the wedge of defs is being
+ // rebuilt, doing nothing is the same as deleting an entry.
+ ChangedThisWedge = true;
+ NumDefsRemoved++;
+ }
+ continue;
+ }
+
+ // Un-reverse the defs and replace the wedge with the pruned version.
+ if (ChangedThisWedge) {
+ std::reverse(NewDefsReversed.begin(), NewDefsReversed.end());
+ FnVarLocs.setWedge(&I, std::move(NewDefsReversed));
+ NumWedgesChanged++;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// Remove redundant location defs using a forward scan. This can remove a
+/// location definition that is redundant due to indicating that a variable has
+/// the same value as is already being indicated by an earlier def.
+///
+/// This implements removeRedundantDbgInstrsUsingForwardScan from
+/// lib/Transforms/Utils/BasicBlockUtils.cpp for locations described with
+/// FunctionVarLocsBuilder instead of with intrinsics
+static bool
+removeRedundantDbgLocsUsingForwardScan(const BasicBlock *BB,
+ FunctionVarLocsBuilder &FnVarLocs) {
+ bool Changed = false;
+ DenseMap<DebugVariable, std::pair<Value *, DIExpression *>> VariableMap;
+
+ // Scan over the entire block, not just over the instructions mapped by
+ // FnVarLocs, because wedges in FnVarLocs may only be seperated by debug
+ // instructions.
+ for (const Instruction &I : *BB) {
+ // Get the defs that come just before this instruction.
+ const auto *Locs = FnVarLocs.getWedge(&I);
+ if (!Locs)
+ continue;
+
+ NumWedgesScanned++;
+ bool ChangedThisWedge = false;
+ // The new pruned set of defs.
+ SmallVector<VarLocInfo> NewDefs;
+
+ // Iterate over the existing defs.
+ for (const VarLocInfo &Loc : *Locs) {
+ NumDefsScanned++;
+ DebugVariable Key(FnVarLocs.getVariable(Loc.VariableID).getVariable(),
+ std::nullopt, Loc.DL.getInlinedAt());
+ auto VMI = VariableMap.find(Key);
+
+ // Update the map if we found a new value/expression describing the
+ // variable, or if the variable wasn't mapped already.
+ if (VMI == VariableMap.end() || VMI->second.first != Loc.V ||
+ VMI->second.second != Loc.Expr) {
+ VariableMap[Key] = {Loc.V, Loc.Expr};
+ NewDefs.push_back(Loc);
+ continue;
+ }
+
+ // Did not insert this Loc, which is the same as removing it.
+ ChangedThisWedge = true;
+ NumDefsRemoved++;
+ }
+
+ // Replace the existing wedge with the pruned version.
+ if (ChangedThisWedge) {
+ FnVarLocs.setWedge(&I, std::move(NewDefs));
+ NumWedgesChanged++;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+static bool
+removeUndefDbgLocsFromEntryBlock(const BasicBlock *BB,
+ FunctionVarLocsBuilder &FnVarLocs) {
+ assert(BB->isEntryBlock());
+ // Do extra work to ensure that we remove semantically unimportant undefs.
+ //
+ // This is to work around the fact that SelectionDAG will hoist dbg.values
+ // using argument values to the top of the entry block. That can move arg
+ // dbg.values before undef and constant dbg.values which they previously
+ // followed. The easiest thing to do is to just try to feed SelectionDAG
+ // input it's happy with.
+ //
+ // Map of {Variable x: Fragments y} where the fragments y of variable x have
+ // have at least one non-undef location defined already. Don't use directly,
+ // instead call DefineBits and HasDefinedBits.
+ SmallDenseMap<DebugAggregate, SmallDenseSet<DIExpression::FragmentInfo>>
+ VarsWithDef;
+ // Specify that V (a fragment of A) has a non-undef location.
+ auto DefineBits = [&VarsWithDef](DebugAggregate A, DebugVariable V) {
+ VarsWithDef[A].insert(V.getFragmentOrDefault());
+ };
+ // Return true if a non-undef location has been defined for V (a fragment of
+ // A). Doesn't imply that the location is currently non-undef, just that a
+ // non-undef location has been seen previously.
+ auto HasDefinedBits = [&VarsWithDef](DebugAggregate A, DebugVariable V) {
+ auto FragsIt = VarsWithDef.find(A);
+ if (FragsIt == VarsWithDef.end())
+ return false;
+ return llvm::any_of(FragsIt->second, [V](auto Frag) {
+ return DIExpression::fragmentsOverlap(Frag, V.getFragmentOrDefault());
+ });
+ };
+
+ bool Changed = false;
+ DenseMap<DebugVariable, std::pair<Value *, DIExpression *>> VariableMap;
+
+ // Scan over the entire block, not just over the instructions mapped by
+ // FnVarLocs, because wedges in FnVarLocs may only be seperated by debug
+ // instructions.
+ for (const Instruction &I : *BB) {
+ // Get the defs that come just before this instruction.
+ const auto *Locs = FnVarLocs.getWedge(&I);
+ if (!Locs)
+ continue;
+
+ NumWedgesScanned++;
+ bool ChangedThisWedge = false;
+ // The new pruned set of defs.
+ SmallVector<VarLocInfo> NewDefs;
+
+ // Iterate over the existing defs.
+ for (const VarLocInfo &Loc : *Locs) {
+ NumDefsScanned++;
+ DebugAggregate Aggr{FnVarLocs.getVariable(Loc.VariableID).getVariable(),
+ Loc.DL.getInlinedAt()};
+ DebugVariable Var = FnVarLocs.getVariable(Loc.VariableID);
+
+ // Remove undef entries that are encountered before any non-undef
+ // intrinsics from the entry block.
+ if (isa<UndefValue>(Loc.V) && !HasDefinedBits(Aggr, Var)) {
+ // Did not insert this Loc, which is the same as removing it.
+ NumDefsRemoved++;
+ ChangedThisWedge = true;
+ continue;
+ }
+
+ DefineBits(Aggr, Var);
+ NewDefs.push_back(Loc);
+ }
+
+ // Replace the existing wedge with the pruned version.
+ if (ChangedThisWedge) {
+ FnVarLocs.setWedge(&I, std::move(NewDefs));
+ NumWedgesChanged++;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+static bool removeRedundantDbgLocs(const BasicBlock *BB,
+ FunctionVarLocsBuilder &FnVarLocs) {
+ bool MadeChanges = false;
+ MadeChanges |= removeRedundantDbgLocsUsingBackwardScan(BB, FnVarLocs);
+ if (BB->isEntryBlock())
+ MadeChanges |= removeUndefDbgLocsFromEntryBlock(BB, FnVarLocs);
+ MadeChanges |= removeRedundantDbgLocsUsingForwardScan(BB, FnVarLocs);
+
+ if (MadeChanges)
+ LLVM_DEBUG(dbgs() << "Removed redundant dbg locs from: " << BB->getName()
+ << "\n");
+ return MadeChanges;
+}
+
+static DenseSet<DebugAggregate> findVarsWithStackSlot(Function &Fn) {
+ DenseSet<DebugAggregate> Result;
+ for (auto &BB : Fn) {
+ for (auto &I : BB) {
+ // Any variable linked to an instruction is considered
+ // interesting. Ideally we only need to check Allocas, however, a
+ // DIAssignID might get dropped from an alloca but not stores. In that
+ // case, we need to consider the variable interesting for NFC behaviour
+ // with this change. TODO: Consider only looking at allocas.
+ for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(&I)) {
+ Result.insert({DAI->getVariable(), DAI->getDebugLoc().getInlinedAt()});
+ }
+ }
+ }
+ return Result;
+}
+
+static void analyzeFunction(Function &Fn, const DataLayout &Layout,
+ FunctionVarLocsBuilder *FnVarLocs) {
+ // The analysis will generate location definitions for all variables, but we
+ // only need to perform a dataflow on the set of variables which have a stack
+ // slot. Find those now.
+ DenseSet<DebugAggregate> VarsWithStackSlot = findVarsWithStackSlot(Fn);
+
+ bool Changed = false;
+
+ // Use a scope block to clean up AssignmentTrackingLowering before running
+ // MemLocFragmentFill to reduce peak memory consumption.
+ {
+ AssignmentTrackingLowering Pass(Fn, Layout, &VarsWithStackSlot);
+ Changed = Pass.run(FnVarLocs);
+ }
+
+ if (Changed) {
+ MemLocFragmentFill Pass(Fn, &VarsWithStackSlot);
+ Pass.run(FnVarLocs);
+
+ // Remove redundant entries. As well as reducing memory consumption and
+ // avoiding waiting cycles later by burning some now, this has another
+ // important job. That is to work around some SelectionDAG quirks. See
+ // removeRedundantDbgLocsUsingForwardScan comments for more info on that.
+ for (auto &BB : Fn)
+ removeRedundantDbgLocs(&BB, *FnVarLocs);
+ }
+}
+
+bool AssignmentTrackingAnalysis::runOnFunction(Function &F) {
+ if (!isAssignmentTrackingEnabled(*F.getParent()))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "AssignmentTrackingAnalysis run on " << F.getName()
+ << "\n");
+ auto DL = std::make_unique<DataLayout>(F.getParent());
+
+ // Clear previous results.
+ Results->clear();
+
+ FunctionVarLocsBuilder Builder;
+ analyzeFunction(F, *DL.get(), &Builder);
+
+ // Save these results.
+ Results->init(Builder);
+
+ if (PrintResults && isFunctionInPrintList(F.getName()))
+ Results->print(errs(), F);
+
+ // Return false because this pass does not modify the function.
+ return false;
+}
+
+AssignmentTrackingAnalysis::AssignmentTrackingAnalysis()
+ : FunctionPass(ID), Results(std::make_unique<FunctionVarLocs>()) {}
+
+char AssignmentTrackingAnalysis::ID = 0;
+
+INITIALIZE_PASS(AssignmentTrackingAnalysis, DEBUG_TYPE,
+ "Assignment Tracking Analysis", false, true)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
index ad51bab8f30b..8f71ec2b490c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/InstSimplifyFolder.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/AtomicExpandUtils.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
@@ -60,6 +61,7 @@ namespace {
class AtomicExpand : public FunctionPass {
const TargetLowering *TLI = nullptr;
+ const DataLayout *DL = nullptr;
public:
static char ID; // Pass identification, replacement for typeid
@@ -83,13 +85,13 @@ private:
bool tryExpandAtomicRMW(AtomicRMWInst *AI);
AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
Value *
- insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
+ insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
Align AddrAlign, AtomicOrdering MemOpOrder,
- function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
- void
- expandAtomicOpToLLSC(Instruction *I, Type *ResultTy, Value *Addr,
- Align AddrAlign, AtomicOrdering MemOpOrder,
- function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
+ function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
+ void expandAtomicOpToLLSC(
+ Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
+ AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
void expandPartwordAtomicRMW(
AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind);
AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
@@ -98,12 +100,11 @@ private:
void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
- static Value *
- insertRMWCmpXchgLoop(IRBuilder<> &Builder, Type *ResultType, Value *Addr,
- Align AddrAlign, AtomicOrdering MemOpOrder,
- SyncScope::ID SSID,
- function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
- CreateCmpXchgInstFun CreateCmpXchg);
+ static Value *insertRMWCmpXchgLoop(
+ IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
+ AtomicOrdering MemOpOrder, SyncScope::ID SSID,
+ function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
+ CreateCmpXchgInstFun CreateCmpXchg);
bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
@@ -125,6 +126,16 @@ private:
CreateCmpXchgInstFun CreateCmpXchg);
};
+// IRBuilder to be used for replacement atomic instructions.
+struct ReplacementIRBuilder : IRBuilder<InstSimplifyFolder> {
+ // Preserves the DebugLoc from I, and preserves still valid metadata.
+ explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
+ : IRBuilder(I->getContext(), DL) {
+ SetInsertPoint(I);
+ this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
+ }
+};
+
} // end anonymous namespace
char AtomicExpand::ID = 0;
@@ -174,9 +185,11 @@ bool AtomicExpand::runOnFunction(Function &F) {
return false;
auto &TM = TPC->getTM<TargetMachine>();
- if (!TM.getSubtargetImpl(F)->enableAtomicExpand())
+ const auto *Subtarget = TM.getSubtargetImpl(F);
+ if (!Subtarget->enableAtomicExpand())
return false;
- TLI = TM.getSubtargetImpl(F)->getTargetLowering();
+ TLI = Subtarget->getTargetLowering();
+ DL = &F.getParent()->getDataLayout();
SmallVector<Instruction *, 1> AtomicInsts;
@@ -221,6 +234,31 @@ bool AtomicExpand::runOnFunction(Function &F) {
}
}
+ if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
+ TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
+ I = LI = convertAtomicLoadToIntegerType(LI);
+ MadeChange = true;
+ } else if (SI &&
+ TLI->shouldCastAtomicStoreInIR(SI) ==
+ TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
+ I = SI = convertAtomicStoreToIntegerType(SI);
+ MadeChange = true;
+ } else if (RMWI &&
+ TLI->shouldCastAtomicRMWIInIR(RMWI) ==
+ TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
+ I = RMWI = convertAtomicXchgToIntegerType(RMWI);
+ MadeChange = true;
+ } else if (CASI) {
+ // TODO: when we're ready to make the change at the IR level, we can
+ // extend convertCmpXchgToInteger for floating point too.
+ if (CASI->getCompareOperand()->getType()->isPointerTy()) {
+ // TODO: add a TLI hook to control this so that each target can
+ // convert to lowering the original type one at a time.
+ I = CASI = convertCmpXchgToIntegerType(CASI);
+ MadeChange = true;
+ }
+ }
+
if (TLI->shouldInsertFencesForAtomic(I)) {
auto FenceOrdering = AtomicOrdering::Monotonic;
if (LI && isAcquireOrStronger(LI->getOrdering())) {
@@ -251,33 +289,31 @@ bool AtomicExpand::runOnFunction(Function &F) {
if (FenceOrdering != AtomicOrdering::Monotonic) {
MadeChange |= bracketInstWithFences(I, FenceOrdering);
}
- }
-
- if (LI) {
- if (TLI->shouldCastAtomicLoadInIR(LI) ==
- TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
- // TODO: add a TLI hook to control this so that each target can
- // convert to lowering the original type one at a time.
- LI = convertAtomicLoadToIntegerType(LI);
- assert(LI->getType()->isIntegerTy() && "invariant broken");
+ } else if (I->hasAtomicStore() &&
+ TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
+ auto FenceOrdering = AtomicOrdering::Monotonic;
+ if (SI)
+ FenceOrdering = SI->getOrdering();
+ else if (RMWI)
+ FenceOrdering = RMWI->getOrdering();
+ else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
+ TargetLoweringBase::AtomicExpansionKind::LLSC)
+ // LLSC is handled in expandAtomicCmpXchg().
+ FenceOrdering = CASI->getSuccessOrdering();
+
+ IRBuilder Builder(I);
+ if (auto TrailingFence =
+ TLI->emitTrailingFence(Builder, I, FenceOrdering)) {
+ TrailingFence->moveAfter(I);
MadeChange = true;
}
+ }
+ if (LI)
MadeChange |= tryExpandAtomicLoad(LI);
- } else if (SI) {
- if (TLI->shouldCastAtomicStoreInIR(SI) ==
- TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
- // TODO: add a TLI hook to control this so that each target can
- // convert to lowering the original type one at a time.
- SI = convertAtomicStoreToIntegerType(SI);
- assert(SI->getValueOperand()->getType()->isIntegerTy() &&
- "invariant broken");
- MadeChange = true;
- }
-
- if (tryExpandAtomicStore(SI))
- MadeChange = true;
- } else if (RMWI) {
+ else if (SI)
+ MadeChange |= tryExpandAtomicStore(SI);
+ else if (RMWI) {
// There are two different ways of expanding RMW instructions:
// - into a load if it is idempotent
// - into a Cmpxchg/LL-SC loop otherwise
@@ -287,15 +323,6 @@ bool AtomicExpand::runOnFunction(Function &F) {
MadeChange = true;
} else {
AtomicRMWInst::BinOp Op = RMWI->getOperation();
- if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
- TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
- // TODO: add a TLI hook to control this so that each target can
- // convert to lowering the original type one at a time.
- RMWI = convertAtomicXchgToIntegerType(RMWI);
- assert(RMWI->getValOperand()->getType()->isIntegerTy() &&
- "invariant broken");
- MadeChange = true;
- }
unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
unsigned ValueSize = getAtomicOpSize(RMWI);
if (ValueSize < MinCASSize &&
@@ -307,28 +334,14 @@ bool AtomicExpand::runOnFunction(Function &F) {
MadeChange |= tryExpandAtomicRMW(RMWI);
}
- } else if (CASI) {
- // TODO: when we're ready to make the change at the IR level, we can
- // extend convertCmpXchgToInteger for floating point too.
- assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&
- "unimplemented - floating point not legal at IR level");
- if (CASI->getCompareOperand()->getType()->isPointerTy()) {
- // TODO: add a TLI hook to control this so that each target can
- // convert to lowering the original type one at a time.
- CASI = convertCmpXchgToIntegerType(CASI);
- assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&
- "invariant broken");
- MadeChange = true;
- }
-
+ } else if (CASI)
MadeChange |= tryExpandAtomicCmpXchg(CASI);
- }
}
return MadeChange;
}
bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
- IRBuilder<> Builder(I);
+ ReplacementIRBuilder Builder(I, *DL);
auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
@@ -357,7 +370,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
auto *M = LI->getModule();
Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
- IRBuilder<> Builder(LI);
+ ReplacementIRBuilder Builder(LI, *DL);
Value *Addr = LI->getPointerOperand();
Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
@@ -381,7 +394,7 @@ AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
Type *NewTy =
getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
- IRBuilder<> Builder(RMWI);
+ ReplacementIRBuilder Builder(RMWI, *DL);
Value *Addr = RMWI->getPointerOperand();
Value *Val = RMWI->getValOperand();
@@ -413,7 +426,7 @@ bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
expandAtomicOpToLLSC(
LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
LI->getOrdering(),
- [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
+ [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
return true;
case TargetLoweringBase::AtomicExpansionKind::LLOnly:
return expandAtomicLoadToLL(LI);
@@ -443,7 +456,7 @@ bool AtomicExpand::tryExpandAtomicStore(StoreInst *SI) {
}
bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
- IRBuilder<> Builder(LI);
+ ReplacementIRBuilder Builder(LI, *DL);
// On some architectures, load-linked instructions are atomic for larger
// sizes than normal loads. For example, the only 64-bit load guaranteed
@@ -459,7 +472,7 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
}
bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
- IRBuilder<> Builder(LI);
+ ReplacementIRBuilder Builder(LI, *DL);
AtomicOrdering Order = LI->getOrdering();
if (Order == AtomicOrdering::Unordered)
Order = AtomicOrdering::Monotonic;
@@ -488,7 +501,7 @@ bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
/// mechanism, we convert back to the old format which the backends understand.
/// Each backend will need individual work to recognize the new format.
StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
- IRBuilder<> Builder(SI);
+ ReplacementIRBuilder Builder(SI, *DL);
auto *M = SI->getModule();
Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
M->getDataLayout());
@@ -514,7 +527,7 @@ void AtomicExpand::expandAtomicStore(StoreInst *SI) {
// or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
// It is the responsibility of the target to only signal expansion via
// shouldExpandAtomicRMW in cases where this is required and possible.
- IRBuilder<> Builder(SI);
+ ReplacementIRBuilder Builder(SI, *DL);
AtomicOrdering Ordering = SI->getOrdering();
assert(Ordering != AtomicOrdering::NotAtomic);
AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
@@ -529,7 +542,7 @@ void AtomicExpand::expandAtomicStore(StoreInst *SI) {
tryExpandAtomicRMW(AI);
}
-static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
+static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
Value *Loaded, Value *NewVal, Align AddrAlign,
AtomicOrdering MemOpOrder, SyncScope::ID SSID,
Value *&Success, Value *&NewLoaded) {
@@ -569,7 +582,7 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
expandPartwordAtomicRMW(AI,
TargetLoweringBase::AtomicExpansionKind::LLSC);
} else {
- auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
+ auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
AI->getValOperand());
};
@@ -582,10 +595,6 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
unsigned ValueSize = getAtomicOpSize(AI);
if (ValueSize < MinCASSize) {
- // TODO: Handle atomicrmw fadd/fsub
- if (AI->getType()->isFloatingPointTy())
- return false;
-
expandPartwordAtomicRMW(AI,
TargetLoweringBase::AtomicExpansionKind::CmpXChg);
} else {
@@ -613,8 +622,15 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
TLI->emitBitTestAtomicRMWIntrinsic(AI);
return true;
}
+ case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
+ TLI->emitCmpArithAtomicRMWIntrinsic(AI);
+ return true;
+ }
case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
return lowerAtomicRMWInst(AI);
+ case TargetLoweringBase::AtomicExpansionKind::Expand:
+ TLI->emitExpandAtomicRMW(AI);
+ return true;
default:
llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
}
@@ -626,6 +642,7 @@ struct PartwordMaskValues {
// These three fields are guaranteed to be set by createMaskInstrs.
Type *WordType = nullptr;
Type *ValueType = nullptr;
+ Type *IntValueType = nullptr;
Value *AlignedAddr = nullptr;
Align AlignedAddrAlignment;
// The remaining fields can be null.
@@ -679,9 +696,9 @@ raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
/// include only the part that would've been loaded from Addr.
///
/// Inv_Mask: The inverse of Mask.
-static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
- Type *ValueType, Value *Addr,
- Align AddrAlign,
+static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
+ Instruction *I, Type *ValueType,
+ Value *Addr, Align AddrAlign,
unsigned MinWordSize) {
PartwordMaskValues PMV;
@@ -690,7 +707,11 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
const DataLayout &DL = M->getDataLayout();
unsigned ValueSize = DL.getTypeStoreSize(ValueType);
- PMV.ValueType = ValueType;
+ PMV.ValueType = PMV.IntValueType = ValueType;
+ if (PMV.ValueType->isFloatingPointTy())
+ PMV.IntValueType =
+ Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
+
PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
: ValueType;
if (PMV.ValueType == PMV.WordType) {
@@ -701,19 +722,29 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
return PMV;
}
+ PMV.AlignedAddrAlignment = Align(MinWordSize);
+
assert(ValueSize < MinWordSize);
- Type *WordPtrType =
- PMV.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
+ PointerType *PtrTy = cast<PointerType>(Addr->getType());
+ Type *WordPtrType = PMV.WordType->getPointerTo(PtrTy->getAddressSpace());
+ IntegerType *IntTy = DL.getIntPtrType(Ctx, PtrTy->getAddressSpace());
+ Value *PtrLSB;
- // TODO: we could skip some of this if AddrAlign >= MinWordSize.
- Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
- PMV.AlignedAddr = Builder.CreateIntToPtr(
- Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType,
- "AlignedAddr");
- PMV.AlignedAddrAlignment = Align(MinWordSize);
+ if (AddrAlign < MinWordSize) {
+ PMV.AlignedAddr = Builder.CreateIntrinsic(
+ Intrinsic::ptrmask, {PtrTy, IntTy},
+ {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
+ "AlignedAddr");
+
+ Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
+ PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
+ } else {
+ // If the alignment is high enough, the LSB are known 0.
+ PMV.AlignedAddr = Addr;
+ PtrLSB = ConstantInt::getNullValue(IntTy);
+ }
- Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
if (DL.isLittleEndian()) {
// turn bytes into bits
PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
@@ -727,28 +758,36 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
PMV.Mask = Builder.CreateShl(
ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
"Mask");
+
PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
+
+ // Cast for typed pointers.
+ PMV.AlignedAddr =
+ Builder.CreateBitCast(PMV.AlignedAddr, WordPtrType, "AlignedAddr");
+
return PMV;
}
-static Value *extractMaskedValue(IRBuilder<> &Builder, Value *WideWord,
+static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
const PartwordMaskValues &PMV) {
assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
if (PMV.WordType == PMV.ValueType)
return WideWord;
Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
- Value *Trunc = Builder.CreateTrunc(Shift, PMV.ValueType, "extracted");
- return Trunc;
+ Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
+ return Builder.CreateBitCast(Trunc, PMV.ValueType);
}
-static Value *insertMaskedValue(IRBuilder<> &Builder, Value *WideWord,
+static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
Value *Updated, const PartwordMaskValues &PMV) {
assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
if (PMV.WordType == PMV.ValueType)
return Updated;
+ Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
+
Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
Value *Shift =
Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
@@ -761,7 +800,7 @@ static Value *insertMaskedValue(IRBuilder<> &Builder, Value *WideWord,
/// operation. (That is, only the bits under the Mask should be
/// affected by the operation)
static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
- IRBuilder<> &Builder, Value *Loaded,
+ IRBuilderBase &Builder, Value *Loaded,
Value *Shifted_Inc, Value *Inc,
const PartwordMaskValues &PMV) {
// TODO: update to use
@@ -790,10 +829,16 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
case AtomicRMWInst::Max:
case AtomicRMWInst::Min:
case AtomicRMWInst::UMax:
- case AtomicRMWInst::UMin: {
- // Finally, comparison ops will operate on the full value, so
- // truncate down to the original size, and expand out again after
- // doing the operation.
+ case AtomicRMWInst::UMin:
+ case AtomicRMWInst::FAdd:
+ case AtomicRMWInst::FSub:
+ case AtomicRMWInst::FMin:
+ case AtomicRMWInst::FMax:
+ case AtomicRMWInst::UIncWrap:
+ case AtomicRMWInst::UDecWrap: {
+ // Finally, other ops will operate on the full value, so truncate down to
+ // the original size, and expand out again after doing the
+ // operation. Bitcasts will be inserted for FP values.
Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
@@ -816,17 +861,23 @@ void AtomicExpand::expandPartwordAtomicRMW(
AtomicOrdering MemOpOrder = AI->getOrdering();
SyncScope::ID SSID = AI->getSyncScopeID();
- IRBuilder<> Builder(AI);
+ ReplacementIRBuilder Builder(AI, *DL);
PartwordMaskValues PMV =
createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
- Value *ValOperand_Shifted =
- Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
- PMV.ShiftAmt, "ValOperand_Shifted");
+ Value *ValOperand_Shifted = nullptr;
+ if (AI->getOperation() == AtomicRMWInst::Xchg ||
+ AI->getOperation() == AtomicRMWInst::Add ||
+ AI->getOperation() == AtomicRMWInst::Sub ||
+ AI->getOperation() == AtomicRMWInst::Nand) {
+ ValOperand_Shifted =
+ Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
+ PMV.ShiftAmt, "ValOperand_Shifted");
+ }
- auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) {
+ auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
ValOperand_Shifted, AI->getValOperand(), PMV);
};
@@ -850,7 +901,7 @@ void AtomicExpand::expandPartwordAtomicRMW(
// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
- IRBuilder<> Builder(AI);
+ ReplacementIRBuilder Builder(AI, *DL);
AtomicRMWInst::BinOp Op = AI->getOperation();
assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
@@ -925,7 +976,7 @@ bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
BasicBlock *BB = CI->getParent();
Function *F = BB->getParent();
- IRBuilder<> Builder(CI);
+ ReplacementIRBuilder Builder(CI, *DL);
LLVMContext &Ctx = Builder.getContext();
BasicBlock *EndBB =
@@ -999,7 +1050,7 @@ bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
Builder.SetInsertPoint(CI);
Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
- Value *Res = UndefValue::get(CI->getType());
+ Value *Res = PoisonValue::get(CI->getType());
Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
Res = Builder.CreateInsertValue(Res, Success, 1);
@@ -1011,8 +1062,8 @@ bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
void AtomicExpand::expandAtomicOpToLLSC(
Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
AtomicOrdering MemOpOrder,
- function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
- IRBuilder<> Builder(I);
+ function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
+ ReplacementIRBuilder Builder(I, *DL);
Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
MemOpOrder, PerformOp);
@@ -1021,7 +1072,7 @@ void AtomicExpand::expandAtomicOpToLLSC(
}
void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
- IRBuilder<> Builder(AI);
+ ReplacementIRBuilder Builder(AI, *DL);
PartwordMaskValues PMV =
createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
@@ -1047,7 +1098,7 @@ void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
}
void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
- IRBuilder<> Builder(CI);
+ ReplacementIRBuilder Builder(CI, *DL);
PartwordMaskValues PMV = createMaskInstrs(
Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
@@ -1063,7 +1114,7 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
CI->getMergedOrdering());
Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
- Value *Res = UndefValue::get(CI->getType());
+ Value *Res = PoisonValue::get(CI->getType());
Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
Value *Success = Builder.CreateICmpEQ(
CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
@@ -1074,9 +1125,9 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
}
Value *AtomicExpand::insertRMWLLSCLoop(
- IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
+ IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
AtomicOrdering MemOpOrder,
- function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
+ function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
LLVMContext &Ctx = Builder.getContext();
BasicBlock *BB = Builder.GetInsertBlock();
Function *F = BB->getParent();
@@ -1134,7 +1185,7 @@ AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
M->getDataLayout());
- IRBuilder<> Builder(CI);
+ ReplacementIRBuilder Builder(CI, *DL);
Value *Addr = CI->getPointerOperand();
Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
@@ -1155,7 +1206,7 @@ AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
- Value *Res = UndefValue::get(CI->getType());
+ Value *Res = PoisonValue::get(CI->getType());
Res = Builder.CreateInsertValue(Res, OldVal, 0);
Res = Builder.CreateInsertValue(Res, Succ, 1);
@@ -1258,8 +1309,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
- // This grabs the DebugLoc from CI
- IRBuilder<> Builder(CI);
+ ReplacementIRBuilder Builder(CI, *DL);
// The split call above "helpfully" added a branch at the end of BB (to the
// wrong place), but we might want a fence too. It's easiest to just remove
@@ -1326,7 +1376,8 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// Make sure later instructions don't get reordered with a fence if
// necessary.
Builder.SetInsertPoint(SuccessBB);
- if (ShouldInsertFencesForAtomic)
+ if (ShouldInsertFencesForAtomic ||
+ TLI->shouldInsertTrailingFenceForAtomicStore(CI))
TLI->emitTrailingFence(Builder, CI, SuccessOrder);
Builder.CreateBr(ExitBB);
@@ -1400,7 +1451,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// Some use of the full struct return that we don't understand has happened,
// so we've got to reconstruct it properly.
Value *Res;
- Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
+ Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
Res = Builder.CreateInsertValue(Res, Success, 1);
CI->replaceAllUsesWith(Res);
@@ -1439,9 +1490,9 @@ bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
}
Value *AtomicExpand::insertRMWCmpXchgLoop(
- IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
+ IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
AtomicOrdering MemOpOrder, SyncScope::ID SSID,
- function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
+ function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
CreateCmpXchgInstFun CreateCmpXchg) {
LLVMContext &Ctx = Builder.getContext();
BasicBlock *BB = Builder.GetInsertBlock();
@@ -1524,11 +1575,11 @@ bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// Note: This function is exposed externally by AtomicExpandUtils.h
bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
CreateCmpXchgInstFun CreateCmpXchg) {
- IRBuilder<> Builder(AI);
+ ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout());
Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
AI->getOrdering(), AI->getSyncScopeID(),
- [&](IRBuilder<> &Builder, Value *Loaded) {
+ [&](IRBuilderBase &Builder, Value *Loaded) {
return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
AI->getValOperand());
},
@@ -1634,19 +1685,19 @@ static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
case AtomicRMWInst::BAD_BINOP:
llvm_unreachable("Should not have BAD_BINOP.");
case AtomicRMWInst::Xchg:
- return makeArrayRef(LibcallsXchg);
+ return ArrayRef(LibcallsXchg);
case AtomicRMWInst::Add:
- return makeArrayRef(LibcallsAdd);
+ return ArrayRef(LibcallsAdd);
case AtomicRMWInst::Sub:
- return makeArrayRef(LibcallsSub);
+ return ArrayRef(LibcallsSub);
case AtomicRMWInst::And:
- return makeArrayRef(LibcallsAnd);
+ return ArrayRef(LibcallsAnd);
case AtomicRMWInst::Or:
- return makeArrayRef(LibcallsOr);
+ return ArrayRef(LibcallsOr);
case AtomicRMWInst::Xor:
- return makeArrayRef(LibcallsXor);
+ return ArrayRef(LibcallsXor);
case AtomicRMWInst::Nand:
- return makeArrayRef(LibcallsNand);
+ return ArrayRef(LibcallsNand);
case AtomicRMWInst::Max:
case AtomicRMWInst::Min:
case AtomicRMWInst::UMax:
@@ -1655,6 +1706,8 @@ static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
case AtomicRMWInst::FMin:
case AtomicRMWInst::FAdd:
case AtomicRMWInst::FSub:
+ case AtomicRMWInst::UIncWrap:
+ case AtomicRMWInst::UDecWrap:
// No atomic libcalls are available for max/min/umax/umin.
return {};
}
@@ -1678,7 +1731,7 @@ void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
// CAS libcall, via a CAS loop, instead.
if (!Success) {
expandAtomicRMWToCmpXchg(
- I, [this](IRBuilder<> &Builder, Value *Addr, Value *Loaded,
+ I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) {
// Create the CAS instruction normally...
@@ -1893,7 +1946,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// The final result from the CAS is {load of 'expected' alloca, bool result
// from call}
Type *FinalResultTy = I->getType();
- Value *V = UndefValue::get(FinalResultTy);
+ Value *V = PoisonValue::get(FinalResultTy);
Value *ExpectedOut = Builder.CreateAlignedLoad(
CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
index 958212a0e448..e7e73606de07 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -68,17 +68,17 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
#include "llvm/CodeGen/BasicBlockSectionUtils.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
+#include <optional>
using namespace llvm;
@@ -130,9 +130,9 @@ INITIALIZE_PASS(BasicBlockSections, "bbsections-prepare",
// This function updates and optimizes the branching instructions of every basic
// block in a given function to account for changes in the layout.
-static void updateBranches(
- MachineFunction &MF,
- const SmallVector<MachineBasicBlock *, 4> &PreLayoutFallThroughs) {
+static void
+updateBranches(MachineFunction &MF,
+ const SmallVector<MachineBasicBlock *> &PreLayoutFallThroughs) {
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
SmallVector<MachineOperand, 4> Cond;
for (auto &MBB : MF) {
@@ -167,7 +167,7 @@ static void updateBranches(
bool getBBClusterInfoForFunction(
const MachineFunction &MF,
BasicBlockSectionsProfileReader *BBSectionsProfileReader,
- std::vector<Optional<BBClusterInfo>> &V) {
+ DenseMap<unsigned, BBClusterInfo> &V) {
// Find the assoicated cluster information.
std::pair<bool, SmallVector<BBClusterInfo, 4>> P =
@@ -182,13 +182,8 @@ bool getBBClusterInfoForFunction(
return true;
}
- V.resize(MF.getNumBlockIDs());
- for (auto bbClusterInfo : P.second) {
- // Bail out if the cluster information contains invalid MBB numbers.
- if (bbClusterInfo.MBBNumber >= MF.getNumBlockIDs())
- return false;
- V[bbClusterInfo.MBBNumber] = bbClusterInfo;
- }
+ for (const BBClusterInfo &BBCI : P.second)
+ V[BBCI.BBID] = BBCI;
return true;
}
@@ -199,16 +194,17 @@ bool getBBClusterInfoForFunction(
// clusters, they are moved into a single "Exception" section. Eventually,
// clusters are ordered in increasing order of their IDs, with the "Exception"
// and "Cold" succeeding all other clusters.
-// FuncBBClusterInfo represent the cluster information for basic blocks. If this
-// is empty, it means unique sections for all basic blocks in the function.
+// FuncBBClusterInfo represent the cluster information for basic blocks. It
+// maps from BBID of basic blocks to their cluster information. If this is
+// empty, it means unique sections for all basic blocks in the function.
static void
assignSections(MachineFunction &MF,
- const std::vector<Optional<BBClusterInfo>> &FuncBBClusterInfo) {
+ const DenseMap<unsigned, BBClusterInfo> &FuncBBClusterInfo) {
assert(MF.hasBBSections() && "BB Sections is not set for function.");
// This variable stores the section ID of the cluster containing eh_pads (if
// all eh_pads are one cluster). If more than one cluster contain eh_pads, we
// set it equal to ExceptionSectionID.
- Optional<MBBSectionID> EHPadsSectionID;
+ std::optional<MBBSectionID> EHPadsSectionID;
for (auto &MBB : MF) {
// With the 'all' option, every basic block is placed in a unique section.
@@ -218,15 +214,21 @@ assignSections(MachineFunction &MF,
if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All ||
FuncBBClusterInfo.empty()) {
// If unique sections are desired for all basic blocks of the function, we
- // set every basic block's section ID equal to its number (basic block
- // id). This further ensures that basic blocks are ordered canonically.
- MBB.setSectionID({static_cast<unsigned int>(MBB.getNumber())});
- } else if (FuncBBClusterInfo[MBB.getNumber()])
- MBB.setSectionID(FuncBBClusterInfo[MBB.getNumber()]->ClusterID);
- else {
- // BB goes into the special cold section if it is not specified in the
- // cluster info map.
- MBB.setSectionID(MBBSectionID::ColdSectionID);
+ // set every basic block's section ID equal to its original position in
+ // the layout (which is equal to its number). This ensures that basic
+ // blocks are ordered canonically.
+ MBB.setSectionID(MBB.getNumber());
+ } else {
+ // TODO: Replace `getBBIDOrNumber` with `getBBID` once version 1 is
+ // deprecated.
+ auto I = FuncBBClusterInfo.find(MBB.getBBIDOrNumber());
+ if (I != FuncBBClusterInfo.end()) {
+ MBB.setSectionID(I->second.ClusterID);
+ } else {
+ // BB goes into the special cold section if it is not specified in the
+ // cluster info map.
+ MBB.setSectionID(MBBSectionID::ColdSectionID);
+ }
}
if (MBB.isEHPad() && EHPadsSectionID != MBB.getSectionID() &&
@@ -249,12 +251,14 @@ assignSections(MachineFunction &MF,
void llvm::sortBasicBlocksAndUpdateBranches(
MachineFunction &MF, MachineBasicBlockComparator MBBCmp) {
- SmallVector<MachineBasicBlock *, 4> PreLayoutFallThroughs(
- MF.getNumBlockIDs());
+ [[maybe_unused]] const MachineBasicBlock *EntryBlock = &MF.front();
+ SmallVector<MachineBasicBlock *> PreLayoutFallThroughs(MF.getNumBlockIDs());
for (auto &MBB : MF)
PreLayoutFallThroughs[MBB.getNumber()] = MBB.getFallThrough();
MF.sort(MBBCmp);
+ assert(&MF.front() == EntryBlock &&
+ "Entry block should not be displaced by basic block sections");
// Set IsBeginSection and IsEndSection according to the assigned section IDs.
MF.assignBeginEndSections();
@@ -317,11 +321,14 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
if (BBSectionsType == BasicBlockSection::List &&
hasInstrProfHashMismatch(MF))
return true;
-
- // Renumber blocks before sorting them for basic block sections. This is
- // useful during sorting, basic blocks in the same section will retain the
- // default order. This renumbering should also be done for basic block
- // labels to match the profiles with the correct blocks.
+ // Renumber blocks before sorting them. This is useful during sorting,
+ // basic blocks in the same section will retain the default order.
+ // This renumbering should also be done for basic block labels to match the
+ // profiles with the correct blocks.
+ // For LLVM_BB_ADDR_MAP versions 2 and higher, this renumbering serves
+ // the different purpose of accessing the original layout positions and
+ // finding the original fallthroughs.
+ // TODO: Change the above comment accordingly when version 1 is deprecated.
MF.RenumberBlocks();
if (BBSectionsType == BasicBlockSection::Labels) {
@@ -331,7 +338,8 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
BBSectionsProfileReader = &getAnalysis<BasicBlockSectionsProfileReader>();
- std::vector<Optional<BBClusterInfo>> FuncBBClusterInfo;
+ // Map from BBID of blocks to their cluster information.
+ DenseMap<unsigned, BBClusterInfo> FuncBBClusterInfo;
if (BBSectionsType == BasicBlockSection::List &&
!getBBClusterInfoForFunction(MF, BBSectionsProfileReader,
FuncBBClusterInfo))
@@ -371,8 +379,8 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
// If the two basic block are in the same section, the order is decided by
// their position within the section.
if (XSectionID.Type == MBBSectionID::SectionType::Default)
- return FuncBBClusterInfo[X.getNumber()]->PositionInCluster <
- FuncBBClusterInfo[Y.getNumber()]->PositionInCluster;
+ return FuncBBClusterInfo.lookup(X.getBBIDOrNumber()).PositionInCluster <
+ FuncBBClusterInfo.lookup(Y.getBBIDOrNumber()).PositionInCluster;
return X.getNumber() < Y.getNumber();
};
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index c2acf115998b..5bc8d82debc3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -93,23 +93,23 @@ static Error getBBClusterInfo(const MemoryBuffer *MBuf,
if (FI == ProgramBBClusterInfo.end())
return invalidProfileError(
"Cluster list does not follow a function name specifier.");
- SmallVector<StringRef, 4> BBIndexes;
- S.split(BBIndexes, ' ');
+ SmallVector<StringRef, 4> BBIDs;
+ S.split(BBIDs, ' ');
// Reset current cluster position.
CurrentPosition = 0;
- for (auto BBIndexStr : BBIndexes) {
- unsigned long long BBIndex;
- if (getAsUnsignedInteger(BBIndexStr, 10, BBIndex))
+ for (auto BBIDStr : BBIDs) {
+ unsigned long long BBID;
+ if (getAsUnsignedInteger(BBIDStr, 10, BBID))
return invalidProfileError(Twine("Unsigned integer expected: '") +
- BBIndexStr + "'.");
- if (!FuncBBIDs.insert(BBIndex).second)
+ BBIDStr + "'.");
+ if (!FuncBBIDs.insert(BBID).second)
return invalidProfileError(Twine("Duplicate basic block id found '") +
- BBIndexStr + "'.");
- if (!BBIndex && CurrentPosition)
+ BBIDStr + "'.");
+ if (BBID == 0 && CurrentPosition)
return invalidProfileError("Entry BB (0) does not begin a cluster.");
- FI->second.emplace_back(BBClusterInfo{
- ((unsigned)BBIndex), CurrentCluster, CurrentPosition++});
+ FI->second.emplace_back(
+ BBClusterInfo{((unsigned)BBID), CurrentCluster, CurrentPosition++});
}
CurrentCluster++;
} else { // This is a function name specifier.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
index 07be03d2dab9..d491691135dc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
@@ -1875,7 +1875,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
addRegAndItsAliases(Reg, TRI, Uses);
} else {
if (Uses.erase(Reg)) {
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
Uses.erase(*SubRegs); // Use sub-registers to be conservative
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
index 29508f8f35a6..016c81dc5aa4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -23,6 +23,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
@@ -87,7 +88,9 @@ class BranchRelaxation : public MachineFunctionPass {
bool relaxBranchInstructions();
void scanFunction();
- MachineBasicBlock *createNewBlockAfter(MachineBasicBlock &BB);
+ MachineBasicBlock *createNewBlockAfter(MachineBasicBlock &OrigMBB);
+ MachineBasicBlock *createNewBlockAfter(MachineBasicBlock &OrigMBB,
+ const BasicBlock *BB);
MachineBasicBlock *splitBlockBeforeInstr(MachineInstr &MI,
MachineBasicBlock *DestBB);
@@ -201,12 +204,20 @@ void BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) {
}
}
-/// Insert a new empty basic block and insert it after \BB
-MachineBasicBlock *BranchRelaxation::createNewBlockAfter(MachineBasicBlock &BB) {
+/// Insert a new empty MachineBasicBlock and insert it after \p OrigMBB
+MachineBasicBlock *
+BranchRelaxation::createNewBlockAfter(MachineBasicBlock &OrigBB) {
+ return createNewBlockAfter(OrigBB, OrigBB.getBasicBlock());
+}
+
+/// Insert a new empty MachineBasicBlock with \p BB as its BasicBlock
+/// and insert it after \p OrigMBB
+MachineBasicBlock *
+BranchRelaxation::createNewBlockAfter(MachineBasicBlock &OrigMBB,
+ const BasicBlock *BB) {
// Create a new MBB for the code after the OrigBB.
- MachineBasicBlock *NewBB =
- MF->CreateMachineBasicBlock(BB.getBasicBlock());
- MF->insert(++BB.getIterator(), NewBB);
+ MachineBasicBlock *NewBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(++OrigMBB.getIterator(), NewBB);
// Insert an entry into BlockInfo to align it properly with the block numbers.
BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
@@ -431,7 +442,7 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
MachineBasicBlock *MBB = MI.getParent();
-
+ SmallVector<MachineOperand, 4> Cond;
unsigned OldBrSize = TII->getInstSizeInBytes(MI);
MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
@@ -466,7 +477,8 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
// Create the optional restore block and, initially, place it at the end of
// function. That block will be placed later if it's used; otherwise, it will
// be erased.
- MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back());
+ MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back(),
+ DestBB->getBasicBlock());
TII->insertIndirectBranch(*BranchBB, *DestBB, *RestoreBB, DL,
DestOffset - SrcOffset, RS.get());
@@ -482,10 +494,11 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
// restore blocks are just duplicated for each far branch.
assert(!DestBB->isEntryBlock());
MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator());
- if (auto *FT = PrevBB->getFallThrough()) {
+ // Fall through only if PrevBB has no unconditional branch as one of its
+ // terminators.
+ if (auto *FT = PrevBB->getLogicalFallThrough()) {
assert(FT == DestBB);
TII->insertUnconditionalBranch(*PrevBB, FT, DebugLoc());
- // Recalculate the block size.
BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB);
}
// Now, RestoreBB could be placed directly before DestBB.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
index 57170c58db14..310273173647 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
@@ -135,6 +135,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
// Get the undef operand's register class
const TargetRegisterClass *OpRC =
TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF);
+ assert(OpRC && "Not a valid register class");
// If the instruction has a true dependency, we can hide the false depdency
// behind it.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
index 42523c47a671..25741686a829 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -18,7 +18,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/Passes.h"
@@ -89,10 +88,10 @@ class CFIInstrInserter : public MachineFunctionPass {
#define INVALID_OFFSET INT_MAX
/// contains the location where CSR register is saved.
struct CSRSavedLocation {
- CSRSavedLocation(Optional<unsigned> R, Optional<int> O)
+ CSRSavedLocation(std::optional<unsigned> R, std::optional<int> O)
: Reg(R), Offset(O) {}
- Optional<unsigned> Reg;
- Optional<int> Offset;
+ std::optional<unsigned> Reg;
+ std::optional<int> Offset;
};
/// Contains cfa offset and register values valid at entry and exit of basic
@@ -148,7 +147,7 @@ void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) {
MF.getSubtarget().getFrameLowering()->getInitialCFAOffset(MF);
// Initial CFA register value i.e. the one valid at the beginning of the
// function.
- unsigned InitialRegister =
+ Register InitialRegister =
MF.getSubtarget().getFrameLowering()->getInitialCFARegister(MF);
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
unsigned NumRegs = TRI.getNumRegs();
@@ -187,8 +186,8 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
// Determine cfa offset and register set by the block.
for (MachineInstr &MI : *MBBInfo.MBB) {
if (MI.isCFIInstruction()) {
- Optional<unsigned> CSRReg;
- Optional<int> CSROffset;
+ std::optional<unsigned> CSRReg;
+ std::optional<int> CSROffset;
unsigned CFIIndex = MI.getOperand(0).getCFIIndex();
const MCCFIInstruction &CFI = Instrs[CFIIndex];
switch (CFI.getOperation()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 519b24c21d7a..615687abad81 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -61,7 +61,7 @@ Register VirtRegAuxInfo::copyHint(const MachineInstr *MI, unsigned Reg,
if (!HReg)
return 0;
- if (Register::isVirtualRegister(HReg))
+ if (HReg.isVirtual())
return Sub == HSub ? HReg : Register();
const TargetRegisterClass *RC = MRI.getRegClass(Reg);
@@ -107,7 +107,7 @@ bool VirtRegAuxInfo::isRematerializable(const LiveInterval &LI,
// If the original (pre-splitting) registers match this
// copy came from a split.
- if (!Register::isVirtualRegister(Reg) || VRM.getOriginal(Reg) != Original)
+ if (!Reg.isVirtual() || VRM.getOriginal(Reg) != Original)
return false;
// Follow the copy live-in value.
@@ -278,7 +278,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
if (TargetHint.first == 0 && TargetHint.second)
MRI.clearSimpleHint(LI.reg());
- std::set<Register> HintedRegs;
+ SmallSet<Register, 4> HintedRegs;
for (const auto &Hint : CopyHints) {
if (!HintedRegs.insert(Hint.Reg).second ||
(TargetHint.first != 0 && Hint.Reg == TargetHint.second))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp
index f74ff30ab2e1..ce1ef571c9df 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -231,7 +231,7 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
// when i64 and f64 are both passed in GPRs.
StackOffset = SavedStackOffset;
MaxStackArgAlign = SavedMaxStackArgAlign;
- Locs.resize(NumLocs);
+ Locs.truncate(NumLocs);
}
void CCState::analyzeMustTailForwardedRegisters(
@@ -240,8 +240,8 @@ void CCState::analyzeMustTailForwardedRegisters(
// Oftentimes calling conventions will not user register parameters for
// variadic functions, so we need to assume we're not variadic so that we get
// all the registers that might be used in a non-variadic call.
- SaveAndRestore<bool> SavedVarArg(IsVarArg, false);
- SaveAndRestore<bool> SavedMustTail(AnalyzingMustTailForwardedRegs, true);
+ SaveAndRestore SavedVarArg(IsVarArg, false);
+ SaveAndRestore SavedMustTail(AnalyzingMustTailForwardedRegs, true);
for (MVT RegVT : RegParmTypes) {
SmallVector<MCPhysReg, 8> RemainingRegs;
@@ -270,19 +270,20 @@ bool CCState::resultsCompatible(CallingConv::ID CalleeCC,
CCState CCInfo2(CallerCC, false, MF, RVLocs2, C);
CCInfo2.AnalyzeCallResult(Ins, CallerFn);
- if (RVLocs1.size() != RVLocs2.size())
- return false;
- for (unsigned I = 0, E = RVLocs1.size(); I != E; ++I) {
- const CCValAssign &Loc1 = RVLocs1[I];
- const CCValAssign &Loc2 = RVLocs2[I];
-
- if ( // Must both be in registers, or both in memory
- Loc1.isRegLoc() != Loc2.isRegLoc() ||
- // Must fill the same part of their locations
- Loc1.getLocInfo() != Loc2.getLocInfo() ||
- // Memory offset/register number must be the same
- Loc1.getExtraInfo() != Loc2.getExtraInfo())
+ auto AreCompatible = [](const CCValAssign &Loc1, const CCValAssign &Loc2) {
+ assert(!Loc1.isPendingLoc() && !Loc2.isPendingLoc() &&
+ "The location must have been decided by now");
+ // Must fill the same part of their locations.
+ if (Loc1.getLocInfo() != Loc2.getLocInfo())
return false;
- }
- return true;
+ // Must both be in the same registers, or both in memory at the same offset.
+ if (Loc1.isRegLoc() && Loc2.isRegLoc())
+ return Loc1.getLocReg() == Loc2.getLocReg();
+ if (Loc1.isMemLoc() && Loc2.isMemLoc())
+ return Loc1.getLocMemOffset() == Loc2.getLocMemOffset();
+ llvm_unreachable("Unknown location kind");
+ };
+
+ return std::equal(RVLocs1.begin(), RVLocs1.end(), RVLocs2.begin(),
+ RVLocs2.end(), AreCompatible);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
index 5050395fbc0f..398ff56f737c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
@@ -19,6 +19,7 @@ using namespace llvm;
/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
void llvm::initializeCodeGen(PassRegistry &Registry) {
+ initializeAssignmentTrackingAnalysisPass(Registry);
initializeAtomicExpandPass(Registry);
initializeBasicBlockSectionsPass(Registry);
initializeBranchFolderPassPass(Registry);
@@ -36,6 +37,8 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeEarlyIfPredicatorPass(Registry);
initializeEarlyMachineLICMPass(Registry);
initializeEarlyTailDuplicatePass(Registry);
+ initializeExpandLargeDivRemLegacyPassPass(Registry);
+ initializeExpandLargeFpConvertLegacyPassPass(Registry);
initializeExpandMemCmpPassPass(Registry);
initializeExpandPostRAPass(Registry);
initializeFEntryInserterPass(Registry);
@@ -68,6 +71,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineBlockFrequencyInfoPass(Registry);
initializeMachineBlockPlacementPass(Registry);
initializeMachineBlockPlacementStatsPass(Registry);
+ initializeMachineCFGPrinterPass(Registry);
initializeMachineCSEPass(Registry);
initializeMachineCombinerPass(Registry);
initializeMachineCopyPropagationPass(Registry);
@@ -75,18 +79,23 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineCycleInfoWrapperPassPass(Registry);
initializeMachineDominatorTreePass(Registry);
initializeMachineFunctionPrinterPassPass(Registry);
+ initializeMachineLateInstrsCleanupPass(Registry);
initializeMachineLICMPass(Registry);
initializeMachineLoopInfoPass(Registry);
initializeMachineModuleInfoWrapperPassPass(Registry);
initializeMachineOptimizationRemarkEmitterPassPass(Registry);
initializeMachineOutlinerPass(Registry);
initializeMachinePipelinerPass(Registry);
+ initializeMachineSanitizerBinaryMetadataPass(Registry);
initializeModuloScheduleTestPass(Registry);
initializeMachinePostDominatorTreePass(Registry);
initializeMachineRegionInfoPassPass(Registry);
initializeMachineSchedulerPass(Registry);
initializeMachineSinkingPass(Registry);
+ initializeMachineUniformityAnalysisPassPass(Registry);
+ initializeMachineUniformityInfoPrinterPassPass(Registry);
initializeMachineVerifierPassPass(Registry);
+ initializeObjCARCContractLegacyPassPass(Registry);
initializeOptimizePHIsPass(Registry);
initializePEIPass(Registry);
initializePHIEliminationPass(Registry);
@@ -113,6 +122,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeSjLjEHPreparePass(Registry);
initializeSlotIndexesPass(Registry);
initializeStackColoringPass(Registry);
+ initializeStackFrameLayoutAnalysisPassPass(Registry);
initializeStackMapLivenessPass(Registry);
initializeStackProtectorPass(Registry);
initializeStackSlotColoringPass(Registry);
@@ -120,7 +130,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeTailDuplicatePass(Registry);
initializeTargetPassConfigPass(Registry);
initializeTwoAddressInstructionPassPass(Registry);
- initializeTypePromotionPass(Registry);
+ initializeTypePromotionLegacyPass(Registry);
initializeUnpackMachineBundlesPass(Registry);
initializeUnreachableBlockElimLegacyPassPass(Registry);
initializeUnreachableMachineBlockElimPass(Registry);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp
index 8f185a161bd0..a5215969c0dd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -17,6 +17,9 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+
+#define DEBUG_TYPE "codegen-common"
using namespace llvm;
@@ -100,8 +103,8 @@ static bool MIIsInTerminatorSequence(const MachineInstr &MI) {
// Make sure that the copy dest is not a vreg when the copy source is a
// physical register.
- if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) &&
- Register::isPhysicalRegister(OPI2->getReg())))
+ if (!OPI2->isReg() ||
+ (!OPI->getReg().isPhysical() && OPI2->getReg().isPhysical()))
return false;
return true;
@@ -197,3 +200,88 @@ unsigned llvm::getInvertedFPClassTest(unsigned Test) {
}
return 0;
}
+
+static MachineOperand *getSalvageOpsForCopy(const MachineRegisterInfo &MRI,
+ MachineInstr &Copy) {
+ assert(Copy.getOpcode() == TargetOpcode::COPY && "Must be a COPY");
+
+ return &Copy.getOperand(1);
+}
+
+static MachineOperand *getSalvageOpsForTrunc(const MachineRegisterInfo &MRI,
+ MachineInstr &Trunc,
+ SmallVectorImpl<uint64_t> &Ops) {
+ assert(Trunc.getOpcode() == TargetOpcode::G_TRUNC && "Must be a G_TRUNC");
+
+ const auto FromLLT = MRI.getType(Trunc.getOperand(1).getReg());
+ const auto ToLLT = MRI.getType(Trunc.defs().begin()->getReg());
+
+ // TODO: Support non-scalar types.
+ if (!FromLLT.isScalar()) {
+ return nullptr;
+ }
+
+ auto ExtOps = DIExpression::getExtOps(FromLLT.getSizeInBits(),
+ ToLLT.getSizeInBits(), false);
+ Ops.append(ExtOps.begin(), ExtOps.end());
+ return &Trunc.getOperand(1);
+}
+
+static MachineOperand *salvageDebugInfoImpl(const MachineRegisterInfo &MRI,
+ MachineInstr &MI,
+ SmallVectorImpl<uint64_t> &Ops) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_TRUNC:
+ return getSalvageOpsForTrunc(MRI, MI, Ops);
+ case TargetOpcode::COPY:
+ return getSalvageOpsForCopy(MRI, MI);
+ default:
+ return nullptr;
+ }
+}
+
+void llvm::salvageDebugInfoForDbgValue(const MachineRegisterInfo &MRI,
+ MachineInstr &MI,
+ ArrayRef<MachineOperand *> DbgUsers) {
+ // These are arbitrary chosen limits on the maximum number of values and the
+ // maximum size of a debug expression we can salvage up to, used for
+ // performance reasons.
+ const unsigned MaxExpressionSize = 128;
+
+ for (auto *DefMO : DbgUsers) {
+ MachineInstr *DbgMI = DefMO->getParent();
+ if (DbgMI->isIndirectDebugValue()) {
+ continue;
+ }
+
+ int UseMOIdx = DbgMI->findRegisterUseOperandIdx(DefMO->getReg());
+ assert(UseMOIdx != -1 && DbgMI->hasDebugOperandForReg(DefMO->getReg()) &&
+ "Must use salvaged instruction as its location");
+
+ // TODO: Support DBG_VALUE_LIST.
+ if (DbgMI->getOpcode() != TargetOpcode::DBG_VALUE) {
+ assert(DbgMI->getOpcode() == TargetOpcode::DBG_VALUE_LIST &&
+ "Must be either DBG_VALUE or DBG_VALUE_LIST");
+ continue;
+ }
+
+ const DIExpression *SalvagedExpr = DbgMI->getDebugExpression();
+
+ SmallVector<uint64_t, 16> Ops;
+ auto Op0 = salvageDebugInfoImpl(MRI, MI, Ops);
+ if (!Op0)
+ continue;
+ SalvagedExpr = DIExpression::appendOpsToArg(SalvagedExpr, Ops, 0, true);
+
+ bool IsValidSalvageExpr =
+ SalvagedExpr->getNumElements() <= MaxExpressionSize;
+ if (IsValidSalvageExpr) {
+ auto &UseMO = DbgMI->getOperand(UseMOIdx);
+ UseMO.setReg(Op0->getReg());
+ UseMO.setSubReg(Op0->getSubReg());
+ DbgMI->getDebugExpressionOp().setMetadata(SalvagedExpr);
+
+ LLVM_DEBUG(dbgs() << "SALVAGE: " << *DbgMI << '\n');
+ }
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
index b8f6fc9bbcde..dd431cc6f4f5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -65,6 +65,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -97,6 +98,7 @@
#include <iterator>
#include <limits>
#include <memory>
+#include <optional>
#include <utility>
#include <vector>
@@ -106,8 +108,8 @@ using namespace llvm::PatternMatch;
#define DEBUG_TYPE "codegenprepare"
STATISTIC(NumBlocksElim, "Number of blocks eliminated");
-STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
-STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
+STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
+STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
"sunken Cmps");
STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
@@ -120,35 +122,36 @@ STATISTIC(NumMemoryInstsPhiCreated,
STATISTIC(NumMemoryInstsSelectCreated,
"Number of select created when address "
"computations were sunk to memory instructions");
-STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
-STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
+STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
STATISTIC(NumAndsAdded,
"Number of and mask instructions added to form ext loads");
STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
-STATISTIC(NumRetsDup, "Number of return instructions duplicated");
+STATISTIC(NumRetsDup, "Number of return instructions duplicated");
STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
static cl::opt<bool> DisableBranchOpts(
- "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
- cl::desc("Disable branch optimizations in CodeGenPrepare"));
+ "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
+ cl::desc("Disable branch optimizations in CodeGenPrepare"));
static cl::opt<bool>
DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
cl::desc("Disable GC optimizations in CodeGenPrepare"));
-static cl::opt<bool> DisableSelectToBranch(
- "disable-cgp-select2branch", cl::Hidden, cl::init(false),
- cl::desc("Disable select to branch conversion."));
+static cl::opt<bool>
+ DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden,
+ cl::init(false),
+ cl::desc("Disable select to branch conversion."));
-static cl::opt<bool> AddrSinkUsingGEPs(
- "addr-sink-using-gep", cl::Hidden, cl::init(true),
- cl::desc("Address sinking in CGP using GEPs."));
+static cl::opt<bool>
+ AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true),
+ cl::desc("Address sinking in CGP using GEPs."));
-static cl::opt<bool> EnableAndCmpSinking(
- "enable-andcmp-sinking", cl::Hidden, cl::init(true),
- cl::desc("Enable sinkinig and/cmp into branches."));
+static cl::opt<bool>
+ EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true),
+ cl::desc("Enable sinkinig and/cmp into branches."));
static cl::opt<bool> DisableStoreExtract(
"disable-cgp-store-extract", cl::Hidden, cl::init(false),
@@ -204,10 +207,11 @@ static cl::opt<bool> ForceSplitStore(
"force-split-store", cl::Hidden, cl::init(false),
cl::desc("Force store splitting no matter what the target query says."));
-static cl::opt<bool>
-EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
+static cl::opt<bool> EnableTypePromotionMerge(
+ "cgp-type-promotion-merge", cl::Hidden,
cl::desc("Enable merging of redundant sexts when one is dominating"
- " the other."), cl::init(true));
+ " the other."),
+ cl::init(true));
static cl::opt<bool> DisableComplexAddrModes(
"disable-complex-addr-modes", cl::Hidden, cl::init(false),
@@ -215,12 +219,12 @@ static cl::opt<bool> DisableComplexAddrModes(
"in optimizeMemoryInst."));
static cl::opt<bool>
-AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
- cl::desc("Allow creation of Phis in Address sinking."));
+ AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
+ cl::desc("Allow creation of Phis in Address sinking."));
-static cl::opt<bool>
-AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true),
- cl::desc("Allow creation of selects in Address sinking."));
+static cl::opt<bool> AddrSinkNewSelects(
+ "addr-sink-new-select", cl::Hidden, cl::init(true),
+ cl::desc("Allow creation of selects in Address sinking."));
static cl::opt<bool> AddrSinkCombineBaseReg(
"addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
@@ -252,200 +256,219 @@ static cl::opt<bool>
cl::desc("Enable BFI update verification for "
"CodeGenPrepare."));
-static cl::opt<bool> OptimizePhiTypes(
- "cgp-optimize-phi-types", cl::Hidden, cl::init(false),
- cl::desc("Enable converting phi types in CodeGenPrepare"));
+static cl::opt<bool>
+ OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(false),
+ cl::desc("Enable converting phi types in CodeGenPrepare"));
+
+static cl::opt<unsigned>
+ HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
+ cl::desc("Least BB number of huge function."));
namespace {
enum ExtType {
- ZeroExtension, // Zero extension has been seen.
- SignExtension, // Sign extension has been seen.
- BothExtension // This extension type is used if we saw sext after
- // ZeroExtension had been set, or if we saw zext after
- // SignExtension had been set. It makes the type
- // information of a promoted instruction invalid.
+ ZeroExtension, // Zero extension has been seen.
+ SignExtension, // Sign extension has been seen.
+ BothExtension // This extension type is used if we saw sext after
+ // ZeroExtension had been set, or if we saw zext after
+ // SignExtension had been set. It makes the type
+ // information of a promoted instruction invalid.
+};
+
+enum ModifyDT {
+ NotModifyDT, // Not Modify any DT.
+ ModifyBBDT, // Modify the Basic Block Dominator Tree.
+ ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
+ // This usually means we move/delete/insert instruction
+ // in a Basic Block. So we should re-iterate instructions
+ // in such Basic Block.
};
using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
using SExts = SmallVector<Instruction *, 16>;
-using ValueToSExts = DenseMap<Value *, SExts>;
+using ValueToSExts = MapVector<Value *, SExts>;
class TypePromotionTransaction;
- class CodeGenPrepare : public FunctionPass {
- const TargetMachine *TM = nullptr;
- const TargetSubtargetInfo *SubtargetInfo;
- const TargetLowering *TLI = nullptr;
- const TargetRegisterInfo *TRI;
- const TargetTransformInfo *TTI = nullptr;
- const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
- const TargetLibraryInfo *TLInfo;
- const LoopInfo *LI;
- std::unique_ptr<BlockFrequencyInfo> BFI;
- std::unique_ptr<BranchProbabilityInfo> BPI;
- ProfileSummaryInfo *PSI;
-
- /// As we scan instructions optimizing them, this is the next instruction
- /// to optimize. Transforms that can invalidate this should update it.
- BasicBlock::iterator CurInstIterator;
-
- /// Keeps track of non-local addresses that have been sunk into a block.
- /// This allows us to avoid inserting duplicate code for blocks with
- /// multiple load/stores of the same address. The usage of WeakTrackingVH
- /// enables SunkAddrs to be treated as a cache whose entries can be
- /// invalidated if a sunken address computation has been erased.
- ValueMap<Value*, WeakTrackingVH> SunkAddrs;
-
- /// Keeps track of all instructions inserted for the current function.
- SetOfInstrs InsertedInsts;
-
- /// Keeps track of the type of the related instruction before their
- /// promotion for the current function.
- InstrToOrigTy PromotedInsts;
-
- /// Keep track of instructions removed during promotion.
- SetOfInstrs RemovedInsts;
-
- /// Keep track of sext chains based on their initial value.
- DenseMap<Value *, Instruction *> SeenChainsForSExt;
-
- /// Keep track of GEPs accessing the same data structures such as structs or
- /// arrays that are candidates to be split later because of their large
- /// size.
- MapVector<
- AssertingVH<Value>,
- SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>>
- LargeOffsetGEPMap;
-
- /// Keep track of new GEP base after splitting the GEPs having large offset.
- SmallSet<AssertingVH<Value>, 2> NewGEPBases;
-
- /// Map serial numbers to Large offset GEPs.
- DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
-
- /// Keep track of SExt promoted.
- ValueToSExts ValToSExtendedUses;
-
- /// True if the function has the OptSize attribute.
- bool OptSize;
-
- /// DataLayout for the Function being processed.
- const DataLayout *DL = nullptr;
-
- /// Building the dominator tree can be expensive, so we only build it
- /// lazily and update it when required.
- std::unique_ptr<DominatorTree> DT;
+class CodeGenPrepare : public FunctionPass {
+ const TargetMachine *TM = nullptr;
+ const TargetSubtargetInfo *SubtargetInfo;
+ const TargetLowering *TLI = nullptr;
+ const TargetRegisterInfo *TRI;
+ const TargetTransformInfo *TTI = nullptr;
+ const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
+ const TargetLibraryInfo *TLInfo;
+ const LoopInfo *LI;
+ std::unique_ptr<BlockFrequencyInfo> BFI;
+ std::unique_ptr<BranchProbabilityInfo> BPI;
+ ProfileSummaryInfo *PSI;
- public:
- static char ID; // Pass identification, replacement for typeid
+ /// As we scan instructions optimizing them, this is the next instruction
+ /// to optimize. Transforms that can invalidate this should update it.
+ BasicBlock::iterator CurInstIterator;
- CodeGenPrepare() : FunctionPass(ID) {
- initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
- }
+ /// Keeps track of non-local addresses that have been sunk into a block.
+ /// This allows us to avoid inserting duplicate code for blocks with
+ /// multiple load/stores of the same address. The usage of WeakTrackingVH
+ /// enables SunkAddrs to be treated as a cache whose entries can be
+ /// invalidated if a sunken address computation has been erased.
+ ValueMap<Value *, WeakTrackingVH> SunkAddrs;
- bool runOnFunction(Function &F) override;
+ /// Keeps track of all instructions inserted for the current function.
+ SetOfInstrs InsertedInsts;
- StringRef getPassName() const override { return "CodeGen Prepare"; }
+ /// Keeps track of the type of the related instruction before their
+ /// promotion for the current function.
+ InstrToOrigTy PromotedInsts;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- // FIXME: When we can selectively preserve passes, preserve the domtree.
- AU.addRequired<ProfileSummaryInfoWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetPassConfig>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addUsedIfAvailable<BasicBlockSectionsProfileReader>();
- }
+ /// Keep track of instructions removed during promotion.
+ SetOfInstrs RemovedInsts;
- private:
- template <typename F>
- void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
- // Substituting can cause recursive simplifications, which can invalidate
- // our iterator. Use a WeakTrackingVH to hold onto it in case this
- // happens.
- Value *CurValue = &*CurInstIterator;
- WeakTrackingVH IterHandle(CurValue);
+ /// Keep track of sext chains based on their initial value.
+ DenseMap<Value *, Instruction *> SeenChainsForSExt;
- f();
+ /// Keep track of GEPs accessing the same data structures such as structs or
+ /// arrays that are candidates to be split later because of their large
+ /// size.
+ MapVector<AssertingVH<Value>,
+ SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>>
+ LargeOffsetGEPMap;
- // If the iterator instruction was recursively deleted, start over at the
- // start of the block.
- if (IterHandle != CurValue) {
- CurInstIterator = BB->begin();
- SunkAddrs.clear();
- }
+ /// Keep track of new GEP base after splitting the GEPs having large offset.
+ SmallSet<AssertingVH<Value>, 2> NewGEPBases;
+
+ /// Map serial numbers to Large offset GEPs.
+ DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
+
+ /// Keep track of SExt promoted.
+ ValueToSExts ValToSExtendedUses;
+
+ /// True if the function has the OptSize attribute.
+ bool OptSize;
+
+ /// DataLayout for the Function being processed.
+ const DataLayout *DL = nullptr;
+
+ /// Building the dominator tree can be expensive, so we only build it
+ /// lazily and update it when required.
+ std::unique_ptr<DominatorTree> DT;
+
+public:
+ /// If encounter huge function, we need to limit the build time.
+ bool IsHugeFunc = false;
+
+ /// FreshBBs is like worklist, it collected the updated BBs which need
+ /// to be optimized again.
+ /// Note: Consider building time in this pass, when a BB updated, we need
+ /// to insert such BB into FreshBBs for huge function.
+ SmallSet<BasicBlock *, 32> FreshBBs;
+
+ static char ID; // Pass identification, replacement for typeid
+
+ CodeGenPrepare() : FunctionPass(ID) {
+ initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ StringRef getPassName() const override { return "CodeGen Prepare"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ // FIXME: When we can selectively preserve passes, preserve the domtree.
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addUsedIfAvailable<BasicBlockSectionsProfileReader>();
+ }
+
+private:
+ template <typename F>
+ void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
+ // Substituting can cause recursive simplifications, which can invalidate
+ // our iterator. Use a WeakTrackingVH to hold onto it in case this
+ // happens.
+ Value *CurValue = &*CurInstIterator;
+ WeakTrackingVH IterHandle(CurValue);
+
+ f();
+
+ // If the iterator instruction was recursively deleted, start over at the
+ // start of the block.
+ if (IterHandle != CurValue) {
+ CurInstIterator = BB->begin();
+ SunkAddrs.clear();
}
+ }
- // Get the DominatorTree, building if necessary.
- DominatorTree &getDT(Function &F) {
- if (!DT)
- DT = std::make_unique<DominatorTree>(F);
- return *DT;
- }
-
- void removeAllAssertingVHReferences(Value *V);
- bool eliminateAssumptions(Function &F);
- bool eliminateFallThrough(Function &F);
- bool eliminateMostlyEmptyBlocks(Function &F);
- BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
- bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
- void eliminateMostlyEmptyBlock(BasicBlock *BB);
- bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
- bool isPreheader);
- bool makeBitReverse(Instruction &I);
- bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
- bool optimizeInst(Instruction *I, bool &ModifiedDT);
- bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
- Type *AccessTy, unsigned AddrSpace);
- bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
- bool optimizeInlineAsmInst(CallInst *CS);
- bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
- bool optimizeExt(Instruction *&I);
- bool optimizeExtUses(Instruction *I);
- bool optimizeLoadExt(LoadInst *Load);
- bool optimizeShiftInst(BinaryOperator *BO);
- bool optimizeFunnelShift(IntrinsicInst *Fsh);
- bool optimizeSelectInst(SelectInst *SI);
- bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
- bool optimizeSwitchType(SwitchInst *SI);
- bool optimizeSwitchPhiConstants(SwitchInst *SI);
- bool optimizeSwitchInst(SwitchInst *SI);
- bool optimizeExtractElementInst(Instruction *Inst);
- bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
- bool fixupDbgValue(Instruction *I);
- bool placeDbgValues(Function &F);
- bool placePseudoProbes(Function &F);
- bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
- LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
- bool tryToPromoteExts(TypePromotionTransaction &TPT,
- const SmallVectorImpl<Instruction *> &Exts,
- SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
- unsigned CreatedInstsCost = 0);
- bool mergeSExts(Function &F);
- bool splitLargeGEPOffsets();
- bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
- SmallPtrSetImpl<Instruction *> &DeletedInstrs);
- bool optimizePhiTypes(Function &F);
- bool performAddressTypePromotion(
- Instruction *&Inst,
- bool AllowPromotionWithoutCommonHeader,
- bool HasPromoted, TypePromotionTransaction &TPT,
- SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
- bool splitBranchCondition(Function &F, bool &ModifiedDT);
- bool simplifyOffsetableRelocate(GCStatepointInst &I);
-
- bool tryToSinkFreeOperands(Instruction *I);
- bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0,
- Value *Arg1, CmpInst *Cmp,
- Intrinsic::ID IID);
- bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT);
- bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
- bool combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
- void verifyBFIUpdates(Function &F);
- };
+ // Get the DominatorTree, building if necessary.
+ DominatorTree &getDT(Function &F) {
+ if (!DT)
+ DT = std::make_unique<DominatorTree>(F);
+ return *DT;
+ }
+
+ void removeAllAssertingVHReferences(Value *V);
+ bool eliminateAssumptions(Function &F);
+ bool eliminateFallThrough(Function &F);
+ bool eliminateMostlyEmptyBlocks(Function &F);
+ BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
+ bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
+ void eliminateMostlyEmptyBlock(BasicBlock *BB);
+ bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
+ bool isPreheader);
+ bool makeBitReverse(Instruction &I);
+ bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
+ bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
+ bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
+ unsigned AddrSpace);
+ bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
+ bool optimizeInlineAsmInst(CallInst *CS);
+ bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
+ bool optimizeExt(Instruction *&I);
+ bool optimizeExtUses(Instruction *I);
+ bool optimizeLoadExt(LoadInst *Load);
+ bool optimizeShiftInst(BinaryOperator *BO);
+ bool optimizeFunnelShift(IntrinsicInst *Fsh);
+ bool optimizeSelectInst(SelectInst *SI);
+ bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
+ bool optimizeSwitchType(SwitchInst *SI);
+ bool optimizeSwitchPhiConstants(SwitchInst *SI);
+ bool optimizeSwitchInst(SwitchInst *SI);
+ bool optimizeExtractElementInst(Instruction *Inst);
+ bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
+ bool fixupDbgValue(Instruction *I);
+ bool placeDbgValues(Function &F);
+ bool placePseudoProbes(Function &F);
+ bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
+ LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
+ bool tryToPromoteExts(TypePromotionTransaction &TPT,
+ const SmallVectorImpl<Instruction *> &Exts,
+ SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
+ unsigned CreatedInstsCost = 0);
+ bool mergeSExts(Function &F);
+ bool splitLargeGEPOffsets();
+ bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
+ SmallPtrSetImpl<Instruction *> &DeletedInstrs);
+ bool optimizePhiTypes(Function &F);
+ bool performAddressTypePromotion(
+ Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
+ bool HasPromoted, TypePromotionTransaction &TPT,
+ SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
+ bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);
+ bool simplifyOffsetableRelocate(GCStatepointInst &I);
+
+ bool tryToSinkFreeOperands(Instruction *I);
+ bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
+ CmpInst *Cmp, Intrinsic::ID IID);
+ bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
+ bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
+ bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
+ void verifyBFIUpdates(Function &F);
+};
} // end anonymous namespace
@@ -459,8 +482,8 @@ INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE,
- "Optimize for code generation", false, false)
+INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE, "Optimize for code generation",
+ false, false)
FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); }
@@ -474,6 +497,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Clear per function information.
InsertedInsts.clear();
PromotedInsts.clear();
+ FreshBBs.clear();
TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
SubtargetInfo = TM->getSubtargetImpl(F);
@@ -488,7 +512,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
BBSectionsProfileReader =
getAnalysisIfAvailable<BasicBlockSectionsProfileReader>();
OptSize = F.hasOptSize();
- // Use the basic-block-sections profile to promote hot functions to .text.hot if requested.
+ // Use the basic-block-sections profile to promote hot functions to .text.hot
+ // if requested.
if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
BBSectionsProfileReader->isFunctionHot(F.getName())) {
F.setSectionPrefix("hot");
@@ -515,11 +540,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
const DenseMap<unsigned int, unsigned int> &BypassWidths =
TLI->getBypassSlowDivWidths();
- BasicBlock* BB = &*F.begin();
+ BasicBlock *BB = &*F.begin();
while (BB != nullptr) {
// bypassSlowDivision may create new BBs, but we don't want to reapply the
// optimization to those blocks.
- BasicBlock* Next = BB->getNextNode();
+ BasicBlock *Next = BB->getNextNode();
// F.hasOptSize is already checked in the outer if statement.
if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
@@ -536,7 +561,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// unconditional branch.
EverMadeChange |= eliminateMostlyEmptyBlocks(F);
- bool ModifiedDT = false;
+ ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
if (!DisableBranchOpts)
EverMadeChange |= splitBranchCondition(F, ModifiedDT);
@@ -545,18 +570,51 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
EverMadeChange |=
SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
+ // If we are optimzing huge function, we need to consider the build time.
+ // Because the basic algorithm's complex is near O(N!).
+ IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
+
bool MadeChange = true;
+ bool FuncIterated = false;
while (MadeChange) {
MadeChange = false;
DT.reset();
+
for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
- bool ModifiedDTOnIteration = false;
- MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration);
+ if (FuncIterated && !FreshBBs.contains(&BB))
+ continue;
- // Restart BB iteration if the dominator tree of the Function was changed
- if (ModifiedDTOnIteration)
- break;
+ ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
+ bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
+
+ MadeChange |= Changed;
+ if (IsHugeFunc) {
+ // If the BB is updated, it may still has chance to be optimized.
+ // This usually happen at sink optimization.
+ // For example:
+ //
+ // bb0:
+ // %and = and i32 %a, 4
+ // %cmp = icmp eq i32 %and, 0
+ //
+ // If the %cmp sink to other BB, the %and will has chance to sink.
+ if (Changed)
+ FreshBBs.insert(&BB);
+ else if (FuncIterated)
+ FreshBBs.erase(&BB);
+
+ if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
+ DT.reset();
+ } else {
+ // For small/normal functions, we restart BB iteration if the dominator
+ // tree of the Function was changed.
+ if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
+ break;
+ }
}
+ // We have iterated all the BB in the (only work for huge) function.
+ FuncIterated = IsHugeFunc;
+
if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
MadeChange |= mergeSExts(F);
if (!LargeOffsetGEPMap.empty())
@@ -586,11 +644,12 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Use a set vector to get deterministic iteration order. The order the
// blocks are removed may affect whether or not PHI nodes in successors
// are removed.
- SmallSetVector<BasicBlock*, 8> WorkList;
+ SmallSetVector<BasicBlock *, 8> WorkList;
for (BasicBlock &BB : F) {
SmallVector<BasicBlock *, 2> Successors(successors(&BB));
MadeChange |= ConstantFoldTerminator(&BB, true);
- if (!MadeChange) continue;
+ if (!MadeChange)
+ continue;
for (BasicBlock *Succ : Successors)
if (pred_empty(Succ))
@@ -601,7 +660,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
MadeChange |= !WorkList.empty();
while (!WorkList.empty()) {
BasicBlock *BB = WorkList.pop_back_val();
- SmallVector<BasicBlock*, 2> Successors(successors(BB));
+ SmallVector<BasicBlock *, 2> Successors(successors(BB));
DeleteDeadBlock(BB);
@@ -715,7 +774,8 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) {
BasicBlock *SinglePred = BB->getSinglePredecessor();
// Don't merge if BB's address is taken.
- if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
+ if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
+ continue;
BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
if (Term && !Term->isConditional()) {
@@ -725,6 +785,12 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) {
// Merge BB into SinglePred and delete it.
MergeBlockIntoPredecessor(BB);
Preds.insert(SinglePred);
+
+ if (IsHugeFunc) {
+ // Update FreshBBs to optimize the merged BB.
+ FreshBBs.insert(SinglePred);
+ FreshBBs.erase(BB);
+ }
}
}
@@ -837,9 +903,8 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
// such empty block (BB), ISel will place COPY instructions in BB, not in the
// predecessor of BB.
BasicBlock *Pred = BB->getUniquePredecessor();
- if (!Pred ||
- !(isa<SwitchInst>(Pred->getTerminator()) ||
- isa<IndirectBrInst>(Pred->getTerminator())))
+ if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||
+ isa<IndirectBrInst>(Pred->getTerminator())))
return true;
if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
@@ -924,10 +989,11 @@ bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
// and DestBB may have conflicting incoming values for the block. If so, we
// can't merge the block.
const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
- if (!DestBBPN) return true; // no conflict.
+ if (!DestBBPN)
+ return true; // no conflict.
// Collect the preds of BB.
- SmallPtrSet<const BasicBlock*, 16> BBPreds;
+ SmallPtrSet<const BasicBlock *, 16> BBPreds;
if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
// It is faster to get preds from a PHI than with pred_iterator.
for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
@@ -939,7 +1005,7 @@ bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
// Walk the preds of DestBB.
for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
- if (BBPreds.count(Pred)) { // Common predecessor?
+ if (BBPreds.count(Pred)) { // Common predecessor?
for (const PHINode &PN : DestBB->phis()) {
const Value *V1 = PN.getIncomingValueForBlock(Pred);
const Value *V2 = PN.getIncomingValueForBlock(BB);
@@ -950,7 +1016,8 @@ bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
V2 = V2PN->getIncomingValueForBlock(Pred);
// If there is a conflict, bail out.
- if (V1 != V2) return false;
+ if (V1 != V2)
+ return false;
}
}
}
@@ -958,6 +1025,22 @@ bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
return true;
}
+/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
+static void replaceAllUsesWith(Value *Old, Value *New,
+ SmallSet<BasicBlock *, 32> &FreshBBs,
+ bool IsHuge) {
+ auto *OldI = dyn_cast<Instruction>(Old);
+ if (OldI) {
+ for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (IsHuge)
+ FreshBBs.insert(User->getParent());
+ }
+ }
+ Old->replaceAllUsesWith(New);
+}
+
/// Eliminate a basic block that has only phi's and an unconditional branch in
/// it.
void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
@@ -978,6 +1061,12 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
// Note: BB(=SinglePred) will not be deleted on this path.
// DestBB(=its single successor) is the one that was deleted.
LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
+
+ if (IsHugeFunc) {
+ // Update FreshBBs to optimize the merged BB.
+ FreshBBs.insert(SinglePred);
+ FreshBBs.erase(DestBB);
+ }
return;
}
}
@@ -1129,31 +1218,34 @@ simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
// cases like this:
// bb1:
// ...
- // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
- // br label %merge
+ // %g1 = call coldcc i8 addrspace(1)*
+ // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
//
// bb2:
// ...
- // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
- // br label %merge
+ // %g2 = call coldcc i8 addrspace(1)*
+ // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
//
// merge:
// %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
// %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
//
- // In this case, we can not find the bitcast any more. So we insert a new bitcast
- // no matter there is already one or not. In this way, we can handle all cases, and
- // the extra bitcast should be optimized away in later passes.
+ // In this case, we can not find the bitcast any more. So we insert a new
+ // bitcast no matter there is already one or not. In this way, we can handle
+ // all cases, and the extra bitcast should be optimized away in later
+ // passes.
Value *ActualRelocatedBase = RelocatedBase;
if (RelocatedBase->getType() != Base->getType()) {
ActualRelocatedBase =
Builder.CreateBitCast(RelocatedBase, Base->getType());
}
- Value *Replacement = Builder.CreateGEP(
- Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
+ Value *Replacement =
+ Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
+ ArrayRef(OffsetV));
Replacement->takeName(ToReplace);
- // If the newly generated derived pointer's type does not match the original derived
- // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
+ // If the newly generated derived pointer's type does not match the original
+ // derived pointer's type, cast the new derived pointer to match it. Same
+ // reasoning as above.
Value *ActualReplacement = Replacement;
if (Replacement->getType() != ToReplace->getType()) {
ActualReplacement =
@@ -1216,11 +1308,11 @@ static bool SinkCast(CastInst *CI) {
BasicBlock *DefBB = CI->getParent();
/// InsertedCasts - Only insert a cast in each block once.
- DenseMap<BasicBlock*, CastInst*> InsertedCasts;
+ DenseMap<BasicBlock *, CastInst *> InsertedCasts;
bool MadeChange = false;
for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
- UI != E; ) {
+ UI != E;) {
Use &TheUse = UI.getUse();
Instruction *User = cast<Instruction>(*UI);
@@ -1246,7 +1338,8 @@ static bool SinkCast(CastInst *CI) {
continue;
// If this user is in the same block as the cast, don't change the cast.
- if (UserBB == DefBB) continue;
+ if (UserBB == DefBB)
+ continue;
// If we have already inserted a cast into this block, use it.
CastInst *&InsertedCast = InsertedCasts[UserBB];
@@ -1300,7 +1393,8 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
// If this is an extension, it will be a zero or sign extension, which
// isn't a noop.
- if (SrcVT.bitsLT(DstVT)) return false;
+ if (SrcVT.bitsLT(DstVT))
+ return false;
// If these values will be promoted, find out what they will be promoted
// to. This helps us consider truncates on PPC as noop copies when they
@@ -1322,7 +1416,7 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
// Match a simple increment by constant operation. Note that if a sub is
// matched, the step is negated (as if the step had been canonicalized to
// an add, even though we leave the instruction alone.)
-bool matchIncrement(const Instruction* IVInc, Instruction *&LHS,
+bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
Constant *&Step) {
if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>(
@@ -1339,21 +1433,21 @@ bool matchIncrement(const Instruction* IVInc, Instruction *&LHS,
/// If given \p PN is an inductive variable with value IVInc coming from the
/// backedge, and on each iteration it gets increased by Step, return pair
-/// <IVInc, Step>. Otherwise, return None.
-static Optional<std::pair<Instruction *, Constant *> >
+/// <IVInc, Step>. Otherwise, return std::nullopt.
+static std::optional<std::pair<Instruction *, Constant *>>
getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
const Loop *L = LI->getLoopFor(PN->getParent());
if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
- return None;
+ return std::nullopt;
auto *IVInc =
dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
- return None;
+ return std::nullopt;
Instruction *LHS = nullptr;
Constant *Step = nullptr;
if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
return std::make_pair(IVInc, Step);
- return None;
+ return std::nullopt;
}
static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
@@ -1440,12 +1534,12 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
if (BO->getOpcode() != Instruction::Xor) {
Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
- BO->replaceAllUsesWith(Math);
+ replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
} else
assert(BO->hasOneUse() &&
"Patterns with XOr should use the BO only in the compare");
Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
- Cmp->replaceAllUsesWith(OV);
+ replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
Cmp->eraseFromParent();
BO->eraseFromParent();
return true;
@@ -1484,7 +1578,7 @@ static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp,
/// Try to combine the compare into a call to the llvm.uadd.with.overflow
/// intrinsic. Return true if any changes were made.
bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
- bool &ModifiedDT) {
+ ModifyDT &ModifiedDT) {
Value *A, *B;
BinaryOperator *Add;
if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
@@ -1511,12 +1605,12 @@ bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
return false;
// Reset callers - do not crash by iterating over a dead instruction.
- ModifiedDT = true;
+ ModifiedDT = ModifyDT::ModifyInstDT;
return true;
}
bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
- bool &ModifiedDT) {
+ ModifyDT &ModifiedDT) {
// We are not expecting non-canonical/degenerate code. Just bail out.
Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
if (isa<Constant>(A) && isa<Constant>(B))
@@ -1574,7 +1668,7 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
return false;
// Reset callers - do not crash by iterating over a dead instruction.
- ModifiedDT = true;
+ ModifiedDT = ModifyDT::ModifyInstDT;
return true;
}
@@ -1593,11 +1687,11 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
return false;
// Only insert a cmp in each block once.
- DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
+ DenseMap<BasicBlock *, CmpInst *> InsertedCmps;
bool MadeChange = false;
for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
- UI != E; ) {
+ UI != E;) {
Use &TheUse = UI.getUse();
Instruction *User = cast<Instruction>(*UI);
@@ -1613,7 +1707,8 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
BasicBlock *DefBB = Cmp->getParent();
// If this user is in the same block as the cmp, don't change the cmp.
- if (UserBB == DefBB) continue;
+ if (UserBB == DefBB)
+ continue;
// If we have already inserted a cmp into this block, use it.
CmpInst *&InsertedCmp = InsertedCmps[UserBB];
@@ -1621,10 +1716,9 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
if (!InsertedCmp) {
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
assert(InsertPt != UserBB->end());
- InsertedCmp =
- CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
- Cmp->getOperand(0), Cmp->getOperand(1), "",
- &*InsertPt);
+ InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
+ Cmp->getOperand(0), Cmp->getOperand(1), "",
+ &*InsertPt);
// Propagate the debug info.
InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
}
@@ -1731,7 +1825,7 @@ static bool foldICmpWithDominatingICmp(CmpInst *Cmp,
return true;
}
-bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {
+bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (sinkCmpExpression(Cmp, *TLI))
return true;
@@ -1752,14 +1846,13 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {
/// this operation can be combined.
///
/// Return true if any changes are made.
-static bool sinkAndCmp0Expression(Instruction *AndI,
- const TargetLowering &TLI,
+static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI,
SetOfInstrs &InsertedInsts) {
// Double-check that we're not trying to optimize an instruction that was
// already optimized by some other part of this pass.
assert(!InsertedInsts.count(AndI) &&
"Attempting to optimize already optimized and instruction");
- (void) InsertedInsts;
+ (void)InsertedInsts;
// Nothing to do for single use in same basic block.
if (AndI->hasOneUse() &&
@@ -1795,7 +1888,7 @@ static bool sinkAndCmp0Expression(Instruction *AndI,
// one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
// others, so we don't need to keep track of which BBs we insert into.
for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
- UI != E; ) {
+ UI != E;) {
Use &TheUse = UI.getUse();
Instruction *User = cast<Instruction>(*UI);
@@ -1976,11 +2069,11 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
// not have i16 compare.
// cmp i16 trunc.result, opnd2
//
- if (isa<TruncInst>(User) && shiftIsLegal
+ if (isa<TruncInst>(User) &&
+ shiftIsLegal
// If the type of the truncate is legal, no truncate will be
// introduced in other basic blocks.
- &&
- (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
+ && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
MadeChange =
SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
@@ -2037,20 +2130,21 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
/// If the transform is performed, return true and set ModifiedDT to true.
static bool despeculateCountZeros(IntrinsicInst *CountZeros,
const TargetLowering *TLI,
- const DataLayout *DL,
- bool &ModifiedDT) {
+ const DataLayout *DL, ModifyDT &ModifiedDT,
+ SmallSet<BasicBlock *, 32> &FreshBBs,
+ bool IsHugeFunc) {
// If a zero input is undefined, it doesn't make sense to despeculate that.
if (match(CountZeros->getOperand(1), m_One()))
return false;
// If it's cheap to speculate, there's nothing to do.
+ Type *Ty = CountZeros->getType();
auto IntrinsicID = CountZeros->getIntrinsicID();
- if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) ||
- (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz()))
+ if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
+ (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
return false;
// Only handle legal scalar cases. Anything else requires too much work.
- Type *Ty = CountZeros->getType();
unsigned SizeInBits = Ty->getScalarSizeInBits();
if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
return false;
@@ -2063,12 +2157,16 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
// The intrinsic will be sunk behind a compare against zero and branch.
BasicBlock *StartBlock = CountZeros->getParent();
BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
+ if (IsHugeFunc)
+ FreshBBs.insert(CallBlock);
// Create another block after the count zero intrinsic. A PHI will be added
// in this block to select the result of the intrinsic or the bit-width
// constant if the input to the intrinsic is zero.
BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
+ if (IsHugeFunc)
+ FreshBBs.insert(EndBlock);
// Set up a builder to create a compare, conditional branch, and PHI.
IRBuilder<> Builder(CountZeros->getContext());
@@ -2089,7 +2187,7 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
// or the bit width of the operand.
Builder.SetInsertPoint(&EndBlock->front());
PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
- CountZeros->replaceAllUsesWith(PN);
+ replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
PN->addIncoming(BitWidth, StartBlock);
PN->addIncoming(CountZeros, CallBlock);
@@ -2098,11 +2196,11 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
// undefined zero argument to 'true'. This will also prevent reprocessing the
// intrinsic; we only despeculate when a zero input is defined.
CountZeros->setArgOperand(1, Builder.getTrue());
- ModifiedDT = true;
+ ModifiedDT = ModifyDT::ModifyBBDT;
return true;
}
-bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
+bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
BasicBlock *BB = CI->getParent();
// Lower inline assembly if we can.
@@ -2152,23 +2250,22 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
GlobalVariable *GV;
if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
GV->getPointerAlignment(*DL) < PrefAlign &&
- DL->getTypeAllocSize(GV->getValueType()) >=
- MinSize + Offset2)
+ DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2)
GV->setAlignment(PrefAlign);
}
- // If this is a memcpy (or similar) then we may be able to improve the
- // alignment
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
- Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
- MaybeAlign MIDestAlign = MI->getDestAlign();
- if (!MIDestAlign || DestAlign > *MIDestAlign)
- MI->setDestAlignment(DestAlign);
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
- MaybeAlign MTISrcAlign = MTI->getSourceAlign();
- Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
- if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
- MTI->setSourceAlignment(SrcAlign);
- }
+ }
+ // If this is a memcpy (or similar) then we may be able to improve the
+ // alignment.
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
+ Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
+ MaybeAlign MIDestAlign = MI->getDestAlign();
+ if (!MIDestAlign || DestAlign > *MIDestAlign)
+ MI->setDestAlignment(DestAlign);
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
+ MaybeAlign MTISrcAlign = MTI->getSourceAlign();
+ Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
+ if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
+ MTI->setSourceAlignment(SrcAlign);
}
}
@@ -2176,8 +2273,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// cold block. This interacts with our handling for loads and stores to
// ensure that we can fold all uses of a potential addressing computation
// into their uses. TODO: generalize this to work over profiling data
- if (CI->hasFnAttr(Attribute::Cold) &&
- !OptSize && !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
+ if (CI->hasFnAttr(Attribute::Cold) && !OptSize &&
+ !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
for (auto &Arg : CI->args()) {
if (!Arg->getType()->isPointerTy())
continue;
@@ -2188,7 +2285,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II) {
switch (II->getIntrinsicID()) {
- default: break;
+ default:
+ break;
case Intrinsic::assume:
llvm_unreachable("llvm.assume should have been removed already");
case Intrinsic::experimental_widenable_condition: {
@@ -2228,25 +2326,27 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
Value *ArgVal = II->getArgOperand(0);
auto it = LargeOffsetGEPMap.find(II);
if (it != LargeOffsetGEPMap.end()) {
- // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
- // Make sure not to have to deal with iterator invalidation
- // after possibly adding ArgVal to LargeOffsetGEPMap.
- auto GEPs = std::move(it->second);
- LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
- LargeOffsetGEPMap.erase(II);
+ // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
+ // Make sure not to have to deal with iterator invalidation
+ // after possibly adding ArgVal to LargeOffsetGEPMap.
+ auto GEPs = std::move(it->second);
+ LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
+ LargeOffsetGEPMap.erase(II);
}
- II->replaceAllUsesWith(ArgVal);
+ replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
II->eraseFromParent();
return true;
}
case Intrinsic::cttz:
case Intrinsic::ctlz:
// If counting zeros is expensive, try to avoid it.
- return despeculateCountZeros(II, TLI, DL, ModifiedDT);
+ return despeculateCountZeros(II, TLI, DL, ModifiedDT, FreshBBs,
+ IsHugeFunc);
case Intrinsic::fshl:
case Intrinsic::fshr:
return optimizeFunnelShift(II);
+ case Intrinsic::dbg_assign:
case Intrinsic::dbg_value:
return fixupDbgValue(II);
case Intrinsic::vscale: {
@@ -2255,12 +2355,13 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// to benefit from cheap constant propagation.
Type *ScalableVectorTy =
VectorType::get(Type::getInt8Ty(II->getContext()), 1, true);
- if (DL->getTypeAllocSize(ScalableVectorTy).getKnownMinSize() == 8) {
+ if (DL->getTypeAllocSize(ScalableVectorTy).getKnownMinValue() == 8) {
auto *Null = Constant::getNullValue(ScalableVectorTy->getPointerTo());
auto *One = ConstantInt::getSigned(II->getType(), 1);
auto *CGep =
ConstantExpr::getGetElementPtr(ScalableVectorTy, Null, One);
- II->replaceAllUsesWith(ConstantExpr::getPtrToInt(CGep, II->getType()));
+ replaceAllUsesWith(II, ConstantExpr::getPtrToInt(CGep, II->getType()),
+ FreshBBs, IsHugeFunc);
II->eraseFromParent();
return true;
}
@@ -2284,7 +2385,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
}
// From here on out we're working with named functions.
- if (!CI->getCalledFunction()) return false;
+ if (!CI->getCalledFunction())
+ return false;
// Lower all default uses of _chk calls. This is very similar
// to what InstCombineCalls does, but here we are only lowering calls
@@ -2293,7 +2395,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
FortifiedLibCallSimplifier Simplifier(TLInfo, true);
IRBuilder<> Builder(CI);
if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
- CI->replaceAllUsesWith(V);
+ replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
CI->eraseFromParent();
return true;
}
@@ -2331,7 +2433,11 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
/// %tmp2 = tail call i32 @f2()
/// ret i32 %tmp2
/// @endcode
-bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT) {
+bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
+ ModifyDT &ModifiedDT) {
+ if (!BB->getTerminator())
+ return false;
+
ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
if (!RetI)
return false;
@@ -2383,7 +2489,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
/// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
/// call.
const Function *F = BB->getParent();
- SmallVector<BasicBlock*, 4> TailCallBBs;
+ SmallVector<BasicBlock *, 4> TailCallBBs;
if (PN) {
for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
// Look through bitcasts.
@@ -2397,7 +2503,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
TailCallBBs.push_back(PredBB);
}
} else {
- SmallPtrSet<BasicBlock*, 4> VisitedBBs;
+ SmallPtrSet<BasicBlock *, 4> VisitedBBs;
for (BasicBlock *Pred : predecessors(BB)) {
if (!VisitedBBs.insert(Pred).second)
continue;
@@ -2425,7 +2531,8 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
BFI->setBlockFreq(
BB,
(BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)).getFrequency());
- ModifiedDT = Changed = true;
+ ModifiedDT = ModifyDT::ModifyBBDT;
+ Changed = true;
++NumRetsDup;
}
@@ -2451,16 +2558,15 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
bool InBounds = true;
enum FieldName {
- NoField = 0x00,
- BaseRegField = 0x01,
- BaseGVField = 0x02,
- BaseOffsField = 0x04,
+ NoField = 0x00,
+ BaseRegField = 0x01,
+ BaseGVField = 0x02,
+ BaseOffsField = 0x04,
ScaledRegField = 0x08,
- ScaleField = 0x10,
+ ScaleField = 0x10,
MultipleFields = 0xff
};
-
ExtAddrMode() = default;
void print(raw_ostream &OS) const;
@@ -2472,8 +2578,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
if (BaseReg && other.BaseReg &&
BaseReg->getType() != other.BaseReg->getType())
return MultipleFields;
- if (BaseGV && other.BaseGV &&
- BaseGV->getType() != other.BaseGV->getType())
+ if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
return MultipleFields;
if (ScaledReg && other.ScaledReg &&
ScaledReg->getType() != other.ScaledReg->getType())
@@ -2498,7 +2603,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
if (Scale && other.Scale && Scale != other.Scale)
Result |= ScaleField;
- if (countPopulation(Result) > 1)
+ if (llvm::popcount(Result) > 1)
return MultipleFields;
else
return static_cast<FieldName>(Result);
@@ -2582,27 +2687,23 @@ void ExtAddrMode::print(raw_ostream &OS) const {
if (InBounds)
OS << "inbounds ";
if (BaseGV) {
- OS << (NeedPlus ? " + " : "")
- << "GV:";
+ OS << (NeedPlus ? " + " : "") << "GV:";
BaseGV->printAsOperand(OS, /*PrintType=*/false);
NeedPlus = true;
}
if (BaseOffs) {
- OS << (NeedPlus ? " + " : "")
- << BaseOffs;
+ OS << (NeedPlus ? " + " : "") << BaseOffs;
NeedPlus = true;
}
if (BaseReg) {
- OS << (NeedPlus ? " + " : "")
- << "Base:";
+ OS << (NeedPlus ? " + " : "") << "Base:";
BaseReg->printAsOperand(OS, /*PrintType=*/false);
NeedPlus = true;
}
if (Scale) {
- OS << (NeedPlus ? " + " : "")
- << Scale << "*";
+ OS << (NeedPlus ? " + " : "") << Scale << "*";
ScaledReg->printAsOperand(OS, /*PrintType=*/false);
}
@@ -3034,7 +3135,8 @@ private:
/// The ordered list of actions made so far.
SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
- using CommitPt = SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
+ using CommitPt =
+ SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
SetOfInstrs &RemovedInsts;
};
@@ -3065,24 +3167,23 @@ void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
}
-Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
- Type *Ty) {
+Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) {
std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
Value *Val = Ptr->getBuiltValue();
Actions.push_back(std::move(Ptr));
return Val;
}
-Value *TypePromotionTransaction::createSExt(Instruction *Inst,
- Value *Opnd, Type *Ty) {
+Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd,
+ Type *Ty) {
std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
Value *Val = Ptr->getBuiltValue();
Actions.push_back(std::move(Ptr));
return Val;
}
-Value *TypePromotionTransaction::createZExt(Instruction *Inst,
- Value *Opnd, Type *Ty) {
+Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd,
+ Type *Ty) {
std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
Value *Val = Ptr->getBuiltValue();
Actions.push_back(std::move(Ptr));
@@ -3123,7 +3224,7 @@ namespace {
///
/// This encapsulates the logic for matching the target-legal addressing modes.
class AddressingModeMatcher {
- SmallVectorImpl<Instruction*> &AddrModeInsts;
+ SmallVectorImpl<Instruction *> &AddrModeInsts;
const TargetLowering &TLI;
const TargetRegisterInfo &TRI;
const DataLayout &DL;
@@ -3165,8 +3266,8 @@ class AddressingModeMatcher {
AddressingModeMatcher(
SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
const TargetRegisterInfo &TRI, const LoopInfo &LI,
- const std::function<const DominatorTree &()> getDTFn,
- Type *AT, unsigned AS, Instruction *MI, ExtAddrMode &AM,
+ const std::function<const DominatorTree &()> getDTFn, Type *AT,
+ unsigned AS, Instruction *MI, ExtAddrMode &AM,
const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT,
std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
@@ -3198,11 +3299,13 @@ public:
bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
ExtAddrMode Result;
- bool Success = AddressingModeMatcher(
- AddrModeInsts, TLI, TRI, LI, getDTFn, AccessTy, AS, MemoryInst, Result,
- InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
- BFI).matchAddr(V, 0);
- (void)Success; assert(Success && "Couldn't select *anything*?");
+ bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn,
+ AccessTy, AS, MemoryInst, Result,
+ InsertedInsts, PromotedInsts, TPT,
+ LargeOffsetGEP, OptSize, PSI, BFI)
+ .matchAddr(V, 0);
+ (void)Success;
+ assert(Success && "Couldn't select *anything*?");
return Result;
}
@@ -3223,15 +3326,15 @@ class PhiNodeSet;
/// An iterator for PhiNodeSet.
class PhiNodeSetIterator {
- PhiNodeSet * const Set;
+ PhiNodeSet *const Set;
size_t CurrentIndex = 0;
public:
/// The constructor. Start should point to either a valid element, or be equal
/// to the size of the underlying SmallVector of the PhiNodeSet.
- PhiNodeSetIterator(PhiNodeSet * const Set, size_t Start);
- PHINode * operator*() const;
- PhiNodeSetIterator& operator++();
+ PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start);
+ PHINode *operator*() const;
+ PhiNodeSetIterator &operator++();
bool operator==(const PhiNodeSetIterator &RHS) const;
bool operator!=(const PhiNodeSetIterator &RHS) const;
};
@@ -3250,7 +3353,7 @@ class PhiNodeSet {
friend class PhiNodeSetIterator;
using MapType = SmallDenseMap<PHINode *, size_t, 32>;
- using iterator = PhiNodeSetIterator;
+ using iterator = PhiNodeSetIterator;
/// Keeps the elements in the order of their insertion in the underlying
/// vector. To achieve constant time removal, it never deletes any element.
@@ -3309,14 +3412,10 @@ public:
iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
/// Returns the number of elements in the collection.
- size_t size() const {
- return NodeMap.size();
- }
+ size_t size() const { return NodeMap.size(); }
/// \returns 1 if the given element is in the collection, and 0 if otherwise.
- size_t count(PHINode *Ptr) const {
- return NodeMap.count(Ptr);
- }
+ size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); }
private:
/// Updates the CurrentIndex so that it will point to a valid element.
@@ -3339,13 +3438,13 @@ private:
PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
: Set(Set), CurrentIndex(Start) {}
-PHINode * PhiNodeSetIterator::operator*() const {
+PHINode *PhiNodeSetIterator::operator*() const {
assert(CurrentIndex < Set->NodeList.size() &&
"PhiNodeSet access out of range");
return Set->NodeList[CurrentIndex];
}
-PhiNodeSetIterator& PhiNodeSetIterator::operator++() {
+PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
assert(CurrentIndex < Set->NodeList.size() &&
"PhiNodeSet access out of range");
++CurrentIndex;
@@ -3374,8 +3473,7 @@ class SimplificationTracker {
SmallPtrSet<SelectInst *, 32> AllSelectNodes;
public:
- SimplificationTracker(const SimplifyQuery &sq)
- : SQ(sq) {}
+ SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {}
Value *Get(Value *V) {
do {
@@ -3410,12 +3508,10 @@ public:
return Get(Val);
}
- void Put(Value *From, Value *To) {
- Storage.insert({ From, To });
- }
+ void Put(Value *From, Value *To) { Storage.insert({From, To}); }
void ReplacePhi(PHINode *From, PHINode *To) {
- Value* OldReplacement = Get(From);
+ Value *OldReplacement = Get(From);
while (OldReplacement != From) {
From = To;
To = dyn_cast<PHINode>(OldReplacement);
@@ -3428,7 +3524,7 @@ public:
From->eraseFromParent();
}
- PhiNodeSet& newPhiNodes() { return AllPhiNodes; }
+ PhiNodeSet &newPhiNodes() { return AllPhiNodes; }
void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
@@ -3483,9 +3579,7 @@ public:
: SQ(_SQ), Original(OriginalValue) {}
/// Get the combined AddrMode
- const ExtAddrMode &getAddrMode() const {
- return AddrModes[0];
- }
+ const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
/// Add a new AddrMode if it's compatible with the AddrModes we already
/// have.
@@ -3506,7 +3600,7 @@ public:
// can do just by comparing against the first one given that we only care
// about the cumulative difference.
ExtAddrMode::FieldName ThisDifferentField =
- AddrModes[0].compare(NewAddrMode);
+ AddrModes[0].compare(NewAddrMode);
if (DifferentField == ExtAddrMode::NoField)
DifferentField = ThisDifferentField;
else if (DifferentField != ThisDifferentField)
@@ -3670,10 +3764,10 @@ private:
SmallSetVector<PHIPair, 8> &Matcher,
PhiNodeSet &PhiNodesToMatch) {
SmallVector<PHIPair, 8> WorkList;
- Matcher.insert({ PHI, Candidate });
+ Matcher.insert({PHI, Candidate});
SmallSet<PHINode *, 8> MatchedPHIs;
MatchedPHIs.insert(PHI);
- WorkList.push_back({ PHI, Candidate });
+ WorkList.push_back({PHI, Candidate});
SmallSet<PHIPair, 8> Visited;
while (!WorkList.empty()) {
auto Item = WorkList.pop_back_val();
@@ -3702,15 +3796,15 @@ private:
return false;
// If we already matched them then continue.
- if (Matcher.count({ FirstPhi, SecondPhi }))
+ if (Matcher.count({FirstPhi, SecondPhi}))
continue;
// So the values are different and does not match. So we need them to
// match. (But we register no more than one match per PHI node, so that
// we won't later try to replace them twice.)
if (MatchedPHIs.insert(FirstPhi).second)
- Matcher.insert({ FirstPhi, SecondPhi });
+ Matcher.insert({FirstPhi, SecondPhi});
// But me must check it.
- WorkList.push_back({ FirstPhi, SecondPhi });
+ WorkList.push_back({FirstPhi, SecondPhi});
}
}
return true;
@@ -3900,7 +3994,8 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
// to see if ScaleReg is actually X+C. If so, we can turn this into adding
// X*Scale + C*Scale to addr mode. If we found available IV increment, do not
// go any further: we can reuse it and cannot eliminate it.
- ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
+ ConstantInt *CI = nullptr;
+ Value *AddLHS = nullptr;
if (isa<Instruction>(ScaleReg) && // not a constant expr.
match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
!isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
@@ -3921,26 +4016,26 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
// If this is an add recurrence with a constant step, return the increment
// instruction and the canonicalized step.
- auto GetConstantStep = [this](const Value * V)
- ->Optional<std::pair<Instruction *, APInt> > {
+ auto GetConstantStep =
+ [this](const Value *V) -> std::optional<std::pair<Instruction *, APInt>> {
auto *PN = dyn_cast<PHINode>(V);
if (!PN)
- return None;
+ return std::nullopt;
auto IVInc = getIVIncrement(PN, &LI);
if (!IVInc)
- return None;
- // TODO: The result of the intrinsics above is two-compliment. However when
+ return std::nullopt;
+ // TODO: The result of the intrinsics above is two-complement. However when
// IV inc is expressed as add or sub, iv.next is potentially a poison value.
// If it has nuw or nsw flags, we need to make sure that these flags are
// inferrable at the point of memory instruction. Otherwise we are replacing
- // well-defined two-compliment computation with poison. Currently, to avoid
+ // well-defined two-complement computation with poison. Currently, to avoid
// potentially complex analysis needed to prove this, we reject such cases.
if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
- return None;
+ return std::nullopt;
if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
return std::make_pair(IVInc->first, ConstantStep->getValue());
- return None;
+ return std::nullopt;
};
// Try to account for the following special case:
@@ -4043,8 +4138,7 @@ class TypePromotionHelper {
/// Utility function to add a promoted instruction \p ExtOpnd to
/// \p PromotedInsts and record the type of extension we have seen.
static void addPromotedInst(InstrToOrigTy &PromotedInsts,
- Instruction *ExtOpnd,
- bool IsSExt) {
+ Instruction *ExtOpnd, bool IsSExt) {
ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
if (It != PromotedInsts.end()) {
@@ -4066,8 +4160,7 @@ class TypePromotionHelper {
/// cannot use the information we had on the original type.
/// BothExtension doesn't match any extension type.
static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
- Instruction *Opnd,
- bool IsSExt) {
+ Instruction *Opnd, bool IsSExt) {
ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
@@ -4431,7 +4524,7 @@ Value *TypePromotionHelper::promoteOperandForOther(
// If yes, create a new one.
LLVM_DEBUG(dbgs() << "More operands to ext\n");
Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
- : TPT.createZExt(Ext, Opnd, Ext->getType());
+ : TPT.createZExt(Ext, Opnd, Ext->getType());
if (!isa<Instruction>(ValForExtOpnd)) {
TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
continue;
@@ -4496,7 +4589,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
unsigned Depth,
bool *MovedAway) {
// Avoid exponential behavior on extremely deep expression trees.
- if (Depth >= 5) return false;
+ if (Depth >= 5)
+ return false;
// By default, all matched instructions stay in place.
if (MovedAway)
@@ -4525,8 +4619,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
return matchAddr(AddrInst->getOperand(0), Depth);
return false;
case Instruction::AddrSpaceCast: {
- unsigned SrcAS
- = AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
+ unsigned SrcAS =
+ AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
return matchAddr(AddrInst->getOperand(0), Depth);
@@ -4544,8 +4638,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
TPT.getRestorationPoint();
AddrMode.InBounds = false;
- if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
- matchAddr(AddrInst->getOperand(0), Depth+1))
+ if (matchAddr(AddrInst->getOperand(1), Depth + 1) &&
+ matchAddr(AddrInst->getOperand(0), Depth + 1))
return true;
// Restore the old addr mode info.
@@ -4554,8 +4648,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
TPT.rollback(LastKnownGood);
// Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
- if (matchAddr(AddrInst->getOperand(0), Depth+1) &&
- matchAddr(AddrInst->getOperand(1), Depth+1))
+ if (matchAddr(AddrInst->getOperand(0), Depth + 1) &&
+ matchAddr(AddrInst->getOperand(1), Depth + 1))
return true;
// Otherwise we definitely can't merge the ADD in.
@@ -4564,9 +4658,9 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
TPT.rollback(LastKnownGood);
break;
}
- //case Instruction::Or:
- // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
- //break;
+ // case Instruction::Or:
+ // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
+ // break;
case Instruction::Mul:
case Instruction::Shl: {
// Can only handle X*C and X << C.
@@ -4592,7 +4686,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
if (StructType *STy = GTI.getStructTypeOrNull()) {
const StructLayout *SL = DL.getStructLayout(STy);
unsigned Idx =
- cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
+ cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
ConstantOffset += SL->getElementOffset(Idx);
} else {
TypeSize TS = DL.getTypeAllocSize(GTI.getIndexedType());
@@ -4600,7 +4694,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
// The optimisations below currently only work for fixed offsets.
if (TS.isScalable())
return false;
- int64_t TypeSize = TS.getFixedSize();
+ int64_t TypeSize = TS.getFixedValue();
if (ConstantInt *CI =
dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
const APInt &CVal = CI->getValue();
@@ -4627,7 +4721,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
if (ConstantOffset == 0 ||
TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
// Check to see if we can fold the base pointer in too.
- if (matchAddr(AddrInst->getOperand(0), Depth+1)) {
+ if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
if (!cast<GEPOperator>(AddrInst)->isInBounds())
AddrMode.InBounds = false;
return true;
@@ -4667,7 +4761,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
AddrMode.InBounds = false;
// Match the base operand of the GEP.
- if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
+ if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) {
// If it couldn't be matched, just stuff the value in a register.
if (AddrMode.HasBaseReg) {
AddrMode = BackupAddrMode;
@@ -4927,14 +5021,15 @@ static bool FindAllMemoryUses(
if (CI->hasFnAttr(Attribute::Cold)) {
// If this is a cold call, we can sink the addressing calculation into
// the cold path. See optimizeCallInst
- bool OptForSize = OptSize ||
- llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
+ bool OptForSize =
+ OptSize || llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
if (!OptForSize)
continue;
}
InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
- if (!IA) return true;
+ if (!IA)
+ return true;
// If this is a memory operand, we're cool, otherwise bail out.
if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
@@ -4954,14 +5049,16 @@ static bool FindAllMemoryUses(
/// folding it into. If so, there is no cost to include it in the addressing
/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
/// instruction already.
-bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
+bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
+ Value *KnownLive1,
Value *KnownLive2) {
// If Val is either of the known-live values, we know it is live!
if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
return true;
// All values other than instructions and arguments (e.g. constants) are live.
- if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
+ if (!isa<Instruction>(Val) && !isa<Argument>(Val))
+ return true;
// If Val is a constant sized alloca in the entry block, it is live, this is
// true because it is just a reference to the stack/frame pointer, which is
@@ -4997,10 +5094,10 @@ bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
/// X was live across 'load Z' for other reasons, we actually *would* want to
/// fold the addressing mode in the Z case. This would make Y die earlier.
-bool AddressingModeMatcher::
-isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
- ExtAddrMode &AMAfter) {
- if (IgnoreProfitability) return true;
+bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
+ Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) {
+ if (IgnoreProfitability)
+ return true;
// AMBefore is the addressing mode before this instruction was folded into it,
// and AMAfter is the addressing mode after the instruction was folded. Get
@@ -5030,10 +5127,10 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// for another (at worst.) In this context, folding an addressing mode into
// the use is just a particularly nice way of sinking it.
SmallVector<std::pair<Value *, Type *>, 16> MemoryUses;
- SmallPtrSet<Instruction*, 16> ConsideredInsts;
- if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
- PSI, BFI))
- return false; // Has a non-memory, non-foldable use!
+ SmallPtrSet<Instruction *, 16> ConsideredInsts;
+ if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, PSI,
+ BFI))
+ return false; // Has a non-memory, non-foldable use!
// Now that we know that all uses of this instruction are part of a chain of
// computation involving only operations that could theoretically be folded
@@ -5044,7 +5141,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// (i.e. cold call sites), this serves as a way to prevent excessive code
// growth since most architectures have some reasonable small and fast way to
// compute an effective address. (i.e LEA on x86)
- SmallVector<Instruction*, 32> MatchedAddrModeInsts;
+ SmallVector<Instruction *, 32> MatchedAddrModeInsts;
for (const std::pair<Value *, Type *> &Pair : MemoryUses) {
Value *Address = Pair.first;
Type *AddressAccessTy = Pair.second;
@@ -5064,7 +5161,8 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
LargeOffsetGEP, OptSize, PSI, BFI);
Matcher.IgnoreProfitability = true;
bool Success = Matcher.matchAddr(Address, 0);
- (void)Success; assert(Success && "Couldn't select *anything*?");
+ (void)Success;
+ assert(Success && "Couldn't select *anything*?");
// The match was to check the profitability, the changes made are not
// part of the original matcher. Therefore, they should be dropped
@@ -5114,15 +5212,15 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// Try to collapse single-value PHI nodes. This is necessary to undo
// unprofitable PRE transformations.
- SmallVector<Value*, 8> worklist;
- SmallPtrSet<Value*, 16> Visited;
+ SmallVector<Value *, 8> worklist;
+ SmallPtrSet<Value *, 16> Visited;
worklist.push_back(Addr);
// Use a worklist to iteratively look through PHI and select nodes, and
// ensure that the addressing mode obtained from the non-PHI/select roots of
// the graph are compatible.
bool PhiOrSelectSeen = false;
- SmallVector<Instruction*, 16> AddrModeInsts;
+ SmallVector<Instruction *, 16> AddrModeInsts;
const SimplifyQuery SQ(*DL, TLInfo);
AddressingModeCombiner AddrModes(SQ, Addr);
TypePromotionTransaction TPT(RemovedInsts);
@@ -5202,12 +5300,12 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
ExtAddrMode AddrMode = AddrModes.getAddrMode();
// If all the instructions matched are already in this BB, don't do anything.
- // If we saw a Phi node then it is not local definitely, and if we saw a select
- // then we want to push the address calculation past it even if it's already
- // in this BB.
+ // If we saw a Phi node then it is not local definitely, and if we saw a
+ // select then we want to push the address calculation past it even if it's
+ // already in this BB.
if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
return IsNonLocalValue(V, MemoryInst->getParent());
- })) {
+ })) {
LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
<< "\n");
return Modified;
@@ -5226,7 +5324,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
- Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
+ Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
if (SunkAddr) {
LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
@@ -5306,8 +5404,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
}
}
- if (!ResultPtr &&
- !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
+ if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&
+ !AddrMode.BaseOffs) {
SunkAddr = Constant::getNullValue(Addr->getType());
} else if (!ResultPtr) {
return Modified;
@@ -5336,7 +5434,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// done.
} else {
assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
- cast<IntegerType>(V->getType())->getBitWidth() &&
+ cast<IntegerType>(V->getType())->getBitWidth() &&
"We can't transform if ScaledReg is too narrow");
V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
}
@@ -5582,11 +5680,10 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
// If the final index isn't a vector, emit a scalar GEP containing all ops
// and a vector GEP with all zeroes final index.
if (!Ops[FinalIndex]->getType()->isVectorTy()) {
- NewAddr = Builder.CreateGEP(SourceTy, Ops[0],
- makeArrayRef(Ops).drop_front());
+ NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
auto *SecondTy = GetElementPtrInst::getIndexedType(
- SourceTy, makeArrayRef(Ops).drop_front());
+ SourceTy, ArrayRef(Ops).drop_front());
NewAddr =
Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
} else {
@@ -5597,10 +5694,9 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
if (Ops.size() != 2) {
// Replace the last index with 0.
Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy);
- Base = Builder.CreateGEP(SourceTy, Base,
- makeArrayRef(Ops).drop_front());
+ Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
SourceTy = GetElementPtrInst::getIndexedType(
- SourceTy, makeArrayRef(Ops).drop_front());
+ SourceTy, ArrayRef(Ops).drop_front());
}
// Now create the GEP with scalar pointer and vector index.
@@ -5836,7 +5932,7 @@ bool CodeGenPrepare::mergeSExts(Function &F) {
bool inserted = false;
for (auto &Pt : CurPts) {
if (getDT(F).dominates(Inst, Pt)) {
- Pt->replaceAllUsesWith(Inst);
+ replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
RemovedInsts.insert(Pt);
Pt->removeFromParent();
Pt = Inst;
@@ -5848,7 +5944,7 @@ bool CodeGenPrepare::mergeSExts(Function &F) {
// Give up if we need to merge in a common dominator as the
// experiments show it is not profitable.
continue;
- Inst->replaceAllUsesWith(Pt);
+ replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);
RemovedInsts.insert(Inst);
Inst->removeFromParent();
inserted = true;
@@ -6000,7 +6096,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
if (GEP->getType() != I8PtrTy)
NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
}
- GEP->replaceAllUsesWith(NewGEP);
+ replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
LargeOffsetGEPID.erase(GEP);
LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
GEP->eraseFromParent();
@@ -6026,6 +6122,7 @@ bool CodeGenPrepare::optimizePhiType(
SmallVector<Instruction *, 4> Worklist;
Worklist.push_back(cast<Instruction>(I));
SmallPtrSet<PHINode *, 4> PhiNodes;
+ SmallPtrSet<ConstantData *, 4> Constants;
PhiNodes.insert(I);
Visited.insert(I);
SmallPtrSet<Instruction *, 4> Defs;
@@ -6068,9 +6165,10 @@ bool CodeGenPrepare::optimizePhiType(
AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
!isa<ExtractElementInst>(OpBC->getOperand(0));
}
- } else if (!isa<UndefValue>(V)) {
+ } else if (auto *OpC = dyn_cast<ConstantData>(V))
+ Constants.insert(OpC);
+ else
return false;
- }
}
}
@@ -6102,7 +6200,8 @@ bool CodeGenPrepare::optimizePhiType(
}
}
- if (!ConvertTy || !AnyAnchored || !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
+ if (!ConvertTy || !AnyAnchored ||
+ !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
return false;
LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
@@ -6111,7 +6210,8 @@ bool CodeGenPrepare::optimizePhiType(
// Create all the new phi nodes of the new type, and bitcast any loads to the
// correct type.
ValueToValueMap ValMap;
- ValMap[UndefValue::get(PhiTy)] = UndefValue::get(ConvertTy);
+ for (ConstantData *C : Constants)
+ ValMap[C] = ConstantExpr::getCast(Instruction::BitCast, C, ConvertTy);
for (Instruction *D : Defs) {
if (isa<BitCastInst>(D)) {
ValMap[D] = D->getOperand(0);
@@ -6136,7 +6236,7 @@ bool CodeGenPrepare::optimizePhiType(
for (Instruction *U : Uses) {
if (isa<BitCastInst>(U)) {
DeletedInstrs.insert(U);
- U->replaceAllUsesWith(ValMap[U->getOperand(0)]);
+ replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
} else {
U->setOperand(0,
new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U));
@@ -6164,7 +6264,7 @@ bool CodeGenPrepare::optimizePhiTypes(Function &F) {
// Remove any old phi's that have been converted.
for (auto *I : DeletedInstrs) {
- I->replaceAllUsesWith(PoisonValue::get(I->getType()));
+ replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);
I->eraseFromParent();
}
@@ -6367,7 +6467,8 @@ bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
// Figure out which BB this ext is used in.
BasicBlock *UserBB = UI->getParent();
- if (UserBB == DefBB) continue;
+ if (UserBB == DefBB)
+ continue;
DefIsLiveOut = true;
break;
}
@@ -6378,7 +6479,8 @@ bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
for (User *U : Src->users()) {
Instruction *UI = cast<Instruction>(U);
BasicBlock *UserBB = UI->getParent();
- if (UserBB == DefBB) continue;
+ if (UserBB == DefBB)
+ continue;
// Be conservative. We don't want this xform to end up introducing
// reloads just before load / store instructions.
if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
@@ -6386,7 +6488,7 @@ bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
}
// InsertedTruncs - Only insert one trunc in each block once.
- DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
+ DenseMap<BasicBlock *, Instruction *> InsertedTruncs;
bool MadeChange = false;
for (Use &U : Src->uses()) {
@@ -6394,7 +6496,8 @@ bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
// Figure out which BB this ext is used in.
BasicBlock *UserBB = User->getParent();
- if (UserBB == DefBB) continue;
+ if (UserBB == DefBB)
+ continue;
// Both src and def are live in this block. Rewrite the use.
Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
@@ -6576,7 +6679,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
// Replace all uses of load with new and (except for the use of load in the
// new and itself).
- Load->replaceAllUsesWith(NewAnd);
+ replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);
NewAnd->setOperand(0, Load);
// Remove any and instructions that are now redundant.
@@ -6584,7 +6687,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
// Check that the and mask is the same as the one we decided to put on the
// new and.
if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
- And->replaceAllUsesWith(NewAnd);
+ replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);
if (&*CurInstIterator == And)
CurInstIterator = std::next(And->getIterator());
And->eraseFromParent();
@@ -6602,8 +6705,7 @@ static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
// If it's safe to speculatively execute, then it should not have side
// effects; therefore, it's safe to sink and possibly *not* execute.
return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
- TTI->getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency) >=
- TargetTransformInfo::TCC_Expensive;
+ TTI->isExpensiveToSpeculativelyExecute(I);
}
/// Returns true if a SelectInst should be turned into an explicit branch.
@@ -6620,7 +6722,7 @@ static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
// If metadata tells us that the select condition is obviously predictable,
// then we want to replace the select with a branch.
uint64_t TrueWeight, FalseWeight;
- if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
+ if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
uint64_t Max = std::max(TrueWeight, FalseWeight);
uint64_t Sum = TrueWeight + FalseWeight;
if (Sum != 0) {
@@ -6651,9 +6753,9 @@ static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
/// false value of \p SI. If the true/false value of \p SI is defined by any
/// select instructions in \p Selects, look through the defining select
/// instruction until the true/false value is not defined in \p Selects.
-static Value *getTrueOrFalseValue(
- SelectInst *SI, bool isTrue,
- const SmallPtrSet<const Instruction *, 2> &Selects) {
+static Value *
+getTrueOrFalseValue(SelectInst *SI, bool isTrue,
+ const SmallPtrSet<const Instruction *, 2> &Selects) {
Value *V = nullptr;
for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
@@ -6695,7 +6797,7 @@ bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
- Shift->replaceAllUsesWith(NewSel);
+ replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);
Shift->eraseFromParent();
return true;
}
@@ -6727,10 +6829,10 @@ bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
IRBuilder<> Builder(Fsh);
Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
- Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, { X, Y, TVal });
- Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, { X, Y, FVal });
+ Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal});
+ Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal});
Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
- Fsh->replaceAllUsesWith(NewSel);
+ replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);
Fsh->eraseFromParent();
return true;
}
@@ -6741,6 +6843,10 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
if (DisableSelectToBranch)
return false;
+ // If the SelectOptimize pass is enabled, selects have already been optimized.
+ if (!getCGPassBuilderOption().DisableSelectOptimize)
+ return false;
+
// Find all consecutive select instructions that share the same condition.
SmallVector<SelectInst *, 2> ASI;
ASI.push_back(SI);
@@ -6813,6 +6919,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
BasicBlock *StartBlock = SI->getParent();
BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
+ if (IsHugeFunc)
+ FreshBBs.insert(EndBlock);
BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
// Delete the unconditional branch that was just created by the split.
@@ -6833,6 +6941,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
EndBlock->getParent(), EndBlock);
TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
+ if (IsHugeFunc)
+ FreshBBs.insert(TrueBlock);
TrueBranch->setDebugLoc(SI->getDebugLoc());
}
auto *TrueInst = cast<Instruction>(SI->getTrueValue());
@@ -6842,6 +6952,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
if (FalseBlock == nullptr) {
FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
EndBlock->getParent(), EndBlock);
+ if (IsHugeFunc)
+ FreshBBs.insert(FalseBlock);
FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
FalseBranch->setDebugLoc(SI->getDebugLoc());
}
@@ -6858,6 +6970,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
EndBlock->getParent(), EndBlock);
+ if (IsHugeFunc)
+ FreshBBs.insert(FalseBlock);
auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
FalseBranch->setDebugLoc(SI->getDebugLoc());
}
@@ -6897,7 +7011,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
PN->setDebugLoc(SI->getDebugLoc());
- SI->replaceAllUsesWith(PN);
+ replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);
SI->eraseFromParent();
INS.erase(SI);
++NumSelectsExpanded;
@@ -6935,9 +7049,10 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
- SVI->replaceAllUsesWith(BC2);
+ replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);
RecursivelyDeleteTriviallyDeadInstructions(
- SVI, TLInfo, nullptr, [&](Value *V) { removeAllAssertingVHReferences(V); });
+ SVI, TLInfo, nullptr,
+ [&](Value *V) { removeAllAssertingVHReferences(V); });
// Also hoist the bitcast up to its operand if it they are not in the same
// block.
@@ -6987,6 +7102,18 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
for (Use *U : ToReplace) {
auto *UI = cast<Instruction>(U->get());
Instruction *NI = UI->clone();
+
+ if (IsHugeFunc) {
+ // Now we clone an instruction, its operands' defs may sink to this BB
+ // now. So we put the operands defs' BBs into FreshBBs to do optmization.
+ for (unsigned I = 0; I < NI->getNumOperands(); ++I) {
+ auto *OpDef = dyn_cast<Instruction>(NI->getOperand(I));
+ if (!OpDef)
+ continue;
+ FreshBBs.insert(OpDef->getParent());
+ }
+ }
+
NewInstructions[UI] = NI;
MaybeDead.insert(UI);
LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
@@ -7057,8 +7184,9 @@ bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
SI->setCondition(ExtInst);
for (auto Case : SI->cases()) {
const APInt &NarrowConst = Case.getCaseValue()->getValue();
- APInt WideConst = (ExtType == Instruction::ZExt) ?
- NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);
+ APInt WideConst = (ExtType == Instruction::ZExt)
+ ? NarrowConst.zext(RegWidth)
+ : NarrowConst.sext(RegWidth);
Case.setValue(ConstantInt::get(Context, WideConst));
}
@@ -7255,11 +7383,11 @@ class VectorPromoteHelper {
// The scalar chain of computation has to pay for the transition
// scalar to vector.
// The vector chain has to account for the combining cost.
+ enum TargetTransformInfo::TargetCostKind CostKind =
+ TargetTransformInfo::TCK_RecipThroughput;
InstructionCost ScalarCost =
- TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
+ TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index);
InstructionCost VectorCost = StoreExtractCombineCost;
- enum TargetTransformInfo::TargetCostKind CostKind =
- TargetTransformInfo::TCK_RecipThroughput;
for (const auto &Inst : InstsToBePromoted) {
// Compute the cost.
// By construction, all instructions being promoted are arithmetic ones.
@@ -7268,17 +7396,16 @@ class VectorPromoteHelper {
Value *Arg0 = Inst->getOperand(0);
bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
isa<ConstantFP>(Arg0);
- TargetTransformInfo::OperandValueKind Arg0OVK =
- IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
- : TargetTransformInfo::OK_AnyValue;
- TargetTransformInfo::OperandValueKind Arg1OVK =
- !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
- : TargetTransformInfo::OK_AnyValue;
+ TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;
+ if (IsArg0Constant)
+ Arg0Info.Kind = TargetTransformInfo::OK_UniformConstantValue;
+ else
+ Arg1Info.Kind = TargetTransformInfo::OK_UniformConstantValue;
+
ScalarCost += TTI.getArithmeticInstrCost(
- Inst->getOpcode(), Inst->getType(), CostKind, Arg0OVK, Arg1OVK);
+ Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info);
VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
- CostKind,
- Arg0OVK, Arg1OVK);
+ CostKind, Arg0Info, Arg1Info);
}
LLVM_DEBUG(
dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
@@ -7662,9 +7789,8 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
// type, and the second operand is a constant.
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP) {
gep_type_iterator I = gep_type_begin(*GEP);
- return GEP->getNumOperands() == 2 &&
- I.isSequential() &&
- isa<ConstantInt>(GEP->getOperand(1));
+ return GEP->getNumOperands() == 2 && I.isSequential() &&
+ isa<ConstantInt>(GEP->getOperand(1));
}
// Try unmerging GEPs to reduce liveness interference (register pressure) across
@@ -7737,8 +7863,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
// Check that GEPI is a cheap one.
if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
- TargetTransformInfo::TCK_SizeAndLatency)
- > TargetTransformInfo::TCC_Basic)
+ TargetTransformInfo::TCK_SizeAndLatency) >
+ TargetTransformInfo::TCC_Basic)
return false;
Value *GEPIOp = GEPI->getOperand(0);
// Check that GEPIOp is an instruction that's also defined in SrcBlock.
@@ -7749,21 +7875,22 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
return false;
// Check that GEP is used outside the block, meaning it's alive on the
// IndirectBr edge(s).
- if (find_if(GEPI->users(), [&](User *Usr) {
+ if (llvm::none_of(GEPI->users(), [&](User *Usr) {
if (auto *I = dyn_cast<Instruction>(Usr)) {
if (I->getParent() != SrcBlock) {
return true;
}
}
return false;
- }) == GEPI->users().end())
+ }))
return false;
// The second elements of the GEP chains to be unmerged.
std::vector<GetElementPtrInst *> UGEPIs;
// Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
// on IndirectBr edges.
for (User *Usr : GEPIOp->users()) {
- if (Usr == GEPI) continue;
+ if (Usr == GEPI)
+ continue;
// Check if Usr is an Instruction. If not, give up.
if (!isa<Instruction>(Usr))
return false;
@@ -7787,8 +7914,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
return false;
ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
- TargetTransformInfo::TCK_SizeAndLatency)
- > TargetTransformInfo::TCC_Basic)
+ TargetTransformInfo::TCK_SizeAndLatency) >
+ TargetTransformInfo::TCC_Basic)
return false;
UGEPIs.push_back(UGEPI);
}
@@ -7807,9 +7934,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
for (GetElementPtrInst *UGEPI : UGEPIs) {
UGEPI->setOperand(0, GEPI);
ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
- Constant *NewUGEPIIdx =
- ConstantInt::get(GEPIIdx->getType(),
- UGEPIIdx->getValue() - GEPIIdx->getValue());
+ Constant *NewUGEPIIdx = ConstantInt::get(
+ GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue());
UGEPI->setOperand(1, NewUGEPIIdx);
// If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
// inbounds to avoid UB.
@@ -7827,7 +7953,9 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
return true;
}
-static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI) {
+static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
+ SmallSet<BasicBlock *, 32> &FreshBBs,
+ bool IsHugeFunc) {
// Try and convert
// %c = icmp ult %x, 8
// br %c, bla, blb
@@ -7868,7 +7996,7 @@ static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI) {
ConstantInt::get(UI->getType(), 0));
LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
- Cmp->replaceAllUsesWith(NewCmp);
+ replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
return true;
}
if (Cmp->isEquality() &&
@@ -7881,14 +8009,14 @@ static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI) {
ConstantInt::get(UI->getType(), 0));
LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
- Cmp->replaceAllUsesWith(NewCmp);
+ replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
return true;
}
}
return false;
}
-bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
+bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
// Bail out if we inserted the instruction to prevent optimizations from
// stepping on each other's toes.
if (InsertedInsts.count(I))
@@ -7901,7 +8029,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
// trivial PHI, go ahead and zap it here.
if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
LargeOffsetGEPMap.erase(P);
- P->replaceAllUsesWith(V);
+ replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);
P->eraseFromParent();
++NumPHIsElim;
return true;
@@ -7922,6 +8050,11 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
return true;
+ if ((isa<UIToFPInst>(I) || isa<FPToUIInst>(I) || isa<TruncInst>(I)) &&
+ TLI->optimizeExtendOrTruncateConversion(I,
+ LI->getLoopFor(I->getParent())))
+ return true;
+
if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
/// Sink a zext or sext into its user blocks if the target type doesn't
/// fit in one register
@@ -7930,6 +8063,10 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
TargetLowering::TypeExpandInteger) {
return SinkCast(CI);
} else {
+ if (TLI->optimizeExtendOrTruncateConversion(
+ I, LI->getLoopFor(I->getParent())))
+ return true;
+
bool MadeChange = optimizeExt(I);
return MadeChange | optimizeExtUses(I);
}
@@ -7959,15 +8096,14 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
}
if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
- unsigned AS = RMW->getPointerAddressSpace();
- return optimizeMemoryInst(I, RMW->getPointerOperand(),
- RMW->getType(), AS);
+ unsigned AS = RMW->getPointerAddressSpace();
+ return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS);
}
if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
- unsigned AS = CmpX->getPointerAddressSpace();
- return optimizeMemoryInst(I, CmpX->getPointerOperand(),
- CmpX->getCompareOperand()->getType(), AS);
+ unsigned AS = CmpX->getPointerAddressSpace();
+ return optimizeMemoryInst(I, CmpX->getPointerOperand(),
+ CmpX->getCompareOperand()->getType(), AS);
}
BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
@@ -7991,7 +8127,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
GEPI->getName(), GEPI);
NC->setDebugLoc(GEPI->getDebugLoc());
- GEPI->replaceAllUsesWith(NC);
+ replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
GEPI->eraseFromParent();
++NumGEPsElim;
optimizeInst(NC, ModifiedDT);
@@ -8024,7 +8160,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
F->takeName(FI);
CmpI->setOperand(Const0 ? 1 : 0, F);
}
- FI->replaceAllUsesWith(CmpI);
+ replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);
FI->eraseFromParent();
return true;
}
@@ -8051,7 +8187,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
case Instruction::ExtractElement:
return optimizeExtractElementInst(cast<ExtractElementInst>(I));
case Instruction::Br:
- return optimizeBranch(cast<BranchInst>(I), *TLI);
+ return optimizeBranch(cast<BranchInst>(I), *TLI, FreshBBs, IsHugeFunc);
}
return false;
@@ -8065,29 +8201,43 @@ bool CodeGenPrepare::makeBitReverse(Instruction &I) {
TLI->getValueType(*DL, I.getType(), true)))
return false;
- SmallVector<Instruction*, 4> Insts;
+ SmallVector<Instruction *, 4> Insts;
if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
return false;
Instruction *LastInst = Insts.back();
- I.replaceAllUsesWith(LastInst);
+ replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);
RecursivelyDeleteTriviallyDeadInstructions(
- &I, TLInfo, nullptr, [&](Value *V) { removeAllAssertingVHReferences(V); });
+ &I, TLInfo, nullptr,
+ [&](Value *V) { removeAllAssertingVHReferences(V); });
return true;
}
// In this pass we look for GEP and cast instructions that are used
// across basic blocks and rewrite them to improve basic-block-at-a-time
// selection.
-bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
+bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
SunkAddrs.clear();
bool MadeChange = false;
- CurInstIterator = BB.begin();
- while (CurInstIterator != BB.end()) {
- MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
- if (ModifiedDT)
- return true;
- }
+ do {
+ CurInstIterator = BB.begin();
+ ModifiedDT = ModifyDT::NotModifyDT;
+ while (CurInstIterator != BB.end()) {
+ MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
+ if (ModifiedDT != ModifyDT::NotModifyDT) {
+ // For huge function we tend to quickly go though the inner optmization
+ // opportunities in the BB. So we go back to the BB head to re-optimize
+ // each instruction instead of go back to the function head.
+ if (IsHugeFunc) {
+ DT.reset();
+ getDT(*BB.getParent());
+ break;
+ } else {
+ return true;
+ }
+ }
+ }
+ } while (ModifiedDT == ModifyDT::ModifyInstDT);
bool MadeBitReverse = true;
while (MadeBitReverse) {
@@ -8176,7 +8326,7 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
dbgs()
<< "Unable to find valid location for Debug Value, undefing:\n"
<< *DVI);
- DVI->setUndef();
+ DVI->setKillLocation();
break;
}
@@ -8247,7 +8397,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
///
/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
///
-bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
+bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
return false;
@@ -8298,6 +8448,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
auto *TmpBB =
BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
BB.getParent(), BB.getNextNode());
+ if (IsHugeFunc)
+ FreshBBs.insert(TmpBB);
// Update original basic block by using the first condition directly by the
// branch instruction and removing the no longer needed and/or instruction.
@@ -8333,7 +8485,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
// Replace the old BB with the new BB.
TBB->replacePhiUsesWith(&BB, TmpBB);
- // Add another incoming edge form the new BB.
+ // Add another incoming edge from the new BB.
for (PHINode &PN : FBB->phis()) {
auto *Val = PN.getIncomingValueForBlock(&BB);
PN.addIncoming(Val, TmpBB);
@@ -8362,18 +8514,20 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
// Another choice is to assume TrueProb for BB1 equals to TrueProb for
// TmpBB, but the math is more complicated.
uint64_t TrueWeight, FalseWeight;
- if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {
+ if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
uint64_t NewTrueWeight = TrueWeight;
uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
scaleWeights(NewTrueWeight, NewFalseWeight);
- Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
- .createBranchWeights(TrueWeight, FalseWeight));
+ Br1->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(Br1->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
NewTrueWeight = TrueWeight;
NewFalseWeight = 2 * FalseWeight;
scaleWeights(NewTrueWeight, NewFalseWeight);
- Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
- .createBranchWeights(TrueWeight, FalseWeight));
+ Br2->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(Br2->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
}
} else {
// Codegen X & Y as:
@@ -8395,22 +8549,24 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
// assumes that
// FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
uint64_t TrueWeight, FalseWeight;
- if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {
+ if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
uint64_t NewFalseWeight = FalseWeight;
scaleWeights(NewTrueWeight, NewFalseWeight);
- Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
- .createBranchWeights(TrueWeight, FalseWeight));
+ Br1->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(Br1->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
NewTrueWeight = 2 * TrueWeight;
NewFalseWeight = FalseWeight;
scaleWeights(NewTrueWeight, NewFalseWeight);
- Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
- .createBranchWeights(TrueWeight, FalseWeight));
+ Br2->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(Br2->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
}
}
- ModifiedDT = true;
+ ModifiedDT = ModifyDT::ModifyBBDT;
MadeChange = true;
LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
index fd52191882cb..48cd8e998ec9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
@@ -23,6 +23,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/MemoryBuffer.h"
+#include <optional>
using namespace llvm;
@@ -40,14 +41,15 @@ using namespace llvm;
return *NAME##View; \
}
+// Temporary macro for incremental transition to std::optional.
#define CGOPT_EXP(TY, NAME) \
CGOPT(TY, NAME) \
- Optional<TY> codegen::getExplicit##NAME() { \
+ std::optional<TY> codegen::getExplicit##NAME() { \
if (NAME##View->getNumOccurrences()) { \
TY res = *NAME##View; \
return res; \
} \
- return None; \
+ return std::nullopt; \
}
CGOPT(std::string, MArch)
@@ -357,7 +359,7 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
"relax-elf-relocations",
cl::desc(
"Emit GOTPCRELX/REX_GOTPCRELX instead of GOTPCREL on x86-64 ELF"),
- cl::init(false));
+ cl::init(true));
CGBINDOPT(RelaxELFRelocations);
static cl::opt<bool> DataSections(
@@ -590,8 +592,8 @@ std::string codegen::getFeaturesStr() {
if (getMCPU() == "native") {
StringMap<bool> HostFeatures;
if (sys::getHostCPUFeatures(HostFeatures))
- for (auto &F : HostFeatures)
- Features.AddFeature(F.first(), F.second);
+ for (const auto &[Feature, IsEnabled] : HostFeatures)
+ Features.AddFeature(Feature, IsEnabled);
}
for (auto const &MAttr : getMAttrs())
@@ -610,8 +612,8 @@ std::vector<std::string> codegen::getFeatureList() {
if (getMCPU() == "native") {
StringMap<bool> HostFeatures;
if (sys::getHostCPUFeatures(HostFeatures))
- for (auto &F : HostFeatures)
- Features.AddFeature(F.first(), F.second);
+ for (const auto &[Feature, IsEnabled] : HostFeatures)
+ Features.AddFeature(Feature, IsEnabled);
}
for (auto const &MAttr : getMAttrs())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
new file mode 100644
index 000000000000..9b1f7117fa57
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
@@ -0,0 +1,889 @@
+//===- ComplexDeinterleavingPass.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Identification:
+// This step is responsible for finding the patterns that can be lowered to
+// complex instructions, and building a graph to represent the complex
+// structures. Starting from the "Converging Shuffle" (a shuffle that
+// reinterleaves the complex components, with a mask of <0, 2, 1, 3>), the
+// operands are evaluated and identified as "Composite Nodes" (collections of
+// instructions that can potentially be lowered to a single complex
+// instruction). This is performed by checking the real and imaginary components
+// and tracking the data flow for each component while following the operand
+// pairs. Validity of each node is expected to be done upon creation, and any
+// validation errors should halt traversal and prevent further graph
+// construction.
+//
+// Replacement:
+// This step traverses the graph built up by identification, delegating to the
+// target to validate and generate the correct intrinsics, and plumbs them
+// together connecting each end of the new intrinsics graph to the existing
+// use-def chain. This step is assumed to finish successfully, as all
+// information is expected to be correct by this point.
+//
+//
+// Internal data structure:
+// ComplexDeinterleavingGraph:
+// Keeps references to all the valid CompositeNodes formed as part of the
+// transformation, and every Instruction contained within said nodes. It also
+// holds onto a reference to the root Instruction, and the root node that should
+// replace it.
+//
+// ComplexDeinterleavingCompositeNode:
+// A CompositeNode represents a single transformation point; each node should
+// transform into a single complex instruction (ignoring vector splitting, which
+// would generate more instructions per node). They are identified in a
+// depth-first manner, traversing and identifying the operands of each
+// instruction in the order they appear in the IR.
+// Each node maintains a reference to its Real and Imaginary instructions,
+// as well as any additional instructions that make up the identified operation
+// (Internal instructions should only have uses within their containing node).
+// A Node also contains the rotation and operation type that it represents.
+// Operands contains pointers to other CompositeNodes, acting as the edges in
+// the graph. ReplacementValue is the transformed Value* that has been emitted
+// to the IR.
+//
+// Note: If the operation of a Node is Shuffle, only the Real, Imaginary, and
+// ReplacementValue fields of that Node are relevant, where the ReplacementValue
+// should be pre-populated.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ComplexDeinterleavingPass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "complex-deinterleaving"
+
+STATISTIC(NumComplexTransformations, "Amount of complex patterns transformed");
+
+static cl::opt<bool> ComplexDeinterleavingEnabled(
+ "enable-complex-deinterleaving",
+ cl::desc("Enable generation of complex instructions"), cl::init(true),
+ cl::Hidden);
+
+/// Checks the given mask, and determines whether said mask is interleaving.
+///
+/// To be interleaving, a mask must alternate between `i` and `i + (Length /
+/// 2)`, and must contain all numbers within the range of `[0..Length)` (e.g. a
+/// 4x vector interleaving mask would be <0, 2, 1, 3>).
+static bool isInterleavingMask(ArrayRef<int> Mask);
+
+/// Checks the given mask, and determines whether said mask is deinterleaving.
+///
+/// To be deinterleaving, a mask must increment in steps of 2, and either start
+/// with 0 or 1.
+/// (e.g. an 8x vector deinterleaving mask would be either <0, 2, 4, 6> or
+/// <1, 3, 5, 7>).
+static bool isDeinterleavingMask(ArrayRef<int> Mask);
+
+namespace {
+
+class ComplexDeinterleavingLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ ComplexDeinterleavingLegacyPass(const TargetMachine *TM = nullptr)
+ : FunctionPass(ID), TM(TM) {
+ initializeComplexDeinterleavingLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Complex Deinterleaving Pass";
+ }
+
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+
+private:
+ const TargetMachine *TM;
+};
+
+class ComplexDeinterleavingGraph;
+struct ComplexDeinterleavingCompositeNode {
+
+ ComplexDeinterleavingCompositeNode(ComplexDeinterleavingOperation Op,
+ Instruction *R, Instruction *I)
+ : Operation(Op), Real(R), Imag(I) {}
+
+private:
+ friend class ComplexDeinterleavingGraph;
+ using NodePtr = std::shared_ptr<ComplexDeinterleavingCompositeNode>;
+ using RawNodePtr = ComplexDeinterleavingCompositeNode *;
+
+public:
+ ComplexDeinterleavingOperation Operation;
+ Instruction *Real;
+ Instruction *Imag;
+
+ // Instructions that should only exist within this node, there should be no
+ // users of these instructions outside the node. An example of these would be
+ // the multiply instructions of a partial multiply operation.
+ SmallVector<Instruction *> InternalInstructions;
+ ComplexDeinterleavingRotation Rotation;
+ SmallVector<RawNodePtr> Operands;
+ Value *ReplacementNode = nullptr;
+
+ void addInstruction(Instruction *I) { InternalInstructions.push_back(I); }
+ void addOperand(NodePtr Node) { Operands.push_back(Node.get()); }
+
+ bool hasAllInternalUses(SmallPtrSet<Instruction *, 16> &AllInstructions);
+
+ void dump() { dump(dbgs()); }
+ void dump(raw_ostream &OS) {
+ auto PrintValue = [&](Value *V) {
+ if (V) {
+ OS << "\"";
+ V->print(OS, true);
+ OS << "\"\n";
+ } else
+ OS << "nullptr\n";
+ };
+ auto PrintNodeRef = [&](RawNodePtr Ptr) {
+ if (Ptr)
+ OS << Ptr << "\n";
+ else
+ OS << "nullptr\n";
+ };
+
+ OS << "- CompositeNode: " << this << "\n";
+ OS << " Real: ";
+ PrintValue(Real);
+ OS << " Imag: ";
+ PrintValue(Imag);
+ OS << " ReplacementNode: ";
+ PrintValue(ReplacementNode);
+ OS << " Operation: " << (int)Operation << "\n";
+ OS << " Rotation: " << ((int)Rotation * 90) << "\n";
+ OS << " Operands: \n";
+ for (const auto &Op : Operands) {
+ OS << " - ";
+ PrintNodeRef(Op);
+ }
+ OS << " InternalInstructions:\n";
+ for (const auto &I : InternalInstructions) {
+ OS << " - \"";
+ I->print(OS, true);
+ OS << "\"\n";
+ }
+ }
+};
+
+class ComplexDeinterleavingGraph {
+public:
+ using NodePtr = ComplexDeinterleavingCompositeNode::NodePtr;
+ using RawNodePtr = ComplexDeinterleavingCompositeNode::RawNodePtr;
+ explicit ComplexDeinterleavingGraph(const TargetLowering *tl) : TL(tl) {}
+
+private:
+ const TargetLowering *TL;
+ Instruction *RootValue;
+ NodePtr RootNode;
+ SmallVector<NodePtr> CompositeNodes;
+ SmallPtrSet<Instruction *, 16> AllInstructions;
+
+ NodePtr prepareCompositeNode(ComplexDeinterleavingOperation Operation,
+ Instruction *R, Instruction *I) {
+ return std::make_shared<ComplexDeinterleavingCompositeNode>(Operation, R,
+ I);
+ }
+
+ NodePtr submitCompositeNode(NodePtr Node) {
+ CompositeNodes.push_back(Node);
+ AllInstructions.insert(Node->Real);
+ AllInstructions.insert(Node->Imag);
+ for (auto *I : Node->InternalInstructions)
+ AllInstructions.insert(I);
+ return Node;
+ }
+
+ NodePtr getContainingComposite(Value *R, Value *I) {
+ for (const auto &CN : CompositeNodes) {
+ if (CN->Real == R && CN->Imag == I)
+ return CN;
+ }
+ return nullptr;
+ }
+
+ /// Identifies a complex partial multiply pattern and its rotation, based on
+ /// the following patterns
+ ///
+ /// 0: r: cr + ar * br
+ /// i: ci + ar * bi
+ /// 90: r: cr - ai * bi
+ /// i: ci + ai * br
+ /// 180: r: cr - ar * br
+ /// i: ci - ar * bi
+ /// 270: r: cr + ai * bi
+ /// i: ci - ai * br
+ NodePtr identifyPartialMul(Instruction *Real, Instruction *Imag);
+
+ /// Identify the other branch of a Partial Mul, taking the CommonOperandI that
+ /// is partially known from identifyPartialMul, filling in the other half of
+ /// the complex pair.
+ NodePtr identifyNodeWithImplicitAdd(
+ Instruction *I, Instruction *J,
+ std::pair<Instruction *, Instruction *> &CommonOperandI);
+
+ /// Identifies a complex add pattern and its rotation, based on the following
+ /// patterns.
+ ///
+ /// 90: r: ar - bi
+ /// i: ai + br
+ /// 270: r: ar + bi
+ /// i: ai - br
+ NodePtr identifyAdd(Instruction *Real, Instruction *Imag);
+
+ NodePtr identifyNode(Instruction *I, Instruction *J);
+
+ Value *replaceNode(RawNodePtr Node);
+
+public:
+ void dump() { dump(dbgs()); }
+ void dump(raw_ostream &OS) {
+ for (const auto &Node : CompositeNodes)
+ Node->dump(OS);
+ }
+
+ /// Returns false if the deinterleaving operation should be cancelled for the
+ /// current graph.
+ bool identifyNodes(Instruction *RootI);
+
+ /// Perform the actual replacement of the underlying instruction graph.
+ /// Returns false if the deinterleaving operation should be cancelled for the
+ /// current graph.
+ void replaceNodes();
+};
+
+class ComplexDeinterleaving {
+public:
+ ComplexDeinterleaving(const TargetLowering *tl, const TargetLibraryInfo *tli)
+ : TL(tl), TLI(tli) {}
+ bool runOnFunction(Function &F);
+
+private:
+ bool evaluateBasicBlock(BasicBlock *B);
+
+ const TargetLowering *TL = nullptr;
+ const TargetLibraryInfo *TLI = nullptr;
+};
+
+} // namespace
+
+char ComplexDeinterleavingLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ComplexDeinterleavingLegacyPass, DEBUG_TYPE,
+ "Complex Deinterleaving", false, false)
+INITIALIZE_PASS_END(ComplexDeinterleavingLegacyPass, DEBUG_TYPE,
+ "Complex Deinterleaving", false, false)
+
+PreservedAnalyses ComplexDeinterleavingPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ const TargetLowering *TL = TM->getSubtargetImpl(F)->getTargetLowering();
+ auto &TLI = AM.getResult<llvm::TargetLibraryAnalysis>(F);
+ if (!ComplexDeinterleaving(TL, &TLI).runOnFunction(F))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<FunctionAnalysisManagerModuleProxy>();
+ return PA;
+}
+
+FunctionPass *llvm::createComplexDeinterleavingPass(const TargetMachine *TM) {
+ return new ComplexDeinterleavingLegacyPass(TM);
+}
+
+bool ComplexDeinterleavingLegacyPass::runOnFunction(Function &F) {
+ const auto *TL = TM->getSubtargetImpl(F)->getTargetLowering();
+ auto TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ return ComplexDeinterleaving(TL, &TLI).runOnFunction(F);
+}
+
+bool ComplexDeinterleaving::runOnFunction(Function &F) {
+ if (!ComplexDeinterleavingEnabled) {
+ LLVM_DEBUG(
+ dbgs() << "Complex deinterleaving has been explicitly disabled.\n");
+ return false;
+ }
+
+ if (!TL->isComplexDeinterleavingSupported()) {
+ LLVM_DEBUG(
+ dbgs() << "Complex deinterleaving has been disabled, target does "
+ "not support lowering of complex number operations.\n");
+ return false;
+ }
+
+ bool Changed = false;
+ for (auto &B : F)
+ Changed |= evaluateBasicBlock(&B);
+
+ return Changed;
+}
+
+static bool isInterleavingMask(ArrayRef<int> Mask) {
+ // If the size is not even, it's not an interleaving mask
+ if ((Mask.size() & 1))
+ return false;
+
+ int HalfNumElements = Mask.size() / 2;
+ for (int Idx = 0; Idx < HalfNumElements; ++Idx) {
+ int MaskIdx = Idx * 2;
+ if (Mask[MaskIdx] != Idx || Mask[MaskIdx + 1] != (Idx + HalfNumElements))
+ return false;
+ }
+
+ return true;
+}
+
+static bool isDeinterleavingMask(ArrayRef<int> Mask) {
+ int Offset = Mask[0];
+ int HalfNumElements = Mask.size() / 2;
+
+ for (int Idx = 1; Idx < HalfNumElements; ++Idx) {
+ if (Mask[Idx] != (Idx * 2) + Offset)
+ return false;
+ }
+
+ return true;
+}
+
+bool ComplexDeinterleaving::evaluateBasicBlock(BasicBlock *B) {
+ bool Changed = false;
+
+ SmallVector<Instruction *> DeadInstrRoots;
+
+ for (auto &I : *B) {
+ auto *SVI = dyn_cast<ShuffleVectorInst>(&I);
+ if (!SVI)
+ continue;
+
+ // Look for a shufflevector that takes separate vectors of the real and
+ // imaginary components and recombines them into a single vector.
+ if (!isInterleavingMask(SVI->getShuffleMask()))
+ continue;
+
+ ComplexDeinterleavingGraph Graph(TL);
+ if (!Graph.identifyNodes(SVI))
+ continue;
+
+ Graph.replaceNodes();
+ DeadInstrRoots.push_back(SVI);
+ Changed = true;
+ }
+
+ for (const auto &I : DeadInstrRoots) {
+ if (!I || I->getParent() == nullptr)
+ continue;
+ llvm::RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ }
+
+ return Changed;
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyNodeWithImplicitAdd(
+ Instruction *Real, Instruction *Imag,
+ std::pair<Instruction *, Instruction *> &PartialMatch) {
+ LLVM_DEBUG(dbgs() << "identifyNodeWithImplicitAdd " << *Real << " / " << *Imag
+ << "\n");
+
+ if (!Real->hasOneUse() || !Imag->hasOneUse()) {
+ LLVM_DEBUG(dbgs() << " - Mul operand has multiple uses.\n");
+ return nullptr;
+ }
+
+ if (Real->getOpcode() != Instruction::FMul ||
+ Imag->getOpcode() != Instruction::FMul) {
+ LLVM_DEBUG(dbgs() << " - Real or imaginary instruction is not fmul\n");
+ return nullptr;
+ }
+
+ Instruction *R0 = dyn_cast<Instruction>(Real->getOperand(0));
+ Instruction *R1 = dyn_cast<Instruction>(Real->getOperand(1));
+ Instruction *I0 = dyn_cast<Instruction>(Imag->getOperand(0));
+ Instruction *I1 = dyn_cast<Instruction>(Imag->getOperand(1));
+ if (!R0 || !R1 || !I0 || !I1) {
+ LLVM_DEBUG(dbgs() << " - Mul operand not Instruction\n");
+ return nullptr;
+ }
+
+ // A +/+ has a rotation of 0. If any of the operands are fneg, we flip the
+ // rotations and use the operand.
+ unsigned Negs = 0;
+ SmallVector<Instruction *> FNegs;
+ if (R0->getOpcode() == Instruction::FNeg ||
+ R1->getOpcode() == Instruction::FNeg) {
+ Negs |= 1;
+ if (R0->getOpcode() == Instruction::FNeg) {
+ FNegs.push_back(R0);
+ R0 = dyn_cast<Instruction>(R0->getOperand(0));
+ } else {
+ FNegs.push_back(R1);
+ R1 = dyn_cast<Instruction>(R1->getOperand(0));
+ }
+ if (!R0 || !R1)
+ return nullptr;
+ }
+ if (I0->getOpcode() == Instruction::FNeg ||
+ I1->getOpcode() == Instruction::FNeg) {
+ Negs |= 2;
+ Negs ^= 1;
+ if (I0->getOpcode() == Instruction::FNeg) {
+ FNegs.push_back(I0);
+ I0 = dyn_cast<Instruction>(I0->getOperand(0));
+ } else {
+ FNegs.push_back(I1);
+ I1 = dyn_cast<Instruction>(I1->getOperand(0));
+ }
+ if (!I0 || !I1)
+ return nullptr;
+ }
+
+ ComplexDeinterleavingRotation Rotation = (ComplexDeinterleavingRotation)Negs;
+
+ Instruction *CommonOperand;
+ Instruction *UncommonRealOp;
+ Instruction *UncommonImagOp;
+
+ if (R0 == I0 || R0 == I1) {
+ CommonOperand = R0;
+ UncommonRealOp = R1;
+ } else if (R1 == I0 || R1 == I1) {
+ CommonOperand = R1;
+ UncommonRealOp = R0;
+ } else {
+ LLVM_DEBUG(dbgs() << " - No equal operand\n");
+ return nullptr;
+ }
+
+ UncommonImagOp = (CommonOperand == I0) ? I1 : I0;
+ if (Rotation == ComplexDeinterleavingRotation::Rotation_90 ||
+ Rotation == ComplexDeinterleavingRotation::Rotation_270)
+ std::swap(UncommonRealOp, UncommonImagOp);
+
+ // Between identifyPartialMul and here we need to have found a complete valid
+ // pair from the CommonOperand of each part.
+ if (Rotation == ComplexDeinterleavingRotation::Rotation_0 ||
+ Rotation == ComplexDeinterleavingRotation::Rotation_180)
+ PartialMatch.first = CommonOperand;
+ else
+ PartialMatch.second = CommonOperand;
+
+ if (!PartialMatch.first || !PartialMatch.second) {
+ LLVM_DEBUG(dbgs() << " - Incomplete partial match\n");
+ return nullptr;
+ }
+
+ NodePtr CommonNode = identifyNode(PartialMatch.first, PartialMatch.second);
+ if (!CommonNode) {
+ LLVM_DEBUG(dbgs() << " - No CommonNode identified\n");
+ return nullptr;
+ }
+
+ NodePtr UncommonNode = identifyNode(UncommonRealOp, UncommonImagOp);
+ if (!UncommonNode) {
+ LLVM_DEBUG(dbgs() << " - No UncommonNode identified\n");
+ return nullptr;
+ }
+
+ NodePtr Node = prepareCompositeNode(
+ ComplexDeinterleavingOperation::CMulPartial, Real, Imag);
+ Node->Rotation = Rotation;
+ Node->addOperand(CommonNode);
+ Node->addOperand(UncommonNode);
+ Node->InternalInstructions.append(FNegs);
+ return submitCompositeNode(Node);
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyPartialMul(Instruction *Real,
+ Instruction *Imag) {
+ LLVM_DEBUG(dbgs() << "identifyPartialMul " << *Real << " / " << *Imag
+ << "\n");
+ // Determine rotation
+ ComplexDeinterleavingRotation Rotation;
+ if (Real->getOpcode() == Instruction::FAdd &&
+ Imag->getOpcode() == Instruction::FAdd)
+ Rotation = ComplexDeinterleavingRotation::Rotation_0;
+ else if (Real->getOpcode() == Instruction::FSub &&
+ Imag->getOpcode() == Instruction::FAdd)
+ Rotation = ComplexDeinterleavingRotation::Rotation_90;
+ else if (Real->getOpcode() == Instruction::FSub &&
+ Imag->getOpcode() == Instruction::FSub)
+ Rotation = ComplexDeinterleavingRotation::Rotation_180;
+ else if (Real->getOpcode() == Instruction::FAdd &&
+ Imag->getOpcode() == Instruction::FSub)
+ Rotation = ComplexDeinterleavingRotation::Rotation_270;
+ else {
+ LLVM_DEBUG(dbgs() << " - Unhandled rotation.\n");
+ return nullptr;
+ }
+
+ if (!Real->getFastMathFlags().allowContract() ||
+ !Imag->getFastMathFlags().allowContract()) {
+ LLVM_DEBUG(dbgs() << " - Contract is missing from the FastMath flags.\n");
+ return nullptr;
+ }
+
+ Value *CR = Real->getOperand(0);
+ Instruction *RealMulI = dyn_cast<Instruction>(Real->getOperand(1));
+ if (!RealMulI)
+ return nullptr;
+ Value *CI = Imag->getOperand(0);
+ Instruction *ImagMulI = dyn_cast<Instruction>(Imag->getOperand(1));
+ if (!ImagMulI)
+ return nullptr;
+
+ if (!RealMulI->hasOneUse() || !ImagMulI->hasOneUse()) {
+ LLVM_DEBUG(dbgs() << " - Mul instruction has multiple uses\n");
+ return nullptr;
+ }
+
+ Instruction *R0 = dyn_cast<Instruction>(RealMulI->getOperand(0));
+ Instruction *R1 = dyn_cast<Instruction>(RealMulI->getOperand(1));
+ Instruction *I0 = dyn_cast<Instruction>(ImagMulI->getOperand(0));
+ Instruction *I1 = dyn_cast<Instruction>(ImagMulI->getOperand(1));
+ if (!R0 || !R1 || !I0 || !I1) {
+ LLVM_DEBUG(dbgs() << " - Mul operand not Instruction\n");
+ return nullptr;
+ }
+
+ Instruction *CommonOperand;
+ Instruction *UncommonRealOp;
+ Instruction *UncommonImagOp;
+
+ if (R0 == I0 || R0 == I1) {
+ CommonOperand = R0;
+ UncommonRealOp = R1;
+ } else if (R1 == I0 || R1 == I1) {
+ CommonOperand = R1;
+ UncommonRealOp = R0;
+ } else {
+ LLVM_DEBUG(dbgs() << " - No equal operand\n");
+ return nullptr;
+ }
+
+ UncommonImagOp = (CommonOperand == I0) ? I1 : I0;
+ if (Rotation == ComplexDeinterleavingRotation::Rotation_90 ||
+ Rotation == ComplexDeinterleavingRotation::Rotation_270)
+ std::swap(UncommonRealOp, UncommonImagOp);
+
+ std::pair<Instruction *, Instruction *> PartialMatch(
+ (Rotation == ComplexDeinterleavingRotation::Rotation_0 ||
+ Rotation == ComplexDeinterleavingRotation::Rotation_180)
+ ? CommonOperand
+ : nullptr,
+ (Rotation == ComplexDeinterleavingRotation::Rotation_90 ||
+ Rotation == ComplexDeinterleavingRotation::Rotation_270)
+ ? CommonOperand
+ : nullptr);
+ NodePtr CNode = identifyNodeWithImplicitAdd(
+ cast<Instruction>(CR), cast<Instruction>(CI), PartialMatch);
+ if (!CNode) {
+ LLVM_DEBUG(dbgs() << " - No cnode identified\n");
+ return nullptr;
+ }
+
+ NodePtr UncommonRes = identifyNode(UncommonRealOp, UncommonImagOp);
+ if (!UncommonRes) {
+ LLVM_DEBUG(dbgs() << " - No UncommonRes identified\n");
+ return nullptr;
+ }
+
+ assert(PartialMatch.first && PartialMatch.second);
+ NodePtr CommonRes = identifyNode(PartialMatch.first, PartialMatch.second);
+ if (!CommonRes) {
+ LLVM_DEBUG(dbgs() << " - No CommonRes identified\n");
+ return nullptr;
+ }
+
+ NodePtr Node = prepareCompositeNode(
+ ComplexDeinterleavingOperation::CMulPartial, Real, Imag);
+ Node->addInstruction(RealMulI);
+ Node->addInstruction(ImagMulI);
+ Node->Rotation = Rotation;
+ Node->addOperand(CommonRes);
+ Node->addOperand(UncommonRes);
+ Node->addOperand(CNode);
+ return submitCompositeNode(Node);
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyAdd(Instruction *Real, Instruction *Imag) {
+ LLVM_DEBUG(dbgs() << "identifyAdd " << *Real << " / " << *Imag << "\n");
+
+ // Determine rotation
+ ComplexDeinterleavingRotation Rotation;
+ if ((Real->getOpcode() == Instruction::FSub &&
+ Imag->getOpcode() == Instruction::FAdd) ||
+ (Real->getOpcode() == Instruction::Sub &&
+ Imag->getOpcode() == Instruction::Add))
+ Rotation = ComplexDeinterleavingRotation::Rotation_90;
+ else if ((Real->getOpcode() == Instruction::FAdd &&
+ Imag->getOpcode() == Instruction::FSub) ||
+ (Real->getOpcode() == Instruction::Add &&
+ Imag->getOpcode() == Instruction::Sub))
+ Rotation = ComplexDeinterleavingRotation::Rotation_270;
+ else {
+ LLVM_DEBUG(dbgs() << " - Unhandled case, rotation is not assigned.\n");
+ return nullptr;
+ }
+
+ auto *AR = dyn_cast<Instruction>(Real->getOperand(0));
+ auto *BI = dyn_cast<Instruction>(Real->getOperand(1));
+ auto *AI = dyn_cast<Instruction>(Imag->getOperand(0));
+ auto *BR = dyn_cast<Instruction>(Imag->getOperand(1));
+
+ if (!AR || !AI || !BR || !BI) {
+ LLVM_DEBUG(dbgs() << " - Not all operands are instructions.\n");
+ return nullptr;
+ }
+
+ NodePtr ResA = identifyNode(AR, AI);
+ if (!ResA) {
+ LLVM_DEBUG(dbgs() << " - AR/AI is not identified as a composite node.\n");
+ return nullptr;
+ }
+ NodePtr ResB = identifyNode(BR, BI);
+ if (!ResB) {
+ LLVM_DEBUG(dbgs() << " - BR/BI is not identified as a composite node.\n");
+ return nullptr;
+ }
+
+ NodePtr Node =
+ prepareCompositeNode(ComplexDeinterleavingOperation::CAdd, Real, Imag);
+ Node->Rotation = Rotation;
+ Node->addOperand(ResA);
+ Node->addOperand(ResB);
+ return submitCompositeNode(Node);
+}
+
+static bool isInstructionPairAdd(Instruction *A, Instruction *B) {
+ unsigned OpcA = A->getOpcode();
+ unsigned OpcB = B->getOpcode();
+
+ return (OpcA == Instruction::FSub && OpcB == Instruction::FAdd) ||
+ (OpcA == Instruction::FAdd && OpcB == Instruction::FSub) ||
+ (OpcA == Instruction::Sub && OpcB == Instruction::Add) ||
+ (OpcA == Instruction::Add && OpcB == Instruction::Sub);
+}
+
+static bool isInstructionPairMul(Instruction *A, Instruction *B) {
+ auto Pattern =
+ m_BinOp(m_FMul(m_Value(), m_Value()), m_FMul(m_Value(), m_Value()));
+
+ return match(A, Pattern) && match(B, Pattern);
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyNode(Instruction *Real, Instruction *Imag) {
+ LLVM_DEBUG(dbgs() << "identifyNode on " << *Real << " / " << *Imag << "\n");
+ if (NodePtr CN = getContainingComposite(Real, Imag)) {
+ LLVM_DEBUG(dbgs() << " - Folding to existing node\n");
+ return CN;
+ }
+
+ auto *RealShuffle = dyn_cast<ShuffleVectorInst>(Real);
+ auto *ImagShuffle = dyn_cast<ShuffleVectorInst>(Imag);
+ if (RealShuffle && ImagShuffle) {
+ Value *RealOp1 = RealShuffle->getOperand(1);
+ if (!isa<UndefValue>(RealOp1) && !isa<ConstantAggregateZero>(RealOp1)) {
+ LLVM_DEBUG(dbgs() << " - RealOp1 is not undef or zero.\n");
+ return nullptr;
+ }
+ Value *ImagOp1 = ImagShuffle->getOperand(1);
+ if (!isa<UndefValue>(ImagOp1) && !isa<ConstantAggregateZero>(ImagOp1)) {
+ LLVM_DEBUG(dbgs() << " - ImagOp1 is not undef or zero.\n");
+ return nullptr;
+ }
+
+ Value *RealOp0 = RealShuffle->getOperand(0);
+ Value *ImagOp0 = ImagShuffle->getOperand(0);
+
+ if (RealOp0 != ImagOp0) {
+ LLVM_DEBUG(dbgs() << " - Shuffle operands are not equal.\n");
+ return nullptr;
+ }
+
+ ArrayRef<int> RealMask = RealShuffle->getShuffleMask();
+ ArrayRef<int> ImagMask = ImagShuffle->getShuffleMask();
+ if (!isDeinterleavingMask(RealMask) || !isDeinterleavingMask(ImagMask)) {
+ LLVM_DEBUG(dbgs() << " - Masks are not deinterleaving.\n");
+ return nullptr;
+ }
+
+ if (RealMask[0] != 0 || ImagMask[0] != 1) {
+ LLVM_DEBUG(dbgs() << " - Masks do not have the correct initial value.\n");
+ return nullptr;
+ }
+
+ // Type checking, the shuffle type should be a vector type of the same
+ // scalar type, but half the size
+ auto CheckType = [&](ShuffleVectorInst *Shuffle) {
+ Value *Op = Shuffle->getOperand(0);
+ auto *ShuffleTy = cast<FixedVectorType>(Shuffle->getType());
+ auto *OpTy = cast<FixedVectorType>(Op->getType());
+
+ if (OpTy->getScalarType() != ShuffleTy->getScalarType())
+ return false;
+ if ((ShuffleTy->getNumElements() * 2) != OpTy->getNumElements())
+ return false;
+
+ return true;
+ };
+
+ auto CheckDeinterleavingShuffle = [&](ShuffleVectorInst *Shuffle) -> bool {
+ if (!CheckType(Shuffle))
+ return false;
+
+ ArrayRef<int> Mask = Shuffle->getShuffleMask();
+ int Last = *Mask.rbegin();
+
+ Value *Op = Shuffle->getOperand(0);
+ auto *OpTy = cast<FixedVectorType>(Op->getType());
+ int NumElements = OpTy->getNumElements();
+
+ // Ensure that the deinterleaving shuffle only pulls from the first
+ // shuffle operand.
+ return Last < NumElements;
+ };
+
+ if (RealShuffle->getType() != ImagShuffle->getType()) {
+ LLVM_DEBUG(dbgs() << " - Shuffle types aren't equal.\n");
+ return nullptr;
+ }
+ if (!CheckDeinterleavingShuffle(RealShuffle)) {
+ LLVM_DEBUG(dbgs() << " - RealShuffle is invalid type.\n");
+ return nullptr;
+ }
+ if (!CheckDeinterleavingShuffle(ImagShuffle)) {
+ LLVM_DEBUG(dbgs() << " - ImagShuffle is invalid type.\n");
+ return nullptr;
+ }
+
+ NodePtr PlaceholderNode =
+ prepareCompositeNode(llvm::ComplexDeinterleavingOperation::Shuffle,
+ RealShuffle, ImagShuffle);
+ PlaceholderNode->ReplacementNode = RealShuffle->getOperand(0);
+ return submitCompositeNode(PlaceholderNode);
+ }
+ if (RealShuffle || ImagShuffle)
+ return nullptr;
+
+ auto *VTy = cast<FixedVectorType>(Real->getType());
+ auto *NewVTy =
+ FixedVectorType::get(VTy->getScalarType(), VTy->getNumElements() * 2);
+
+ if (TL->isComplexDeinterleavingOperationSupported(
+ ComplexDeinterleavingOperation::CMulPartial, NewVTy) &&
+ isInstructionPairMul(Real, Imag)) {
+ return identifyPartialMul(Real, Imag);
+ }
+
+ if (TL->isComplexDeinterleavingOperationSupported(
+ ComplexDeinterleavingOperation::CAdd, NewVTy) &&
+ isInstructionPairAdd(Real, Imag)) {
+ return identifyAdd(Real, Imag);
+ }
+
+ return nullptr;
+}
+
+bool ComplexDeinterleavingGraph::identifyNodes(Instruction *RootI) {
+ Instruction *Real;
+ Instruction *Imag;
+ if (!match(RootI, m_Shuffle(m_Instruction(Real), m_Instruction(Imag))))
+ return false;
+
+ RootValue = RootI;
+ AllInstructions.insert(RootI);
+ RootNode = identifyNode(Real, Imag);
+
+ LLVM_DEBUG({
+ Function *F = RootI->getFunction();
+ BasicBlock *B = RootI->getParent();
+ dbgs() << "Complex deinterleaving graph for " << F->getName()
+ << "::" << B->getName() << ".\n";
+ dump(dbgs());
+ dbgs() << "\n";
+ });
+
+ // Check all instructions have internal uses
+ for (const auto &Node : CompositeNodes) {
+ if (!Node->hasAllInternalUses(AllInstructions)) {
+ LLVM_DEBUG(dbgs() << " - Invalid internal uses\n");
+ return false;
+ }
+ }
+ return RootNode != nullptr;
+}
+
+Value *ComplexDeinterleavingGraph::replaceNode(
+ ComplexDeinterleavingGraph::RawNodePtr Node) {
+ if (Node->ReplacementNode)
+ return Node->ReplacementNode;
+
+ Value *Input0 = replaceNode(Node->Operands[0]);
+ Value *Input1 = replaceNode(Node->Operands[1]);
+ Value *Accumulator =
+ Node->Operands.size() > 2 ? replaceNode(Node->Operands[2]) : nullptr;
+
+ assert(Input0->getType() == Input1->getType() &&
+ "Node inputs need to be of the same type");
+
+ Node->ReplacementNode = TL->createComplexDeinterleavingIR(
+ Node->Real, Node->Operation, Node->Rotation, Input0, Input1, Accumulator);
+
+ assert(Node->ReplacementNode && "Target failed to create Intrinsic call.");
+ NumComplexTransformations += 1;
+ return Node->ReplacementNode;
+}
+
+void ComplexDeinterleavingGraph::replaceNodes() {
+ Value *R = replaceNode(RootNode.get());
+ assert(R && "Unable to find replacement for RootValue");
+ RootValue->replaceAllUsesWith(R);
+}
+
+bool ComplexDeinterleavingCompositeNode::hasAllInternalUses(
+ SmallPtrSet<Instruction *, 16> &AllInstructions) {
+ if (Operation == ComplexDeinterleavingOperation::Shuffle)
+ return true;
+
+ for (auto *User : Real->users()) {
+ if (!AllInstructions.contains(cast<Instruction>(User)))
+ return false;
+ }
+ for (auto *User : Imag->users()) {
+ if (!AllInstructions.contains(cast<Instruction>(User)))
+ return false;
+ }
+ for (auto *I : InternalInstructions) {
+ for (auto *User : I->users()) {
+ if (!AllInstructions.contains(cast<Instruction>(User)))
+ return false;
+ }
+ }
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index ce00be634e9a..e36db43567c5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -12,6 +12,7 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -30,10 +31,9 @@ namespace {
class DeadMachineInstructionElim : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;
- const TargetRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
const TargetInstrInfo *TII;
- BitVector LivePhysRegs;
+ LiveRegUnits LivePhysRegs;
public:
static char ID; // Pass identification, replacement for typeid
@@ -78,15 +78,14 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
for (const MachineOperand &MO : MI->operands()) {
if (MO.isReg() && MO.isDef()) {
Register Reg = MO.getReg();
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
// Don't delete live physreg defs, or any reserved register defs.
- if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
+ if (!LivePhysRegs.available(Reg) || MRI->isReserved(Reg))
return false;
} else {
if (MO.isDead()) {
#ifndef NDEBUG
- // Baisc check on the register. All of them should be
- // 'undef'.
+ // Basic check on the register. All of them should be 'undef'.
for (auto &U : MRI->use_nodbg_operands(Reg))
assert(U.isUndef() && "'Undef' use on a 'dead' register is found!");
#endif
@@ -108,6 +107,13 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
+
+ MRI = &MF.getRegInfo();
+
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+ TII = ST.getInstrInfo();
+ LivePhysRegs.init(*ST.getRegisterInfo());
+
bool AnyChanges = eliminateDeadMI(MF);
while (AnyChanges && eliminateDeadMI(MF))
;
@@ -116,27 +122,16 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
bool AnyChanges = false;
- MRI = &MF.getRegInfo();
- TRI = MF.getSubtarget().getRegisterInfo();
- TII = MF.getSubtarget().getInstrInfo();
// Loop over all instructions in all blocks, from bottom to top, so that it's
// more likely that chains of dependent but ultimately dead instructions will
// be cleaned up.
for (MachineBasicBlock *MBB : post_order(&MF)) {
- // Start out assuming that reserved registers are live out of this block.
- LivePhysRegs = MRI->getReservedRegs();
-
- // Add live-ins from successors to LivePhysRegs. Normally, physregs are not
- // live across blocks, but some targets (x86) can have flags live out of a
- // block.
- for (const MachineBasicBlock *Succ : MBB->successors())
- for (const auto &LI : Succ->liveins())
- LivePhysRegs.set(LI.PhysReg);
+ LivePhysRegs.addLiveOuts(*MBB);
// Now scan the instructions and delete dead ones, tracking physreg
// liveness as we go.
- for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(*MBB))) {
+ for (MachineInstr &MI : make_early_inc_range(reverse(*MBB))) {
// If the instruction is dead, delete it!
if (isDead(&MI)) {
LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << MI);
@@ -149,34 +144,7 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
continue;
}
- // Record the physreg defs.
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.isDef()) {
- Register Reg = MO.getReg();
- if (Register::isPhysicalRegister(Reg)) {
- // Check the subreg set, not the alias set, because a def
- // of a super-register may still be partially live after
- // this def.
- for (MCSubRegIterator SR(Reg, TRI,/*IncludeSelf=*/true);
- SR.isValid(); ++SR)
- LivePhysRegs.reset(*SR);
- }
- } else if (MO.isRegMask()) {
- // Register mask of preserved registers. All clobbers are dead.
- LivePhysRegs.clearBitsNotInMask(MO.getRegMask());
- }
- }
- // Record the physreg uses, after the defs, in case a physreg is
- // both defined and used in the same instruction.
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.isUse()) {
- Register Reg = MO.getReg();
- if (Register::isPhysicalRegister(Reg)) {
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- LivePhysRegs.set(*AI);
- }
- }
- }
+ LivePhysRegs.stepBackward(MI);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp
index 565c8b405f82..bbb89855cfff 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp
@@ -189,7 +189,7 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
if (!MO.readsReg())
return;
Register MOReg = MO.getReg();
- if (!Register::isVirtualRegister(MOReg))
+ if (!MOReg.isVirtual())
return;
unsigned MOSubReg = MO.getSubReg();
@@ -213,7 +213,7 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
void DetectDeadLanes::transferUsedLanesStep(const MachineInstr &MI,
LaneBitmask UsedLanes) {
for (const MachineOperand &MO : MI.uses()) {
- if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
+ if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
LaneBitmask UsedOnMO = transferUsedLanes(MI, UsedLanes, MO);
addUsedLanesOnOperand(MO, UsedOnMO);
@@ -280,7 +280,7 @@ void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use,
return;
const MachineOperand &Def = *MI.defs().begin();
Register DefReg = Def.getReg();
- if (!Register::isVirtualRegister(DefReg))
+ if (!DefReg.isVirtual())
return;
unsigned DefRegIdx = Register::virtReg2Index(DefReg);
if (!DefinedByCopy.test(DefRegIdx))
@@ -376,12 +376,12 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
continue;
LaneBitmask MODefinedLanes;
- if (Register::isPhysicalRegister(MOReg)) {
+ if (MOReg.isPhysical()) {
MODefinedLanes = LaneBitmask::getAll();
} else if (isCrossCopy(*MRI, DefMI, DefRC, MO)) {
MODefinedLanes = LaneBitmask::getAll();
} else {
- assert(Register::isVirtualRegister(MOReg));
+ assert(MOReg.isVirtual());
if (MRI->hasOneDef(MOReg)) {
const MachineOperand &MODef = *MRI->def_begin(MOReg);
const MachineInstr &MODefMI = *MODef.getParent();
@@ -425,7 +425,7 @@ LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) {
Register DefReg = Def.getReg();
// The used lanes of COPY-like instruction operands are determined by the
// following dataflow analysis.
- if (Register::isVirtualRegister(DefReg)) {
+ if (DefReg.isVirtual()) {
// But ignore copies across incompatible register classes.
bool CrossCopy = false;
if (lowersToCopies(UseMI)) {
@@ -465,7 +465,7 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
return false;
const MachineOperand &Def = MI.getOperand(0);
Register DefReg = Def.getReg();
- if (!Register::isVirtualRegister(DefReg))
+ if (!DefReg.isVirtual())
return false;
unsigned DefRegIdx = Register::virtReg2Index(DefReg);
if (!DefinedByCopy.test(DefRegIdx))
@@ -477,7 +477,7 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
return false;
Register MOReg = MO.getReg();
- if (Register::isVirtualRegister(MOReg)) {
+ if (MOReg.isVirtual()) {
const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg);
*CrossCopy = isCrossCopy(*MRI, MI, DstRC, MO);
}
@@ -488,7 +488,7 @@ std::pair<bool, bool> DetectDeadLanes::runOnce(MachineFunction &MF) {
// First pass: Populate defs/uses of vregs with initial values
unsigned NumVirtRegs = MRI->getNumVirtRegs();
for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
- unsigned Reg = Register::index2VirtReg(RegIdx);
+ Register Reg = Register::index2VirtReg(RegIdx);
// Determine used/defined lanes and add copy instructions to worklist.
VRegInfo &Info = VRegInfos[RegIdx];
@@ -502,7 +502,7 @@ std::pair<bool, bool> DetectDeadLanes::runOnce(MachineFunction &MF) {
Worklist.pop_front();
WorklistMembers.reset(RegIdx);
VRegInfo &Info = VRegInfos[RegIdx];
- unsigned Reg = Register::index2VirtReg(RegIdx);
+ Register Reg = Register::index2VirtReg(RegIdx);
// Transfer UsedLanes to operands of DefMI (backwards dataflow).
MachineOperand &Def = *MRI->def_begin(Reg);
@@ -516,7 +516,7 @@ std::pair<bool, bool> DetectDeadLanes::runOnce(MachineFunction &MF) {
LLVM_DEBUG({
dbgs() << "Defined/Used lanes:\n";
for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
- unsigned Reg = Register::index2VirtReg(RegIdx);
+ Register Reg = Register::index2VirtReg(RegIdx);
const VRegInfo &Info = VRegInfos[RegIdx];
dbgs() << printReg(Reg, nullptr)
<< " Used: " << PrintLaneMask(Info.UsedLanes)
@@ -534,7 +534,7 @@ std::pair<bool, bool> DetectDeadLanes::runOnce(MachineFunction &MF) {
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
unsigned RegIdx = Register::virtReg2Index(Reg);
const VRegInfo &RegInfo = VRegInfos[RegIdx];
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
index c108f0088d43..00626604d81c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -262,12 +262,12 @@ bool SSAIfConv::InstrDependenciesAllowIfConv(MachineInstr *I) {
Register Reg = MO.getReg();
// Remember clobbered regunits.
- if (MO.isDef() && Register::isPhysicalRegister(Reg))
+ if (MO.isDef() && Reg.isPhysical())
for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
++Units)
ClobberedRegUnits.set(*Units);
- if (!MO.readsReg() || !Register::isVirtualRegister(Reg))
+ if (!MO.readsReg() || !Reg.isVirtual())
continue;
MachineInstr *DefMI = MRI->getVRegDef(Reg);
if (!DefMI || DefMI->getParent() != Head)
@@ -321,9 +321,15 @@ bool SSAIfConv::canPredicateInstrs(MachineBasicBlock *MBB) {
return false;
}
- // Check that instruction is predicable and that it is not already
- // predicated.
- if (!TII->isPredicable(*I) || TII->isPredicated(*I)) {
+ // Check that instruction is predicable
+ if (!TII->isPredicable(*I)) {
+ LLVM_DEBUG(dbgs() << "Isn't predicable: " << *I);
+ return false;
+ }
+
+ // Check that instruction is not already predicated.
+ if (TII->isPredicated(*I) && !TII->canPredicatePredicatedInstr(*I)) {
+ LLVM_DEBUG(dbgs() << "Is already predicated: " << *I);
return false;
}
@@ -381,7 +387,7 @@ bool SSAIfConv::findInsertionPoint() {
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
- if (!Register::isPhysicalRegister(Reg))
+ if (!Reg.isPhysical())
continue;
// I clobbers Reg, so it isn't live before I.
if (MO.isDef())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp
new file mode 100644
index 000000000000..057b5311db70
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeDivRem.cpp
@@ -0,0 +1,139 @@
+//===--- ExpandLargeDivRem.cpp - Expand large div/rem ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands div/rem instructions with a bitwidth above a threshold
+// into a call to auto-generated functions.
+// This is useful for targets like x86_64 that cannot lower divisions
+// with more than 128 bits or targets like x86_32 that cannot lower divisions
+// with more than 64 bits.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/IntegerDivision.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+ ExpandDivRemBits("expand-div-rem-bits", cl::Hidden,
+ cl::init(llvm::IntegerType::MAX_INT_BITS),
+ cl::desc("div and rem instructions on integers with "
+ "more than <N> bits are expanded."));
+
+static bool isConstantPowerOfTwo(llvm::Value *V, bool SignedOp) {
+ auto *C = dyn_cast<ConstantInt>(V);
+ if (!C)
+ return false;
+
+ APInt Val = C->getValue();
+ if (SignedOp && Val.isNegative())
+ Val = -Val;
+ return Val.isPowerOf2();
+}
+
+static bool isSigned(unsigned int Opcode) {
+ return Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
+}
+
+static bool runImpl(Function &F, const TargetLowering &TLI) {
+ SmallVector<BinaryOperator *, 4> Replace;
+ bool Modified = false;
+
+ unsigned MaxLegalDivRemBitWidth = TLI.getMaxDivRemBitWidthSupported();
+ if (ExpandDivRemBits != llvm::IntegerType::MAX_INT_BITS)
+ MaxLegalDivRemBitWidth = ExpandDivRemBits;
+
+ if (MaxLegalDivRemBitWidth >= llvm::IntegerType::MAX_INT_BITS)
+ return false;
+
+ for (auto &I : instructions(F)) {
+ switch (I.getOpcode()) {
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem: {
+ // TODO: This doesn't handle vectors.
+ auto *IntTy = dyn_cast<IntegerType>(I.getType());
+ if (!IntTy || IntTy->getIntegerBitWidth() <= MaxLegalDivRemBitWidth)
+ continue;
+
+ // The backend has peephole optimizations for powers of two.
+ if (isConstantPowerOfTwo(I.getOperand(1), isSigned(I.getOpcode())))
+ continue;
+
+ Replace.push_back(&cast<BinaryOperator>(I));
+ Modified = true;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (Replace.empty())
+ return false;
+
+ while (!Replace.empty()) {
+ BinaryOperator *I = Replace.pop_back_val();
+
+ if (I->getOpcode() == Instruction::UDiv ||
+ I->getOpcode() == Instruction::SDiv) {
+ expandDivision(I);
+ } else {
+ expandRemainder(I);
+ }
+ }
+
+ return Modified;
+}
+
+namespace {
+class ExpandLargeDivRemLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ ExpandLargeDivRemLegacyPass() : FunctionPass(ID) {
+ initializeExpandLargeDivRemLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+ auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+ return runImpl(F, *TLI);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetPassConfig>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+} // namespace
+
+char ExpandLargeDivRemLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ExpandLargeDivRemLegacyPass, "expand-large-div-rem",
+ "Expand large div/rem", false, false)
+INITIALIZE_PASS_END(ExpandLargeDivRemLegacyPass, "expand-large-div-rem",
+ "Expand large div/rem", false, false)
+
+FunctionPass *llvm::createExpandLargeDivRemPass() {
+ return new ExpandLargeDivRemLegacyPass();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp
new file mode 100644
index 000000000000..ca8056a53139
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp
@@ -0,0 +1,664 @@
+//===--- ExpandLargeFpConvert.cpp - Expand large fp convert----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+// This pass expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’,
+// ‘sitofp .. to’ instructions with a bitwidth above a threshold into
+// auto-generated functions. This is useful for targets like x86_64 that cannot
+// lower fp convertions with more than 128 bits.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+ ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
+ cl::init(llvm::IntegerType::MAX_INT_BITS),
+ cl::desc("fp convert instructions on integers with "
+ "more than <N> bits are expanded."));
+
+/// Generate code to convert a fp number to integer, replacing FPToS(U)I with
+/// the generated code. This currently generates code similarly to compiler-rt's
+/// implementations.
+///
+/// An example IR generated from compiler-rt/fixsfdi.c looks like below:
+/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
+/// entry:
+/// %0 = bitcast float %a to i32
+/// %conv.i = zext i32 %0 to i64
+/// %tobool.not = icmp sgt i32 %0, -1
+/// %conv = select i1 %tobool.not, i64 1, i64 -1
+/// %and = lshr i64 %conv.i, 23
+/// %shr = and i64 %and, 255
+/// %and2 = and i64 %conv.i, 8388607
+/// %or = or i64 %and2, 8388608
+/// %cmp = icmp ult i64 %shr, 127
+/// br i1 %cmp, label %cleanup, label %if.end
+///
+/// if.end: ; preds = %entry
+/// %sub = add nuw nsw i64 %shr, 4294967169
+/// %conv5 = and i64 %sub, 4294967232
+/// %cmp6.not = icmp eq i64 %conv5, 0
+/// br i1 %cmp6.not, label %if.end12, label %if.then8
+///
+/// if.then8: ; preds = %if.end
+/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64 -9223372036854775808
+/// br label %cleanup
+///
+/// if.end12: ; preds = %if.end
+/// %cmp13 = icmp ult i64 %shr, 150
+/// br i1 %cmp13, label %if.then15, label %if.else
+///
+/// if.then15: ; preds = %if.end12
+/// %sub16 = sub nuw nsw i64 150, %shr
+/// %shr17 = lshr i64 %or, %sub16
+/// %mul = mul nsw i64 %shr17, %conv
+/// br label %cleanup
+///
+/// if.else: ; preds = %if.end12
+/// %sub18 = add nsw i64 %shr, -150
+/// %shl = shl i64 %or, %sub18
+/// %mul19 = mul nsw i64 %shl, %conv
+/// br label %cleanup
+///
+/// cleanup: ; preds = %entry, %if.else, %if.then15, %if.then8
+/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [ %mul19, %if.else ], [ 0, %entry ]
+/// ret i64 %retval.0
+/// }
+///
+/// Replace fp to integer with generated code.
+static void expandFPToI(Instruction *FPToI) {
+ IRBuilder<> Builder(FPToI);
+ auto *FloatVal = FPToI->getOperand(0);
+ IntegerType *IntTy = cast<IntegerType>(FPToI->getType());
+
+ unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
+ unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
+
+ // FIXME: fp16's range is covered by i32. So `fptoi half` can convert
+ // to i32 first following a sext/zext to target integer type.
+ Value *A1 = nullptr;
+ if (FloatVal->getType()->isHalfTy()) {
+ if (FPToI->getOpcode() == Instruction::FPToUI) {
+ Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getIntNTy(32));
+ A1 = Builder.CreateZExt(A0, IntTy);
+ } else { // FPToSI
+ Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getIntNTy(32));
+ A1 = Builder.CreateSExt(A0, IntTy);
+ }
+ FPToI->replaceAllUsesWith(A1);
+ FPToI->dropAllReferences();
+ FPToI->eraseFromParent();
+ return;
+ }
+
+ // fp80 conversion is implemented by fpext to fp128 first then do the
+ // conversion.
+ FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
+ unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
+ unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
+ unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
+ Value *ImplicitBit = Builder.CreateShl(
+ Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth));
+ Value *SignificandMask =
+ Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1));
+ Value *NegOne = Builder.CreateSExt(
+ ConstantInt::getSigned(Builder.getInt32Ty(), -1), IntTy);
+ Value *NegInf =
+ Builder.CreateShl(ConstantInt::getSigned(IntTy, 1),
+ ConstantInt::getSigned(IntTy, BitWidth - 1));
+
+ BasicBlock *Entry = Builder.GetInsertBlock();
+ Function *F = Entry->getParent();
+ Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
+ BasicBlock *End =
+ Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");
+ BasicBlock *IfEnd =
+ BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end", F, End);
+ BasicBlock *IfThen5 =
+ BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then5", F, End);
+ BasicBlock *IfEnd9 =
+ BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end9", F, End);
+ BasicBlock *IfThen12 =
+ BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then12", F, End);
+ BasicBlock *IfElse =
+ BasicBlock::Create(Builder.getContext(), "fp-to-i-if-else", F, End);
+
+ Entry->getTerminator()->eraseFromParent();
+
+ // entry:
+ Builder.SetInsertPoint(Entry);
+ Value *FloatVal0 = FloatVal;
+ // fp80 conversion is implemented by fpext to fp128 first then do the
+ // conversion.
+ if (FloatVal->getType()->isX86_FP80Ty())
+ FloatVal0 =
+ Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));
+ Value *ARep0 =
+ Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth));
+ Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType());
+ Value *PosOrNeg = Builder.CreateICmpSGT(
+ ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));
+ Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
+ ConstantInt::getSigned(IntTy, -1));
+ Value *And =
+ Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));
+ Value *And2 = Builder.CreateAnd(
+ And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1));
+ Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
+ Value *Or = Builder.CreateOr(Abs, ImplicitBit);
+ Value *Cmp =
+ Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias));
+ Builder.CreateCondBr(Cmp, End, IfEnd);
+
+ // if.end:
+ Builder.SetInsertPoint(IfEnd);
+ Value *Add1 = Builder.CreateAdd(
+ And2, ConstantInt::getSigned(IntTy, -int64_t(ExponentBias + BitWidth)));
+ Value *Cmp3 =
+ Builder.CreateICmpULT(Add1, ConstantInt::getSigned(IntTy, -BitWidth));
+ Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);
+
+ // if.then5:
+ Builder.SetInsertPoint(IfThen5);
+ Value *PosInf = Builder.CreateXor(NegOne, NegInf);
+ Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf);
+ Builder.CreateBr(End);
+
+ // if.end9:
+ Builder.SetInsertPoint(IfEnd9);
+ Value *Cmp10 = Builder.CreateICmpULT(
+ And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth));
+ Builder.CreateCondBr(Cmp10, IfThen12, IfElse);
+
+ // if.then12:
+ Builder.SetInsertPoint(IfThen12);
+ Value *Sub13 = Builder.CreateSub(
+ Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2);
+ Value *Shr14 = Builder.CreateLShr(Or, Sub13);
+ Value *Mul = Builder.CreateMul(Shr14, Sign);
+ Builder.CreateBr(End);
+
+ // if.else:
+ Builder.SetInsertPoint(IfElse);
+ Value *Sub15 = Builder.CreateAdd(
+ And2,
+ ConstantInt::getSigned(IntTy, -(ExponentBias + FPMantissaWidth)));
+ Value *Shl = Builder.CreateShl(Or, Sub15);
+ Value *Mul16 = Builder.CreateMul(Shl, Sign);
+ Builder.CreateBr(End);
+
+ // cleanup:
+ Builder.SetInsertPoint(End, End->begin());
+ PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4);
+
+ Retval0->addIncoming(Cond8, IfThen5);
+ Retval0->addIncoming(Mul, IfThen12);
+ Retval0->addIncoming(Mul16, IfElse);
+ Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry);
+
+ FPToI->replaceAllUsesWith(Retval0);
+ FPToI->dropAllReferences();
+ FPToI->eraseFromParent();
+}
+
+/// Generate code to convert a fp number to integer, replacing S(U)IToFP with
+/// the generated code. This currently generates code similarly to compiler-rt's
+/// implementations. This implementation has an implicit assumption that integer
+/// width is larger than fp.
+///
+/// An example IR generated from compiler-rt/floatdisf.c looks like below:
+/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
+/// entry:
+/// %cmp = icmp eq i64 %a, 0
+/// br i1 %cmp, label %return, label %if.end
+///
+/// if.end: ; preds = %entry
+/// %shr = ashr i64 %a, 63
+/// %xor = xor i64 %shr, %a
+/// %sub = sub nsw i64 %xor, %shr
+/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
+/// %cast = trunc i64 %0 to i32
+/// %sub1 = sub nuw nsw i32 64, %cast
+/// %sub2 = xor i32 %cast, 63
+/// %cmp3 = icmp ult i32 %cast, 40
+/// br i1 %cmp3, label %if.then4, label %if.else
+///
+/// if.then4: ; preds = %if.end
+/// switch i32 %sub1, label %sw.default [
+/// i32 25, label %sw.bb
+/// i32 26, label %sw.epilog
+/// ]
+///
+/// sw.bb: ; preds = %if.then4
+/// %shl = shl i64 %sub, 1
+/// br label %sw.epilog
+///
+/// sw.default: ; preds = %if.then4
+/// %sub5 = sub nsw i64 38, %0
+/// %sh_prom = and i64 %sub5, 4294967295
+/// %shr6 = lshr i64 %sub, %sh_prom
+/// %shr9 = lshr i64 274877906943, %0
+/// %and = and i64 %shr9, %sub
+/// %cmp10 = icmp ne i64 %and, 0
+/// %conv11 = zext i1 %cmp10 to i64
+/// %or = or i64 %shr6, %conv11
+/// br label %sw.epilog
+///
+/// sw.epilog: ; preds = %sw.default, %if.then4, %sw.bb
+/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl, %sw.bb ]
+/// %1 = lshr i64 %a.addr.0, 2
+/// %2 = and i64 %1, 1
+/// %or16 = or i64 %2, %a.addr.0
+/// %inc = add nsw i64 %or16, 1
+/// %3 = and i64 %inc, 67108864
+/// %tobool.not = icmp eq i64 %3, 0
+/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
+/// %spec.select = ashr i64 %inc, %spec.select.v
+/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
+/// br label %if.end26
+///
+/// if.else: ; preds = %if.end
+/// %sub23 = add nuw nsw i64 %0, 4294967256
+/// %sh_prom24 = and i64 %sub23, 4294967295
+/// %shl25 = shl i64 %sub, %sh_prom24
+/// br label %if.end26
+///
+/// if.end26: ; preds = %sw.epilog, %if.else
+/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
+/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
+/// %conv27 = trunc i64 %shr to i32
+/// %and28 = and i32 %conv27, -2147483648
+/// %add = shl nuw nsw i32 %e.0, 23
+/// %shl29 = add nuw nsw i32 %add, 1065353216
+/// %conv31 = trunc i64 %a.addr.1 to i32
+/// %and32 = and i32 %conv31, 8388607
+/// %or30 = or i32 %and32, %and28
+/// %or33 = or i32 %or30, %shl29
+/// %4 = bitcast i32 %or33 to float
+/// br label %return
+///
+/// return: ; preds = %entry, %if.end26
+/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
+/// ret float %retval.0
+/// }
+///
+/// Replace integer to fp with generated code.
+static void expandIToFP(Instruction *IToFP) {
+ IRBuilder<> Builder(IToFP);
+ auto *IntVal = IToFP->getOperand(0);
+ IntegerType *IntTy = cast<IntegerType>(IntVal->getType());
+
+ unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
+ unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;
+ // fp80 conversion is implemented by conversion tp fp128 first following
+ // a fptrunc to fp80.
+ FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
+ // FIXME: As there is no related builtins added in compliler-rt,
+ // here currently utilized the fp32 <-> fp16 lib calls to implement.
+ FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
+ unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
+ bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
+
+ assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() "
+ "assumes integer width is larger than fp.");
+
+ Value *Temp1 =
+ Builder.CreateShl(Builder.getIntN(BitWidth, 1),
+ Builder.getIntN(BitWidth, FPMantissaWidth + 3));
+
+ BasicBlock *Entry = Builder.GetInsertBlock();
+ Function *F = Entry->getParent();
+ Entry->setName(Twine(Entry->getName(), "itofp-entry"));
+ BasicBlock *End =
+ Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");
+ BasicBlock *IfEnd =
+ BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End);
+ BasicBlock *IfThen4 =
+ BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End);
+ BasicBlock *SwBB =
+ BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End);
+ BasicBlock *SwDefault =
+ BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End);
+ BasicBlock *SwEpilog =
+ BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End);
+ BasicBlock *IfThen20 =
+ BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End);
+ BasicBlock *IfElse =
+ BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End);
+ BasicBlock *IfEnd26 =
+ BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End);
+
+ Entry->getTerminator()->eraseFromParent();
+
+ Function *CTLZ =
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, IntTy);
+ ConstantInt *True = Builder.getTrue();
+
+ // entry:
+ Builder.SetInsertPoint(Entry);
+ Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));
+ Builder.CreateCondBr(Cmp, End, IfEnd);
+
+ // if.end:
+ Builder.SetInsertPoint(IfEnd);
+ Value *Shr =
+ Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));
+ Value *Xor = Builder.CreateXor(Shr, IntVal);
+ Value *Sub = Builder.CreateSub(Xor, Shr);
+ Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});
+ Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());
+ int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
+ Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),
+ FloatWidth == 128 ? Call : Cast);
+ Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),
+ FloatWidth == 128 ? Call : Cast);
+ Value *Cmp3 = Builder.CreateICmpSGT(
+ Sub2, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
+ Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
+
+ // if.then4:
+ Builder.SetInsertPoint(IfThen4);
+ llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);
+ SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
+ SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
+
+ // sw.bb:
+ Builder.SetInsertPoint(SwBB);
+ Value *Shl =
+ Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));
+ Builder.CreateBr(SwEpilog);
+
+ // sw.default:
+ Builder.SetInsertPoint(SwDefault);
+ Value *Sub5 = Builder.CreateSub(
+ Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),
+ FloatWidth == 128 ? Call : Cast);
+ Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
+ Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,
+ FloatWidth == 128 ? Sub5 : ShProm);
+ Value *Sub8 =
+ Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,
+ Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
+ Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
+ Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1),
+ FloatWidth == 128 ? Sub8 : ShProm9);
+ Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);
+ Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));
+ Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
+ Value *Or = Builder.CreateOr(Shr6, Conv11);
+ Builder.CreateBr(SwEpilog);
+
+ // sw.epilog:
+ Builder.SetInsertPoint(SwEpilog);
+ PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
+ AAddr0->addIncoming(Or, SwDefault);
+ AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);
+ AAddr0->addIncoming(Shl, SwBB);
+ Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
+ Value *A1 = Builder.CreateLShr(A0, Builder.getIntN(32, 2));
+ Value *A2 = Builder.CreateAnd(A1, Builder.getIntN(32, 1));
+ Value *Conv16 = Builder.CreateZExt(A2, IntTy);
+ Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
+ Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));
+ Value *Shr18 = nullptr;
+ if (IsSigned)
+ Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));
+ else
+ Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));
+ Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");
+ Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));
+ Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
+ Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));
+ Value *ExtractT64 = nullptr;
+ if (FloatWidth > 80)
+ ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
+ else
+ ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
+ Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
+
+ // if.then20
+ Builder.SetInsertPoint(IfThen20);
+ Value *Shr21 = nullptr;
+ if (IsSigned)
+ Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));
+ else
+ Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));
+ Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
+ Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));
+ Value *ExtractT62 = nullptr;
+ if (FloatWidth > 80)
+ ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getIntNTy(64));
+ else
+ ExtractT62 = Builder.CreateTrunc(Extract, Builder.getIntNTy(32));
+ Builder.CreateBr(IfEnd26);
+
+ // if.else:
+ Builder.SetInsertPoint(IfElse);
+ Value *Sub24 = Builder.CreateAdd(
+ FloatWidth == 128 ? Call : Cast,
+ ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew),
+ -(BitWidth - FPMantissaWidth - 1)));
+ Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
+ Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,
+ FloatWidth == 128 ? Sub24 : ShProm25);
+ Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
+ Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));
+ Value *ExtractT66 = nullptr;
+ if (FloatWidth > 80)
+ ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getIntNTy(64));
+ else
+ ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
+ Builder.CreateBr(IfEnd26);
+
+ // if.end26:
+ Builder.SetInsertPoint(IfEnd26);
+ PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
+ AAddr1Off0->addIncoming(ExtractT, IfThen20);
+ AAddr1Off0->addIncoming(ExtractT60, SwEpilog);
+ AAddr1Off0->addIncoming(ExtractT61, IfElse);
+ PHINode *AAddr1Off32 = nullptr;
+ if (FloatWidth > 32) {
+ AAddr1Off32 =
+ Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
+ AAddr1Off32->addIncoming(ExtractT62, IfThen20);
+ AAddr1Off32->addIncoming(ExtractT64, SwEpilog);
+ AAddr1Off32->addIncoming(ExtractT66, IfElse);
+ }
+ PHINode *E0 = nullptr;
+ if (FloatWidth <= 80) {
+ E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
+ E0->addIncoming(Sub1, IfThen20);
+ E0->addIncoming(Sub2, SwEpilog);
+ E0->addIncoming(Sub2, IfElse);
+ }
+ Value *And29 = nullptr;
+ if (FloatWidth > 80) {
+ Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),
+ Builder.getIntN(BitWidth, 63));
+ And29 = Builder.CreateAnd(Shr, Temp2, "and29");
+ } else {
+ Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getIntNTy(32));
+ And29 = Builder.CreateAnd(
+ Conv28, ConstantInt::getSigned(Builder.getIntNTy(32), 0x80000000));
+ }
+ unsigned TempMod = FPMantissaWidth % 32;
+ Value *And34 = nullptr;
+ Value *Shl30 = nullptr;
+ if (FloatWidth > 80) {
+ TempMod += 32;
+ Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getIntN(64, TempMod));
+ Shl30 = Builder.CreateAdd(
+ Add,
+ Builder.getIntN(64, ((1ull << (62ull - TempMod)) - 1ull) << TempMod));
+ And34 = Builder.CreateZExt(Shl30, Builder.getIntNTy(128));
+ } else {
+ Value *Add = Builder.CreateShl(E0, Builder.getIntN(32, TempMod));
+ Shl30 = Builder.CreateAdd(
+ Add, Builder.getIntN(32, ((1 << (30 - TempMod)) - 1) << TempMod));
+ And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
+ Builder.getIntN(32, (1 << TempMod) - 1));
+ }
+ Value *Or35 = nullptr;
+ if (FloatWidth > 80) {
+ Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getIntNTy(128));
+ Value *Or31 = Builder.CreateOr(And29Trunc, And34);
+ Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
+ Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
+ Builder.getIntN(128, FPMantissaWidth));
+ Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
+ Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
+ Or35 = Builder.CreateOr(Or34, A6);
+ } else {
+ Value *Or31 = Builder.CreateOr(And34, And29);
+ Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
+ }
+ Value *A4 = nullptr;
+ if (IToFP->getType()->isDoubleTy()) {
+ Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
+ Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
+ Value *And1 =
+ Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
+ Value *Or1 = Builder.CreateOr(Shl1, And1);
+ A4 = Builder.CreateBitCast(Or1, IToFP->getType());
+ } else if (IToFP->getType()->isX86_FP80Ty()) {
+ Value *A40 =
+ Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));
+ A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
+ } else if (IToFP->getType()->isHalfTy()) {
+ // Deal with "half" situation. This is a workaround since we don't have
+ // floattihf.c currently as referring.
+ Value *A40 =
+ Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));
+ A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
+ } else // float type
+ A4 = Builder.CreateBitCast(Or35, IToFP->getType());
+ Builder.CreateBr(End);
+
+ // return:
+ Builder.SetInsertPoint(End, End->begin());
+ PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);
+ Retval0->addIncoming(A4, IfEnd26);
+ Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry);
+
+ IToFP->replaceAllUsesWith(Retval0);
+ IToFP->dropAllReferences();
+ IToFP->eraseFromParent();
+}
+
+static bool runImpl(Function &F, const TargetLowering &TLI) {
+ SmallVector<Instruction *, 4> Replace;
+ bool Modified = false;
+
+ unsigned MaxLegalFpConvertBitWidth =
+ TLI.getMaxLargeFPConvertBitWidthSupported();
+ if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS)
+ MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
+
+ if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS)
+ return false;
+
+ for (auto &I : instructions(F)) {
+ switch (I.getOpcode()) {
+ case Instruction::FPToUI:
+ case Instruction::FPToSI: {
+ // TODO: This pass doesn't handle vectors.
+ if (I.getOperand(0)->getType()->isVectorTy())
+ continue;
+
+ auto *IntTy = dyn_cast<IntegerType>(I.getType());
+ if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
+ continue;
+
+ Replace.push_back(&I);
+ Modified = true;
+ break;
+ }
+ case Instruction::UIToFP:
+ case Instruction::SIToFP: {
+ // TODO: This pass doesn't handle vectors.
+ if (I.getOperand(0)->getType()->isVectorTy())
+ continue;
+
+ auto *IntTy = dyn_cast<IntegerType>(I.getOperand(0)->getType());
+ if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
+ continue;
+
+ Replace.push_back(&I);
+ Modified = true;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (Replace.empty())
+ return false;
+
+ while (!Replace.empty()) {
+ Instruction *I = Replace.pop_back_val();
+ if (I->getOpcode() == Instruction::FPToUI ||
+ I->getOpcode() == Instruction::FPToSI) {
+ expandFPToI(I);
+ } else {
+ expandIToFP(I);
+ }
+ }
+
+ return Modified;
+}
+
+namespace {
+class ExpandLargeFpConvertLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ ExpandLargeFpConvertLegacyPass() : FunctionPass(ID) {
+ initializeExpandLargeFpConvertLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+ auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+ return runImpl(F, *TLI);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetPassConfig>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+} // namespace
+
+char ExpandLargeFpConvertLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",
+ "Expand large fp convert", false, false)
+INITIALIZE_PASS_END(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",
+ "Expand large fp convert", false, false)
+
+FunctionPass *llvm::createExpandLargeFpConvertPass() {
+ return new ExpandLargeFpConvertLegacyPass();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
index b2639636dda7..3838eaadd1d2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -28,6 +28,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
+#include <optional>
using namespace llvm;
@@ -877,15 +878,14 @@ ExpandMemCmpPass::runImpl(Function &F, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
const TargetLowering *TL, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, DominatorTree *DT) {
- Optional<DomTreeUpdater> DTU;
+ std::optional<DomTreeUpdater> DTU;
if (DT)
DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
const DataLayout& DL = F.getParent()->getDataLayout();
bool MadeChanges = false;
for (auto BBIt = F.begin(); BBIt != F.end();) {
- if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI,
- DTU ? DTU.getPointer() : nullptr)) {
+ if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, DTU ? &*DTU : nullptr)) {
MadeChanges = true;
// If changes were made, restart the function from the beginning, since
// the structure of the function was changed.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index 086b4a4dcc47..cc63984158c8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -93,9 +93,9 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
assert(SubIdx != 0 && "Invalid index for insert_subreg");
Register DstSubReg = TRI->getSubReg(DstReg, SubIdx);
- assert(Register::isPhysicalRegister(DstReg) &&
+ assert(DstReg.isPhysical() &&
"Insert destination must be in a physical register");
- assert(Register::isPhysicalRegister(InsReg) &&
+ assert(InsReg.isPhysical() &&
"Inserted value must be in a physical register");
LLVM_DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index db4d42bf3ca4..5ee76ff567fb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include <optional>
using namespace llvm;
@@ -122,7 +123,7 @@ static bool maySpeculateLanes(VPIntrinsic &VPI) {
if (isa<VPReductionIntrinsic>(VPI))
return false;
// Fallback to whether the intrinsic is speculatable.
- Optional<unsigned> OpcOpt = VPI.getFunctionalOpcode();
+ std::optional<unsigned> OpcOpt = VPI.getFunctionalOpcode();
unsigned FunctionalOpc = OpcOpt.value_or((unsigned)Instruction::Call);
return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc, &VPI);
}
@@ -166,25 +167,27 @@ struct CachingVPExpander {
/// length of the operation.
void discardEVLParameter(VPIntrinsic &PI);
- /// \brief Lower this VP binary operator to a unpredicated binary operator.
+ /// Lower this VP binary operator to a unpredicated binary operator.
Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder,
VPIntrinsic &PI);
- /// \brief Lower this VP reduction to a call to an unpredicated reduction
- /// intrinsic.
+ /// Lower this VP reduction to a call to an unpredicated reduction intrinsic.
Value *expandPredicationInReduction(IRBuilder<> &Builder,
VPReductionIntrinsic &PI);
- /// \brief Lower this VP memory operation to a non-VP intrinsic.
+ /// Lower this VP memory operation to a non-VP intrinsic.
Value *expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder,
VPIntrinsic &VPI);
- /// \brief Query TTI and expand the vector predication in \p P accordingly.
+ /// Lower this VP comparison to a call to an unpredicated comparison.
+ Value *expandPredicationInComparison(IRBuilder<> &Builder,
+ VPCmpIntrinsic &PI);
+
+ /// Query TTI and expand the vector predication in \p P accordingly.
Value *expandPredication(VPIntrinsic &PI);
- /// \brief Determine how and whether the VPIntrinsic \p VPI shall be
- /// expanded. This overrides TTI with the cl::opts listed at the top of this
- /// file.
+ /// Determine how and whether the VPIntrinsic \p VPI shall be expanded. This
+ /// overrides TTI with the cl::opts listed at the top of this file.
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const;
bool UsingTTIOverrides;
@@ -293,7 +296,7 @@ static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI,
APInt::getSignedMinValue(EltBits));
case Intrinsic::vp_reduce_fmax:
Negative = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case Intrinsic::vp_reduce_fmin: {
FastMathFlags Flags = VPI.getFastMathFlags();
const fltSemantics &Semantics = EltTy->getFltSemantics();
@@ -420,7 +423,7 @@ CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder,
StoreInst *NewStore =
Builder.CreateStore(DataParam, PtrParam, /*IsVolatile*/ false);
if (AlignOpt.has_value())
- NewStore->setAlignment(AlignOpt.value());
+ NewStore->setAlignment(*AlignOpt);
NewMemoryInst = NewStore;
} else
NewMemoryInst = Builder.CreateMaskedStore(
@@ -432,7 +435,7 @@ CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder,
LoadInst *NewLoad =
Builder.CreateLoad(VPI.getType(), PtrParam, /*IsVolatile*/ false);
if (AlignOpt.has_value())
- NewLoad->setAlignment(AlignOpt.value());
+ NewLoad->setAlignment(*AlignOpt);
NewMemoryInst = NewLoad;
} else
NewMemoryInst = Builder.CreateMaskedLoad(
@@ -462,6 +465,24 @@ CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder,
return NewMemoryInst;
}
+Value *CachingVPExpander::expandPredicationInComparison(IRBuilder<> &Builder,
+ VPCmpIntrinsic &VPI) {
+ assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) &&
+ "Implicitly dropping %evl in non-speculatable operator!");
+
+ assert(*VPI.getFunctionalOpcode() == Instruction::ICmp ||
+ *VPI.getFunctionalOpcode() == Instruction::FCmp);
+
+ Value *Op0 = VPI.getOperand(0);
+ Value *Op1 = VPI.getOperand(1);
+ auto Pred = VPI.getPredicate();
+
+ auto *NewCmp = Builder.CreateCmp(Pred, Op0, Op1);
+
+ replaceOperation(*NewCmp, VPI);
+ return NewCmp;
+}
+
void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) {
LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n");
@@ -538,6 +559,9 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI))
return expandPredicationInReduction(Builder, *VPRI);
+ if (auto *VPCmp = dyn_cast<VPCmpIntrinsic>(&VPI))
+ return expandPredicationInComparison(Builder, *VPCmp);
+
switch (VPI.getIntrinsicID()) {
default:
break;
@@ -598,7 +622,7 @@ CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
return VPStrat;
}
-/// \brief Expand llvm.vp.* intrinsics as requested by \p TTI.
+/// Expand llvm.vp.* intrinsics as requested by \p TTI.
bool CachingVPExpander::expandVectorPredication() {
SmallVector<TransformJob, 16> Worklist;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
index 252910fd9462..55d939de426e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
@@ -422,7 +422,7 @@ public:
LLVM_DEBUG(dbgs() << "Insert spill before " << *InsertBefore);
TII.storeRegToStackSlot(*MI.getParent(), InsertBefore, Reg, IsKill, FI,
- RC, &TRI);
+ RC, &TRI, Register());
}
}
@@ -431,7 +431,7 @@ public:
const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
int FI = RegToSlotIdx[Reg];
if (It != MBB->end()) {
- TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI);
+ TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI, Register());
return;
}
@@ -439,7 +439,7 @@ public:
// and then swap them.
assert(!MBB->empty() && "Empty block");
--It;
- TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI);
+ TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI, Register());
MachineInstr *Reload = It->getPrevNode();
int Dummy = 0;
(void)Dummy;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
index 6a0d1c33d3e3..356d208fc881 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -61,6 +61,10 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) {
case TargetOpcode::G_TRUNC:
case TargetOpcode::G_PTR_ADD:
case TargetOpcode::G_EXTRACT:
+ case TargetOpcode::G_SELECT:
+ case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_BUILD_VECTOR_TRUNC:
+ case TargetOpcode::G_SEXT_INREG:
return true;
}
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index a432e4ed7fb7..64e2d517e3b9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -107,7 +107,7 @@ void CSEMIRBuilder::profileMBBOpcode(GISelInstProfileBuilder &B,
void CSEMIRBuilder::profileEverything(unsigned Opc, ArrayRef<DstOp> DstOps,
ArrayRef<SrcOp> SrcOps,
- Optional<unsigned> Flags,
+ std::optional<unsigned> Flags,
GISelInstProfileBuilder &B) const {
profileMBBOpcode(B, Opc);
@@ -170,7 +170,7 @@ CSEMIRBuilder::generateCopiesIfRequired(ArrayRef<DstOp> DstOps,
MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
ArrayRef<DstOp> DstOps,
ArrayRef<SrcOp> SrcOps,
- Optional<unsigned> Flag) {
+ std::optional<unsigned> Flag) {
switch (Opc) {
default:
break;
@@ -210,8 +210,8 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
break;
}
- if (Optional<APInt> Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(),
- SrcOps[1].getReg(), *getMRI()))
+ if (std::optional<APInt> Cst = ConstantFoldBinOp(
+ Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI()))
return buildConstant(DstOps[0], *Cst);
break;
}
@@ -230,7 +230,7 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
// Try to constant fold these.
assert(SrcOps.size() == 2 && "Invalid sources");
assert(DstOps.size() == 1 && "Invalid dsts");
- if (Optional<APFloat> Cst = ConstantFoldFPBinOp(
+ if (std::optional<APFloat> Cst = ConstantFoldFPBinOp(
Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI()))
return buildFConstant(DstOps[0], *Cst);
break;
@@ -251,7 +251,7 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
// Try to constant fold these.
assert(SrcOps.size() == 1 && "Invalid sources");
assert(DstOps.size() == 1 && "Invalid dsts");
- if (Optional<APFloat> Cst = ConstantFoldIntToFloat(
+ if (std::optional<APFloat> Cst = ConstantFoldIntToFloat(
Opc, DstOps[0].getLLTTy(*getMRI()), SrcOps[0].getReg(), *getMRI()))
return buildFConstant(DstOps[0], *Cst);
break;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 6c36c6445c65..89872259cfca 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -70,6 +70,15 @@ ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call,
return Flags;
}
+ISD::ArgFlagsTy
+CallLowering::getAttributesForReturn(const CallBase &Call) const {
+ ISD::ArgFlagsTy Flags;
+ addFlagsUsingAttrFn(Flags, [&Call](Attribute::AttrKind Attr) {
+ return Call.hasRetAttr(Attr);
+ });
+ return Flags;
+}
+
void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags,
const AttributeList &Attrs,
unsigned OpIdx) const {
@@ -141,7 +150,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
Register ReturnHintAlignReg;
Align ReturnHintAlign;
- Info.OrigRet = ArgInfo{ResRegs, RetTy, 0, ISD::ArgFlagsTy{}};
+ Info.OrigRet = ArgInfo{ResRegs, RetTy, 0, getAttributesForReturn(CB)};
if (!Info.OrigRet.Ty->isVoidTy()) {
setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB);
@@ -155,6 +164,12 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
}
}
+ auto Bundle = CB.getOperandBundle(LLVMContext::OB_kcfi);
+ if (Bundle && CB.isIndirectCall()) {
+ Info.CFIType = cast<ConstantInt>(Bundle->Inputs[0]);
+ assert(Info.CFIType->getType()->isIntegerTy(32) && "Invalid CFI type");
+ }
+
Info.CB = &CB;
Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees);
Info.CallConv = CallConv;
@@ -291,8 +306,8 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
Register UnmergeSrcReg;
if (LCMTy != PartLLT) {
assert(DstRegs.size() == 1);
- return B.buildDeleteTrailingVectorElements(DstRegs[0],
- B.buildMerge(LCMTy, SrcRegs));
+ return B.buildDeleteTrailingVectorElements(
+ DstRegs[0], B.buildMergeLikeInstr(LCMTy, SrcRegs));
} else {
// We don't need to widen anything if we're extracting a scalar which was
// promoted to a vector e.g. s8 -> v4s8 -> s8
@@ -371,11 +386,11 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
assert(OrigRegs.size() == 1);
LLT OrigTy = MRI.getType(OrigRegs[0]);
- unsigned SrcSize = PartLLT.getSizeInBits().getFixedSize() * Regs.size();
+ unsigned SrcSize = PartLLT.getSizeInBits().getFixedValue() * Regs.size();
if (SrcSize == OrigTy.getSizeInBits())
- B.buildMerge(OrigRegs[0], Regs);
+ B.buildMergeValues(OrigRegs[0], Regs);
else {
- auto Widened = B.buildMerge(LLT::scalar(SrcSize), Regs);
+ auto Widened = B.buildMergeLikeInstr(LLT::scalar(SrcSize), Regs);
B.buildTrunc(OrigRegs[0], Widened);
}
@@ -443,7 +458,8 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0);
for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) {
- auto Merge = B.buildMerge(RealDstEltTy, Regs.take_front(PartsPerElt));
+ auto Merge =
+ B.buildMergeLikeInstr(RealDstEltTy, Regs.take_front(PartsPerElt));
// Fix the type in case this is really a vector of pointers.
MRI.setType(Merge.getReg(0), RealDstEltTy);
EltMerges.push_back(Merge.getReg(0));
@@ -489,6 +505,15 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
return;
}
+ if (SrcTy.isVector() && PartTy.isVector() &&
+ PartTy.getScalarSizeInBits() == SrcTy.getScalarSizeInBits() &&
+ SrcTy.getNumElements() < PartTy.getNumElements()) {
+ // A coercion like: v2f32 -> v4f32.
+ Register DstReg = DstRegs.front();
+ B.buildPadVectorWithUndefElements(DstReg, SrcReg);
+ return;
+ }
+
LLT GCDTy = getGCDType(SrcTy, PartTy);
if (GCDTy == PartTy) {
// If this already evenly divisible, we can create a simple unmerge.
@@ -525,7 +550,7 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
SmallVector<Register, 8> MergeParts(1, SrcReg);
for (unsigned Size = SrcSize; Size != CoveringSize; Size += SrcSize)
MergeParts.push_back(Undef);
- UnmergeSrc = B.buildMerge(LCMTy, MergeParts).getReg(0);
+ UnmergeSrc = B.buildMergeLikeInstr(LCMTy, MergeParts).getReg(0);
}
}
@@ -656,7 +681,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
if (VA.needsCustom()) {
std::function<void()> Thunk;
unsigned NumArgRegs = Handler.assignCustomValue(
- Args[i], makeArrayRef(ArgLocs).slice(j), &Thunk);
+ Args[i], ArrayRef(ArgLocs).slice(j), &Thunk);
if (Thunk)
DelayedOutgoingRegAssignments.emplace_back(Thunk);
if (!NumArgRegs)
@@ -1196,7 +1221,7 @@ static bool isCopyCompatibleType(LLT SrcTy, LLT DstTy) {
DstTy = DstTy.getScalarType();
return (SrcTy.isPointer() && DstTy.isScalar()) ||
- (DstTy.isScalar() && SrcTy.isPointer());
+ (DstTy.isPointer() && SrcTy.isScalar());
}
void CallLowering::IncomingValueHandler::assignValueToReg(Register ValVReg,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index 1a5fe3e84c17..748fa273d499 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -12,6 +12,7 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
@@ -52,7 +53,9 @@ class WorkListMaintainer : public GISelChangeObserver {
WorkListTy &WorkList;
/// The instructions that have been created but we want to report once they
/// have their operands. This is only maintained if debug output is requested.
- SmallPtrSet<const MachineInstr *, 4> CreatedInstrs;
+#ifndef NDEBUG
+ SetVector<const MachineInstr *> CreatedInstrs;
+#endif
public:
WorkListMaintainer(WorkListTy &WorkList) : WorkList(WorkList) {}
@@ -132,6 +135,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
// Erase dead insts before even adding to the list.
if (isTriviallyDead(CurMI, *MRI)) {
LLVM_DEBUG(dbgs() << CurMI << "Is dead; erasing.\n");
+ llvm::salvageDebugInfo(*MRI, CurMI);
CurMI.eraseFromParent();
continue;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 05a25bc3078e..af4bb1634746 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -27,10 +27,13 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/DivisionByConstantInfo.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
+#include <cmath>
+#include <optional>
#include <tuple>
#define DEBUG_TYPE "gi-combiner"
@@ -46,11 +49,12 @@ static cl::opt<bool>
"legal for the GlobalISel combiner"));
CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
- MachineIRBuilder &B, GISelKnownBits *KB,
- MachineDominatorTree *MDT,
+ MachineIRBuilder &B, bool IsPreLegalize,
+ GISelKnownBits *KB, MachineDominatorTree *MDT,
const LegalizerInfo *LI)
: Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
- MDT(MDT), LI(LI), RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
+ MDT(MDT), IsPreLegalize(IsPreLegalize), LI(LI),
+ RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
(void)this->KB;
}
@@ -93,8 +97,8 @@ static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
/// \param MemOffset2Idx maps memory offsets to address offsets.
/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
///
-/// \returns true if the map corresponds to a big endian byte pattern, false
-/// if it corresponds to a little endian byte pattern, and None otherwise.
+/// \returns true if the map corresponds to a big endian byte pattern, false if
+/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
///
/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
/// are as follows:
@@ -104,24 +108,24 @@ static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
/// 1 1 2
/// 2 2 1
/// 3 3 0
-static Optional<bool>
+static std::optional<bool>
isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
int64_t LowestIdx) {
// Need at least two byte positions to decide on endianness.
unsigned Width = MemOffset2Idx.size();
if (Width < 2)
- return None;
+ return std::nullopt;
bool BigEndian = true, LittleEndian = true;
for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
if (MemOffsetAndIdx == MemOffset2Idx.end())
- return None;
+ return std::nullopt;
const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
assert(Idx >= 0 && "Expected non-negative byte offset?");
LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
if (!BigEndian && !LittleEndian)
- return None;
+ return std::nullopt;
}
assert((BigEndian != LittleEndian) &&
@@ -129,7 +133,7 @@ isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
return BigEndian;
}
-bool CombinerHelper::isPreLegalize() const { return !LI; }
+bool CombinerHelper::isPreLegalize() const { return IsPreLegalize; }
bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
assert(LI && "Must have LegalizerInfo to query isLegal!");
@@ -385,7 +389,7 @@ void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
if (Ops.size() == 1)
Builder.buildCopy(NewDstReg, Ops[0]);
else
- Builder.buildMerge(NewDstReg, Ops);
+ Builder.buildMergeLikeInstr(NewDstReg, Ops);
MI.eraseFromParent();
replaceRegWith(MRI, DstReg, NewDstReg);
@@ -485,6 +489,24 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
return false;
}
+static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
+ unsigned CandidateLoadOpc;
+ switch (ExtOpc) {
+ case TargetOpcode::G_ANYEXT:
+ CandidateLoadOpc = TargetOpcode::G_LOAD;
+ break;
+ case TargetOpcode::G_SEXT:
+ CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
+ break;
+ case TargetOpcode::G_ZEXT:
+ CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
+ break;
+ default:
+ llvm_unreachable("Unexpected extend opc");
+ }
+ return CandidateLoadOpc;
+}
+
bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
PreferredTuple &Preferred) {
// We match the loads and follow the uses to the extend instead of matching
@@ -535,11 +557,12 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
if (MMO.isAtomic() && UseMI.getOpcode() != TargetOpcode::G_ANYEXT)
continue;
// Check for legality.
- if (LI) {
+ if (!isPreLegalize()) {
LegalityQuery::MemDesc MMDesc(MMO);
+ unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
- if (LI->getAction({LoadMI->getOpcode(), {UseTy, SrcTy}, {MMDesc}})
+ if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
.Action != LegalizeActions::Legal)
continue;
}
@@ -587,12 +610,8 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
};
Observer.changingInstr(MI);
- MI.setDesc(
- Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
- ? TargetOpcode::G_SEXTLOAD
- : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
- ? TargetOpcode::G_ZEXTLOAD
- : TargetOpcode::G_LOAD));
+ unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
+ MI.setDesc(Builder.getTII().get(LoadOpc));
// Rewrite all the uses to fix up the types.
auto &LoadValue = MI.getOperand(0);
@@ -1266,12 +1285,12 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
LegalizerHelper::LegalizeResult::Legalized;
}
-static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy,
- const Register Op,
- const MachineRegisterInfo &MRI) {
+static std::optional<APFloat>
+constantFoldFpUnary(unsigned Opcode, LLT DstTy, const Register Op,
+ const MachineRegisterInfo &MRI) {
const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI);
if (!MaybeCst)
- return None;
+ return std::nullopt;
APFloat V = MaybeCst->getValueAPF();
switch (Opcode) {
@@ -1308,8 +1327,8 @@ static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy,
return V;
}
-bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI,
- Optional<APFloat> &Cst) {
+bool CombinerHelper::matchCombineConstantFoldFpUnary(
+ MachineInstr &MI, std::optional<APFloat> &Cst) {
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
@@ -1317,8 +1336,8 @@ bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI,
return Cst.has_value();
}
-void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
- Optional<APFloat> &Cst) {
+void CombinerHelper::applyCombineConstantFoldFpUnary(
+ MachineInstr &MI, std::optional<APFloat> &Cst) {
assert(Cst && "Optional is unexpectedly empty!");
Builder.setInstrAndDebugLoc(MI);
MachineFunction &MF = Builder.getMF();
@@ -1580,6 +1599,13 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
Register Shift1 =
Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
+ // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
+ // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
+ // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
+ // remove old shift1. And it will cause crash later. So erase it earlier to
+ // avoid the crash.
+ MatchInfo.Shift2->eraseFromParent();
+
Register Shift2Const = MI.getOperand(2).getReg();
Register Shift2 = Builder
.buildInstr(Opcode, {DestType},
@@ -1589,8 +1615,7 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
Register Dest = MI.getOperand(0).getReg();
Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
- // These were one use so it's safe to remove them.
- MatchInfo.Shift2->eraseFromParent();
+ // This was one use so it's safe to remove it.
MatchInfo.Logic->eraseFromParent();
MI.eraseFromParent();
@@ -1706,7 +1731,7 @@ bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
auto &Unmerge = cast<GUnmerge>(MI);
Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
- auto *SrcInstr = getOpcodeDef<GMergeLikeOp>(SrcReg, MRI);
+ auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
if (!SrcInstr)
return false;
@@ -1947,7 +1972,7 @@ void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
}
auto Zero = Builder.buildConstant(HalfTy, 0);
- Builder.buildMerge(DstReg, { Narrowed, Zero });
+ Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
} else if (MI.getOpcode() == TargetOpcode::G_SHL) {
Register Narrowed = Unmerge.getReg(0);
// dst = G_SHL s64:x, C for C >= 32
@@ -1960,7 +1985,7 @@ void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
}
auto Zero = Builder.buildConstant(HalfTy, 0);
- Builder.buildMerge(DstReg, { Zero, Narrowed });
+ Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
} else {
assert(MI.getOpcode() == TargetOpcode::G_ASHR);
auto Hi = Builder.buildAShr(
@@ -1970,13 +1995,13 @@ void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
if (ShiftVal == HalfSize) {
// (G_ASHR i64:x, 32) ->
// G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
- Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi });
+ Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
} else if (ShiftVal == Size - 1) {
// Don't need a second shift.
// (G_ASHR i64:x, 63) ->
// %narrowed = (G_ASHR hi_32(x), 31)
// G_MERGE_VALUES %narrowed, %narrowed
- Builder.buildMerge(DstReg, { Hi, Hi });
+ Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
} else {
auto Lo = Builder.buildAShr(
HalfTy, Unmerge.getReg(1),
@@ -1984,7 +2009,7 @@ void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
// (G_ASHR i64:x, C) ->, for C >= 32
// G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
- Builder.buildMerge(DstReg, { Lo, Hi });
+ Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
}
}
@@ -2019,12 +2044,6 @@ void CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
MI.eraseFromParent();
}
-bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
- assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
- Register SrcReg = MI.getOperand(1).getReg();
- return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg)));
-}
-
void CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
Register DstReg = MI.getOperand(0).getReg();
@@ -2195,19 +2214,6 @@ void CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
MI.eraseFromParent();
}
-bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) {
- assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG");
- Register SrcReg = MI.getOperand(1).getReg();
- return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg)));
-}
-
-bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
- assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
- Src = MI.getOperand(1).getReg();
- Register AbsSrc;
- return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc)));
-}
-
bool CombinerHelper::matchCombineFAbsOfFNeg(MachineInstr &MI,
BuildFnTy &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
@@ -2260,44 +2266,109 @@ void CombinerHelper::applyCombineTruncOfExt(
MI.eraseFromParent();
}
-bool CombinerHelper::matchCombineTruncOfShl(
- MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- Register ShiftSrc;
- Register ShiftAmt;
-
- if (MRI.hasOneNonDBGUse(SrcReg) &&
- mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) &&
- isLegalOrBeforeLegalizer(
- {TargetOpcode::G_SHL,
- {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {
- KnownBits Known = KB->getKnownBits(ShiftAmt);
- unsigned Size = DstTy.getSizeInBits();
- if (Known.countMaxActiveBits() <= Log2_32(Size)) {
- MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
- return true;
- }
- }
- return false;
+static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy) {
+ const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
+ const unsigned TruncSize = TruncTy.getScalarSizeInBits();
+
+ // ShiftTy > 32 > TruncTy -> 32
+ if (ShiftSize > 32 && TruncSize < 32)
+ return ShiftTy.changeElementSize(32);
+
+ // TODO: We could also reduce to 16 bits, but that's more target-dependent.
+ // Some targets like it, some don't, some only like it under certain
+ // conditions/processor versions, etc.
+ // A TL hook might be needed for this.
+
+ // Don't combine
+ return ShiftTy;
}
-void CombinerHelper::applyCombineTruncOfShl(
- MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+bool CombinerHelper::matchCombineTruncOfShift(
+ MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
+
+ if (!MRI.hasOneNonDBGUse(SrcReg))
+ return false;
+
+ LLT SrcTy = MRI.getType(SrcReg);
LLT DstTy = MRI.getType(DstReg);
- MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
- Register ShiftSrc = MatchInfo.first;
- Register ShiftAmt = MatchInfo.second;
+ MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
+ const auto &TL = getTargetLowering();
+
+ LLT NewShiftTy;
+ switch (SrcMI->getOpcode()) {
+ default:
+ return false;
+ case TargetOpcode::G_SHL: {
+ NewShiftTy = DstTy;
+
+ // Make sure new shift amount is legal.
+ KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
+ if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
+ return false;
+ break;
+ }
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_ASHR: {
+ // For right shifts, we conservatively do not do the transform if the TRUNC
+ // has any STORE users. The reason is that if we change the type of the
+ // shift, we may break the truncstore combine.
+ //
+ // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
+ for (auto &User : MRI.use_instructions(DstReg))
+ if (User.getOpcode() == TargetOpcode::G_STORE)
+ return false;
+
+ NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
+ if (NewShiftTy == SrcTy)
+ return false;
+
+ // Make sure we won't lose information by truncating the high bits.
+ KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
+ if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
+ DstTy.getScalarSizeInBits()))
+ return false;
+ break;
+ }
+ }
+
+ if (!isLegalOrBeforeLegalizer(
+ {SrcMI->getOpcode(),
+ {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
+ return false;
+
+ MatchInfo = std::make_pair(SrcMI, NewShiftTy);
+ return true;
+}
+
+void CombinerHelper::applyCombineTruncOfShift(
+ MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
Builder.setInstrAndDebugLoc(MI);
- auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc);
- Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags());
- MI.eraseFromParent();
+
+ MachineInstr *ShiftMI = MatchInfo.first;
+ LLT NewShiftTy = MatchInfo.second;
+
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst);
+
+ Register ShiftAmt = ShiftMI->getOperand(2).getReg();
+ Register ShiftSrc = ShiftMI->getOperand(1).getReg();
+ ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
+
+ Register NewShift =
+ Builder
+ .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
+ .getReg(0);
+
+ if (NewShiftTy == DstTy)
+ replaceRegWith(MRI, Dst, NewShift);
+ else
+ Builder.buildTrunc(Dst, NewShift);
+
+ eraseInst(MI);
}
bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
@@ -2332,6 +2403,19 @@ bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
MRI);
}
+bool CombinerHelper::matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) {
+ assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
+ MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
+ "Expected an insert/extract element op");
+ LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
+ unsigned IdxIdx =
+ MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
+ auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
+ if (!Idx)
+ return false;
+ return Idx->getZExtValue() >= VecTy.getNumElements();
+}
+
bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
GSelect &SelMI = cast<GSelect>(MI);
auto Cst =
@@ -2579,7 +2663,7 @@ bool CombinerHelper::matchCombineInsertVecElts(
while (mi_match(
CurrInst->getOperand(0).getReg(), MRI,
m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
- if (IntImm >= NumElts)
+ if (IntImm >= NumElts || IntImm < 0)
return false;
if (!MatchInfo[IntImm])
MatchInfo[IntImm] = TmpReg;
@@ -2738,9 +2822,9 @@ bool CombinerHelper::matchAshrShlToSextInreg(
assert(MI.getOpcode() == TargetOpcode::G_ASHR);
int64_t ShlCst, AshrCst;
Register Src;
- // FIXME: detect splat constant vectors.
if (!mi_match(MI.getOperand(0).getReg(), MRI,
- m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst))))
+ m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
+ m_ICstOrSplat(AshrCst))))
return false;
if (ShlCst != AshrCst)
return false;
@@ -2812,12 +2896,6 @@ bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
return false;
Register AndDst = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(AndDst);
-
- // FIXME: This should be removed once GISelKnownBits supports vectors.
- if (DstTy.isVector())
- return false;
-
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
KnownBits LHSBits = KB->getKnownBits(LHS);
@@ -2858,12 +2936,6 @@ bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
return false;
Register OrDst = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(OrDst);
-
- // FIXME: This should be removed once GISelKnownBits supports vectors.
- if (DstTy.isVector())
- return false;
-
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
KnownBits LHSBits = KB->getKnownBits(LHS);
@@ -3190,14 +3262,12 @@ bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,
}
Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
- Observer.erasingInstr(*Select);
- Select->eraseFromParent();
MI.eraseFromParent();
return true;
}
-Optional<SmallVector<Register, 8>>
+std::optional<SmallVector<Register, 8>>
CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
// We want to detect if Root is part of a tree which represents a bunch
@@ -3239,7 +3309,7 @@ CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
// In the combine, we want to elimate the entire tree.
if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
- return None;
+ return std::nullopt;
// If it's a G_OR, save it and continue to walk. If it's not, then it's
// something that may be a load + arithmetic.
@@ -3256,7 +3326,7 @@ CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
// We're going to try and merge each register into a wider power-of-2 type,
// so we ought to have an even number of registers.
if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
- return None;
+ return std::nullopt;
return RegsToVisit;
}
@@ -3268,7 +3338,7 @@ CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
/// e.g. x[i] << 24
///
/// \returns The load instruction and the byte offset it is moved into.
-static Optional<std::pair<GZExtLoad *, int64_t>>
+static std::optional<std::pair<GZExtLoad *, int64_t>>
matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
const MachineRegisterInfo &MRI) {
assert(MRI.hasOneNonDBGUse(Reg) &&
@@ -3282,20 +3352,20 @@ matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
}
if (Shift % MemSizeInBits != 0)
- return None;
+ return std::nullopt;
// TODO: Handle other types of loads.
auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
if (!Load)
- return None;
+ return std::nullopt;
if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
- return None;
+ return std::nullopt;
return std::make_pair(Load, Shift / MemSizeInBits);
}
-Optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
+std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
CombinerHelper::findLoadOffsetsForLoadOrCombine(
SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
@@ -3335,7 +3405,7 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine(
// shifted) value.
auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
if (!LoadAndPos)
- return None;
+ return std::nullopt;
GZExtLoad *Load;
int64_t DstPos;
std::tie(Load, DstPos) = *LoadAndPos;
@@ -3346,14 +3416,14 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine(
if (!MBB)
MBB = LoadMBB;
if (LoadMBB != MBB)
- return None;
+ return std::nullopt;
// Make sure that the MachineMemOperands of every seen load are compatible.
auto &LoadMMO = Load->getMMO();
if (!MMO)
MMO = &LoadMMO;
if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
- return None;
+ return std::nullopt;
// Find out what the base pointer and index for the load is.
Register LoadPtr;
@@ -3366,7 +3436,7 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine(
// Don't combine things like a[i], a[i] -> a bigger load.
if (!SeenIdx.insert(Idx).second)
- return None;
+ return std::nullopt;
// Every load must share the same base pointer; don't combine things like:
//
@@ -3374,7 +3444,7 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine(
if (!BasePtr.isValid())
BasePtr = LoadPtr;
if (BasePtr != LoadPtr)
- return None;
+ return std::nullopt;
if (Idx < LowestIdx) {
LowestIdx = Idx;
@@ -3386,7 +3456,7 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine(
//
// a[i] << 16, a[i + k] << 16 -> a bigger load.
if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
- return None;
+ return std::nullopt;
Loads.insert(Load);
// Keep track of the position of the earliest/latest loads in the pattern.
@@ -3421,9 +3491,9 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine(
if (Loads.count(&MI))
continue;
if (MI.isLoadFoldBarrier())
- return None;
+ return std::nullopt;
if (Iter++ == MaxIter)
- return None;
+ return std::nullopt;
}
return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
@@ -3487,7 +3557,7 @@ bool CombinerHelper::matchLoadOrCombine(
// pattern. If it does, then we can represent it using a load + possibly a
// BSWAP.
bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
- Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
+ std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
if (!IsBigEndian)
return false;
bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
@@ -3527,7 +3597,7 @@ bool CombinerHelper::matchLoadOrCombine(
// Load must be allowed and fast on the target.
LLVMContext &C = MF.getFunction().getContext();
auto &DL = MF.getDataLayout();
- bool Fast = false;
+ unsigned Fast = 0;
if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
!Fast)
return false;
@@ -3548,11 +3618,12 @@ bool CombinerHelper::matchLoadOrCombine(
/// value found.
/// On match, returns the start byte offset of the \p SrcVal that is being
/// stored.
-static Optional<int64_t> getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
- MachineRegisterInfo &MRI) {
+static std::optional<int64_t>
+getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
+ MachineRegisterInfo &MRI) {
Register TruncVal;
if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal))))
- return None;
+ return std::nullopt;
// The shift amount must be a constant multiple of the narrow type.
// It is translated to the offset address in the wide source value "y".
@@ -3570,21 +3641,21 @@ static Optional<int64_t> getTruncStoreByteOffset(GStore &Store, Register &SrcVal
SrcVal = TruncVal;
return 0; // If it's the lowest index store.
}
- return None;
+ return std::nullopt;
}
unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
if (ShiftAmt % NarrowBits!= 0)
- return None;
+ return std::nullopt;
const unsigned Offset = ShiftAmt / NarrowBits;
if (SrcVal.isValid() && FoundSrcVal != SrcVal)
- return None;
+ return std::nullopt;
if (!SrcVal.isValid())
SrcVal = FoundSrcVal;
else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal))
- return None;
+ return std::nullopt;
return Offset;
}
@@ -3732,7 +3803,7 @@ bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI,
const auto &DL = LastStore.getMF()->getDataLayout();
auto &C = LastStore.getMF()->getFunction().getContext();
// Check that a store of the wide type is both allowed and fast on the target
- bool Fast = false;
+ unsigned Fast = 0;
bool Allowed = getTargetLowering().allowsMemoryAccess(
C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
if (!Allowed || !Fast)
@@ -3917,33 +3988,30 @@ bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI,
// and find the source register that the index maps to.
Register SrcVec = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(SrcVec);
- if (!isLegalOrBeforeLegalizer(
- {TargetOpcode::G_BUILD_VECTOR, {SrcTy, SrcTy.getElementType()}}))
- return false;
auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
return false;
unsigned VecIdx = Cst->Value.getZExtValue();
- MachineInstr *BuildVecMI =
- getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, SrcVec, MRI);
- if (!BuildVecMI) {
- BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR_TRUNC, SrcVec, MRI);
- if (!BuildVecMI)
- return false;
- LLT ScalarTy = MRI.getType(BuildVecMI->getOperand(1).getReg());
- if (!isLegalOrBeforeLegalizer(
- {TargetOpcode::G_BUILD_VECTOR_TRUNC, {SrcTy, ScalarTy}}))
- return false;
+
+ // Check if we have a build_vector or build_vector_trunc with an optional
+ // trunc in front.
+ MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
+ if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
+ SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
}
+ if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
+ SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
+ return false;
+
EVT Ty(getMVTForLLT(SrcTy));
if (!MRI.hasOneNonDBGUse(SrcVec) &&
!getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
return false;
- Reg = BuildVecMI->getOperand(VecIdx + 1).getReg();
+ Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
return true;
}
@@ -4146,7 +4214,7 @@ bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI,
auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
- Optional<bool> KnownVal;
+ std::optional<bool> KnownVal;
switch (Pred) {
default:
llvm_unreachable("Unexpected G_ICMP predicate?");
@@ -4542,7 +4610,7 @@ bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI,
// G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
// if and only if (G_PTR_ADD X, C) has one use.
Register LHSBase;
- Optional<ValueAndVReg> LHSCstOff;
+ std::optional<ValueAndVReg> LHSCstOff;
if (!mi_match(MI.getBaseReg(), MRI,
m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
return false;
@@ -4554,8 +4622,10 @@ bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI,
// doesn't happen.
LHSPtrAdd->moveBefore(&MI);
Register RHSReg = MI.getOffsetReg();
+ // set VReg will cause type mismatch if it comes from extend/trunc
+ auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
Observer.changingInstr(MI);
- MI.getOperand(2).setReg(LHSCstOff->VReg);
+ MI.getOperand(2).setReg(NewCst.getReg(0));
Observer.changedInstr(MI);
Observer.changingInstr(*LHSPtrAdd);
LHSPtrAdd->getOperand(2).setReg(RHSReg);
@@ -4781,6 +4851,83 @@ bool CombinerHelper::matchAddOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) {
return true;
}
+bool CombinerHelper::matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
+ // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
+ assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
+ MI.getOpcode() == TargetOpcode::G_SADDE ||
+ MI.getOpcode() == TargetOpcode::G_USUBE ||
+ MI.getOpcode() == TargetOpcode::G_SSUBE);
+ if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
+ return false;
+ MatchInfo = [&](MachineIRBuilder &B) {
+ unsigned NewOpcode;
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_UADDE:
+ NewOpcode = TargetOpcode::G_UADDO;
+ break;
+ case TargetOpcode::G_SADDE:
+ NewOpcode = TargetOpcode::G_SADDO;
+ break;
+ case TargetOpcode::G_USUBE:
+ NewOpcode = TargetOpcode::G_USUBO;
+ break;
+ case TargetOpcode::G_SSUBE:
+ NewOpcode = TargetOpcode::G_SSUBO;
+ break;
+ }
+ Observer.changingInstr(MI);
+ MI.setDesc(B.getTII().get(NewOpcode));
+ MI.removeOperand(4);
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
+bool CombinerHelper::matchSubAddSameReg(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SUB);
+ Register Dst = MI.getOperand(0).getReg();
+ // (x + y) - z -> x (if y == z)
+ // (x + y) - z -> y (if x == z)
+ Register X, Y, Z;
+ if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
+ Register ReplaceReg;
+ int64_t CstX, CstY;
+ if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
+ mi_match(Z, MRI, m_SpecificICstOrSplat(CstY))))
+ ReplaceReg = X;
+ else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
+ mi_match(Z, MRI, m_SpecificICstOrSplat(CstX))))
+ ReplaceReg = Y;
+ if (ReplaceReg) {
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
+ return true;
+ }
+ }
+
+ // x - (y + z) -> 0 - y (if x == z)
+ // x - (y + z) -> 0 - z (if x == y)
+ if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
+ Register ReplaceReg;
+ int64_t CstX;
+ if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
+ mi_match(Z, MRI, m_SpecificICstOrSplat(CstX))))
+ ReplaceReg = Y;
+ else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
+ mi_match(Y, MRI, m_SpecificICstOrSplat(CstX))))
+ ReplaceReg = Z;
+ if (ReplaceReg) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto Zero = B.buildConstant(MRI.getType(Dst), 0);
+ B.buildSub(Dst, Zero, ReplaceReg);
+ };
+ return true;
+ }
+ }
+ return false;
+}
+
MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_UDIV);
auto &UDiv = cast<GenericMachineInstr>(MI);
@@ -4801,34 +4948,33 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
auto BuildUDIVPattern = [&](const Constant *C) {
auto *CI = cast<ConstantInt>(C);
const APInt &Divisor = CI->getValue();
- UnsignedDivisionByConstantInfo magics =
- UnsignedDivisionByConstantInfo::get(Divisor);
+
+ bool SelNPQ = false;
+ APInt Magic(Divisor.getBitWidth(), 0);
unsigned PreShift = 0, PostShift = 0;
- // If the divisor is even, we can avoid using the expensive fixup by
- // shifting the divided value upfront.
- if (magics.IsAdd && !Divisor[0]) {
- PreShift = Divisor.countTrailingZeros();
- // Get magic number for the shifted divisor.
- magics =
- UnsignedDivisionByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
- assert(!magics.IsAdd && "Should use cheap fixup now");
- }
+ // Magic algorithm doesn't work for division by 1. We need to emit a select
+ // at the end.
+ // TODO: Use undef values for divisor of 1.
+ if (!Divisor.isOneValue()) {
+ UnsignedDivisionByConstantInfo magics =
+ UnsignedDivisionByConstantInfo::get(Divisor);
- unsigned SelNPQ;
- if (!magics.IsAdd || Divisor.isOneValue()) {
- assert(magics.ShiftAmount < Divisor.getBitWidth() &&
+ Magic = std::move(magics.Magic);
+
+ assert(magics.PreShift < Divisor.getBitWidth() &&
"We shouldn't generate an undefined shift!");
- PostShift = magics.ShiftAmount;
- SelNPQ = false;
- } else {
- PostShift = magics.ShiftAmount - 1;
- SelNPQ = true;
+ assert(magics.PostShift < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
+ PreShift = magics.PreShift;
+ PostShift = magics.PostShift;
+ SelNPQ = magics.IsAdd;
}
PreShifts.push_back(
MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
- MagicFactors.push_back(MIB.buildConstant(ScalarTy, magics.Magic).getReg(0));
+ MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
NPQFactors.push_back(
MIB.buildConstant(ScalarTy,
SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
@@ -4935,6 +5081,108 @@ void CombinerHelper::applyUDivByConst(MachineInstr &MI) {
replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
}
+bool CombinerHelper::matchSDivByConst(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
+ Register Dst = MI.getOperand(0).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(Dst);
+
+ auto &MF = *MI.getMF();
+ AttributeList Attr = MF.getFunction().getAttributes();
+ const auto &TLI = getTargetLowering();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
+ return false;
+
+ // Don't do this for minsize because the instruction sequence is usually
+ // larger.
+ if (MF.getFunction().hasMinSize())
+ return false;
+
+ // If the sdiv has an 'exact' flag we can use a simpler lowering.
+ if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
+ return matchUnaryPredicate(
+ MRI, RHS, [](const Constant *C) { return C && !C->isZeroValue(); });
+ }
+
+ // Don't support the general case for now.
+ return false;
+}
+
+void CombinerHelper::applySDivByConst(MachineInstr &MI) {
+ auto *NewMI = buildSDivUsingMul(MI);
+ replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
+}
+
+MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
+ auto &SDiv = cast<GenericMachineInstr>(MI);
+ Register Dst = SDiv.getReg(0);
+ Register LHS = SDiv.getReg(1);
+ Register RHS = SDiv.getReg(2);
+ LLT Ty = MRI.getType(Dst);
+ LLT ScalarTy = Ty.getScalarType();
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
+ auto &MIB = Builder;
+ MIB.setInstrAndDebugLoc(MI);
+
+ bool UseSRA = false;
+ SmallVector<Register, 16> Shifts, Factors;
+
+ auto *RHSDef = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
+ bool IsSplat = getIConstantSplatVal(*RHSDef, MRI).has_value();
+
+ auto BuildSDIVPattern = [&](const Constant *C) {
+ // Don't recompute inverses for each splat element.
+ if (IsSplat && !Factors.empty()) {
+ Shifts.push_back(Shifts[0]);
+ Factors.push_back(Factors[0]);
+ return true;
+ }
+
+ auto *CI = cast<ConstantInt>(C);
+ APInt Divisor = CI->getValue();
+ unsigned Shift = Divisor.countTrailingZeros();
+ if (Shift) {
+ Divisor.ashrInPlace(Shift);
+ UseSRA = true;
+ }
+
+ // Calculate the multiplicative inverse modulo BW.
+ // 2^W requires W + 1 bits, so we have to extend and then truncate.
+ unsigned W = Divisor.getBitWidth();
+ APInt Factor = Divisor.zext(W + 1)
+ .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
+ .trunc(W);
+ Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
+ Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
+ return true;
+ };
+
+ // Collect all magic values from the build vector.
+ bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
+ (void)Matched;
+ assert(Matched && "Expected unary predicate match to succeed");
+
+ Register Shift, Factor;
+ if (Ty.isVector()) {
+ Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
+ Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
+ } else {
+ Shift = Shifts[0];
+ Factor = Factors[0];
+ }
+
+ Register Res = LHS;
+
+ if (UseSRA)
+ Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
+
+ return MIB.buildMul(Ty, Res, Factor);
+}
+
bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_UMULH);
Register RHS = MI.getOperand(2).getReg();
@@ -5014,6 +5262,38 @@ bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI,
return true;
}
+bool CombinerHelper::matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ Register LHS = MI.getOperand(1).getReg();
+ MatchInfo = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+
+ const auto LHSCst = Ty.isVector()
+ ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
+ : getFConstantVRegValWithLookThrough(LHS, MRI);
+ if (!LHSCst)
+ return false;
+
+ // -0.0 is always allowed
+ if (LHSCst->Value.isNegZero())
+ return true;
+
+ // +0.0 is only allowed if nsz is set.
+ if (LHSCst->Value.isPosZero())
+ return MI.getFlag(MachineInstr::FmNsz);
+
+ return false;
+}
+
+void CombinerHelper::applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) {
+ Builder.setInstrAndDebugLoc(MI);
+ Register Dst = MI.getOperand(0).getReg();
+ Builder.buildFNeg(
+ Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
+ eraseInst(MI);
+}
+
/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
/// due to global flags or MachineInstr flags.
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
@@ -5045,7 +5325,7 @@ bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI,
return false;
// Floating-point multiply-add with intermediate rounding.
- HasFMAD = (LI && TLI.isFMADLegal(MI, DstType));
+ HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
@@ -5670,6 +5950,241 @@ bool CombinerHelper::matchAddSubSameReg(MachineInstr &MI, Register &Src) {
return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
}
+bool CombinerHelper::matchBuildVectorIdentityFold(MachineInstr &MI,
+ Register &MatchInfo) {
+ // This combine folds the following patterns:
+ //
+ // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
+ // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
+ // into
+ // x
+ // if
+ // k == sizeof(VecEltTy)/2
+ // type(x) == type(dst)
+ //
+ // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
+ // into
+ // x
+ // if
+ // type(x) == type(dst)
+
+ LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT DstEltTy = DstVecTy.getElementType();
+
+ Register Lo, Hi;
+
+ if (mi_match(
+ MI, MRI,
+ m_GBuildVector(m_GTrunc(m_GBitcast(m_Reg(Lo))), m_GImplicitDef()))) {
+ MatchInfo = Lo;
+ return MRI.getType(MatchInfo) == DstVecTy;
+ }
+
+ std::optional<ValueAndVReg> ShiftAmount;
+ const auto LoPattern = m_GBitcast(m_Reg(Lo));
+ const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
+ if (mi_match(
+ MI, MRI,
+ m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
+ m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
+ if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
+ MatchInfo = Lo;
+ return MRI.getType(MatchInfo) == DstVecTy;
+ }
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchTruncBuildVectorFold(MachineInstr &MI,
+ Register &MatchInfo) {
+ // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
+ // if type(x) == type(G_TRUNC)
+ if (!mi_match(MI.getOperand(1).getReg(), MRI,
+ m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
+ return false;
+
+ return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
+}
+
+bool CombinerHelper::matchTruncLshrBuildVectorFold(MachineInstr &MI,
+ Register &MatchInfo) {
+ // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
+ // y if K == size of vector element type
+ std::optional<ValueAndVReg> ShiftAmt;
+ if (!mi_match(MI.getOperand(1).getReg(), MRI,
+ m_GLShr(m_GBitcast(m_GBuildVector(m_Reg(), m_Reg(MatchInfo))),
+ m_GCst(ShiftAmt))))
+ return false;
+
+ LLT MatchTy = MRI.getType(MatchInfo);
+ return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
+ MatchTy == MRI.getType(MI.getOperand(0).getReg());
+}
+
+unsigned CombinerHelper::getFPMinMaxOpcForSelect(
+ CmpInst::Predicate Pred, LLT DstTy,
+ SelectPatternNaNBehaviour VsNaNRetVal) const {
+ assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
+ "Expected a NaN behaviour?");
+ // Choose an opcode based off of legality or the behaviour when one of the
+ // LHS/RHS may be NaN.
+ switch (Pred) {
+ default:
+ return 0;
+ case CmpInst::FCMP_UGT:
+ case CmpInst::FCMP_UGE:
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_OGE:
+ if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
+ return TargetOpcode::G_FMAXNUM;
+ if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
+ return TargetOpcode::G_FMAXIMUM;
+ if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
+ return TargetOpcode::G_FMAXNUM;
+ if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
+ return TargetOpcode::G_FMAXIMUM;
+ return 0;
+ case CmpInst::FCMP_ULT:
+ case CmpInst::FCMP_ULE:
+ case CmpInst::FCMP_OLT:
+ case CmpInst::FCMP_OLE:
+ if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
+ return TargetOpcode::G_FMINNUM;
+ if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
+ return TargetOpcode::G_FMINIMUM;
+ if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
+ return TargetOpcode::G_FMINNUM;
+ if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
+ return 0;
+ return TargetOpcode::G_FMINIMUM;
+ }
+}
+
+CombinerHelper::SelectPatternNaNBehaviour
+CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
+ bool IsOrderedComparison) const {
+ bool LHSSafe = isKnownNeverNaN(LHS, MRI);
+ bool RHSSafe = isKnownNeverNaN(RHS, MRI);
+ // Completely unsafe.
+ if (!LHSSafe && !RHSSafe)
+ return SelectPatternNaNBehaviour::NOT_APPLICABLE;
+ if (LHSSafe && RHSSafe)
+ return SelectPatternNaNBehaviour::RETURNS_ANY;
+ // An ordered comparison will return false when given a NaN, so it
+ // returns the RHS.
+ if (IsOrderedComparison)
+ return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
+ : SelectPatternNaNBehaviour::RETURNS_OTHER;
+ // An unordered comparison will return true when given a NaN, so it
+ // returns the LHS.
+ return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
+ : SelectPatternNaNBehaviour::RETURNS_NAN;
+}
+
+bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
+ Register TrueVal, Register FalseVal,
+ BuildFnTy &MatchInfo) {
+ // Match: select (fcmp cond x, y) x, y
+ // select (fcmp cond x, y) y, x
+ // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
+ LLT DstTy = MRI.getType(Dst);
+ // Bail out early on pointers, since we'll never want to fold to a min/max.
+ if (DstTy.isPointer())
+ return false;
+ // Match a floating point compare with a less-than/greater-than predicate.
+ // TODO: Allow multiple users of the compare if they are all selects.
+ CmpInst::Predicate Pred;
+ Register CmpLHS, CmpRHS;
+ if (!mi_match(Cond, MRI,
+ m_OneNonDBGUse(
+ m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
+ CmpInst::isEquality(Pred))
+ return false;
+ SelectPatternNaNBehaviour ResWithKnownNaNInfo =
+ computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
+ if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
+ return false;
+ if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
+ std::swap(CmpLHS, CmpRHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
+ ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
+ else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
+ ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
+ }
+ if (TrueVal != CmpLHS || FalseVal != CmpRHS)
+ return false;
+ // Decide what type of max/min this should be based off of the predicate.
+ unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
+ if (!Opc || !isLegal({Opc, {DstTy}}))
+ return false;
+ // Comparisons between signed zero and zero may have different results...
+ // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
+ if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
+ // We don't know if a comparison between two 0s will give us a consistent
+ // result. Be conservative and only proceed if at least one side is
+ // non-zero.
+ auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
+ if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
+ KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
+ if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
+ return false;
+ }
+ }
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
+ };
+ return true;
+}
+
+bool CombinerHelper::matchSimplifySelectToMinMax(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ // TODO: Handle integer cases.
+ assert(MI.getOpcode() == TargetOpcode::G_SELECT);
+ // Condition may be fed by a truncated compare.
+ Register Cond = MI.getOperand(1).getReg();
+ Register MaybeTrunc;
+ if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
+ Cond = MaybeTrunc;
+ Register Dst = MI.getOperand(0).getReg();
+ Register TrueVal = MI.getOperand(2).getReg();
+ Register FalseVal = MI.getOperand(3).getReg();
+ return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
+}
+
+bool CombinerHelper::matchRedundantBinOpInEquality(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ICMP);
+ // (X + Y) == X --> Y == 0
+ // (X + Y) != X --> Y != 0
+ // (X - Y) == X --> Y == 0
+ // (X - Y) != X --> Y != 0
+ // (X ^ Y) == X --> Y == 0
+ // (X ^ Y) != X --> Y != 0
+ Register Dst = MI.getOperand(0).getReg();
+ CmpInst::Predicate Pred;
+ Register X, Y, OpLHS, OpRHS;
+ bool MatchedSub = mi_match(
+ Dst, MRI,
+ m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
+ if (MatchedSub && X != OpLHS)
+ return false;
+ if (!MatchedSub) {
+ if (!mi_match(Dst, MRI,
+ m_c_GICmp(m_Pred(Pred), m_Reg(X),
+ m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
+ m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
+ return false;
+ Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
+ }
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto Zero = B.buildConstant(MRI.getType(Y), 0);
+ B.buildICmp(Pred, Dst, Y, Zero);
+ };
+ return CmpInst::isEquality(Pred) && Y.isValid();
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 4f03af0fce82..bfbe7e1c3e55 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -39,8 +39,7 @@ Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) {
return computeKnownAlignment(MI->getOperand(1).getReg(), Depth);
case TargetOpcode::G_ASSERT_ALIGN: {
// TODO: Min with source
- int64_t LogAlign = MI->getOperand(2).getImm();
- return Align(1ull << LogAlign);
+ return Align(MI->getOperand(2).getImm());
}
case TargetOpcode::G_FRAME_INDEX: {
int FrameIdx = MI->getOperand(1).getIndex();
@@ -286,7 +285,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
LLT Ty = MRI.getType(MI.getOperand(1).getReg());
if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
break;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
}
case TargetOpcode::G_ADD: {
computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
@@ -447,7 +446,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
if (DstTy.isVector())
break;
// Fall through and handle them the same as zext/trunc.
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case TargetOpcode::G_ASSERT_ZEXT:
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_TRUNC: {
@@ -472,9 +471,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
break;
}
case TargetOpcode::G_ASSERT_ALIGN: {
- int64_t LogOfAlign = MI.getOperand(2).getImm();
- if (LogOfAlign == 0)
- break;
+ int64_t LogOfAlign = Log2_64(MI.getOperand(2).getImm());
// TODO: Should use maximum with source
// If a node is guaranteed to be aligned, set low zero bits accordingly as
@@ -533,7 +530,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
// We can bound the space the count needs. Also, bits known to be zero can't
// contribute to the population.
unsigned BitsPossiblySet = Known2.countMaxPopulation();
- unsigned LowBits = Log2_32(BitsPossiblySet)+1;
+ unsigned LowBits = llvm::bit_width(BitsPossiblySet);
Known.Zero.setBitsFrom(LowBits);
// TODO: we could bound Known.One using the lower bound on the number of
// bits which might be set provided by popcnt KnownOne2.
@@ -714,6 +711,18 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
break;
}
+ case TargetOpcode::G_FCMP:
+ case TargetOpcode::G_ICMP: {
+ bool IsFP = Opcode == TargetOpcode::G_FCMP;
+ if (TyBits == 1)
+ break;
+ auto BC = TL.getBooleanContents(DstTy.isVector(), IsFP);
+ if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent)
+ return TyBits; // All bits are sign bits.
+ if (BC == TargetLowering::ZeroOrOneBooleanContent)
+ return TyBits - 1; // Every always-zero bit is a sign bit.
+ break;
+ }
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
default: {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 2f9187bbf2ad..7d811dc0ad8f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -16,7 +16,9 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
@@ -61,6 +63,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -81,6 +84,7 @@
#include <cassert>
#include <cstdint>
#include <iterator>
+#include <optional>
#include <string>
#include <utility>
#include <vector>
@@ -167,6 +171,7 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<StackProtector>();
AU.addRequired<TargetPassConfig>();
AU.addRequired<GISelCSEAnalysisWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
if (OptLevel != CodeGenOpt::None) {
AU.addRequired<BranchProbabilityInfoWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
@@ -1064,7 +1069,7 @@ void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
LLT SwitchTy = getLLTForMVT(BB.RegVT);
Register Cmp;
- unsigned PopCount = countPopulation(B.Mask);
+ unsigned PopCount = llvm::popcount(B.Mask);
if (PopCount == 1) {
// Testing for a single bit; just compare the shift count with what it
// would need to be to shift a 1 bit in that position.
@@ -1301,16 +1306,12 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
}
auto &TLI = *MF->getSubtarget().getTargetLowering();
- MachineMemOperand::Flags Flags = TLI.getLoadMemOperandFlags(LI, *DL);
+ MachineMemOperand::Flags Flags =
+ TLI.getLoadMemOperandFlags(LI, *DL, AC, LibInfo);
if (AA && !(Flags & MachineMemOperand::MOInvariant)) {
if (AA->pointsToConstantMemory(
MemoryLocation(Ptr, LocationSize::precise(StoreSize), AAInfo))) {
Flags |= MachineMemOperand::MOInvariant;
-
- // FIXME: pointsToConstantMemory probably does not imply dereferenceable,
- // but the previous usage implied it did. Probably should check
- // isDereferenceableAndAlignedPointer.
- Flags |= MachineMemOperand::MODereferenceable;
}
}
@@ -1882,10 +1883,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
MachineIRBuilder &MIRBuilder) {
if (auto *MI = dyn_cast<AnyMemIntrinsic>(&CI)) {
if (ORE->enabled()) {
- const Function &F = *MI->getParent()->getParent();
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- if (MemoryOpRemark::canHandle(MI, TLI)) {
- MemoryOpRemark R(*ORE, "gisel-irtranslator-memsize", *DL, TLI);
+ if (MemoryOpRemark::canHandle(MI, *LibInfo)) {
+ MemoryOpRemark R(*ORE, "gisel-irtranslator-memsize", *DL, *LibInfo);
R.visit(MI);
}
}
@@ -2301,7 +2300,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// Convert the metadata argument to a constant integer
Metadata *MD = cast<MetadataAsValue>(CI.getArgOperand(1))->getMetadata();
- Optional<RoundingMode> RoundMode =
+ std::optional<RoundingMode> RoundMode =
convertStrToRoundingMode(cast<MDString>(MD)->getString());
// Add the Rounding mode as an integer
@@ -2313,6 +2312,17 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return true;
}
+ case Intrinsic::is_fpclass: {
+ Value *FpValue = CI.getOperand(0);
+ ConstantInt *TestMaskValue = cast<ConstantInt>(CI.getOperand(1));
+
+ MIRBuilder
+ .buildInstr(TargetOpcode::G_IS_FPCLASS, {getOrCreateVReg(CI)},
+ {getOrCreateVReg(*FpValue)})
+ .addImm(TestMaskValue->getZExtValue());
+
+ return true;
+ }
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
@@ -2352,7 +2362,7 @@ bool IRTranslator::translateCallBase(const CallBase &CB,
SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt(
&CB, &MIRBuilder.getMBB(), Arg));
- Args.emplace_back(makeArrayRef(SwiftInVReg));
+ Args.emplace_back(ArrayRef(SwiftInVReg));
SwiftErrorVReg =
SwiftError.getOrCreateVRegDefAt(&CB, &MIRBuilder.getMBB(), Arg);
continue;
@@ -2362,10 +2372,8 @@ bool IRTranslator::translateCallBase(const CallBase &CB,
if (auto *CI = dyn_cast<CallInst>(&CB)) {
if (ORE->enabled()) {
- const Function &F = *CI->getParent()->getParent();
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- if (MemoryOpRemark::canHandle(CI, TLI)) {
- MemoryOpRemark R(*ORE, "gisel-irtranslator-memsize", *DL, TLI);
+ if (MemoryOpRemark::canHandle(CI, *LibInfo)) {
+ MemoryOpRemark R(*ORE, "gisel-irtranslator-memsize", *DL, *LibInfo);
R.visit(CI);
}
}
@@ -2403,6 +2411,10 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (CI.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
return false;
+ // FIXME: support statepoints and related.
+ if (isa<GCStatepointInst, GCRelocateInst, GCResultInst>(U))
+ return false;
+
if (CI.isInlineAsm())
return translateInlineAsm(CI, MIRBuilder);
@@ -2475,8 +2487,16 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
LLT MemTy = Info.memVT.isSimple()
? getLLTForMVT(Info.memVT.getSimpleVT())
: LLT::scalar(Info.memVT.getStoreSizeInBits());
- MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal),
- Info.flags, MemTy, Alignment));
+
+ // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic
+ // didn't yield anything useful.
+ MachinePointerInfo MPI;
+ if (Info.ptrVal)
+ MPI = MachinePointerInfo(Info.ptrVal, Info.offset);
+ else if (Info.fallbackAddressSpace)
+ MPI = MachinePointerInfo(*Info.fallbackAddressSpace);
+ MIB.addMemOperand(
+ MF->getMachineMemOperand(MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata()));
}
return true;
@@ -2566,14 +2586,12 @@ bool IRTranslator::translateInvoke(const User &U,
bool LowerInlineAsm = I.isInlineAsm();
bool NeedEHLabel = true;
- // If it can't throw then use a fast-path without emitting EH labels.
- if (LowerInlineAsm)
- NeedEHLabel = (cast<InlineAsm>(I.getCalledOperand()))->canThrow();
// Emit the actual call, bracketed by EH_LABELs so that the MF knows about
// the region covered by the try.
MCSymbol *BeginSymbol = nullptr;
if (NeedEHLabel) {
+ MIRBuilder.buildInstr(TargetOpcode::G_INVOKE_REGION_START);
BeginSymbol = Context.createTempSymbol();
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
}
@@ -2808,7 +2826,7 @@ bool IRTranslator::translateExtractElement(const User &U,
Register Idx;
if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) {
if (CI->getBitWidth() != PreferredVecIdxWidth) {
- APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth);
+ APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth);
auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx);
Idx = getOrCreateVReg(*NewIdxCI);
}
@@ -2817,7 +2835,7 @@ bool IRTranslator::translateExtractElement(const User &U,
Idx = getOrCreateVReg(*U.getOperand(1));
if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
- Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx).getReg(0);
+ Idx = MIRBuilder.buildZExtOrTrunc(VecIdxTy, Idx).getReg(0);
}
MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
return true;
@@ -2934,6 +2952,12 @@ bool IRTranslator::translateAtomicRMW(const User &U,
case AtomicRMWInst::FMin:
Opcode = TargetOpcode::G_ATOMICRMW_FMIN;
break;
+ case AtomicRMWInst::UIncWrap:
+ Opcode = TargetOpcode::G_ATOMICRMW_UINC_WRAP;
+ break;
+ case AtomicRMWInst::UDecWrap:
+ Opcode = TargetOpcode::G_ATOMICRMW_UDEC_WRAP;
+ break;
}
MIRBuilder.buildAtomicRMW(
@@ -3003,6 +3027,7 @@ void IRTranslator::finishPendingPhis() {
bool IRTranslator::translate(const Instruction &Inst) {
CurBuilder->setDebugLoc(Inst.getDebugLoc());
+ CurBuilder->setPCSections(Inst.getMetadata(LLVMContext::MD_pcsections));
auto &TLI = *MF->getSubtarget().getTargetLowering();
if (TLI.fallBackToDAGISel(Inst))
@@ -3393,6 +3418,9 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
FuncInfo.BPI = nullptr;
}
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
+ MF->getFunction());
+ LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF);
const auto &TLI = *MF->getSubtarget().getTargetLowering();
@@ -3437,7 +3465,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
MF->push_back(MBB);
if (BB.hasAddressTaken())
- MBB->setHasAddressTaken();
+ MBB->setAddressTakenIRBlock(const_cast<BasicBlock *>(&BB));
if (!HasMustTailInVarArgFn)
HasMustTailInVarArgFn = checkForMustTailInVarArgFn(IsVarArg, BB);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 28f3b425c67d..f780050ca3f1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -160,6 +160,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
// If so, erase it.
if (isTriviallyDead(MI, MRI)) {
LLVM_DEBUG(dbgs() << "Is dead; erasing.\n");
+ salvageDebugInfo(MRI, MI);
MI.eraseFromParent();
continue;
}
@@ -183,6 +184,11 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
continue;
}
+ if (MI.getOpcode() == TargetOpcode::G_INVOKE_REGION_START) {
+ MI.eraseFromParent();
+ continue;
+ }
+
if (!ISel->select(MI)) {
// FIXME: It would be nice to dump all inserted instructions. It's
// not obvious how, esp. considering select() can insert after MI.
@@ -229,8 +235,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
continue;
Register SrcReg = MI.getOperand(1).getReg();
Register DstReg = MI.getOperand(0).getReg();
- if (Register::isVirtualRegister(SrcReg) &&
- Register::isVirtualRegister(DstReg)) {
+ if (SrcReg.isVirtual() && DstReg.isVirtual()) {
auto SrcRC = MRI.getRegClass(SrcReg);
auto DstRC = MRI.getRegClass(DstReg);
if (SrcRC == DstRC) {
@@ -247,7 +252,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
// that the size of the now-constrained vreg is unchanged and that it has a
// register class.
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- unsigned VReg = Register::index2VirtReg(I);
+ Register VReg = Register::index2VirtReg(I);
MachineInstr *MI = nullptr;
if (!MRI.def_empty(VReg))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
index 6271a4514c27..8cfb1b786c24 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
@@ -264,7 +264,7 @@ LegacyLegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Siz
// Special case for scalarization:
if (Vec == SizeAndActionsVec({{1, FewerElements}}))
return {1, FewerElements};
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case NarrowScalar: {
// The following needs to be a loop, as for now, we do allow needing to
// go over "Unsupported" bit sizes before finding a legalizable bit size.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index f09e5b7ce783..1a13f39c100c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -225,6 +225,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
assert(isPreISelGenericOpcode(MI.getOpcode()) &&
"Expecting generic opcode");
if (isTriviallyDead(MI, MRI)) {
+ salvageDebugInfo(MRI, MI);
eraseInstr(MI, MRI, &LocObserver);
continue;
}
@@ -272,6 +273,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
assert(isPreISelGenericOpcode(MI.getOpcode()) &&
"Expecting generic opcode");
if (isTriviallyDead(MI, MRI)) {
+ salvageDebugInfo(MRI, MI);
eraseInstr(MI, MRI, &LocObserver);
continue;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 52ee13757f27..8a1fce2d3d65 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -33,6 +33,8 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include <numeric>
+#include <optional>
#define DEBUG_TYPE "legalizer"
@@ -233,7 +235,7 @@ void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts,
// Requested sub-vectors of NarrowTy.
for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) {
ArrayRef<Register> Pieces(&Elts[Offset], NumElts);
- VRegs.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
+ VRegs.push_back(MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
}
// Leftover element(s).
@@ -242,7 +244,8 @@ void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts,
} else {
LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy);
ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts);
- VRegs.push_back(MIRBuilder.buildMerge(LeftoverTy, Pieces).getReg(0));
+ VRegs.push_back(
+ MIRBuilder.buildMergeLikeInstr(LeftoverTy, Pieces).getReg(0));
}
}
@@ -255,7 +258,7 @@ void LegalizerHelper::insertParts(Register DstReg,
assert(LeftoverRegs.empty());
if (!ResultTy.isVector()) {
- MIRBuilder.buildMerge(DstReg, PartRegs);
+ MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
return;
}
@@ -304,7 +307,7 @@ void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
else
appendVectorElts(AllElts, Leftover);
- MIRBuilder.buildMerge(DstReg, AllElts);
+ MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
}
/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
@@ -421,7 +424,7 @@ LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
if (NumSubParts == 1)
Remerge[I] = SubMerge[0];
else
- Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
+ Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
// In the sign extend padding case, re-use the first all-signbit merge.
if (AllMergePartsArePadding && !AllPadReg)
@@ -440,11 +443,11 @@ void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
// the result.
if (DstTy == LCMTy) {
- MIRBuilder.buildMerge(DstReg, RemergeRegs);
+ MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
return;
}
- auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
+ auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
if (DstTy.isScalar() && LCMTy.isScalar()) {
MIRBuilder.buildTrunc(DstReg, Remerge);
return;
@@ -458,7 +461,7 @@ void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
MIRBuilder.buildUnmerge(UnmergeDefs,
- MIRBuilder.buildMerge(LCMTy, RemergeRegs));
+ MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
return;
}
@@ -497,6 +500,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
} while (0)
switch (Opcode) {
+ case TargetOpcode::G_MUL:
+ RTLIBCASE_INT(MUL_I);
case TargetOpcode::G_SDIV:
RTLIBCASE_INT(SDIV_I);
case TargetOpcode::G_UDIV:
@@ -795,6 +800,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
+ case TargetOpcode::G_MUL:
case TargetOpcode::G_SDIV:
case TargetOpcode::G_UDIV:
case TargetOpcode::G_SREM:
@@ -936,7 +942,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
if (DstTy.isVector())
MIRBuilder.buildBuildVector(DstReg, DstRegs);
else
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -1008,7 +1014,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
}
- MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Parts);
+ MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
MI.eraseFromParent();
return Legalized;
}
@@ -1169,7 +1175,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Observer.changingInstr(MI);
for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
- MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
+ MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
SrcRegs[i / 2]);
}
@@ -1183,7 +1189,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
}
MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
- MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
+ MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
Observer.changedInstr(MI);
MI.eraseFromParent();
return Legalized;
@@ -1360,7 +1366,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
// Gather the destination registers into the final destination.
Register DstReg = MI.getOperand(0).getReg();
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -1380,7 +1386,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
DstRegs.push_back(DstPart.getReg(0));
}
- MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
+ MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
Observer.changedInstr(MI);
MI.eraseFromParent();
@@ -1565,7 +1571,7 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
// %9:_(s6) = G_MERGE_VALUES %6, %7, %7
// %10:_(s12) = G_MERGE_VALUES %8, %9
- const int GCD = greatestCommonDivisor(SrcSize, WideSize);
+ const int GCD = std::gcd(SrcSize, WideSize);
LLT GCDTy = LLT::scalar(GCD);
SmallVector<Register, 8> Parts;
@@ -1597,16 +1603,17 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
// Build merges of each piece.
ArrayRef<Register> Slicer(Unmerges);
for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
- auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
+ auto Merge =
+ MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
NewMergeRegs.push_back(Merge.getReg(0));
}
// A truncate may be necessary if the requested type doesn't evenly divide the
// original result type.
if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
- MIRBuilder.buildMerge(DstReg, NewMergeRegs);
+ MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
} else {
- auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
+ auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
}
@@ -1734,7 +1741,7 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
RemergeParts.emplace_back(Parts[Idx]);
}
- MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
+ MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
RemergeParts.clear();
}
}
@@ -1838,7 +1845,7 @@ LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
unsigned Opcode;
unsigned ExtOpcode;
- Optional<Register> CarryIn = None;
+ std::optional<Register> CarryIn;
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode!");
@@ -1884,9 +1891,9 @@ LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
Observer.changingInstr(MI);
- widenScalarDst(MI, WideTy, 1);
if (CarryIn)
widenScalarSrc(MI, WideTy, 4, BoolExtOp);
+ widenScalarDst(MI, WideTy, 1);
Observer.changedInstr(MI);
return Legalized;
@@ -2454,7 +2461,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changingInstr(MI);
for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
- MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
+ MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
}
@@ -2675,7 +2682,7 @@ LegalizerHelper::lowerBitcast(MachineInstr &MI) {
} else
getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
- MIRBuilder.buildMerge(Dst, SrcRegs);
+ MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -2683,7 +2690,7 @@ LegalizerHelper::lowerBitcast(MachineInstr &MI) {
if (DstTy.isVector()) {
SmallVector<Register, 8> SrcRegs;
getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
- MIRBuilder.buildMerge(Dst, SrcRegs);
+ MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -3315,7 +3322,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
MI.eraseFromParent();
return Legalized;
}
- case TargetOpcode::G_FSUB: {
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_STRICT_FSUB: {
Register Res = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Res);
@@ -3326,9 +3334,13 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return UnableToLegalize;
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
- Register Neg = MRI.createGenericVirtualRegister(Ty);
- MIRBuilder.buildFNeg(Neg, RHS);
- MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
+ auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
+
+ if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
+ MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
+ else
+ MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
+
MI.eraseFromParent();
return Legalized;
}
@@ -3515,6 +3527,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerAbsToAddXor(MI);
case G_SELECT:
return lowerSelect(MI);
+ case G_IS_FPCLASS:
+ return lowerISFPCLASS(MI);
case G_SDIVREM:
case G_UDIVREM:
return lowerDIVREM(MI);
@@ -3748,7 +3762,7 @@ LegalizerHelper::fewerElementsVectorMultiEltType(
mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
} else {
for (unsigned i = 0; i < NumDefs; ++i)
- MIRBuilder.buildMerge(MI.getReg(i), OutputRegs[i]);
+ MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
}
MI.eraseFromParent();
@@ -3773,7 +3787,7 @@ LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI,
for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
UseIdx += 2, ++UseNo) {
MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
- MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
+ MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
}
@@ -3796,7 +3810,7 @@ LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI,
if (NumLeftovers) {
mergeMixedSubvectors(MI.getReg(0), OutputRegs);
} else {
- MIRBuilder.buildMerge(MI.getReg(0), OutputRegs);
+ MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
}
MI.eraseFromParent();
@@ -3899,10 +3913,11 @@ LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
++i, Offset += NumNarrowTyElts) {
ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
- NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
+ NarrowTyElts.push_back(
+ MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
}
- MIRBuilder.buildMerge(DstReg, NarrowTyElts);
+ MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
MI.eraseFromParent();
return Legalized;
}
@@ -3930,10 +3945,11 @@ LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
SmallVector<Register, 8> Sources;
for (unsigned j = 0; j < NumElts; ++j)
Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
- NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Sources).getReg(0));
+ NarrowTyElts.push_back(
+ MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
}
- MIRBuilder.buildMerge(DstReg, NarrowTyElts);
+ MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
MI.eraseFromParent();
return Legalized;
}
@@ -4214,10 +4230,16 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_SSUBO:
case G_SADDE:
case G_SSUBE:
+ case G_STRICT_FADD:
+ case G_STRICT_FSUB:
+ case G_STRICT_FMUL:
+ case G_STRICT_FMA:
return fewerElementsVectorMultiEltType(GMI, NumElts);
case G_ICMP:
case G_FCMP:
return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
+ case G_IS_FPCLASS:
+ return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
case G_SELECT:
if (MRI.getType(MI.getOperand(1).getReg()).isVector())
return fewerElementsVectorMultiEltType(GMI, NumElts);
@@ -4307,7 +4329,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
// The input vector this mask element indexes into.
unsigned Input = (unsigned)Idx / NewElts;
- if (Input >= array_lengthof(Inputs)) {
+ if (Input >= std::size(Inputs)) {
// The mask element does not index into any input vector.
Ops.push_back(-1);
continue;
@@ -4318,7 +4340,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
// Find or create a shuffle vector operand to hold this input.
unsigned OpNo;
- for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+ for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
if (InputUsed[OpNo] == Input) {
// This input vector is already an operand.
break;
@@ -4329,7 +4351,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
}
}
- if (OpNo >= array_lengthof(InputUsed)) {
+ if (OpNo >= std::size(InputUsed)) {
// More than two input vectors used! Give up on trying to create a
// shuffle vector. Insert all elements into a BUILD_VECTOR instead.
UseBuildVector = true;
@@ -4352,7 +4374,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
// The input vector this mask element indexes into.
unsigned Input = (unsigned)Idx / NewElts;
- if (Input >= array_lengthof(Inputs)) {
+ if (Input >= std::size(Inputs)) {
// The mask element is "undef" or indexes off the end of the input.
SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
continue;
@@ -4570,7 +4592,7 @@ LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
if (Amt.isZero()) {
- MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
+ MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
MI.eraseFromParent();
return Legalized;
}
@@ -4643,7 +4665,7 @@ LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
}
}
- MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
+ MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
MI.eraseFromParent();
return Legalized;
@@ -4754,7 +4776,7 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
llvm_unreachable("not a shift");
}
- MIRBuilder.buildMerge(DstReg, ResultRegs);
+ MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -4820,7 +4842,10 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_FMINNUM_IEEE:
case TargetOpcode::G_FMAXNUM_IEEE:
case TargetOpcode::G_FMINIMUM:
- case TargetOpcode::G_FMAXIMUM: {
+ case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_STRICT_FADD:
+ case TargetOpcode::G_STRICT_FSUB:
+ case TargetOpcode::G_STRICT_FMUL: {
Observer.changingInstr(MI);
moreElementsVectorSrc(MI, MoreTy, 1);
moreElementsVectorSrc(MI, MoreTy, 2);
@@ -4829,6 +4854,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
return Legalized;
}
case TargetOpcode::G_FMA:
+ case TargetOpcode::G_STRICT_FMA:
case TargetOpcode::G_FSHR:
case TargetOpcode::G_FSHL: {
Observer.changingInstr(MI);
@@ -4922,12 +4948,72 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
}
}
+/// Expand source vectors to the size of destination vector.
+static LegalizerHelper::LegalizeResult
+equalizeVectorShuffleLengths(MachineInstr &MI, MachineIRBuilder &MIRBuilder) {
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ unsigned MaskNumElts = Mask.size();
+ unsigned SrcNumElts = SrcTy.getNumElements();
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DestEltTy = DstTy.getElementType();
+
+ // TODO: Normalize the shuffle vector since mask and vector length don't
+ // match.
+ if (MaskNumElts <= SrcNumElts) {
+ return LegalizerHelper::LegalizeResult::UnableToLegalize;
+ }
+
+ unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
+ unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
+ LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
+
+ // Create new source vectors by concatenating the initial
+ // source vectors with undefined vectors of the same size.
+ auto Undef = MIRBuilder.buildUndef(SrcTy);
+ SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
+ SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
+ MOps1[0] = MI.getOperand(1).getReg();
+ MOps2[0] = MI.getOperand(2).getReg();
+
+ auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
+ auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
+
+ // Readjust mask for new input vector length.
+ SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
+ for (unsigned I = 0; I != MaskNumElts; ++I) {
+ int Idx = Mask[I];
+ if (Idx >= static_cast<int>(SrcNumElts))
+ Idx += PaddedMaskNumElts - SrcNumElts;
+ MappedOps[I] = Idx;
+ }
+
+ // If we got more elements than required, extract subvector.
+ if (MaskNumElts != PaddedMaskNumElts) {
+ auto Shuffle =
+ MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
+
+ SmallVector<Register, 16> Elts(MaskNumElts);
+ for (unsigned I = 0; I < MaskNumElts; ++I) {
+ Elts[I] =
+ MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
+ .getReg(0);
+ }
+ MIRBuilder.buildBuildVector(DstReg, Elts);
+ } else {
+ MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
+ }
+
+ MI.eraseFromParent();
+ return LegalizerHelper::LegalizeResult::Legalized;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
unsigned int TypeIdx, LLT MoreTy) {
- if (TypeIdx != 0)
- return UnableToLegalize;
-
Register DstReg = MI.getOperand(0).getReg();
Register Src1Reg = MI.getOperand(1).getReg();
Register Src2Reg = MI.getOperand(2).getReg();
@@ -4938,6 +5024,14 @@ LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
unsigned NumElts = DstTy.getNumElements();
unsigned WidenNumElts = MoreTy.getNumElements();
+ if (DstTy.isVector() && Src1Ty.isVector() &&
+ DstTy.getNumElements() > Src1Ty.getNumElements()) {
+ return equalizeVectorShuffleLengths(MI, MIRBuilder);
+ }
+
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
// Expect a canonicalized shuffle.
if (DstTy != Src1Ty || DstTy != Src2Ty)
return UnableToLegalize;
@@ -5115,8 +5209,8 @@ LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
CarryIn = CarryOut;
}
insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
- makeArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
- makeArrayRef(DstRegs).drop_front(NarrowParts));
+ ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
+ ArrayRef(DstRegs).drop_front(NarrowParts));
MI.eraseFromParent();
return Legalized;
@@ -5149,7 +5243,7 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
// Take only high half of registers if this is high mul.
ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -5239,7 +5333,7 @@ LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
if (MRI.getType(DstReg).isVector())
MIRBuilder.buildBuildVector(DstReg, DstRegs);
else if (DstRegs.size() > 1)
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
else
MIRBuilder.buildCopy(DstReg, DstRegs[0]);
MI.eraseFromParent();
@@ -5321,10 +5415,10 @@ LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
Register DstReg = MI.getOperand(0).getReg();
if (WideSize > RegTy.getSizeInBits()) {
Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
- MIRBuilder.buildMerge(MergeReg, DstRegs);
+ MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
MIRBuilder.buildTrunc(DstReg, MergeReg);
} else
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
@@ -6582,7 +6676,7 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
if (InsertVal) {
SrcRegs[IdxVal] = MI.getOperand(2).getReg();
- MIRBuilder.buildMerge(DstReg, SrcRegs);
+ MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
} else {
MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
}
@@ -6754,7 +6848,7 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) {
if (SubVectorElts.size() == 1)
MIRBuilder.buildCopy(Dst, SubVectorElts[0]);
else
- MIRBuilder.buildMerge(Dst, SubVectorElts);
+ MIRBuilder.buildMergeLikeInstr(Dst, SubVectorElts);
MI.eraseFromParent();
return Legalized;
@@ -6827,7 +6921,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
DstElts.push_back(UnmergeSrc.getReg(Idx));
}
- MIRBuilder.buildMerge(Dst, DstElts);
+ MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
MI.eraseFromParent();
return Legalized;
}
@@ -7216,6 +7310,166 @@ LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
return Legalized;
}
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
+ uint64_t Mask = MI.getOperand(2).getImm();
+
+ if (Mask == 0) {
+ MIRBuilder.buildConstant(DstReg, 0);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ if ((Mask & fcAllFlags) == fcAllFlags) {
+ MIRBuilder.buildConstant(DstReg, 1);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ unsigned BitSize = SrcTy.getScalarSizeInBits();
+ const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
+
+ LLT IntTy = LLT::scalar(BitSize);
+ if (SrcTy.isVector())
+ IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
+ auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
+
+ // Various masks.
+ APInt SignBit = APInt::getSignMask(BitSize);
+ APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
+ APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
+ APInt ExpMask = Inf;
+ APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
+ APInt QNaNBitMask =
+ APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
+ APInt InvertionMask = APInt::getAllOnesValue(DstTy.getScalarSizeInBits());
+
+ auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
+ auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
+ auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
+ auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
+ auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
+
+ auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
+ auto Sign =
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
+
+ auto Res = MIRBuilder.buildConstant(DstTy, 0);
+ const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
+ Res = MIRBuilder.buildOr(DstTy, Res, ToAppend);
+ };
+
+ // Tests that involve more than one class should be processed first.
+ if ((Mask & fcFinite) == fcFinite) {
+ // finite(V) ==> abs(V) u< exp_mask
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
+ ExpMaskC));
+ Mask &= ~fcFinite;
+ } else if ((Mask & fcFinite) == fcPosFinite) {
+ // finite(V) && V > 0 ==> V u< exp_mask
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
+ ExpMaskC));
+ Mask &= ~fcPosFinite;
+ } else if ((Mask & fcFinite) == fcNegFinite) {
+ // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
+ auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
+ ExpMaskC);
+ auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
+ appendToRes(And);
+ Mask &= ~fcNegFinite;
+ }
+
+ // Check for individual classes.
+ if (unsigned PartialCheck = Mask & fcZero) {
+ if (PartialCheck == fcPosZero)
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
+ AsInt, ZeroC));
+ else if (PartialCheck == fcZero)
+ appendToRes(
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
+ else // fcNegZero
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
+ AsInt, SignBitC));
+ }
+
+ if (unsigned PartialCheck = Mask & fcInf) {
+ if (PartialCheck == fcPosInf)
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
+ AsInt, InfC));
+ else if (PartialCheck == fcInf)
+ appendToRes(
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
+ else { // fcNegInf
+ APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
+ auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
+ AsInt, NegInfC));
+ }
+ }
+
+ if (unsigned PartialCheck = Mask & fcNan) {
+ auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
+ if (PartialCheck == fcNan) {
+ // isnan(V) ==> abs(V) u> int(inf)
+ appendToRes(
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
+ } else if (PartialCheck == fcQNan) {
+ // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
+ InfWithQnanBitC));
+ } else { // fcSNan
+ // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
+ // abs(V) u< (unsigned(Inf) | quiet_bit)
+ auto IsNan =
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
+ auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
+ Abs, InfWithQnanBitC);
+ appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
+ }
+ }
+
+ if (unsigned PartialCheck = Mask & fcSubnormal) {
+ // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
+ // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
+ auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
+ auto OneC = MIRBuilder.buildConstant(IntTy, 1);
+ auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
+ auto SubnormalRes =
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
+ MIRBuilder.buildConstant(IntTy, AllOneMantissa));
+ if (PartialCheck == fcNegSubnormal)
+ SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
+ appendToRes(SubnormalRes);
+ }
+
+ if (unsigned PartialCheck = Mask & fcNormal) {
+ // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
+ // (max_exp-1))
+ APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
+ auto ExpMinusOne = MIRBuilder.buildSub(
+ IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
+ APInt MaxExpMinusOne = ExpMask - ExpLSB;
+ auto NormalRes =
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
+ MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
+ if (PartialCheck == fcNegNormal)
+ NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
+ else if (PartialCheck == fcPosNormal) {
+ auto PosSign = MIRBuilder.buildXor(
+ DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask));
+ NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
+ }
+ appendToRes(NormalRes);
+ }
+
+ MIRBuilder.buildCopy(DstReg, Res);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
// Implement vector G_SELECT in terms of XOR, AND, OR.
Register DstReg = MI.getOperand(0).getReg();
@@ -7227,6 +7481,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
if (!DstTy.isVector())
return UnableToLegalize;
+ bool IsEltPtr = DstTy.getElementType().isPointer();
+ if (IsEltPtr) {
+ LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
+ LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
+ Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
+ Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
+ DstTy = NewTy;
+ }
+
if (MaskTy.isScalar()) {
// Turn the scalar condition into a vector condition mask.
@@ -7234,10 +7497,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
// The condition was potentially zero extended before, but we want a sign
// extended boolean.
- if (MaskTy.getSizeInBits() <= DstTy.getScalarSizeInBits() &&
- MaskTy != LLT::scalar(1)) {
+ if (MaskTy != LLT::scalar(1))
MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
- }
// Continue the sign extension (or truncate) to match the data type.
MaskElt = MIRBuilder.buildSExtOrTrunc(DstTy.getElementType(),
@@ -7256,7 +7517,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
- MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
+ if (IsEltPtr) {
+ auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
+ MIRBuilder.buildIntToPtr(DstReg, Or);
+ } else {
+ MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
+ }
MI.eraseFromParent();
return Legalized;
}
@@ -7378,7 +7644,7 @@ static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
// If the new LLT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.
- bool Fast;
+ unsigned Fast;
// Need to get a VT equivalent for allowMisalignedMemoryAccesses().
MVT VT = getMVTForLLT(Ty);
if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 6adb7ddb5b66..4b6c3a156709 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -126,7 +126,7 @@ static bool mutationIsSane(const LegalizeRule &Rule,
case FewerElements:
if (!OldTy.isVector())
return false;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case MoreElements: {
// MoreElements can go from scalar to vector.
const ElementCount OldElts = OldTy.isVector() ?
@@ -296,7 +296,7 @@ LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder(
std::initializer_list<unsigned> Opcodes) {
unsigned Representative = *Opcodes.begin();
- assert(!llvm::empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() &&
+ assert(Opcodes.size() >= 2 &&
"Initializer list must have at least two opcodes");
for (unsigned Op : llvm::drop_begin(Opcodes))
@@ -330,7 +330,7 @@ LegalizerInfo::getAction(const MachineInstr &MI,
const MachineRegisterInfo &MRI) const {
SmallVector<LLT, 8> Types;
SmallBitVector SeenTypes(8);
- const MCOperandInfo *OpInfo = MI.getDesc().OpInfo;
+ ArrayRef<MCOperandInfo> OpInfo = MI.getDesc().operands();
// FIXME: probably we'll need to cache the results here somehow?
for (unsigned i = 0; i < MI.getDesc().getNumOperands(); ++i) {
if (!OpInfo[i].isGenericType())
@@ -379,14 +379,14 @@ void LegalizerInfo::verify(const MCInstrInfo &MII) const {
for (unsigned Opcode = FirstOp; Opcode <= LastOp; ++Opcode) {
const MCInstrDesc &MCID = MII.get(Opcode);
const unsigned NumTypeIdxs = std::accumulate(
- MCID.opInfo_begin(), MCID.opInfo_end(), 0U,
+ MCID.operands().begin(), MCID.operands().end(), 0U,
[](unsigned Acc, const MCOperandInfo &OpInfo) {
return OpInfo.isGenericType()
? std::max(OpInfo.getGenericTypeIndex() + 1U, Acc)
: Acc;
});
const unsigned NumImmIdxs = std::accumulate(
- MCID.opInfo_begin(), MCID.opInfo_end(), 0U,
+ MCID.operands().begin(), MCID.operands().end(), 0U,
[](unsigned Acc, const MCOperandInfo &OpInfo) {
return OpInfo.isGenericImm()
? std::max(OpInfo.getGenericImmIndex() + 1U, Acc)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
index be1bc865d1e1..7c6eac8c8ce0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -306,7 +306,7 @@ bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) {
bool AnyMerged = false;
do {
unsigned NumPow2 = PowerOf2Floor(StoresToMerge.size());
- unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedSize();
+ unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedValue();
// Compute the biggest store we can generate to handle the number of stores.
unsigned MergeSizeBits;
for (MergeSizeBits = MaxSizeBits; MergeSizeBits > 1; MergeSizeBits /= 2) {
@@ -352,13 +352,13 @@ bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) {
const unsigned NumStores = Stores.size();
LLT SmallTy = MRI->getType(FirstStore->getValueReg());
LLT WideValueTy =
- LLT::scalar(NumStores * SmallTy.getSizeInBits().getFixedSize());
+ LLT::scalar(NumStores * SmallTy.getSizeInBits().getFixedValue());
// For each store, compute pairwise merged debug locs.
- DebugLoc MergedLoc;
- for (unsigned AIdx = 0, BIdx = 1; BIdx < NumStores; ++AIdx, ++BIdx)
- MergedLoc = DILocation::getMergedLocation(Stores[AIdx]->getDebugLoc(),
- Stores[BIdx]->getDebugLoc());
+ DebugLoc MergedLoc = Stores.front()->getDebugLoc();
+ for (auto *Store : drop_begin(Stores))
+ MergedLoc = DILocation::getMergedLocation(MergedLoc, Store->getDebugLoc());
+
Builder.setInstr(*Stores.back());
Builder.setDebugLoc(MergedLoc);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
index c1287693e74d..bf4dcc2c2459 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -181,6 +181,17 @@ bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) {
MI->removeFromParent();
MBB.insert(II, MI);
Changed = true;
+
+ // If the instruction (constant) being localized has single user, we can
+ // propagate debug location from user.
+ if (Users.size() == 1) {
+ const auto &DefDL = MI->getDebugLoc();
+ const auto &UserDL = (*Users.begin())->getDebugLoc();
+
+ if ((!DefDL || DefDL.getLine() == 0) && UserDL && UserDL.getLine() != 0) {
+ MI->setDebugLoc(UserDL);
+ }
+ }
}
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 2e22dae35e5a..9100e064f30f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -27,6 +27,7 @@ void MachineIRBuilder::setMF(MachineFunction &MF) {
State.MRI = &MF.getRegInfo();
State.TII = MF.getSubtarget().getInstrInfo();
State.DL = DebugLoc();
+ State.PCSections = nullptr;
State.II = MachineBasicBlock::iterator();
State.Observer = nullptr;
}
@@ -36,8 +37,7 @@ void MachineIRBuilder::setMF(MachineFunction &MF) {
//------------------------------------------------------------------------------
MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) {
- MachineInstrBuilder MIB = BuildMI(getMF(), getDL(), getTII().get(Opcode));
- return MIB;
+ return BuildMI(getMF(), {getDL(), getPCSections()}, getTII().get(Opcode));
}
MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) {
@@ -96,13 +96,23 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
"Expected inlined-at fields to agree");
auto MIB = buildInstrNoInsert(TargetOpcode::DBG_VALUE);
- if (auto *CI = dyn_cast<ConstantInt>(&C)) {
+
+ auto *NumericConstant = [&] () -> const Constant* {
+ if (const auto *CE = dyn_cast<ConstantExpr>(&C))
+ if (CE->getOpcode() == Instruction::IntToPtr)
+ return CE->getOperand(0);
+ return &C;
+ }();
+
+ if (auto *CI = dyn_cast<ConstantInt>(NumericConstant)) {
if (CI->getBitWidth() > 64)
MIB.addCImm(CI);
else
MIB.addImm(CI->getZExtValue());
- } else if (auto *CFP = dyn_cast<ConstantFP>(&C)) {
+ } else if (auto *CFP = dyn_cast<ConstantFP>(NumericConstant)) {
MIB.addFPImm(CFP);
+ } else if (isa<ConstantPointerNull>(NumericConstant)) {
+ MIB.addImm(0);
} else {
// Insert $noreg if we didn't find a usable constant and had to drop it.
MIB.addReg(Register());
@@ -187,7 +197,7 @@ MachineInstrBuilder MachineIRBuilder::buildPtrAdd(const DstOp &Res,
return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1});
}
-Optional<MachineInstrBuilder>
+std::optional<MachineInstrBuilder>
MachineIRBuilder::materializePtrAdd(Register &Res, Register Op0,
const LLT ValueTy, uint64_t Value) {
assert(Res == 0 && "Res is a result argument");
@@ -195,7 +205,7 @@ MachineIRBuilder::materializePtrAdd(Register &Res, Register Op0,
if (Value == 0) {
Res = Op0;
- return None;
+ return std::nullopt;
}
Res = getMRI()->createGenericVirtualRegister(getMRI()->getType(Op0));
@@ -233,7 +243,7 @@ MachineIRBuilder::buildPadVectorWithUndefElements(const DstOp &Res,
unsigned NumberOfPadElts = ResTy.getNumElements() - Regs.size();
for (unsigned i = 0; i < NumberOfPadElts; ++i)
Regs.push_back(Undef);
- return buildMerge(Res, Regs);
+ return buildMergeLikeInstr(Res, Regs);
}
MachineInstrBuilder
@@ -252,7 +262,7 @@ MachineIRBuilder::buildDeleteTrailingVectorElements(const DstOp &Res,
auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
for (unsigned i = 0; i < ResTy.getNumElements(); ++i)
Regs.push_back(Unmerge.getReg(i));
- return buildMerge(Res, Regs);
+ return buildMergeLikeInstr(Res, Regs);
}
MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
@@ -587,8 +597,8 @@ MachineInstrBuilder MachineIRBuilder::buildUndef(const DstOp &Res) {
return buildInstr(TargetOpcode::G_IMPLICIT_DEF, {Res}, {});
}
-MachineInstrBuilder MachineIRBuilder::buildMerge(const DstOp &Res,
- ArrayRef<Register> Ops) {
+MachineInstrBuilder MachineIRBuilder::buildMergeValues(const DstOp &Res,
+ ArrayRef<Register> Ops) {
// Unfortunately to convert from ArrayRef<LLT> to ArrayRef<SrcOp>,
// we need some temporary storage for the DstOp objects. Here we use a
// sufficiently large SmallVector to not go through the heap.
@@ -598,10 +608,32 @@ MachineInstrBuilder MachineIRBuilder::buildMerge(const DstOp &Res,
}
MachineInstrBuilder
-MachineIRBuilder::buildMerge(const DstOp &Res,
- std::initializer_list<SrcOp> Ops) {
+MachineIRBuilder::buildMergeLikeInstr(const DstOp &Res,
+ ArrayRef<Register> Ops) {
+ // Unfortunately to convert from ArrayRef<LLT> to ArrayRef<SrcOp>,
+ // we need some temporary storage for the DstOp objects. Here we use a
+ // sufficiently large SmallVector to not go through the heap.
+ SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+ assert(TmpVec.size() > 1);
+ return buildInstr(getOpcodeForMerge(Res, TmpVec), Res, TmpVec);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildMergeLikeInstr(const DstOp &Res,
+ std::initializer_list<SrcOp> Ops) {
assert(Ops.size() > 1);
- return buildInstr(TargetOpcode::G_MERGE_VALUES, Res, Ops);
+ return buildInstr(getOpcodeForMerge(Res, Ops), Res, Ops);
+}
+
+unsigned MachineIRBuilder::getOpcodeForMerge(const DstOp &DstOp,
+ ArrayRef<SrcOp> SrcOps) const {
+ if (DstOp.getLLTTy(*getMRI()).isVector()) {
+ if (SrcOps[0].getLLTTy(*getMRI()).isVector())
+ return TargetOpcode::G_CONCAT_VECTORS;
+ return TargetOpcode::G_BUILD_VECTOR;
+ }
+
+ return TargetOpcode::G_MERGE_VALUES;
}
MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res,
@@ -664,6 +696,9 @@ MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res,
// we need some temporary storage for the DstOp objects. Here we use a
// sufficiently large SmallVector to not go through the heap.
SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+ if (TmpVec[0].getLLTTy(*getMRI()).getSizeInBits() ==
+ Res.getLLTTy(*getMRI()).getElementType().getSizeInBits())
+ return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
return buildInstr(TargetOpcode::G_BUILD_VECTOR_TRUNC, Res, TmpVec);
}
@@ -752,9 +787,9 @@ MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res,
return buildInstr(TargetOpcode::G_TRUNC, Res, Op);
}
-MachineInstrBuilder MachineIRBuilder::buildFPTrunc(const DstOp &Res,
- const SrcOp &Op,
- Optional<unsigned> Flags) {
+MachineInstrBuilder
+MachineIRBuilder::buildFPTrunc(const DstOp &Res, const SrcOp &Op,
+ std::optional<unsigned> Flags) {
return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op, Flags);
}
@@ -769,16 +804,15 @@ MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred,
const DstOp &Res,
const SrcOp &Op0,
const SrcOp &Op1,
- Optional<unsigned> Flags) {
+ std::optional<unsigned> Flags) {
return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1}, Flags);
}
-MachineInstrBuilder MachineIRBuilder::buildSelect(const DstOp &Res,
- const SrcOp &Tst,
- const SrcOp &Op0,
- const SrcOp &Op1,
- Optional<unsigned> Flags) {
+MachineInstrBuilder
+MachineIRBuilder::buildSelect(const DstOp &Res, const SrcOp &Tst,
+ const SrcOp &Op0, const SrcOp &Op1,
+ std::optional<unsigned> Flags) {
return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1}, Flags);
}
@@ -1019,10 +1053,10 @@ void MachineIRBuilder::validateSelectOp(const LLT ResTy, const LLT TstTy,
#endif
}
-MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
- ArrayRef<DstOp> DstOps,
- ArrayRef<SrcOp> SrcOps,
- Optional<unsigned> Flags) {
+MachineInstrBuilder
+MachineIRBuilder::buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
+ ArrayRef<SrcOp> SrcOps,
+ std::optional<unsigned> Flags) {
switch (Opc) {
default:
break;
@@ -1150,7 +1184,7 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
break;
}
case TargetOpcode::G_MERGE_VALUES: {
- assert(!SrcOps.empty() && "invalid trivial sequence");
+ assert(SrcOps.size() >= 2 && "invalid trivial sequence");
assert(DstOps.size() == 1 && "Invalid Dst");
assert(llvm::all_of(SrcOps,
[&, this](const SrcOp &Op) {
@@ -1162,13 +1196,8 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
"input operands do not cover output register");
- if (SrcOps.size() == 1)
- return buildCast(DstOps[0], SrcOps[0]);
- if (DstOps[0].getLLTTy(*getMRI()).isVector()) {
- if (SrcOps[0].getLLTTy(*getMRI()).isVector())
- return buildInstr(TargetOpcode::G_CONCAT_VECTORS, DstOps, SrcOps);
- return buildInstr(TargetOpcode::G_BUILD_VECTOR, DstOps, SrcOps);
- }
+ assert(!DstOps[0].getLLTTy(*getMRI()).isVector() &&
+ "vectors should be built with G_CONCAT_VECTOR or G_BUILD_VECTOR");
break;
}
case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
@@ -1228,9 +1257,6 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
SrcOps[0].getLLTTy(*getMRI());
}) &&
"type mismatch in input list");
- if (SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
- DstOps[0].getLLTTy(*getMRI()).getElementType().getSizeInBits())
- return buildInstr(TargetOpcode::G_BUILD_VECTOR, DstOps, SrcOps);
break;
}
case TargetOpcode::G_CONCAT_VECTORS: {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index bce850ee212c..080f3ca540f2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -153,8 +153,7 @@ bool RegBankSelect::repairReg(
if (MO.isDef())
std::swap(Src, Dst);
- assert((RepairPt.getNumInsertPoints() == 1 ||
- Register::isPhysicalRegister(Dst)) &&
+ assert((RepairPt.getNumInsertPoints() == 1 || Dst.isPhysical()) &&
"We are about to create several defs for Dst");
// Build the instruction used to repair, then clone it at the right
@@ -398,7 +397,7 @@ void RegBankSelect::tryAvoidingSplit(
// Check if this is a physical or virtual register.
Register Reg = MO.getReg();
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
// We are going to split every outgoing edges.
// Check that this is possible.
// FIXME: The machine representation is currently broken
@@ -458,6 +457,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
LLVM_DEBUG(dbgs() << "Mapping is too expensive from the start\n");
return Cost;
}
+ const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
// Moreover, to realize this mapping, the register bank of each operand must
// match this mapping. In other words, we may need to locally reassign the
@@ -471,6 +471,10 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
Register Reg = MO.getReg();
if (!Reg)
continue;
+ LLT Ty = MRI.getType(Reg);
+ if (!Ty.isValid())
+ continue;
+
LLVM_DEBUG(dbgs() << "Opd" << OpIdx << '\n');
const RegisterBankInfo::ValueMapping &ValMapping =
InstrMapping.getOperandMapping(OpIdx);
@@ -603,6 +607,9 @@ bool RegBankSelect::applyMapping(
MRI->setRegBank(Reg, *ValMapping.BreakDown[0].RegBank);
break;
case RepairingPlacement::Insert:
+ // Don't insert additional instruction for debug instruction.
+ if (MI.isDebugInstr())
+ break;
OpdMapper.createVRegs(OpIdx);
if (!repairReg(MO, ValMapping, RepairPt, OpdMapper.getVRegs(OpIdx)))
return false;
@@ -667,31 +674,7 @@ bool RegBankSelect::assignInstr(MachineInstr &MI) {
return applyMapping(MI, *BestMapping, RepairPts);
}
-bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
- // If the ISel pipeline failed, do not bother running that pass.
- if (MF.getProperties().hasProperty(
- MachineFunctionProperties::Property::FailedISel))
- return false;
-
- LLVM_DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n');
- const Function &F = MF.getFunction();
- Mode SaveOptMode = OptMode;
- if (F.hasOptNone())
- OptMode = Mode::Fast;
- init(MF);
-
-#ifndef NDEBUG
- // Check that our input is fully legal: we require the function to have the
- // Legalized property, so it should be.
- // FIXME: This should be in the MachineVerifier.
- if (!DisableGISelLegalityCheck)
- if (const MachineInstr *MI = machineFunctionIsIllegal(MF)) {
- reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect",
- "instruction is not legal", *MI);
- return false;
- }
-#endif
-
+bool RegBankSelect::assignRegisterBanks(MachineFunction &MF) {
// Walk the function and assign register banks to all operands.
// Use a RPOT to make sure all registers are assigned before we choose
// the best mapping of the current instruction.
@@ -716,10 +699,6 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
if (MI.isInlineAsm())
continue;
- // Ignore debug info.
- if (MI.isDebugInstr())
- continue;
-
// Ignore IMPLICIT_DEF which must have a regclass.
if (MI.isImplicitDef())
continue;
@@ -732,6 +711,42 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
}
}
+ return true;
+}
+
+bool RegBankSelect::checkFunctionIsLegal(MachineFunction &MF) const {
+#ifndef NDEBUG
+ if (!DisableGISelLegalityCheck) {
+ if (const MachineInstr *MI = machineFunctionIsIllegal(MF)) {
+ reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect",
+ "instruction is not legal", *MI);
+ return false;
+ }
+ }
+#endif
+ return true;
+}
+
+bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
+ // If the ISel pipeline failed, do not bother running that pass.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n');
+ const Function &F = MF.getFunction();
+ Mode SaveOptMode = OptMode;
+ if (F.hasOptNone())
+ OptMode = Mode::Fast;
+ init(MF);
+
+#ifndef NDEBUG
+ if (!checkFunctionIsLegal(MF))
+ return false;
+#endif
+
+ assignRegisterBanks(MF);
+
OptMode = SaveOptMode;
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 013c8700e8ae..07448548c295 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -12,7 +12,7 @@
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/Optional.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
@@ -32,6 +32,8 @@
#include "llvm/IR/Constants.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
+#include <numeric>
+#include <optional>
#define DEBUG_TYPE "globalisel-utils"
@@ -55,7 +57,7 @@ Register llvm::constrainOperandRegClass(
const TargetRegisterClass &RegClass, MachineOperand &RegMO) {
Register Reg = RegMO.getReg();
// Assume physical registers are properly constrained.
- assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented");
+ assert(Reg.isVirtual() && "PhysReg not implemented");
// Save the old register class to check whether
// the change notifications will be required.
@@ -107,7 +109,7 @@ Register llvm::constrainOperandRegClass(
MachineOperand &RegMO, unsigned OpIdx) {
Register Reg = RegMO.getReg();
// Assume physical registers are properly constrained.
- assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented");
+ assert(Reg.isVirtual() && "PhysReg not implemented");
const TargetRegisterClass *OpRC = TII.getRegClass(II, OpIdx, &TRI, MF);
// Some of the target independent instructions, like COPY, may not impose any
@@ -169,7 +171,7 @@ bool llvm::constrainSelectedInstRegOperands(MachineInstr &I,
Register Reg = MO.getReg();
// Physical registers don't need to be constrained.
- if (Register::isPhysicalRegister(Reg))
+ if (Reg.isPhysical())
continue;
// Register operands with a value of 0 (e.g. predicate operands) don't need
@@ -233,7 +235,7 @@ bool llvm::isTriviallyDead(const MachineInstr &MI,
continue;
Register Reg = MO.getReg();
- if (Register::isPhysicalRegister(Reg) || !MRI.use_nodbg_empty(Reg))
+ if (Reg.isPhysical() || !MRI.use_nodbg_empty(Reg))
return false;
}
return true;
@@ -283,31 +285,31 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
reportGISelFailure(MF, TPC, MORE, R);
}
-Optional<APInt> llvm::getIConstantVRegVal(Register VReg,
- const MachineRegisterInfo &MRI) {
- Optional<ValueAndVReg> ValAndVReg = getIConstantVRegValWithLookThrough(
+std::optional<APInt> llvm::getIConstantVRegVal(Register VReg,
+ const MachineRegisterInfo &MRI) {
+ std::optional<ValueAndVReg> ValAndVReg = getIConstantVRegValWithLookThrough(
VReg, MRI, /*LookThroughInstrs*/ false);
assert((!ValAndVReg || ValAndVReg->VReg == VReg) &&
"Value found while looking through instrs");
if (!ValAndVReg)
- return None;
+ return std::nullopt;
return ValAndVReg->Value;
}
-Optional<int64_t>
+std::optional<int64_t>
llvm::getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI) {
- Optional<APInt> Val = getIConstantVRegVal(VReg, MRI);
+ std::optional<APInt> Val = getIConstantVRegVal(VReg, MRI);
if (Val && Val->getBitWidth() <= 64)
return Val->getSExtValue();
- return None;
+ return std::nullopt;
}
namespace {
typedef std::function<bool(const MachineInstr *)> IsOpcodeFn;
-typedef std::function<Optional<APInt>(const MachineInstr *MI)> GetAPCstFn;
+typedef std::function<std::optional<APInt>(const MachineInstr *MI)> GetAPCstFn;
-Optional<ValueAndVReg> getConstantVRegValWithLookThrough(
+std::optional<ValueAndVReg> getConstantVRegValWithLookThrough(
Register VReg, const MachineRegisterInfo &MRI, IsOpcodeFn IsConstantOpcode,
GetAPCstFn getAPCstValue, bool LookThroughInstrs = true,
bool LookThroughAnyExt = false) {
@@ -319,8 +321,8 @@ Optional<ValueAndVReg> getConstantVRegValWithLookThrough(
switch (MI->getOpcode()) {
case TargetOpcode::G_ANYEXT:
if (!LookThroughAnyExt)
- return None;
- LLVM_FALLTHROUGH;
+ return std::nullopt;
+ [[fallthrough]];
case TargetOpcode::G_TRUNC:
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
@@ -331,22 +333,22 @@ Optional<ValueAndVReg> getConstantVRegValWithLookThrough(
break;
case TargetOpcode::COPY:
VReg = MI->getOperand(1).getReg();
- if (Register::isPhysicalRegister(VReg))
- return None;
+ if (VReg.isPhysical())
+ return std::nullopt;
break;
case TargetOpcode::G_INTTOPTR:
VReg = MI->getOperand(1).getReg();
break;
default:
- return None;
+ return std::nullopt;
}
}
if (!MI || !IsConstantOpcode(MI))
- return None;
+ return std::nullopt;
- Optional<APInt> MaybeVal = getAPCstValue(MI);
+ std::optional<APInt> MaybeVal = getAPCstValue(MI);
if (!MaybeVal)
- return None;
+ return std::nullopt;
APInt &Val = *MaybeVal;
while (!SeenOpcodes.empty()) {
std::pair<unsigned, unsigned> OpcodeAndSize = SeenOpcodes.pop_back_val();
@@ -386,31 +388,31 @@ bool isAnyConstant(const MachineInstr *MI) {
return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_FCONSTANT;
}
-Optional<APInt> getCImmAsAPInt(const MachineInstr *MI) {
+std::optional<APInt> getCImmAsAPInt(const MachineInstr *MI) {
const MachineOperand &CstVal = MI->getOperand(1);
if (CstVal.isCImm())
return CstVal.getCImm()->getValue();
- return None;
+ return std::nullopt;
}
-Optional<APInt> getCImmOrFPImmAsAPInt(const MachineInstr *MI) {
+std::optional<APInt> getCImmOrFPImmAsAPInt(const MachineInstr *MI) {
const MachineOperand &CstVal = MI->getOperand(1);
if (CstVal.isCImm())
return CstVal.getCImm()->getValue();
if (CstVal.isFPImm())
return CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
- return None;
+ return std::nullopt;
}
} // end anonymous namespace
-Optional<ValueAndVReg> llvm::getIConstantVRegValWithLookThrough(
+std::optional<ValueAndVReg> llvm::getIConstantVRegValWithLookThrough(
Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
return getConstantVRegValWithLookThrough(VReg, MRI, isIConstant,
getCImmAsAPInt, LookThroughInstrs);
}
-Optional<ValueAndVReg> llvm::getAnyConstantVRegValWithLookThrough(
+std::optional<ValueAndVReg> llvm::getAnyConstantVRegValWithLookThrough(
Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
bool LookThroughAnyExt) {
return getConstantVRegValWithLookThrough(
@@ -418,12 +420,12 @@ Optional<ValueAndVReg> llvm::getAnyConstantVRegValWithLookThrough(
LookThroughAnyExt);
}
-Optional<FPValueAndVReg> llvm::getFConstantVRegValWithLookThrough(
+std::optional<FPValueAndVReg> llvm::getFConstantVRegValWithLookThrough(
Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
auto Reg = getConstantVRegValWithLookThrough(
VReg, MRI, isFConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs);
if (!Reg)
- return None;
+ return std::nullopt;
return FPValueAndVReg{getConstantFPVRegVal(Reg->VReg, MRI)->getValueAPF(),
Reg->VReg};
}
@@ -436,13 +438,13 @@ llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) {
return MI->getOperand(1).getFPImm();
}
-Optional<DefinitionAndSourceRegister>
+std::optional<DefinitionAndSourceRegister>
llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
Register DefSrcReg = Reg;
auto *DefMI = MRI.getVRegDef(Reg);
auto DstTy = MRI.getType(DefMI->getOperand(0).getReg());
if (!DstTy.isValid())
- return None;
+ return std::nullopt;
unsigned Opc = DefMI->getOpcode();
while (Opc == TargetOpcode::COPY || isPreISelGenericOptimizationHint(Opc)) {
Register SrcReg = DefMI->getOperand(1).getReg();
@@ -458,14 +460,14 @@ llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
MachineInstr *llvm::getDefIgnoringCopies(Register Reg,
const MachineRegisterInfo &MRI) {
- Optional<DefinitionAndSourceRegister> DefSrcReg =
+ std::optional<DefinitionAndSourceRegister> DefSrcReg =
getDefSrcRegIgnoringCopies(Reg, MRI);
return DefSrcReg ? DefSrcReg->MI : nullptr;
}
Register llvm::getSrcRegIgnoringCopies(Register Reg,
const MachineRegisterInfo &MRI) {
- Optional<DefinitionAndSourceRegister> DefSrcReg =
+ std::optional<DefinitionAndSourceRegister> DefSrcReg =
getDefSrcRegIgnoringCopies(Reg, MRI);
return DefSrcReg ? DefSrcReg->Reg : Register();
}
@@ -489,16 +491,17 @@ APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) {
return APF;
}
-Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1,
- const Register Op2,
- const MachineRegisterInfo &MRI) {
+std::optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode,
+ const Register Op1,
+ const Register Op2,
+ const MachineRegisterInfo &MRI) {
auto MaybeOp2Cst = getAnyConstantVRegValWithLookThrough(Op2, MRI, false);
if (!MaybeOp2Cst)
- return None;
+ return std::nullopt;
auto MaybeOp1Cst = getAnyConstantVRegValWithLookThrough(Op1, MRI, false);
if (!MaybeOp1Cst)
- return None;
+ return std::nullopt;
const APInt &C1 = MaybeOp1Cst->Value;
const APInt &C2 = MaybeOp2Cst->Value;
@@ -550,19 +553,19 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1,
return APIntOps::umax(C1, C2);
}
- return None;
+ return std::nullopt;
}
-Optional<APFloat> llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
- const Register Op2,
- const MachineRegisterInfo &MRI) {
+std::optional<APFloat>
+llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
+ const Register Op2, const MachineRegisterInfo &MRI) {
const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
if (!Op2Cst)
- return None;
+ return std::nullopt;
const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
if (!Op1Cst)
- return None;
+ return std::nullopt;
APFloat C1 = Op1Cst->getValueAPF();
const APFloat &C2 = Op2Cst->getValueAPF();
@@ -604,7 +607,7 @@ Optional<APFloat> llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
break;
}
- return None;
+ return std::nullopt;
}
SmallVector<APInt>
@@ -656,6 +659,20 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
switch (DefMI->getOpcode()) {
default:
break;
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FREM:
+ case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FCOS:
+ case TargetOpcode::G_FMA:
+ case TargetOpcode::G_FMAD:
+ if (SNaN)
+ return true;
+
+ // TODO: Need isKnownNeverInfinity
+ return false;
case TargetOpcode::G_FMINNUM_IEEE:
case TargetOpcode::G_FMAXNUM_IEEE: {
if (SNaN)
@@ -742,9 +759,9 @@ Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF,
return LiveIn;
}
-Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1,
- uint64_t Imm,
- const MachineRegisterInfo &MRI) {
+std::optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode,
+ const Register Op1, uint64_t Imm,
+ const MachineRegisterInfo &MRI) {
auto MaybeOp1Cst = getIConstantVRegVal(Op1, MRI);
if (MaybeOp1Cst) {
switch (Opcode) {
@@ -756,12 +773,12 @@ Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1,
}
}
}
- return None;
+ return std::nullopt;
}
-Optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
- Register Src,
- const MachineRegisterInfo &MRI) {
+std::optional<APFloat>
+llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, Register Src,
+ const MachineRegisterInfo &MRI) {
assert(Opcode == TargetOpcode::G_SITOFP || Opcode == TargetOpcode::G_UITOFP);
if (auto MaybeSrcVal = getIConstantVRegVal(Src, MRI)) {
APFloat DstVal(getFltSemanticForLLT(DstTy));
@@ -769,30 +786,30 @@ Optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
APFloat::rmNearestTiesToEven);
return DstVal;
}
- return None;
+ return std::nullopt;
}
-Optional<SmallVector<unsigned>>
+std::optional<SmallVector<unsigned>>
llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) {
LLT Ty = MRI.getType(Src);
SmallVector<unsigned> FoldedCTLZs;
- auto tryFoldScalar = [&](Register R) -> Optional<unsigned> {
+ auto tryFoldScalar = [&](Register R) -> std::optional<unsigned> {
auto MaybeCst = getIConstantVRegVal(R, MRI);
if (!MaybeCst)
- return None;
+ return std::nullopt;
return MaybeCst->countLeadingZeros();
};
if (Ty.isVector()) {
// Try to constant fold each element.
auto *BV = getOpcodeDef<GBuildVector>(Src, MRI);
if (!BV)
- return None;
+ return std::nullopt;
for (unsigned SrcIdx = 0; SrcIdx < BV->getNumSources(); ++SrcIdx) {
if (auto MaybeFold = tryFoldScalar(BV->getSourceReg(SrcIdx))) {
FoldedCTLZs.emplace_back(*MaybeFold);
continue;
}
- return None;
+ return std::nullopt;
}
return FoldedCTLZs;
}
@@ -800,12 +817,12 @@ llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) {
FoldedCTLZs.emplace_back(*MaybeCst);
return FoldedCTLZs;
}
- return None;
+ return std::nullopt;
}
bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
GISelKnownBits *KB) {
- Optional<DefinitionAndSourceRegister> DefSrcReg =
+ std::optional<DefinitionAndSourceRegister> DefSrcReg =
getDefSrcRegIgnoringCopies(Reg, MRI);
if (!DefSrcReg)
return false;
@@ -879,12 +896,6 @@ void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
AU.addPreserved<StackProtector>();
}
-static unsigned getLCMSize(unsigned OrigSize, unsigned TargetSize) {
- unsigned Mul = OrigSize * TargetSize;
- unsigned GCDSize = greatestCommonDivisor(OrigSize, TargetSize);
- return Mul / GCDSize;
-}
-
LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) {
const unsigned OrigSize = OrigTy.getSizeInBits();
const unsigned TargetSize = TargetTy.getSizeInBits();
@@ -899,8 +910,8 @@ LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) {
const LLT TargetElt = TargetTy.getElementType();
if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) {
- int GCDElts = greatestCommonDivisor(OrigTy.getNumElements(),
- TargetTy.getNumElements());
+ int GCDElts =
+ std::gcd(OrigTy.getNumElements(), TargetTy.getNumElements());
// Prefer the original element type.
ElementCount Mul = OrigTy.getElementCount() * TargetTy.getNumElements();
return LLT::vector(Mul.divideCoefficientBy(GCDElts),
@@ -911,16 +922,16 @@ LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) {
return OrigTy;
}
- unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
+ unsigned LCMSize = std::lcm(OrigSize, TargetSize);
return LLT::fixed_vector(LCMSize / OrigElt.getSizeInBits(), OrigElt);
}
if (TargetTy.isVector()) {
- unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
+ unsigned LCMSize = std::lcm(OrigSize, TargetSize);
return LLT::fixed_vector(LCMSize / OrigSize, OrigTy);
}
- unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
+ unsigned LCMSize = std::lcm(OrigSize, TargetSize);
// Preserve pointer types.
if (LCMSize == OrigSize)
@@ -958,8 +969,7 @@ LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) {
if (TargetTy.isVector()) {
LLT TargetElt = TargetTy.getElementType();
if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) {
- int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
- TargetTy.getNumElements());
+ int GCD = std::gcd(OrigTy.getNumElements(), TargetTy.getNumElements());
return LLT::scalarOrVector(ElementCount::getFixed(GCD), OrigElt);
}
} else {
@@ -968,7 +978,7 @@ LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) {
return OrigElt;
}
- unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize);
+ unsigned GCD = std::gcd(OrigSize, TargetSize);
if (GCD == OrigElt.getSizeInBits())
return OrigElt;
@@ -986,11 +996,11 @@ LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) {
return OrigTy;
}
- unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize);
+ unsigned GCD = std::gcd(OrigSize, TargetSize);
return LLT::scalar(GCD);
}
-Optional<int> llvm::getSplatIndex(MachineInstr &MI) {
+std::optional<int> llvm::getSplatIndex(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
"Only G_SHUFFLE_VECTOR can have a splat index!");
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
@@ -1006,7 +1016,7 @@ Optional<int> llvm::getSplatIndex(MachineInstr &MI) {
int SplatValue = *FirstDefinedIdx;
if (any_of(make_range(std::next(FirstDefinedIdx), Mask.end()),
[&SplatValue](int Elt) { return Elt >= 0 && Elt != SplatValue; }))
- return None;
+ return std::nullopt;
return SplatValue;
}
@@ -1018,36 +1028,41 @@ static bool isBuildVectorOp(unsigned Opcode) {
namespace {
-Optional<ValueAndVReg> getAnyConstantSplat(Register VReg,
- const MachineRegisterInfo &MRI,
- bool AllowUndef) {
+std::optional<ValueAndVReg> getAnyConstantSplat(Register VReg,
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef) {
MachineInstr *MI = getDefIgnoringCopies(VReg, MRI);
if (!MI)
- return None;
+ return std::nullopt;
- if (!isBuildVectorOp(MI->getOpcode()))
- return None;
+ bool isConcatVectorsOp = MI->getOpcode() == TargetOpcode::G_CONCAT_VECTORS;
+ if (!isBuildVectorOp(MI->getOpcode()) && !isConcatVectorsOp)
+ return std::nullopt;
- Optional<ValueAndVReg> SplatValAndReg = None;
+ std::optional<ValueAndVReg> SplatValAndReg;
for (MachineOperand &Op : MI->uses()) {
Register Element = Op.getReg();
+ // If we have a G_CONCAT_VECTOR, we recursively look into the
+ // vectors that we're concatenating to see if they're splats.
auto ElementValAndReg =
- getAnyConstantVRegValWithLookThrough(Element, MRI, true, true);
+ isConcatVectorsOp
+ ? getAnyConstantSplat(Element, MRI, AllowUndef)
+ : getAnyConstantVRegValWithLookThrough(Element, MRI, true, true);
// If AllowUndef, treat undef as value that will result in a constant splat.
if (!ElementValAndReg) {
if (AllowUndef && isa<GImplicitDef>(MRI.getVRegDef(Element)))
continue;
- return None;
+ return std::nullopt;
}
// Record splat value
if (!SplatValAndReg)
SplatValAndReg = ElementValAndReg;
- // Different constant then the one already recorded, not a constant splat.
+ // Different constant than the one already recorded, not a constant splat.
if (SplatValAndReg->Value != ElementValAndReg->Value)
- return None;
+ return std::nullopt;
}
return SplatValAndReg;
@@ -1070,44 +1085,45 @@ bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI,
AllowUndef);
}
-Optional<APInt> llvm::getIConstantSplatVal(const Register Reg,
- const MachineRegisterInfo &MRI) {
+std::optional<APInt>
+llvm::getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI) {
if (auto SplatValAndReg =
getAnyConstantSplat(Reg, MRI, /* AllowUndef */ false)) {
- Optional<ValueAndVReg> ValAndVReg =
+ std::optional<ValueAndVReg> ValAndVReg =
getIConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI);
return ValAndVReg->Value;
}
- return None;
+ return std::nullopt;
}
-Optional<APInt> getIConstantSplatVal(const MachineInstr &MI,
- const MachineRegisterInfo &MRI) {
+std::optional<APInt>
+llvm::getIConstantSplatVal(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
return getIConstantSplatVal(MI.getOperand(0).getReg(), MRI);
}
-Optional<int64_t>
+std::optional<int64_t>
llvm::getIConstantSplatSExtVal(const Register Reg,
const MachineRegisterInfo &MRI) {
if (auto SplatValAndReg =
getAnyConstantSplat(Reg, MRI, /* AllowUndef */ false))
return getIConstantVRegSExtVal(SplatValAndReg->VReg, MRI);
- return None;
+ return std::nullopt;
}
-Optional<int64_t>
+std::optional<int64_t>
llvm::getIConstantSplatSExtVal(const MachineInstr &MI,
const MachineRegisterInfo &MRI) {
return getIConstantSplatSExtVal(MI.getOperand(0).getReg(), MRI);
}
-Optional<FPValueAndVReg> llvm::getFConstantSplat(Register VReg,
- const MachineRegisterInfo &MRI,
- bool AllowUndef) {
+std::optional<FPValueAndVReg>
+llvm::getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI,
+ bool AllowUndef) {
if (auto SplatValAndReg = getAnyConstantSplat(VReg, MRI, AllowUndef))
return getFConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI);
- return None;
+ return std::nullopt;
}
bool llvm::isBuildVectorAllZeros(const MachineInstr &MI,
@@ -1122,17 +1138,17 @@ bool llvm::isBuildVectorAllOnes(const MachineInstr &MI,
return isBuildVectorConstantSplat(MI, MRI, -1, AllowUndef);
}
-Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
- const MachineRegisterInfo &MRI) {
+std::optional<RegOrConstant>
+llvm::getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI) {
unsigned Opc = MI.getOpcode();
if (!isBuildVectorOp(Opc))
- return None;
+ return std::nullopt;
if (auto Splat = getIConstantSplatSExtVal(MI, MRI))
return RegOrConstant(*Splat);
auto Reg = MI.getOperand(1).getReg();
if (any_of(make_range(MI.operands_begin() + 2, MI.operands_end()),
[&Reg](const MachineOperand &Op) { return Op.getReg() != Reg; }))
- return None;
+ return std::nullopt;
return RegOrConstant(Reg);
}
@@ -1192,7 +1208,7 @@ bool llvm::isConstantOrConstantVector(const MachineInstr &MI,
return true;
}
-Optional<APInt>
+std::optional<APInt>
llvm::isConstantOrConstantSplatVector(MachineInstr &MI,
const MachineRegisterInfo &MRI) {
Register Def = MI.getOperand(0).getReg();
@@ -1200,7 +1216,7 @@ llvm::isConstantOrConstantSplatVector(MachineInstr &MI,
return C->Value;
auto MaybeCst = getIConstantSplatSExtVal(MI, MRI);
if (!MaybeCst)
- return None;
+ return std::nullopt;
const unsigned ScalarSize = MRI.getType(Def).getScalarSizeInBits();
return APInt(ScalarSize, *MaybeCst, true);
}
@@ -1283,6 +1299,18 @@ bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
llvm_unreachable("Invalid boolean contents");
}
+bool llvm::isConstFalseVal(const TargetLowering &TLI, int64_t Val,
+ bool IsVector, bool IsFP) {
+ switch (TLI.getBooleanContents(IsVector, IsFP)) {
+ case TargetLowering::UndefinedBooleanContent:
+ return ~Val & 0x1;
+ case TargetLowering::ZeroOrOneBooleanContent:
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return Val == 0;
+ }
+ llvm_unreachable("Invalid boolean contents");
+}
+
int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector,
bool IsFP) {
switch (TLI.getBooleanContents(IsVector, IsFP)) {
@@ -1335,3 +1363,22 @@ void llvm::eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI,
LostDebugLocObserver *LocObserver) {
return eraseInstrs({&MI}, MRI, LocObserver);
}
+
+void llvm::salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI) {
+ for (auto &Def : MI.defs()) {
+ assert(Def.isReg() && "Must be a reg");
+
+ SmallVector<MachineOperand *, 16> DbgUsers;
+ for (auto &MOUse : MRI.use_operands(Def.getReg())) {
+ MachineInstr *DbgValue = MOUse.getParent();
+ // Ignore partially formed DBG_VALUEs.
+ if (DbgValue->isNonListDebugValue() && DbgValue->getNumOperands() == 4) {
+ DbgUsers.push_back(&MOUse);
+ }
+ }
+
+ if (!DbgUsers.empty()) {
+ salvageDebugInfoForDbgValue(MRI, MI, DbgUsers);
+ }
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
index f5833d3b9086..2ccf2def48f8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -62,6 +62,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -181,7 +182,7 @@ namespace {
void collectUsedGlobalVariables(Module &M, StringRef Name);
/// Keep track of the GlobalVariable that must not be merged away
- SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables;
+ SmallSetVector<const GlobalVariable *, 16> MustKeepGlobalVariables;
public:
static char ID; // Pass identification, replacement for typeid.
@@ -224,8 +225,8 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
llvm::stable_sort(
Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
// We don't support scalable global variables.
- return DL.getTypeAllocSize(GV1->getValueType()).getFixedSize() <
- DL.getTypeAllocSize(GV2->getValueType()).getFixedSize();
+ return DL.getTypeAllocSize(GV1->getValueType()).getFixedValue() <
+ DL.getTypeAllocSize(GV2->getValueType()).getFixedValue();
});
// If we want to just blindly group all globals together, do so.
@@ -619,9 +620,8 @@ bool GlobalMerge::doInitialization(Module &M) {
LLVM_DEBUG({
dbgs() << "Number of GV that must be kept: " <<
MustKeepGlobalVariables.size() << "\n";
- for (auto KeptGV = MustKeepGlobalVariables.begin();
- KeptGV != MustKeepGlobalVariables.end(); KeptGV++)
- dbgs() << "Kept: " << **KeptGV << "\n";
+ for (const GlobalVariable *KeptGV : MustKeepGlobalVariables)
+ dbgs() << "Kept: " << *KeptGV << "\n";
});
// Grab all non-const globals.
for (auto &GV : M.globals()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index da6ec76bd770..fa493af0eea7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -26,8 +26,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -98,11 +96,11 @@ class ImplicitNullChecks : public MachineFunctionPass {
/// If non-None, then an instruction in \p Insts that also must be
/// hoisted.
- Optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence;
+ std::optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence;
/*implicit*/ DependenceResult(
bool CanReorder,
- Optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence)
+ std::optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence)
: CanReorder(CanReorder), PotentialDependence(PotentialDependence) {
assert((!PotentialDependence || CanReorder) &&
"!CanReorder && PotentialDependence.hasValue() not allowed!");
@@ -255,18 +253,18 @@ ImplicitNullChecks::computeDependence(const MachineInstr *MI,
assert(llvm::all_of(Block, canHandle) && "Check this first!");
assert(!is_contained(Block, MI) && "Block must be exclusive of MI!");
- Optional<ArrayRef<MachineInstr *>::iterator> Dep;
+ std::optional<ArrayRef<MachineInstr *>::iterator> Dep;
for (auto I = Block.begin(), E = Block.end(); I != E; ++I) {
if (canReorder(*I, MI))
continue;
- if (Dep == None) {
+ if (Dep == std::nullopt) {
// Found one possible dependency, keep track of it.
Dep = I;
} else {
// We found two dependencies, so bail out.
- return {false, None};
+ return {false, std::nullopt};
}
}
@@ -805,7 +803,7 @@ void ImplicitNullChecks::rewriteNullChecks(
// Insert an *unconditional* branch to not-null successor - we expect
// block placement to remove fallthroughs later.
TII->insertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr,
- /*Cond=*/None, DL);
+ /*Cond=*/std::nullopt, DL);
NumImplicitNullChecks++;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
index 5be98e114673..012892166ae7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
@@ -40,6 +40,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
+#include <optional>
using namespace llvm;
@@ -90,7 +91,7 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) {
return false;
TLI = STI.getTargetLowering();
- Optional<DomTreeUpdater> DTU;
+ std::optional<DomTreeUpdater> DTU;
if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
DTU.emplace(DTWP->getDomTree(), DomTreeUpdater::UpdateStrategy::Lazy);
@@ -198,7 +199,7 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) {
CommonITy = ITy;
}
- auto GetSwitchValue = [DL, CommonITy](IndirectBrInst *IBr) {
+ auto GetSwitchValue = [CommonITy](IndirectBrInst *IBr) {
return CastInst::CreatePointerCast(
IBr->getAddress(), CommonITy,
Twine(IBr->getAddress()->getName()) + ".switch_cast", IBr);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
index 3ea1d6c7f1ef..cf4fff878ad1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -104,7 +103,7 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate {
// Map from pair of (StackSlot and Original VNI) to a set of spills which
// have the same stackslot and have equal values defined by Original VNI.
- // These spills are mergeable and are hoist candiates.
+ // These spills are mergeable and are hoist candidates.
using MergeableSpillsMap =
MapVector<std::pair<int, VNInfo *>, SmallPtrSet<MachineInstr *, 16>>;
MergeableSpillsMap MergeableSpills;
@@ -270,7 +269,7 @@ static Register isFullCopyOf(const MachineInstr &MI, Register Reg) {
static void getVDefInterval(const MachineInstr &MI, LiveIntervals &LIS) {
for (const MachineOperand &MO : MI.operands())
- if (MO.isReg() && MO.isDef() && Register::isVirtualRegister(MO.getReg()))
+ if (MO.isReg() && MO.isDef() && MO.getReg().isVirtual())
LIS.getInterval(MO.getReg());
}
@@ -281,13 +280,28 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
Register Reg = Edit->getReg();
// A snippet is a tiny live range with only a single instruction using it
- // besides copies to/from Reg or spills/fills. We accept:
+ // besides copies to/from Reg or spills/fills.
+ // Exception is done for statepoint instructions which will fold fills
+ // into their operands.
+ // We accept:
//
// %snip = COPY %Reg / FILL fi#
// %snip = USE %snip
+ // %snip = STATEPOINT %snip in var arg area
// %Reg = COPY %snip / SPILL %snip, fi#
//
- if (SnipLI.getNumValNums() > 2 || !LIS.intervalIsInOneMBB(SnipLI))
+ if (!LIS.intervalIsInOneMBB(SnipLI))
+ return false;
+
+ // Number of defs should not exceed 2 not accounting defs coming from
+ // statepoint instructions.
+ unsigned NumValNums = SnipLI.getNumValNums();
+ for (auto *VNI : SnipLI.vnis()) {
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ if (MI->getOpcode() == TargetOpcode::STATEPOINT)
+ --NumValNums;
+ }
+ if (NumValNums > 2)
return false;
MachineInstr *UseMI = nullptr;
@@ -312,6 +326,9 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
if (SnipLI.reg() == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot)
continue;
+ if (StatepointOpers::isFoldableReg(&MI, SnipLI.reg()))
+ continue;
+
// Allow a single additional instruction.
if (UseMI && &MI != UseMI)
return false;
@@ -417,7 +434,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
MachineInstrSpan MIS(MII, MBB);
// Insert spill without kill flag immediately after def.
TII.storeRegToStackSlot(*MBB, MII, SrcReg, false, StackSlot,
- MRI.getRegClass(SrcReg), &TRI);
+ MRI.getRegClass(SrcReg), &TRI, Register());
LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MII);
for (const MachineInstr &MI : make_range(MIS.begin(), MII))
getVDefInterval(MI, LIS);
@@ -894,7 +911,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
if (!MO->isReg())
continue;
Register Reg = MO->getReg();
- if (!Reg || Register::isVirtualRegister(Reg) || MRI.isReserved(Reg)) {
+ if (!Reg || Reg.isVirtual() || MRI.isReserved(Reg)) {
continue;
}
// Skip non-Defs, including undef uses and internal reads.
@@ -993,7 +1010,7 @@ void InlineSpiller::insertReload(Register NewVReg,
MachineInstrSpan MIS(MI, &MBB);
TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot,
- MRI.getRegClass(NewVReg), &TRI);
+ MRI.getRegClass(NewVReg), &TRI, Register());
LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI);
@@ -1030,7 +1047,7 @@ void InlineSpiller::insertSpill(Register NewVReg, bool isKill,
if (IsRealSpill)
TII.storeRegToStackSlot(MBB, SpillBefore, NewVReg, isKill, StackSlot,
- MRI.getRegClass(NewVReg), &TRI);
+ MRI.getRegClass(NewVReg), &TRI, Register());
else
// Don't spill undef value.
// Anything works for undef, in particular keeping the memory
@@ -1596,7 +1613,7 @@ void HoistSpillHelper::hoistAllSpills() {
MachineBasicBlock::iterator MII = IPA.getLastInsertPointIter(OrigLI, *BB);
MachineInstrSpan MIS(MII, BB);
TII.storeRegToStackSlot(*BB, MII, LiveReg, false, Slot,
- MRI.getRegClass(LiveReg), &TRI);
+ MRI.getRegClass(LiveReg), &TRI, Register());
LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MII);
for (const MachineInstr &MI : make_range(MIS.begin(), MII))
getVDefInterval(MI, LIS);
@@ -1613,7 +1630,7 @@ void HoistSpillHelper::hoistAllSpills() {
RMEnt->removeOperand(i - 1);
}
}
- Edit.eliminateDeadDefs(SpillsToRm, None);
+ Edit.eliminateDeadDefs(SpillsToRm, std::nullopt);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index a0f304659bca..0d36badfa10f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -887,7 +887,7 @@ public:
ConstantInt::get(Type::getInt32Ty(LI->getContext()), 0),
ConstantInt::get(Type::getInt32Ty(LI->getContext()), i),
};
- int64_t Ofs = DL.getIndexedOffsetInType(Result.VTy, makeArrayRef(Idx, 2));
+ int64_t Ofs = DL.getIndexedOffsetInType(Result.VTy, ArrayRef(Idx, 2));
Result.EI[i] = ElementInfo(Offset + Ofs, i == 0 ? LI : nullptr);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 808a79d9792a..61920a0e04ab 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -430,7 +430,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
ReplaceFPIntrinsicWithCall(CI, "copysignf", "copysign", "copysignl");
break;
}
- case Intrinsic::flt_rounds:
+ case Intrinsic::get_rounding:
// Lower to "round to the nearest"
if (!CI->getType()->isVoidTy())
CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp
index 23220872b532..f1953c363b59 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp
@@ -151,7 +151,7 @@ bool JMCInstrumenter::runOnModule(Module &M) {
bool IsELF = ModuleTriple.isOSBinFormatELF();
assert((IsELF || IsMSVC) && "Unsupported triple for JMC");
bool UseX86FastCall = IsMSVC && ModuleTriple.getArch() == Triple::x86;
- const char *const FlagSymbolSection = IsELF ? ".just.my.code" : ".msvcjmc";
+ const char *const FlagSymbolSection = IsELF ? ".data.just.my.code" : ".msvcjmc";
GlobalValue *CheckFunction = nullptr;
DenseMap<DISubprogram *, Constant *> SavedFlags(8);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index 191596dbf53e..ba417322d4f6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -127,6 +127,7 @@
#include "InstrRefBasedImpl.h"
#include "LiveDebugValues.h"
+#include <optional>
using namespace llvm;
using namespace LiveDebugValues;
@@ -155,6 +156,8 @@ static cl::opt<unsigned>
cl::desc("livedebugvalues-stack-ws-limit"),
cl::init(250));
+DbgOpID DbgOpID::UndefID = DbgOpID(0xffffffff);
+
/// Tracker for converting machine value locations and variable values into
/// variable locations (the output of LiveDebugValues), recorded as DBG_VALUEs
/// specifying block live-in locations and transfers within blocks.
@@ -191,9 +194,25 @@ public:
SmallVector<MachineInstr *, 4> Insts; /// Vector of DBG_VALUEs to insert.
};
- struct LocAndProperties {
- LocIdx Loc;
+ /// Stores the resolved operands (machine locations and constants) and
+ /// qualifying meta-information needed to construct a concrete DBG_VALUE-like
+ /// instruction.
+ struct ResolvedDbgValue {
+ SmallVector<ResolvedDbgOp> Ops;
DbgValueProperties Properties;
+
+ ResolvedDbgValue(SmallVectorImpl<ResolvedDbgOp> &Ops,
+ DbgValueProperties Properties)
+ : Ops(Ops.begin(), Ops.end()), Properties(Properties) {}
+
+ /// Returns all the LocIdx values used in this struct, in the order in which
+ /// they appear as operands in the debug value; may contain duplicates.
+ auto loc_indices() const {
+ return map_range(
+ make_filter_range(
+ Ops, [](const ResolvedDbgOp &Op) { return !Op.IsConst; }),
+ [](const ResolvedDbgOp &Op) { return Op.Loc; });
+ }
};
/// Collection of transfers (DBG_VALUEs) to be inserted.
@@ -213,7 +232,7 @@ public:
/// Map from DebugVariable to it's current location and qualifying meta
/// information. To be used in conjunction with ActiveMLocs to construct
/// enough information for the DBG_VALUEs for a particular LocIdx.
- DenseMap<DebugVariable, LocAndProperties> ActiveVLocs;
+ DenseMap<DebugVariable, ResolvedDbgValue> ActiveVLocs;
/// Temporary cache of DBG_VALUEs to be entered into the Transfers collection.
SmallVector<MachineInstr *, 4> PendingDbgValues;
@@ -223,11 +242,15 @@ public:
/// defined in this block.
struct UseBeforeDef {
/// Value of this variable, def'd in block.
- ValueIDNum ID;
+ SmallVector<DbgOp> Values;
/// Identity of this variable.
DebugVariable Var;
/// Additional variable properties.
DbgValueProperties Properties;
+ UseBeforeDef(ArrayRef<DbgOp> Values, const DebugVariable &Var,
+ const DbgValueProperties &Properties)
+ : Values(Values.begin(), Values.end()), Var(Var),
+ Properties(Properties) {}
};
/// Map from instruction index (within the block) to the set of UseBeforeDefs
@@ -252,6 +275,153 @@ public:
ShouldEmitDebugEntryValues = TM.Options.ShouldEmitDebugEntryValues();
}
+ bool isCalleeSaved(LocIdx L) const {
+ unsigned Reg = MTracker->LocIdxToLocID[L];
+ if (Reg >= MTracker->NumRegs)
+ return false;
+ for (MCRegAliasIterator RAI(Reg, &TRI, true); RAI.isValid(); ++RAI)
+ if (CalleeSavedRegs.test(*RAI))
+ return true;
+ return false;
+ };
+
+ // An estimate of the expected lifespan of values at a machine location, with
+ // a greater value corresponding to a longer expected lifespan, i.e. spill
+ // slots generally live longer than callee-saved registers which generally
+ // live longer than non-callee-saved registers. The minimum value of 0
+ // corresponds to an illegal location that cannot have a "lifespan" at all.
+ enum class LocationQuality : unsigned char {
+ Illegal = 0,
+ Register,
+ CalleeSavedRegister,
+ SpillSlot,
+ Best = SpillSlot
+ };
+
+ class LocationAndQuality {
+ unsigned Location : 24;
+ unsigned Quality : 8;
+
+ public:
+ LocationAndQuality() : Location(0), Quality(0) {}
+ LocationAndQuality(LocIdx L, LocationQuality Q)
+ : Location(L.asU64()), Quality(static_cast<unsigned>(Q)) {}
+ LocIdx getLoc() const {
+ if (!Quality)
+ return LocIdx::MakeIllegalLoc();
+ return LocIdx(Location);
+ }
+ LocationQuality getQuality() const { return LocationQuality(Quality); }
+ bool isIllegal() const { return !Quality; }
+ bool isBest() const { return getQuality() == LocationQuality::Best; }
+ };
+
+ // Returns the LocationQuality for the location L iff the quality of L is
+ // is strictly greater than the provided minimum quality.
+ std::optional<LocationQuality>
+ getLocQualityIfBetter(LocIdx L, LocationQuality Min) const {
+ if (L.isIllegal())
+ return std::nullopt;
+ if (Min >= LocationQuality::SpillSlot)
+ return std::nullopt;
+ if (MTracker->isSpill(L))
+ return LocationQuality::SpillSlot;
+ if (Min >= LocationQuality::CalleeSavedRegister)
+ return std::nullopt;
+ if (isCalleeSaved(L))
+ return LocationQuality::CalleeSavedRegister;
+ if (Min >= LocationQuality::Register)
+ return std::nullopt;
+ return LocationQuality::Register;
+ }
+
+ /// For a variable \p Var with the live-in value \p Value, attempts to resolve
+ /// the DbgValue to a concrete DBG_VALUE, emitting that value and loading the
+ /// tracking information to track Var throughout the block.
+ /// \p ValueToLoc is a map containing the best known location for every
+ /// ValueIDNum that Value may use.
+ /// \p MBB is the basic block that we are loading the live-in value for.
+ /// \p DbgOpStore is the map containing the DbgOpID->DbgOp mapping needed to
+ /// determine the values used by Value.
+ void loadVarInloc(MachineBasicBlock &MBB, DbgOpIDMap &DbgOpStore,
+ const DenseMap<ValueIDNum, LocationAndQuality> &ValueToLoc,
+ DebugVariable Var, DbgValue Value) {
+ SmallVector<DbgOp> DbgOps;
+ SmallVector<ResolvedDbgOp> ResolvedDbgOps;
+ bool IsValueValid = true;
+ unsigned LastUseBeforeDef = 0;
+
+ // If every value used by the incoming DbgValue is available at block
+ // entry, ResolvedDbgOps will contain the machine locations/constants for
+ // those values and will be used to emit a debug location.
+ // If one or more values are not yet available, but will all be defined in
+ // this block, then LastUseBeforeDef will track the instruction index in
+ // this BB at which the last of those values is defined, DbgOps will
+ // contain the values that we will emit when we reach that instruction.
+ // If one or more values are undef or not available throughout this block,
+ // and we can't recover as an entry value, we set IsValueValid=false and
+ // skip this variable.
+ for (DbgOpID ID : Value.getDbgOpIDs()) {
+ DbgOp Op = DbgOpStore.find(ID);
+ DbgOps.push_back(Op);
+ if (ID.isUndef()) {
+ IsValueValid = false;
+ break;
+ }
+ if (ID.isConst()) {
+ ResolvedDbgOps.push_back(Op.MO);
+ continue;
+ }
+
+ // If the value has no location, we can't make a variable location.
+ const ValueIDNum &Num = Op.ID;
+ auto ValuesPreferredLoc = ValueToLoc.find(Num);
+ if (ValuesPreferredLoc->second.isIllegal()) {
+ // If it's a def that occurs in this block, register it as a
+ // use-before-def to be resolved as we step through the block.
+ // Continue processing values so that we add any other UseBeforeDef
+ // entries needed for later.
+ if (Num.getBlock() == (unsigned)MBB.getNumber() && !Num.isPHI()) {
+ LastUseBeforeDef = std::max(LastUseBeforeDef,
+ static_cast<unsigned>(Num.getInst()));
+ continue;
+ }
+ recoverAsEntryValue(Var, Value.Properties, Num);
+ IsValueValid = false;
+ break;
+ }
+
+ // Defer modifying ActiveVLocs until after we've confirmed we have a
+ // live range.
+ LocIdx M = ValuesPreferredLoc->second.getLoc();
+ ResolvedDbgOps.push_back(M);
+ }
+
+ // If we cannot produce a valid value for the LiveIn value within this
+ // block, skip this variable.
+ if (!IsValueValid)
+ return;
+
+ // Add UseBeforeDef entry for the last value to be defined in this block.
+ if (LastUseBeforeDef) {
+ addUseBeforeDef(Var, Value.Properties, DbgOps,
+ LastUseBeforeDef);
+ return;
+ }
+
+ // The LiveIn value is available at block entry, begin tracking and record
+ // the transfer.
+ for (const ResolvedDbgOp &Op : ResolvedDbgOps)
+ if (!Op.IsConst)
+ ActiveMLocs[Op.Loc].insert(Var);
+ auto NewValue = ResolvedDbgValue{ResolvedDbgOps, Value.Properties};
+ auto Result = ActiveVLocs.insert(std::make_pair(Var, NewValue));
+ if (!Result.second)
+ Result.first->second = NewValue;
+ PendingDbgValues.push_back(
+ MTracker->emitLoc(ResolvedDbgOps, Var, Value.Properties));
+ }
+
/// Load object with live-in variable values. \p mlocs contains the live-in
/// values in each machine location, while \p vlocs the live-in variable
/// values. This method picks variable locations for the live-in variables,
@@ -259,7 +429,7 @@ public:
/// object fields to track variable locations as we step through the block.
/// FIXME: could just examine mloctracker instead of passing in \p mlocs?
void
- loadInlocs(MachineBasicBlock &MBB, ValueTable &MLocs,
+ loadInlocs(MachineBasicBlock &MBB, ValueTable &MLocs, DbgOpIDMap &DbgOpStore,
const SmallVectorImpl<std::pair<DebugVariable, DbgValue>> &VLocs,
unsigned NumLocs) {
ActiveMLocs.clear();
@@ -269,24 +439,16 @@ public:
UseBeforeDefs.clear();
UseBeforeDefVariables.clear();
- auto isCalleeSaved = [&](LocIdx L) {
- unsigned Reg = MTracker->LocIdxToLocID[L];
- if (Reg >= MTracker->NumRegs)
- return false;
- for (MCRegAliasIterator RAI(Reg, &TRI, true); RAI.isValid(); ++RAI)
- if (CalleeSavedRegs.test(*RAI))
- return true;
- return false;
- };
-
// Map of the preferred location for each value.
- DenseMap<ValueIDNum, LocIdx> ValueToLoc;
+ DenseMap<ValueIDNum, LocationAndQuality> ValueToLoc;
// Initialized the preferred-location map with illegal locations, to be
// filled in later.
for (const auto &VLoc : VLocs)
if (VLoc.second.Kind == DbgValue::Def)
- ValueToLoc.insert({VLoc.second.ID, LocIdx::MakeIllegalLoc()});
+ for (DbgOpID OpID : VLoc.second.getDbgOpIDs())
+ if (!OpID.ID.IsConst)
+ ValueToLoc.insert({DbgOpStore.find(OpID).ID, LocationAndQuality()});
ActiveMLocs.reserve(VLocs.size());
ActiveVLocs.reserve(VLocs.size());
@@ -297,6 +459,8 @@ public:
for (auto Location : MTracker->locations()) {
LocIdx Idx = Location.Idx;
ValueIDNum &VNum = MLocs[Idx.asU64()];
+ if (VNum == ValueIDNum::EmptyValue)
+ continue;
VarLocs.push_back(VNum);
// Is there a variable that wants a location for this value? If not, skip.
@@ -304,47 +468,18 @@ public:
if (VIt == ValueToLoc.end())
continue;
- LocIdx CurLoc = VIt->second;
- // In order of preference, pick:
- // * Callee saved registers,
- // * Other registers,
- // * Spill slots.
- if (CurLoc.isIllegal() || MTracker->isSpill(CurLoc) ||
- (!isCalleeSaved(CurLoc) && isCalleeSaved(Idx.asU64()))) {
- // Insert, or overwrite if insertion failed.
- VIt->second = Idx;
- }
+ auto &Previous = VIt->second;
+ // If this is the first location with that value, pick it. Otherwise,
+ // consider whether it's a "longer term" location.
+ std::optional<LocationQuality> ReplacementQuality =
+ getLocQualityIfBetter(Idx, Previous.getQuality());
+ if (ReplacementQuality)
+ Previous = LocationAndQuality(Idx, *ReplacementQuality);
}
// Now map variables to their picked LocIdxes.
for (const auto &Var : VLocs) {
- if (Var.second.Kind == DbgValue::Const) {
- PendingDbgValues.push_back(
- emitMOLoc(*Var.second.MO, Var.first, Var.second.Properties));
- continue;
- }
-
- // If the value has no location, we can't make a variable location.
- const ValueIDNum &Num = Var.second.ID;
- auto ValuesPreferredLoc = ValueToLoc.find(Num);
- if (ValuesPreferredLoc->second.isIllegal()) {
- // If it's a def that occurs in this block, register it as a
- // use-before-def to be resolved as we step through the block.
- if (Num.getBlock() == (unsigned)MBB.getNumber() && !Num.isPHI())
- addUseBeforeDef(Var.first, Var.second.Properties, Num);
- else
- recoverAsEntryValue(Var.first, Var.second.Properties, Num);
- continue;
- }
-
- LocIdx M = ValuesPreferredLoc->second;
- auto NewValue = LocAndProperties{M, Var.second.Properties};
- auto Result = ActiveVLocs.insert(std::make_pair(Var.first, NewValue));
- if (!Result.second)
- Result.first->second = NewValue;
- ActiveMLocs[M].insert(Var.first);
- PendingDbgValues.push_back(
- MTracker->emitLoc(M, Var.first, Var.second.Properties));
+ loadVarInloc(MBB, DbgOpStore, ValueToLoc, Var.first, Var.second);
}
flushDbgValues(MBB.begin(), &MBB);
}
@@ -352,9 +487,9 @@ public:
/// Record that \p Var has value \p ID, a value that becomes available
/// later in the function.
void addUseBeforeDef(const DebugVariable &Var,
- const DbgValueProperties &Properties, ValueIDNum ID) {
- UseBeforeDef UBD = {ID, Var, Properties};
- UseBeforeDefs[ID.getInst()].push_back(UBD);
+ const DbgValueProperties &Properties,
+ const SmallVectorImpl<DbgOp> &DbgOps, unsigned Inst) {
+ UseBeforeDefs[Inst].emplace_back(DbgOps, Var, Properties);
UseBeforeDefVariables.insert(Var);
}
@@ -367,22 +502,77 @@ public:
if (MIt == UseBeforeDefs.end())
return;
+ // Map of values to the locations that store them for every value used by
+ // the variables that may have become available.
+ SmallDenseMap<ValueIDNum, LocationAndQuality> ValueToLoc;
+
+ // Populate ValueToLoc with illegal default mappings for every value used by
+ // any UseBeforeDef variables for this instruction.
for (auto &Use : MIt->second) {
- LocIdx L = Use.ID.getLoc();
+ if (!UseBeforeDefVariables.count(Use.Var))
+ continue;
+
+ for (DbgOp &Op : Use.Values) {
+ assert(!Op.isUndef() && "UseBeforeDef erroneously created for a "
+ "DbgValue with undef values.");
+ if (Op.IsConst)
+ continue;
+
+ ValueToLoc.insert({Op.ID, LocationAndQuality()});
+ }
+ }
- // If something goes very wrong, we might end up labelling a COPY
- // instruction or similar with an instruction number, where it doesn't
- // actually define a new value, instead it moves a value. In case this
- // happens, discard.
- if (MTracker->readMLoc(L) != Use.ID)
+ // Exit early if we have no DbgValues to produce.
+ if (ValueToLoc.empty())
+ return;
+
+ // Determine the best location for each desired value.
+ for (auto Location : MTracker->locations()) {
+ LocIdx Idx = Location.Idx;
+ ValueIDNum &LocValueID = Location.Value;
+
+ // Is there a variable that wants a location for this value? If not, skip.
+ auto VIt = ValueToLoc.find(LocValueID);
+ if (VIt == ValueToLoc.end())
continue;
- // If a different debug instruction defined the variable value / location
- // since the start of the block, don't materialize this use-before-def.
+ auto &Previous = VIt->second;
+ // If this is the first location with that value, pick it. Otherwise,
+ // consider whether it's a "longer term" location.
+ std::optional<LocationQuality> ReplacementQuality =
+ getLocQualityIfBetter(Idx, Previous.getQuality());
+ if (ReplacementQuality)
+ Previous = LocationAndQuality(Idx, *ReplacementQuality);
+ }
+
+ // Using the map of values to locations, produce a final set of values for
+ // this variable.
+ for (auto &Use : MIt->second) {
if (!UseBeforeDefVariables.count(Use.Var))
continue;
- PendingDbgValues.push_back(MTracker->emitLoc(L, Use.Var, Use.Properties));
+ SmallVector<ResolvedDbgOp> DbgOps;
+
+ for (DbgOp &Op : Use.Values) {
+ if (Op.IsConst) {
+ DbgOps.push_back(Op.MO);
+ continue;
+ }
+ LocIdx NewLoc = ValueToLoc.find(Op.ID)->second.getLoc();
+ if (NewLoc.isIllegal())
+ break;
+ DbgOps.push_back(NewLoc);
+ }
+
+ // If at least one value used by this debug value is no longer available,
+ // i.e. one of the values was killed before we finished defining all of
+ // the values used by this variable, discard.
+ if (DbgOps.size() != Use.Values.size())
+ continue;
+
+ // Otherwise, we're good to go.
+ PendingDbgValues.push_back(
+ MTracker->emitLoc(DbgOps, Use.Var, Use.Properties));
}
flushDbgValues(pos, nullptr);
}
@@ -440,8 +630,21 @@ public:
if (!ShouldEmitDebugEntryValues)
return false;
+ const DIExpression *DIExpr = Prop.DIExpr;
+
+ // We don't currently emit entry values for DBG_VALUE_LISTs.
+ if (Prop.IsVariadic) {
+ // If this debug value can be converted to be non-variadic, then do so;
+ // otherwise give up.
+ auto NonVariadicExpression =
+ DIExpression::convertToNonVariadicExpression(DIExpr);
+ if (!NonVariadicExpression)
+ return false;
+ DIExpr = *NonVariadicExpression;
+ }
+
// Is the variable appropriate for entry values (i.e., is a parameter).
- if (!isEntryValueVariable(Var, Prop.DIExpr))
+ if (!isEntryValueVariable(Var, DIExpr))
return false;
// Is the value assigned to this variable still the entry value?
@@ -450,11 +653,12 @@ public:
// Emit a variable location using an entry value expression.
DIExpression *NewExpr =
- DIExpression::prepend(Prop.DIExpr, DIExpression::EntryValue);
+ DIExpression::prepend(DIExpr, DIExpression::EntryValue);
Register Reg = MTracker->LocIdxToLocID[Num.getLoc()];
MachineOperand MO = MachineOperand::CreateReg(Reg, false);
- PendingDbgValues.push_back(emitMOLoc(MO, Var, {NewExpr, Prop.Indirect}));
+ PendingDbgValues.push_back(
+ emitMOLoc(MO, Var, {NewExpr, Prop.Indirect, false}));
return true;
}
@@ -464,62 +668,100 @@ public:
MI.getDebugLoc()->getInlinedAt());
DbgValueProperties Properties(MI);
- const MachineOperand &MO = MI.getOperand(0);
-
// Ignore non-register locations, we don't transfer those.
- if (!MO.isReg() || MO.getReg() == 0) {
+ if (MI.isUndefDebugValue() ||
+ all_of(MI.debug_operands(),
+ [](const MachineOperand &MO) { return !MO.isReg(); })) {
auto It = ActiveVLocs.find(Var);
if (It != ActiveVLocs.end()) {
- ActiveMLocs[It->second.Loc].erase(Var);
+ for (LocIdx Loc : It->second.loc_indices())
+ ActiveMLocs[Loc].erase(Var);
ActiveVLocs.erase(It);
- }
+ }
// Any use-before-defs no longer apply.
UseBeforeDefVariables.erase(Var);
return;
}
- Register Reg = MO.getReg();
- LocIdx NewLoc = MTracker->getRegMLoc(Reg);
- redefVar(MI, Properties, NewLoc);
+ SmallVector<ResolvedDbgOp> NewLocs;
+ for (const MachineOperand &MO : MI.debug_operands()) {
+ if (MO.isReg()) {
+ // Any undef regs have already been filtered out above.
+ Register Reg = MO.getReg();
+ LocIdx NewLoc = MTracker->getRegMLoc(Reg);
+ NewLocs.push_back(NewLoc);
+ } else {
+ NewLocs.push_back(MO);
+ }
+ }
+
+ redefVar(MI, Properties, NewLocs);
}
/// Handle a change in variable location within a block. Terminate the
/// variables current location, and record the value it now refers to, so
/// that we can detect location transfers later on.
void redefVar(const MachineInstr &MI, const DbgValueProperties &Properties,
- Optional<LocIdx> OptNewLoc) {
+ SmallVectorImpl<ResolvedDbgOp> &NewLocs) {
DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
MI.getDebugLoc()->getInlinedAt());
// Any use-before-defs no longer apply.
UseBeforeDefVariables.erase(Var);
- // Erase any previous location,
+ // Erase any previous location.
auto It = ActiveVLocs.find(Var);
- if (It != ActiveVLocs.end())
- ActiveMLocs[It->second.Loc].erase(Var);
+ if (It != ActiveVLocs.end()) {
+ for (LocIdx Loc : It->second.loc_indices())
+ ActiveMLocs[Loc].erase(Var);
+ }
// If there _is_ no new location, all we had to do was erase.
- if (!OptNewLoc)
+ if (NewLocs.empty()) {
+ if (It != ActiveVLocs.end())
+ ActiveVLocs.erase(It);
return;
- LocIdx NewLoc = *OptNewLoc;
-
- // Check whether our local copy of values-by-location in #VarLocs is out of
- // date. Wipe old tracking data for the location if it's been clobbered in
- // the meantime.
- if (MTracker->readMLoc(NewLoc) != VarLocs[NewLoc.asU64()]) {
- for (const auto &P : ActiveMLocs[NewLoc]) {
- ActiveVLocs.erase(P);
+ }
+
+ SmallVector<std::pair<LocIdx, DebugVariable>> LostMLocs;
+ for (ResolvedDbgOp &Op : NewLocs) {
+ if (Op.IsConst)
+ continue;
+
+ LocIdx NewLoc = Op.Loc;
+
+ // Check whether our local copy of values-by-location in #VarLocs is out
+ // of date. Wipe old tracking data for the location if it's been clobbered
+ // in the meantime.
+ if (MTracker->readMLoc(NewLoc) != VarLocs[NewLoc.asU64()]) {
+ for (const auto &P : ActiveMLocs[NewLoc]) {
+ auto LostVLocIt = ActiveVLocs.find(P);
+ if (LostVLocIt != ActiveVLocs.end()) {
+ for (LocIdx Loc : LostVLocIt->second.loc_indices()) {
+ // Every active variable mapping for NewLoc will be cleared, no
+ // need to track individual variables.
+ if (Loc == NewLoc)
+ continue;
+ LostMLocs.emplace_back(Loc, P);
+ }
+ }
+ ActiveVLocs.erase(P);
+ }
+ for (const auto &LostMLoc : LostMLocs)
+ ActiveMLocs[LostMLoc.first].erase(LostMLoc.second);
+ LostMLocs.clear();
+ It = ActiveVLocs.find(Var);
+ ActiveMLocs[NewLoc.asU64()].clear();
+ VarLocs[NewLoc.asU64()] = MTracker->readMLoc(NewLoc);
}
- ActiveMLocs[NewLoc.asU64()].clear();
- VarLocs[NewLoc.asU64()] = MTracker->readMLoc(NewLoc);
+
+ ActiveMLocs[NewLoc].insert(Var);
}
- ActiveMLocs[NewLoc].insert(Var);
if (It == ActiveVLocs.end()) {
ActiveVLocs.insert(
- std::make_pair(Var, LocAndProperties{NewLoc, Properties}));
+ std::make_pair(Var, ResolvedDbgValue(NewLocs, Properties)));
} else {
- It->second.Loc = NewLoc;
+ It->second.Ops.assign(NewLocs);
It->second.Properties = Properties;
}
}
@@ -551,7 +793,7 @@ public:
// Examine the remaining variable locations: if we can find the same value
// again, we can recover the location.
- Optional<LocIdx> NewLoc = None;
+ std::optional<LocIdx> NewLoc;
for (auto Loc : MTracker->locations())
if (Loc.Value == OldValue)
NewLoc = Loc.Idx;
@@ -570,28 +812,54 @@ public:
// Examine all the variables based on this location.
DenseSet<DebugVariable> NewMLocs;
+ // If no new location has been found, every variable that depends on this
+ // MLoc is dead, so end their existing MLoc->Var mappings as well.
+ SmallVector<std::pair<LocIdx, DebugVariable>> LostMLocs;
for (const auto &Var : ActiveMLocIt->second) {
auto ActiveVLocIt = ActiveVLocs.find(Var);
// Re-state the variable location: if there's no replacement then NewLoc
- // is None and a $noreg DBG_VALUE will be created. Otherwise, a DBG_VALUE
- // identifying the alternative location will be emitted.
+ // is std::nullopt and a $noreg DBG_VALUE will be created. Otherwise, a
+ // DBG_VALUE identifying the alternative location will be emitted.
const DbgValueProperties &Properties = ActiveVLocIt->second.Properties;
- PendingDbgValues.push_back(MTracker->emitLoc(NewLoc, Var, Properties));
+
+ // Produce the new list of debug ops - an empty list if no new location
+ // was found, or the existing list with the substitution MLoc -> NewLoc
+ // otherwise.
+ SmallVector<ResolvedDbgOp> DbgOps;
+ if (NewLoc) {
+ ResolvedDbgOp OldOp(MLoc);
+ ResolvedDbgOp NewOp(*NewLoc);
+ // Insert illegal ops to overwrite afterwards.
+ DbgOps.insert(DbgOps.begin(), ActiveVLocIt->second.Ops.size(),
+ ResolvedDbgOp(LocIdx::MakeIllegalLoc()));
+ replace_copy(ActiveVLocIt->second.Ops, DbgOps.begin(), OldOp, NewOp);
+ }
+
+ PendingDbgValues.push_back(MTracker->emitLoc(DbgOps, Var, Properties));
// Update machine locations <=> variable locations maps. Defer updating
- // ActiveMLocs to avoid invalidaing the ActiveMLocIt iterator.
+ // ActiveMLocs to avoid invalidating the ActiveMLocIt iterator.
if (!NewLoc) {
+ for (LocIdx Loc : ActiveVLocIt->second.loc_indices()) {
+ if (Loc != MLoc)
+ LostMLocs.emplace_back(Loc, Var);
+ }
ActiveVLocs.erase(ActiveVLocIt);
} else {
- ActiveVLocIt->second.Loc = *NewLoc;
+ ActiveVLocIt->second.Ops = DbgOps;
NewMLocs.insert(Var);
}
}
- // Commit any deferred ActiveMLoc changes.
- if (!NewMLocs.empty())
- for (auto &Var : NewMLocs)
- ActiveMLocs[*NewLoc].insert(Var);
+ // Remove variables from ActiveMLocs if they no longer use any other MLocs
+ // due to being killed by this clobber.
+ for (auto &LocVarIt : LostMLocs) {
+ auto LostMLocIt = ActiveMLocs.find(LocVarIt.first);
+ assert(LostMLocIt != ActiveMLocs.end() &&
+ "Variable was using this MLoc, but ActiveMLocs[MLoc] has no "
+ "entries?");
+ LostMLocIt->second.erase(LocVarIt.second);
+ }
// We lazily track what locations have which values; if we've found a new
// location for the clobbered value, remember it.
@@ -600,9 +868,11 @@ public:
flushDbgValues(Pos, nullptr);
- // Re-find ActiveMLocIt, iterator could have been invalidated.
- ActiveMLocIt = ActiveMLocs.find(MLoc);
+ // Commit ActiveMLoc changes.
ActiveMLocIt->second.clear();
+ if (!NewMLocs.empty())
+ for (auto &Var : NewMLocs)
+ ActiveMLocs[*NewLoc].insert(Var);
}
/// Transfer variables based on \p Src to be based on \p Dst. This handles
@@ -619,17 +889,22 @@ public:
// Move set of active variables from one location to another.
auto MovingVars = ActiveMLocs[Src];
- ActiveMLocs[Dst] = MovingVars;
+ ActiveMLocs[Dst].insert(MovingVars.begin(), MovingVars.end());
VarLocs[Dst.asU64()] = VarLocs[Src.asU64()];
// For each variable based on Src; create a location at Dst.
+ ResolvedDbgOp SrcOp(Src);
+ ResolvedDbgOp DstOp(Dst);
for (const auto &Var : MovingVars) {
auto ActiveVLocIt = ActiveVLocs.find(Var);
assert(ActiveVLocIt != ActiveVLocs.end());
- ActiveVLocIt->second.Loc = Dst;
- MachineInstr *MI =
- MTracker->emitLoc(Dst, Var, ActiveVLocIt->second.Properties);
+ // Update all instances of Src in the variable's tracked values to Dst.
+ std::replace(ActiveVLocIt->second.Ops.begin(),
+ ActiveVLocIt->second.Ops.end(), SrcOp, DstOp);
+
+ MachineInstr *MI = MTracker->emitLoc(ActiveVLocIt->second.Ops, Var,
+ ActiveVLocIt->second.Properties);
PendingDbgValues.push_back(MI);
}
ActiveMLocs[Src].clear();
@@ -667,17 +942,43 @@ ValueIDNum ValueIDNum::EmptyValue = {UINT_MAX, UINT_MAX, UINT_MAX};
ValueIDNum ValueIDNum::TombstoneValue = {UINT_MAX, UINT_MAX, UINT_MAX - 1};
#ifndef NDEBUG
-void DbgValue::dump(const MLocTracker *MTrack) const {
- if (Kind == Const) {
- MO->dump();
- } else if (Kind == NoVal) {
- dbgs() << "NoVal(" << BlockNo << ")";
- } else if (Kind == VPHI) {
- dbgs() << "VPHI(" << BlockNo << "," << MTrack->IDAsString(ID) << ")";
+void ResolvedDbgOp::dump(const MLocTracker *MTrack) const {
+ if (IsConst) {
+ dbgs() << MO;
} else {
- assert(Kind == Def);
+ dbgs() << MTrack->LocIdxToName(Loc);
+ }
+}
+void DbgOp::dump(const MLocTracker *MTrack) const {
+ if (IsConst) {
+ dbgs() << MO;
+ } else if (!isUndef()) {
dbgs() << MTrack->IDAsString(ID);
}
+}
+void DbgOpID::dump(const MLocTracker *MTrack, const DbgOpIDMap *OpStore) const {
+ if (!OpStore) {
+ dbgs() << "ID(" << asU32() << ")";
+ } else {
+ OpStore->find(*this).dump(MTrack);
+ }
+}
+void DbgValue::dump(const MLocTracker *MTrack,
+ const DbgOpIDMap *OpStore) const {
+ if (Kind == NoVal) {
+ dbgs() << "NoVal(" << BlockNo << ")";
+ } else if (Kind == VPHI || Kind == Def) {
+ if (Kind == VPHI)
+ dbgs() << "VPHI(" << BlockNo << ",";
+ else
+ dbgs() << "Def(";
+ for (unsigned Idx = 0; Idx < getDbgOpIDs().size(); ++Idx) {
+ getDbgOpID(Idx).dump(MTrack, OpStore);
+ if (Idx != 0)
+ dbgs() << ",";
+ }
+ dbgs() << ")";
+ }
if (Properties.Indirect)
dbgs() << " indir";
if (Properties.DIExpr)
@@ -789,14 +1090,14 @@ void MLocTracker::writeRegMask(const MachineOperand *MO, unsigned CurBB,
Masks.push_back(std::make_pair(MO, InstID));
}
-Optional<SpillLocationNo> MLocTracker::getOrTrackSpillLoc(SpillLoc L) {
+std::optional<SpillLocationNo> MLocTracker::getOrTrackSpillLoc(SpillLoc L) {
SpillLocationNo SpillID(SpillLocs.idFor(L));
if (SpillID.id() == 0) {
// If there is no location, and we have reached the limit of how many stack
// slots to track, then don't track this one.
if (SpillLocs.size() >= StackWorkingSetLimit)
- return None;
+ return std::nullopt;
// Spill location is untracked: create record for this one, and all
// subregister slots too.
@@ -853,120 +1154,157 @@ LLVM_DUMP_METHOD void MLocTracker::dump_mloc_map() {
}
#endif
-MachineInstrBuilder MLocTracker::emitLoc(Optional<LocIdx> MLoc,
- const DebugVariable &Var,
- const DbgValueProperties &Properties) {
+MachineInstrBuilder
+MLocTracker::emitLoc(const SmallVectorImpl<ResolvedDbgOp> &DbgOps,
+ const DebugVariable &Var,
+ const DbgValueProperties &Properties) {
DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0,
Var.getVariable()->getScope(),
const_cast<DILocation *>(Var.getInlinedAt()));
- auto MIB = BuildMI(MF, DL, TII.get(TargetOpcode::DBG_VALUE));
+
+ const MCInstrDesc &Desc = Properties.IsVariadic
+ ? TII.get(TargetOpcode::DBG_VALUE_LIST)
+ : TII.get(TargetOpcode::DBG_VALUE);
+
+#ifdef EXPENSIVE_CHECKS
+ assert(all_of(DbgOps,
+ [](const ResolvedDbgOp &Op) {
+ return Op.IsConst || !Op.Loc.isIllegal();
+ }) &&
+ "Did not expect illegal ops in DbgOps.");
+ assert((DbgOps.size() == 0 ||
+ DbgOps.size() == Properties.getLocationOpCount()) &&
+ "Expected to have either one DbgOp per MI LocationOp, or none.");
+#endif
+
+ auto GetRegOp = [](unsigned Reg) -> MachineOperand {
+ return MachineOperand::CreateReg(
+ /* Reg */ Reg, /* isDef */ false, /* isImp */ false,
+ /* isKill */ false, /* isDead */ false,
+ /* isUndef */ false, /* isEarlyClobber */ false,
+ /* SubReg */ 0, /* isDebug */ true);
+ };
+
+ SmallVector<MachineOperand> MOs;
+
+ auto EmitUndef = [&]() {
+ MOs.clear();
+ MOs.assign(Properties.getLocationOpCount(), GetRegOp(0));
+ return BuildMI(MF, DL, Desc, false, MOs, Var.getVariable(),
+ Properties.DIExpr);
+ };
+
+ // Don't bother passing any real operands to BuildMI if any of them would be
+ // $noreg.
+ if (DbgOps.empty())
+ return EmitUndef();
+
+ bool Indirect = Properties.Indirect;
const DIExpression *Expr = Properties.DIExpr;
- if (!MLoc) {
- // No location -> DBG_VALUE $noreg
- MIB.addReg(0);
- MIB.addReg(0);
- } else if (LocIdxToLocID[*MLoc] >= NumRegs) {
- unsigned LocID = LocIdxToLocID[*MLoc];
- SpillLocationNo SpillID = locIDToSpill(LocID);
- StackSlotPos StackIdx = locIDToSpillIdx(LocID);
- unsigned short Offset = StackIdx.second;
-
- // TODO: support variables that are located in spill slots, with non-zero
- // offsets from the start of the spill slot. It would require some more
- // complex DIExpression calculations. This doesn't seem to be produced by
- // LLVM right now, so don't try and support it.
- // Accept no-subregister slots and subregisters where the offset is zero.
- // The consumer should already have type information to work out how large
- // the variable is.
- if (Offset == 0) {
- const SpillLoc &Spill = SpillLocs[SpillID.id()];
- unsigned Base = Spill.SpillBase;
- MIB.addReg(Base);
-
- // There are several ways we can dereference things, and several inputs
- // to consider:
- // * NRVO variables will appear with IsIndirect set, but should have
- // nothing else in their DIExpressions,
- // * Variables with DW_OP_stack_value in their expr already need an
- // explicit dereference of the stack location,
- // * Values that don't match the variable size need DW_OP_deref_size,
- // * Everything else can just become a simple location expression.
-
- // We need to use deref_size whenever there's a mismatch between the
- // size of value and the size of variable portion being read.
- // Additionally, we should use it whenever dealing with stack_value
- // fragments, to avoid the consumer having to determine the deref size
- // from DW_OP_piece.
- bool UseDerefSize = false;
- unsigned ValueSizeInBits = getLocSizeInBits(*MLoc);
- unsigned DerefSizeInBytes = ValueSizeInBits / 8;
- if (auto Fragment = Var.getFragment()) {
- unsigned VariableSizeInBits = Fragment->SizeInBits;
- if (VariableSizeInBits != ValueSizeInBits || Expr->isComplex())
- UseDerefSize = true;
- } else if (auto Size = Var.getVariable()->getSizeInBits()) {
- if (*Size != ValueSizeInBits) {
- UseDerefSize = true;
+
+ assert(DbgOps.size() == Properties.getLocationOpCount());
+
+ // If all locations are valid, accumulate them into our list of
+ // MachineOperands. For any spilled locations, either update the indirectness
+ // register or apply the appropriate transformations in the DIExpression.
+ for (size_t Idx = 0; Idx < Properties.getLocationOpCount(); ++Idx) {
+ const ResolvedDbgOp &Op = DbgOps[Idx];
+
+ if (Op.IsConst) {
+ MOs.push_back(Op.MO);
+ continue;
+ }
+
+ LocIdx MLoc = Op.Loc;
+ unsigned LocID = LocIdxToLocID[MLoc];
+ if (LocID >= NumRegs) {
+ SpillLocationNo SpillID = locIDToSpill(LocID);
+ StackSlotPos StackIdx = locIDToSpillIdx(LocID);
+ unsigned short Offset = StackIdx.second;
+
+ // TODO: support variables that are located in spill slots, with non-zero
+ // offsets from the start of the spill slot. It would require some more
+ // complex DIExpression calculations. This doesn't seem to be produced by
+ // LLVM right now, so don't try and support it.
+ // Accept no-subregister slots and subregisters where the offset is zero.
+ // The consumer should already have type information to work out how large
+ // the variable is.
+ if (Offset == 0) {
+ const SpillLoc &Spill = SpillLocs[SpillID.id()];
+ unsigned Base = Spill.SpillBase;
+
+ // There are several ways we can dereference things, and several inputs
+ // to consider:
+ // * NRVO variables will appear with IsIndirect set, but should have
+ // nothing else in their DIExpressions,
+ // * Variables with DW_OP_stack_value in their expr already need an
+ // explicit dereference of the stack location,
+ // * Values that don't match the variable size need DW_OP_deref_size,
+ // * Everything else can just become a simple location expression.
+
+ // We need to use deref_size whenever there's a mismatch between the
+ // size of value and the size of variable portion being read.
+ // Additionally, we should use it whenever dealing with stack_value
+ // fragments, to avoid the consumer having to determine the deref size
+ // from DW_OP_piece.
+ bool UseDerefSize = false;
+ unsigned ValueSizeInBits = getLocSizeInBits(MLoc);
+ unsigned DerefSizeInBytes = ValueSizeInBits / 8;
+ if (auto Fragment = Var.getFragment()) {
+ unsigned VariableSizeInBits = Fragment->SizeInBits;
+ if (VariableSizeInBits != ValueSizeInBits || Expr->isComplex())
+ UseDerefSize = true;
+ } else if (auto Size = Var.getVariable()->getSizeInBits()) {
+ if (*Size != ValueSizeInBits) {
+ UseDerefSize = true;
+ }
}
- }
- if (Properties.Indirect) {
- // This is something like an NRVO variable, where the pointer has been
- // spilt to the stack, or a dbg.addr pointing at a coroutine frame
- // field. It should end up being a memory location, with the pointer
- // to the variable loaded off the stack with a deref. It can't be a
- // DW_OP_stack_value expression.
- assert(!Expr->isImplicit());
- Expr = TRI.prependOffsetExpression(
- Expr, DIExpression::ApplyOffset | DIExpression::DerefAfter,
- Spill.SpillOffset);
- MIB.addImm(0);
- } else if (UseDerefSize) {
- // We're loading a value off the stack that's not the same size as the
- // variable. Add / subtract stack offset, explicitly deref with a size,
- // and add DW_OP_stack_value if not already present.
- SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size,
- DerefSizeInBytes};
- Expr = DIExpression::prependOpcodes(Expr, Ops, true);
- unsigned Flags = DIExpression::StackValue | DIExpression::ApplyOffset;
- Expr = TRI.prependOffsetExpression(Expr, Flags, Spill.SpillOffset);
- MIB.addReg(0);
- } else if (Expr->isComplex()) {
- // A variable with no size ambiguity, but with extra elements in it's
- // expression. Manually dereference the stack location.
- assert(Expr->isComplex());
- Expr = TRI.prependOffsetExpression(
- Expr, DIExpression::ApplyOffset | DIExpression::DerefAfter,
- Spill.SpillOffset);
- MIB.addReg(0);
+ SmallVector<uint64_t, 5> OffsetOps;
+ TRI.getOffsetOpcodes(Spill.SpillOffset, OffsetOps);
+ bool StackValue = false;
+
+ if (Properties.Indirect) {
+ // This is something like an NRVO variable, where the pointer has been
+ // spilt to the stack. It should end up being a memory location, with
+ // the pointer to the variable loaded off the stack with a deref:
+ assert(!Expr->isImplicit());
+ OffsetOps.push_back(dwarf::DW_OP_deref);
+ } else if (UseDerefSize && Expr->isSingleLocationExpression()) {
+ // TODO: Figure out how to handle deref size issues for variadic
+ // values.
+ // We're loading a value off the stack that's not the same size as the
+ // variable. Add / subtract stack offset, explicitly deref with a
+ // size, and add DW_OP_stack_value if not already present.
+ OffsetOps.push_back(dwarf::DW_OP_deref_size);
+ OffsetOps.push_back(DerefSizeInBytes);
+ StackValue = true;
+ } else if (Expr->isComplex() || Properties.IsVariadic) {
+ // A variable with no size ambiguity, but with extra elements in it's
+ // expression. Manually dereference the stack location.
+ OffsetOps.push_back(dwarf::DW_OP_deref);
+ } else {
+ // A plain value that has been spilt to the stack, with no further
+ // context. Request a location expression, marking the DBG_VALUE as
+ // IsIndirect.
+ Indirect = true;
+ }
+
+ Expr = DIExpression::appendOpsToArg(Expr, OffsetOps, Idx, StackValue);
+ MOs.push_back(GetRegOp(Base));
} else {
- // A plain value that has been spilt to the stack, with no further
- // context. Request a location expression, marking the DBG_VALUE as
- // IsIndirect.
- Expr = TRI.prependOffsetExpression(Expr, DIExpression::ApplyOffset,
- Spill.SpillOffset);
- MIB.addImm(0);
+ // This is a stack location with a weird subregister offset: emit an
+ // undef DBG_VALUE instead.
+ return EmitUndef();
}
} else {
- // This is a stack location with a weird subregister offset: emit an undef
- // DBG_VALUE instead.
- MIB.addReg(0);
- MIB.addReg(0);
+ // Non-empty, non-stack slot, must be a plain register.
+ MOs.push_back(GetRegOp(LocID));
}
- } else {
- // Non-empty, non-stack slot, must be a plain register.
- unsigned LocID = LocIdxToLocID[*MLoc];
- MIB.addReg(LocID);
- if (Properties.Indirect)
- MIB.addImm(0);
- else
- MIB.addReg(0);
}
- MIB.addMetadata(Var.getVariable());
- MIB.addMetadata(Expr);
- return MIB;
+ return BuildMI(MF, DL, Desc, Indirect, MOs, Var.getVariable(), Expr);
}
/// Default construct and initialize the pass.
@@ -974,7 +1312,10 @@ InstrRefBasedLDV::InstrRefBasedLDV() = default;
bool InstrRefBasedLDV::isCalleeSaved(LocIdx L) const {
unsigned Reg = MTracker->LocIdxToLocID[L];
- for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
+ return isCalleeSavedReg(Reg);
+}
+bool InstrRefBasedLDV::isCalleeSavedReg(Register R) const {
+ for (MCRegAliasIterator RAI(R, TRI, true); RAI.isValid(); ++RAI)
if (CalleeSavedRegs.test(*RAI))
return true;
return false;
@@ -989,7 +1330,7 @@ bool InstrRefBasedLDV::isCalleeSaved(LocIdx L) const {
// void InstrRefBasedLDV::printVarLocInMBB(..)
#endif
-Optional<SpillLocationNo>
+std::optional<SpillLocationNo>
InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
assert(MI.hasOneMemOperand() &&
"Spill instruction does not have exactly one memory operand?");
@@ -1004,11 +1345,11 @@ InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
return MTracker->getOrTrackSpillLoc({Reg, Offset});
}
-Optional<LocIdx>
+std::optional<LocIdx>
InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr &MI) {
- Optional<SpillLocationNo> SpillLoc = extractSpillBaseRegAndOffset(MI);
+ std::optional<SpillLocationNo> SpillLoc = extractSpillBaseRegAndOffset(MI);
if (!SpillLoc)
- return None;
+ return std::nullopt;
// Where in the stack slot is this value defined -- i.e., what size of value
// is this? An important question, because it could be loaded into a register
@@ -1022,7 +1363,7 @@ InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr &MI) {
if (IdxIt == MTracker->StackSlotIdxes.end())
// That index is not tracked. This is suprising, and unlikely to ever
// occur, but the safe action is to indicate the variable is optimised out.
- return None;
+ return std::nullopt;
unsigned SpillID = MTracker->getSpillIDWithIdx(*SpillLoc, IdxIt->second);
return MTracker->getSpillMLoc(SpillID);
@@ -1050,39 +1391,33 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) {
if (Scope == nullptr)
return true; // handled it; by doing nothing
- // For now, ignore DBG_VALUE_LISTs when extending ranges. Allow it to
- // contribute to locations in this block, but don't propagate further.
- // Interpret it like a DBG_VALUE $noreg.
- if (MI.isDebugValueList()) {
- if (VTracker)
- VTracker->defVar(MI, Properties, None);
- if (TTracker)
- TTracker->redefVar(MI, Properties, None);
- return true;
- }
-
- const MachineOperand &MO = MI.getOperand(0);
-
// MLocTracker needs to know that this register is read, even if it's only
// read by a debug inst.
- if (MO.isReg() && MO.getReg() != 0)
- (void)MTracker->readReg(MO.getReg());
+ for (const MachineOperand &MO : MI.debug_operands())
+ if (MO.isReg() && MO.getReg() != 0)
+ (void)MTracker->readReg(MO.getReg());
// If we're preparing for the second analysis (variables), the machine value
// locations are already solved, and we report this DBG_VALUE and the value
// it refers to to VLocTracker.
if (VTracker) {
- if (MO.isReg()) {
- // Feed defVar the new variable location, or if this is a
- // DBG_VALUE $noreg, feed defVar None.
- if (MO.getReg())
- VTracker->defVar(MI, Properties, MTracker->readReg(MO.getReg()));
- else
- VTracker->defVar(MI, Properties, None);
- } else if (MI.getOperand(0).isImm() || MI.getOperand(0).isFPImm() ||
- MI.getOperand(0).isCImm()) {
- VTracker->defVar(MI, MI.getOperand(0));
+ SmallVector<DbgOpID> DebugOps;
+ // Feed defVar the new variable location, or if this is a DBG_VALUE $noreg,
+ // feed defVar None.
+ if (!MI.isUndefDebugValue()) {
+ for (const MachineOperand &MO : MI.debug_operands()) {
+ // There should be no undef registers here, as we've screened for undef
+ // debug values.
+ if (MO.isReg()) {
+ DebugOps.push_back(DbgOpStore.insert(MTracker->readReg(MO.getReg())));
+ } else if (MO.isImm() || MO.isFPImm() || MO.isCImm()) {
+ DebugOps.push_back(DbgOpStore.insert(MO));
+ } else {
+ llvm_unreachable("Unexpected debug operand type.");
+ }
+ }
}
+ VTracker->defVar(MI, Properties, DebugOps);
}
// If performing final tracking of transfers, report this variable definition
@@ -1092,39 +1427,14 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) {
return true;
}
-bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
- const ValueTable *MLiveOuts,
- const ValueTable *MLiveIns) {
- if (!MI.isDebugRef())
- return false;
-
- // Only handle this instruction when we are building the variable value
- // transfer function.
- if (!VTracker && !TTracker)
- return false;
-
- unsigned InstNo = MI.getOperand(0).getImm();
- unsigned OpNo = MI.getOperand(1).getImm();
-
- const DILocalVariable *Var = MI.getDebugVariable();
- const DIExpression *Expr = MI.getDebugExpression();
- const DILocation *DebugLoc = MI.getDebugLoc();
- const DILocation *InlinedAt = DebugLoc->getInlinedAt();
- assert(Var->isValidLocationForIntrinsic(DebugLoc) &&
- "Expected inlined-at fields to agree");
-
- DebugVariable V(Var, Expr, InlinedAt);
-
- auto *Scope = LS.findLexicalScope(MI.getDebugLoc().get());
- if (Scope == nullptr)
- return true; // Handled by doing nothing. This variable is never in scope.
-
- const MachineFunction &MF = *MI.getParent()->getParent();
-
+std::optional<ValueIDNum> InstrRefBasedLDV::getValueForInstrRef(
+ unsigned InstNo, unsigned OpNo, MachineInstr &MI,
+ const ValueTable *MLiveOuts, const ValueTable *MLiveIns) {
// Various optimizations may have happened to the value during codegen,
// recorded in the value substitution table. Apply any substitutions to
// the instruction / operand number in this DBG_INSTR_REF, and collect
// any subregister extractions performed during optimization.
+ const MachineFunction &MF = *MI.getParent()->getParent();
// Create dummy substitution with Src set, for lookup.
auto SoughtSub =
@@ -1143,13 +1453,12 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
// Default machine value number is <None> -- if no instruction defines
// the corresponding value, it must have been optimized out.
- Optional<ValueIDNum> NewID = None;
+ std::optional<ValueIDNum> NewID;
// Try to lookup the instruction number, and find the machine value number
// that it defines. It could be an instruction, or a PHI.
auto InstrIt = DebugInstrNumToInstr.find(InstNo);
- auto PHIIt = std::lower_bound(DebugPHINumToValue.begin(),
- DebugPHINumToValue.end(), InstNo);
+ auto PHIIt = llvm::lower_bound(DebugPHINumToValue, InstNo);
if (InstrIt != DebugInstrNumToInstr.end()) {
const MachineInstr &TargetInstr = *InstrIt->second.first;
uint64_t BlockNo = TargetInstr.getParent()->getNumber();
@@ -1158,7 +1467,7 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
// a register def was folded into a stack store.
if (OpNo == MachineFunction::DebugOperandMemNumber &&
TargetInstr.hasOneMemOperand()) {
- Optional<LocIdx> L = findLocationForMemOperand(TargetInstr);
+ std::optional<LocIdx> L = findLocationForMemOperand(TargetInstr);
if (L)
NewID = ValueIDNum(BlockNo, InstrIt->second.second, *L);
} else if (OpNo != MachineFunction::DebugOperandMemNumber) {
@@ -1247,7 +1556,7 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
// If we didn't find anything: there's no way to express our value.
if (!NewReg) {
- NewID = None;
+ NewID = std::nullopt;
} else {
// Re-state the value as being defined within the subregister
// that we found.
@@ -1257,61 +1566,162 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
}
} else {
// If we can't handle subregisters, unset the new value.
- NewID = None;
+ NewID = std::nullopt;
}
}
- // We, we have a value number or None. Tell the variable value tracker about
- // it. The rest of this LiveDebugValues implementation acts exactly the same
- // for DBG_INSTR_REFs as DBG_VALUEs (just, the former can refer to values that
- // aren't immediately available).
- DbgValueProperties Properties(Expr, false);
+ return NewID;
+}
+
+bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
+ const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns) {
+ if (!MI.isDebugRef())
+ return false;
+
+ // Only handle this instruction when we are building the variable value
+ // transfer function.
+ if (!VTracker && !TTracker)
+ return false;
+
+ const DILocalVariable *Var = MI.getDebugVariable();
+ const DIExpression *Expr = MI.getDebugExpression();
+ const DILocation *DebugLoc = MI.getDebugLoc();
+ const DILocation *InlinedAt = DebugLoc->getInlinedAt();
+ assert(Var->isValidLocationForIntrinsic(DebugLoc) &&
+ "Expected inlined-at fields to agree");
+
+ DebugVariable V(Var, Expr, InlinedAt);
+
+ auto *Scope = LS.findLexicalScope(MI.getDebugLoc().get());
+ if (Scope == nullptr)
+ return true; // Handled by doing nothing. This variable is never in scope.
+
+ SmallVector<DbgOpID> DbgOpIDs;
+ for (const MachineOperand &MO : MI.debug_operands()) {
+ if (!MO.isDbgInstrRef()) {
+ assert(!MO.isReg() && "DBG_INSTR_REF should not contain registers");
+ DbgOpID ConstOpID = DbgOpStore.insert(DbgOp(MO));
+ DbgOpIDs.push_back(ConstOpID);
+ continue;
+ }
+
+ unsigned InstNo = MO.getInstrRefInstrIndex();
+ unsigned OpNo = MO.getInstrRefOpIndex();
+
+ // Default machine value number is <None> -- if no instruction defines
+ // the corresponding value, it must have been optimized out.
+ std::optional<ValueIDNum> NewID =
+ getValueForInstrRef(InstNo, OpNo, MI, MLiveOuts, MLiveIns);
+ // We have a value number or std::nullopt. If the latter, then kill the
+ // entire debug value.
+ if (NewID) {
+ DbgOpIDs.push_back(DbgOpStore.insert(*NewID));
+ } else {
+ DbgOpIDs.clear();
+ break;
+ }
+ }
+
+ // We have a DbgOpID for every value or for none. Tell the variable value
+ // tracker about it. The rest of this LiveDebugValues implementation acts
+ // exactly the same for DBG_INSTR_REFs as DBG_VALUEs (just, the former can
+ // refer to values that aren't immediately available).
+ DbgValueProperties Properties(Expr, false, true);
if (VTracker)
- VTracker->defVar(MI, Properties, NewID);
+ VTracker->defVar(MI, Properties, DbgOpIDs);
// If we're on the final pass through the function, decompose this INSTR_REF
// into a plain DBG_VALUE.
if (!TTracker)
return true;
+ // Fetch the concrete DbgOps now, as we will need them later.
+ SmallVector<DbgOp> DbgOps;
+ for (DbgOpID OpID : DbgOpIDs) {
+ DbgOps.push_back(DbgOpStore.find(OpID));
+ }
+
// Pick a location for the machine value number, if such a location exists.
// (This information could be stored in TransferTracker to make it faster).
- Optional<LocIdx> FoundLoc = None;
+ SmallDenseMap<ValueIDNum, TransferTracker::LocationAndQuality> FoundLocs;
+ SmallVector<ValueIDNum> ValuesToFind;
+ // Initialized the preferred-location map with illegal locations, to be
+ // filled in later.
+ for (const DbgOp &Op : DbgOps) {
+ if (!Op.IsConst)
+ if (FoundLocs.insert({Op.ID, TransferTracker::LocationAndQuality()})
+ .second)
+ ValuesToFind.push_back(Op.ID);
+ }
+
for (auto Location : MTracker->locations()) {
LocIdx CurL = Location.Idx;
ValueIDNum ID = MTracker->readMLoc(CurL);
- if (NewID && ID == NewID) {
- // If this is the first location with that value, pick it. Otherwise,
- // consider whether it's a "longer term" location.
- if (!FoundLoc) {
- FoundLoc = CurL;
- continue;
+ auto ValueToFindIt = find(ValuesToFind, ID);
+ if (ValueToFindIt == ValuesToFind.end())
+ continue;
+ auto &Previous = FoundLocs.find(ID)->second;
+ // If this is the first location with that value, pick it. Otherwise,
+ // consider whether it's a "longer term" location.
+ std::optional<TransferTracker::LocationQuality> ReplacementQuality =
+ TTracker->getLocQualityIfBetter(CurL, Previous.getQuality());
+ if (ReplacementQuality) {
+ Previous = TransferTracker::LocationAndQuality(CurL, *ReplacementQuality);
+ if (Previous.isBest()) {
+ ValuesToFind.erase(ValueToFindIt);
+ if (ValuesToFind.empty())
+ break;
}
-
- if (MTracker->isSpill(CurL))
- FoundLoc = CurL; // Spills are a longer term location.
- else if (!MTracker->isSpill(*FoundLoc) &&
- !MTracker->isSpill(CurL) &&
- !isCalleeSaved(*FoundLoc) &&
- isCalleeSaved(CurL))
- FoundLoc = CurL; // Callee saved regs are longer term than normal.
}
}
+ SmallVector<ResolvedDbgOp> NewLocs;
+ for (const DbgOp &DbgOp : DbgOps) {
+ if (DbgOp.IsConst) {
+ NewLocs.push_back(DbgOp.MO);
+ continue;
+ }
+ LocIdx FoundLoc = FoundLocs.find(DbgOp.ID)->second.getLoc();
+ if (FoundLoc.isIllegal()) {
+ NewLocs.clear();
+ break;
+ }
+ NewLocs.push_back(FoundLoc);
+ }
// Tell transfer tracker that the variable value has changed.
- TTracker->redefVar(MI, Properties, FoundLoc);
-
- // If there was a value with no location; but the value is defined in a
- // later instruction in this block, this is a block-local use-before-def.
- if (!FoundLoc && NewID && NewID->getBlock() == CurBB &&
- NewID->getInst() > CurInst)
- TTracker->addUseBeforeDef(V, {MI.getDebugExpression(), false}, *NewID);
+ TTracker->redefVar(MI, Properties, NewLocs);
+
+ // If there were values with no location, but all such values are defined in
+ // later instructions in this block, this is a block-local use-before-def.
+ if (!DbgOps.empty() && NewLocs.empty()) {
+ bool IsValidUseBeforeDef = true;
+ uint64_t LastUseBeforeDef = 0;
+ for (auto ValueLoc : FoundLocs) {
+ ValueIDNum NewID = ValueLoc.first;
+ LocIdx FoundLoc = ValueLoc.second.getLoc();
+ if (!FoundLoc.isIllegal())
+ continue;
+ // If we have an value with no location that is not defined in this block,
+ // then it has no location in this block, leaving this value undefined.
+ if (NewID.getBlock() != CurBB || NewID.getInst() <= CurInst) {
+ IsValidUseBeforeDef = false;
+ break;
+ }
+ LastUseBeforeDef = std::max(LastUseBeforeDef, NewID.getInst());
+ }
+ if (IsValidUseBeforeDef) {
+ TTracker->addUseBeforeDef(V, {MI.getDebugExpression(), false, true},
+ DbgOps, LastUseBeforeDef);
+ }
+ }
// Produce a DBG_VALUE representing what this DBG_INSTR_REF meant.
// This DBG_VALUE is potentially a $noreg / undefined location, if
- // FoundLoc is None.
+ // FoundLoc is illegal.
// (XXX -- could morph the DBG_INSTR_REF in the future).
- MachineInstr *DbgMI = MTracker->emitLoc(FoundLoc, V, Properties);
+ MachineInstr *DbgMI = MTracker->emitLoc(NewLocs, V, Properties);
+
TTracker->PendingDbgValues.push_back(DbgMI);
TTracker->flushDbgValues(MI.getIterator(), nullptr);
return true;
@@ -1335,7 +1745,8 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) {
// a DBG_PHI. This can happen if DBG_PHIs are malformed, or refer to a
// dead stack slot, for example.
// Record a DebugPHIRecord with an empty value + location.
- DebugPHINumToValue.push_back({InstrNum, MI.getParent(), None, None});
+ DebugPHINumToValue.push_back(
+ {InstrNum, MI.getParent(), std::nullopt, std::nullopt});
return true;
};
@@ -1364,7 +1775,7 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) {
Register Base;
StackOffset Offs = TFI->getFrameIndexReference(*MI.getMF(), FI, Base);
SpillLoc SL = {Base, Offs};
- Optional<SpillLocationNo> SpillNo = MTracker->getOrTrackSpillLoc(SL);
+ std::optional<SpillLocationNo> SpillNo = MTracker->getOrTrackSpillLoc(SL);
// We might be able to find a value, but have chosen not to, to avoid
// tracking too much stack information.
@@ -1437,8 +1848,7 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
SmallVector<const MachineOperand *, 4> RegMaskPtrs;
for (const MachineOperand &MO : MI.operands()) {
// Determine whether the operand is a register def.
- if (MO.isReg() && MO.isDef() && MO.getReg() &&
- Register::isPhysicalRegister(MO.getReg()) &&
+ if (MO.isReg() && MO.isDef() && MO.getReg() && MO.getReg().isPhysical() &&
!IgnoreSPAlias(MO.getReg())) {
// Remove ranges of all aliased registers.
for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
@@ -1459,7 +1869,8 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
// If this instruction writes to a spill slot, def that slot.
if (hasFoldedStackStore(MI)) {
- if (Optional<SpillLocationNo> SpillNo = extractSpillBaseRegAndOffset(MI)) {
+ if (std::optional<SpillLocationNo> SpillNo =
+ extractSpillBaseRegAndOffset(MI)) {
for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) {
unsigned SpillID = MTracker->getSpillIDWithIdx(*SpillNo, I);
LocIdx L = MTracker->getSpillMLoc(SpillID);
@@ -1501,7 +1912,8 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
// Tell TTracker about any folded stack store.
if (hasFoldedStackStore(MI)) {
- if (Optional<SpillLocationNo> SpillNo = extractSpillBaseRegAndOffset(MI)) {
+ if (std::optional<SpillLocationNo> SpillNo =
+ extractSpillBaseRegAndOffset(MI)) {
for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) {
unsigned SpillID = MTracker->getSpillIDWithIdx(*SpillNo, I);
LocIdx L = MTracker->getSpillMLoc(SpillID);
@@ -1542,22 +1954,22 @@ void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) {
}
}
-Optional<SpillLocationNo>
+std::optional<SpillLocationNo>
InstrRefBasedLDV::isSpillInstruction(const MachineInstr &MI,
MachineFunction *MF) {
// TODO: Handle multiple stores folded into one.
if (!MI.hasOneMemOperand())
- return None;
+ return std::nullopt;
// Reject any memory operand that's aliased -- we can't guarantee its value.
auto MMOI = MI.memoperands_begin();
const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue();
if (PVal->isAliased(MFI))
- return None;
+ return std::nullopt;
if (!MI.getSpillSize(TII) && !MI.getFoldedSpillSize(TII))
- return None; // This is not a spill instruction, since no valid size was
- // returned from either function.
+ return std::nullopt; // This is not a spill instruction, since no valid size
+ // was returned from either function.
return extractSpillBaseRegAndOffset(MI);
}
@@ -1572,11 +1984,11 @@ bool InstrRefBasedLDV::isLocationSpill(const MachineInstr &MI,
return Reg != 0;
}
-Optional<SpillLocationNo>
+std::optional<SpillLocationNo>
InstrRefBasedLDV::isRestoreInstruction(const MachineInstr &MI,
MachineFunction *MF, unsigned &Reg) {
if (!MI.hasOneMemOperand())
- return None;
+ return std::nullopt;
// FIXME: Handle folded restore instructions with more than one memory
// operand.
@@ -1584,7 +1996,7 @@ InstrRefBasedLDV::isRestoreInstruction(const MachineInstr &MI,
Reg = MI.getOperand(0).getReg();
return extractSpillBaseRegAndOffset(MI);
}
- return None;
+ return std::nullopt;
}
bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {
@@ -1616,12 +2028,12 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {
// First, if there are any DBG_VALUEs pointing at a spill slot that is
// written to, terminate that variable location. The value in memory
// will have changed. DbgEntityHistoryCalculator doesn't try to detect this.
- if (Optional<SpillLocationNo> Loc = isSpillInstruction(MI, MF)) {
+ if (std::optional<SpillLocationNo> Loc = isSpillInstruction(MI, MF)) {
// Un-set this location and clobber, so that earlier locations don't
// continue past this store.
for (unsigned SlotIdx = 0; SlotIdx < MTracker->NumSlotIdxes; ++SlotIdx) {
unsigned SpillID = MTracker->getSpillIDWithIdx(*Loc, SlotIdx);
- Optional<LocIdx> MLoc = MTracker->getSpillMLoc(SpillID);
+ std::optional<LocIdx> MLoc = MTracker->getSpillMLoc(SpillID);
if (!MLoc)
continue;
@@ -1667,7 +2079,7 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {
unsigned SpillID = MTracker->getLocID(Loc, {Size, 0});
DoTransfer(Reg, SpillID);
} else {
- Optional<SpillLocationNo> Loc = isRestoreInstruction(MI, MF, Reg);
+ std::optional<SpillLocationNo> Loc = isRestoreInstruction(MI, MF, Reg);
if (!Loc)
return false;
@@ -1711,13 +2123,6 @@ bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) {
const MachineOperand *DestRegOp = DestSrc->Destination;
const MachineOperand *SrcRegOp = DestSrc->Source;
- auto isCalleeSavedReg = [&](unsigned Reg) {
- for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
- if (CalleeSavedRegs.test(*RAI))
- return true;
- return false;
- };
-
Register SrcReg = SrcRegOp->getReg();
Register DestReg = DestRegOp->getReg();
@@ -1791,7 +2196,7 @@ bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) {
/// \param MI A previously unprocessed debug instruction to analyze for
/// fragment usage.
void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) {
- assert(MI.isDebugValue() || MI.isDebugRef());
+ assert(MI.isDebugValueLike());
DebugVariable MIVar(MI.getDebugVariable(), MI.getDebugExpression(),
MI.getDebugLoc()->getInlinedAt());
FragmentInfo ThisFragment = MIVar.getFragmentOrDefault();
@@ -1896,7 +2301,7 @@ void InstrRefBasedLDV::produceMLocTransferFunction(
process(MI, nullptr, nullptr);
// Also accumulate fragment map.
- if (MI.isDebugValue() || MI.isDebugRef())
+ if (MI.isDebugValueLike())
accumulateFragmentMap(MI);
// Create a map from the instruction number (if present) to the
@@ -1931,7 +2336,7 @@ void InstrRefBasedLDV::produceMLocTransferFunction(
Result.first->second = P;
}
- // Accumulate any bitmask operands into the clobberred reg mask for this
+ // Accumulate any bitmask operands into the clobbered reg mask for this
// block.
for (auto &P : MTracker->Masks) {
BlockMasks[CurBB].clearBitsNotInMask(P.first->getRegMask(), BVWords);
@@ -2353,33 +2758,104 @@ void InstrRefBasedLDV::BlockPHIPlacement(
IDF.calculate(PHIBlocks);
}
-Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc(
- const MachineBasicBlock &MBB, const DebugVariable &Var,
+bool InstrRefBasedLDV::pickVPHILoc(
+ SmallVectorImpl<DbgOpID> &OutValues, const MachineBasicBlock &MBB,
const LiveIdxT &LiveOuts, FuncValueTable &MOutLocs,
const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders) {
- // Collect a set of locations from predecessor where its live-out value can
- // be found.
- SmallVector<SmallVector<LocIdx, 4>, 8> Locs;
- SmallVector<const DbgValueProperties *, 4> Properties;
- unsigned NumLocs = MTracker->getNumLocs();
// No predecessors means no PHIs.
if (BlockOrders.empty())
- return None;
+ return false;
- for (const auto *p : BlockOrders) {
- unsigned ThisBBNum = p->getNumber();
+ // All the location operands that do not already agree need to be joined,
+ // track the indices of each such location operand here.
+ SmallDenseSet<unsigned> LocOpsToJoin;
+
+ auto FirstValueIt = LiveOuts.find(BlockOrders[0]);
+ if (FirstValueIt == LiveOuts.end())
+ return false;
+ const DbgValue &FirstValue = *FirstValueIt->second;
+
+ for (const auto p : BlockOrders) {
auto OutValIt = LiveOuts.find(p);
if (OutValIt == LiveOuts.end())
// If we have a predecessor not in scope, we'll never find a PHI position.
- return None;
+ return false;
const DbgValue &OutVal = *OutValIt->second;
- if (OutVal.Kind == DbgValue::Const || OutVal.Kind == DbgValue::NoVal)
- // Consts and no-values cannot have locations we can join on.
- return None;
+ // No-values cannot have locations we can join on.
+ if (OutVal.Kind == DbgValue::NoVal)
+ return false;
+
+ // For unjoined VPHIs where we don't know the location, we definitely
+ // can't find a join loc unless the VPHI is a backedge.
+ if (OutVal.isUnjoinedPHI() && OutVal.BlockNo != MBB.getNumber())
+ return false;
+
+ if (!FirstValue.Properties.isJoinable(OutVal.Properties))
+ return false;
+
+ for (unsigned Idx = 0; Idx < FirstValue.getLocationOpCount(); ++Idx) {
+ // An unjoined PHI has no defined locations, and so a shared location must
+ // be found for every operand.
+ if (OutVal.isUnjoinedPHI()) {
+ LocOpsToJoin.insert(Idx);
+ continue;
+ }
+ DbgOpID FirstValOp = FirstValue.getDbgOpID(Idx);
+ DbgOpID OutValOp = OutVal.getDbgOpID(Idx);
+ if (FirstValOp != OutValOp) {
+ // We can never join constant ops - the ops must either both be equal
+ // constant ops or non-const ops.
+ if (FirstValOp.isConst() || OutValOp.isConst())
+ return false;
+ else
+ LocOpsToJoin.insert(Idx);
+ }
+ }
+ }
+
+ SmallVector<DbgOpID> NewDbgOps;
- Properties.push_back(&OutVal.Properties);
+ for (unsigned Idx = 0; Idx < FirstValue.getLocationOpCount(); ++Idx) {
+ // If this op doesn't need to be joined because the values agree, use that
+ // already-agreed value.
+ if (!LocOpsToJoin.contains(Idx)) {
+ NewDbgOps.push_back(FirstValue.getDbgOpID(Idx));
+ continue;
+ }
+
+ std::optional<ValueIDNum> JoinedOpLoc =
+ pickOperandPHILoc(Idx, MBB, LiveOuts, MOutLocs, BlockOrders);
+
+ if (!JoinedOpLoc)
+ return false;
+
+ NewDbgOps.push_back(DbgOpStore.insert(*JoinedOpLoc));
+ }
+
+ OutValues.append(NewDbgOps);
+ return true;
+}
+
+std::optional<ValueIDNum> InstrRefBasedLDV::pickOperandPHILoc(
+ unsigned DbgOpIdx, const MachineBasicBlock &MBB, const LiveIdxT &LiveOuts,
+ FuncValueTable &MOutLocs,
+ const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders) {
+
+ // Collect a set of locations from predecessor where its live-out value can
+ // be found.
+ SmallVector<SmallVector<LocIdx, 4>, 8> Locs;
+ unsigned NumLocs = MTracker->getNumLocs();
+
+ for (const auto p : BlockOrders) {
+ unsigned ThisBBNum = p->getNumber();
+ auto OutValIt = LiveOuts.find(p);
+ assert(OutValIt != LiveOuts.end());
+ const DbgValue &OutVal = *OutValIt->second;
+ DbgOpID OutValOpID = OutVal.getDbgOpID(DbgOpIdx);
+ DbgOp OutValOp = DbgOpStore.find(OutValOpID);
+ assert(!OutValOp.IsConst);
// Create new empty vector of locations.
Locs.resize(Locs.size() + 1);
@@ -2388,8 +2864,8 @@ Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc(
// present. Do the same for VPHIs where we know the VPHI value.
if (OutVal.Kind == DbgValue::Def ||
(OutVal.Kind == DbgValue::VPHI && OutVal.BlockNo != MBB.getNumber() &&
- OutVal.ID != ValueIDNum::EmptyValue)) {
- ValueIDNum ValToLookFor = OutVal.ID;
+ !OutValOp.isUndef())) {
+ ValueIDNum ValToLookFor = OutValOp.ID;
// Search the live-outs of the predecessor for the specified value.
for (unsigned int I = 0; I < NumLocs; ++I) {
if (MOutLocs[ThisBBNum][I] == ValToLookFor)
@@ -2397,11 +2873,6 @@ Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc(
}
} else {
assert(OutVal.Kind == DbgValue::VPHI);
- // For VPHIs where we don't know the location, we definitely can't find
- // a join loc.
- if (OutVal.BlockNo != MBB.getNumber())
- return None;
-
// Otherwise: this is a VPHI on a backedge feeding back into itself, i.e.
// a value that's live-through the whole loop. (It has to be a backedge,
// because a block can't dominate itself). We can accept as a PHI location
@@ -2415,17 +2886,9 @@ Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc(
}
}
}
-
// We should have found locations for all predecessors, or returned.
assert(Locs.size() == BlockOrders.size());
- // Check that all properties are the same. We can't pick a location if they're
- // not.
- const DbgValueProperties *Properties0 = Properties[0];
- for (const auto *Prop : Properties)
- if (*Prop != *Properties0)
- return None;
-
// Starting with the first set of locations, take the intersection with
// subsequent sets.
SmallVector<LocIdx, 4> CandidateLocs = Locs[0];
@@ -2437,7 +2900,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc(
CandidateLocs = NewCandidates;
}
if (CandidateLocs.empty())
- return None;
+ return std::nullopt;
// We now have a set of LocIdxes that contain the right output value in
// each of the predecessors. Pick the lowest; if there's a register loc,
@@ -2516,12 +2979,12 @@ bool InstrRefBasedLDV::vlocJoin(
// Scan for variable values that can never be resolved: if they have
// different DIExpressions, different indirectness, or are mixed constants /
// non-constants.
- for (auto &V : Values) {
- if (V.second->Properties != FirstVal.Properties)
+ for (const auto &V : Values) {
+ if (!V.second->Properties.isJoinable(FirstVal.Properties))
return false;
if (V.second->Kind == DbgValue::NoVal)
return false;
- if (V.second->Kind == DbgValue::Const && FirstVal.Kind != DbgValue::Const)
+ if (!V.second->hasJoinableLocOps(FirstVal))
return false;
}
@@ -2531,6 +2994,12 @@ bool InstrRefBasedLDV::vlocJoin(
if (*V.second == FirstVal)
continue; // No disagreement.
+ // If both values are not equal but have equal non-empty IDs then they refer
+ // to the same value from different sources (e.g. one is VPHI and the other
+ // is Def), which does not cause disagreement.
+ if (V.second->hasIdenticalValidLocOps(FirstVal))
+ continue;
+
// Eliminate if a backedge feeds a VPHI back into itself.
if (V.second->Kind == DbgValue::VPHI &&
V.second->BlockNo == MBB.getNumber() &&
@@ -2675,7 +3144,7 @@ void InstrRefBasedLDV::buildVLocValueMap(
// Initialize all values to start as NoVals. This signifies "it's live
// through, but we don't know what it is".
- DbgValueProperties EmptyProperties(EmptyExpr, false);
+ DbgValueProperties EmptyProperties(EmptyExpr, false, false);
for (unsigned int I = 0; I < NumBlocks; ++I) {
DbgValue EmptyDbgValue(I, EmptyProperties, DbgValue::NoVal);
LiveIns.push_back(EmptyDbgValue);
@@ -2775,12 +3244,13 @@ void InstrRefBasedLDV::buildVLocValueMap(
// eliminated and transitions from VPHI-with-location to
// live-through-value. As a result, the selected location of any VPHI
// might change, so we need to re-compute it on each iteration.
- Optional<ValueIDNum> ValueNum =
- pickVPHILoc(*MBB, Var, LiveOutIdx, MOutLocs, Preds);
+ SmallVector<DbgOpID> JoinedOps;
- if (ValueNum) {
- InLocsChanged |= LiveIn->ID != *ValueNum;
- LiveIn->ID = *ValueNum;
+ if (pickVPHILoc(JoinedOps, *MBB, LiveOutIdx, MOutLocs, Preds)) {
+ bool NewLocPicked = !equal(LiveIn->getDbgOpIDs(), JoinedOps);
+ InLocsChanged |= NewLocPicked;
+ if (NewLocPicked)
+ LiveIn->setDbgOpIDs(JoinedOps);
}
}
@@ -2850,8 +3320,7 @@ void InstrRefBasedLDV::buildVLocValueMap(
DbgValue *BlockLiveIn = LiveInIdx[MBB];
if (BlockLiveIn->Kind == DbgValue::NoVal)
continue;
- if (BlockLiveIn->Kind == DbgValue::VPHI &&
- BlockLiveIn->ID == ValueIDNum::EmptyValue)
+ if (BlockLiveIn->isUnjoinedPHI())
continue;
if (BlockLiveIn->Kind == DbgValue::VPHI)
BlockLiveIn->Kind = DbgValue::Def;
@@ -2933,12 +3402,17 @@ void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {
// Compute mappings of block <=> RPO order.
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
unsigned int RPONumber = 0;
- for (MachineBasicBlock *MBB : RPOT) {
+ auto processMBB = [&](MachineBasicBlock *MBB) {
OrderToBB[RPONumber] = MBB;
BBToOrder[MBB] = RPONumber;
BBNumToRPO[MBB->getNumber()] = RPONumber;
++RPONumber;
- }
+ };
+ for (MachineBasicBlock *MBB : RPOT)
+ processMBB(MBB);
+ for (MachineBasicBlock &MBB : MF)
+ if (BBToOrder.find(&MBB) == BBToOrder.end())
+ processMBB(&MBB);
// Order value substitutions by their "source" operand pair, for quick lookup.
llvm::sort(MF.DebugValueSubstitutions);
@@ -3037,7 +3511,8 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit(
// instructions, installing transfers.
MTracker->reset();
MTracker->loadFromArray(MInLocs[BBNum], BBNum);
- TTracker->loadInlocs(MBB, MInLocs[BBNum], Output[BBNum], NumLocs);
+ TTracker->loadInlocs(MBB, MInLocs[BBNum], DbgOpStore, Output[BBNum],
+ NumLocs);
CurBB = BBNum;
CurInst = 1;
@@ -3335,6 +3810,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
OverlapFragments.clear();
SeenFragments.clear();
SeenDbgPHIs.clear();
+ DbgOpStore.clear();
return Changed;
}
@@ -3596,7 +4072,7 @@ public:
} // end namespace llvm
-Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs(
+std::optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs(
MachineFunction &MF, const ValueTable *MLiveOuts,
const ValueTable *MLiveIns, MachineInstr &Here, uint64_t InstrNum) {
assert(MLiveOuts && MLiveIns &&
@@ -3605,17 +4081,17 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs(
// This function will be called twice per DBG_INSTR_REF, and might end up
// computing lots of SSA information: memoize it.
- auto SeenDbgPHIIt = SeenDbgPHIs.find(&Here);
+ auto SeenDbgPHIIt = SeenDbgPHIs.find(std::make_pair(&Here, InstrNum));
if (SeenDbgPHIIt != SeenDbgPHIs.end())
return SeenDbgPHIIt->second;
- Optional<ValueIDNum> Result =
+ std::optional<ValueIDNum> Result =
resolveDbgPHIsImpl(MF, MLiveOuts, MLiveIns, Here, InstrNum);
- SeenDbgPHIs.insert({&Here, Result});
+ SeenDbgPHIs.insert({std::make_pair(&Here, InstrNum), Result});
return Result;
}
-Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl(
+std::optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl(
MachineFunction &MF, const ValueTable *MLiveOuts,
const ValueTable *MLiveIns, MachineInstr &Here, uint64_t InstrNum) {
// Pick out records of DBG_PHI instructions that have been observed. If there
@@ -3627,7 +4103,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl(
// No DBG_PHI means there can be no location.
if (LowerIt == UpperIt)
- return None;
+ return std::nullopt;
// If any DBG_PHIs referred to a location we didn't understand, don't try to
// compute a value. There might be scenarios where we could recover a value
@@ -3636,7 +4112,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl(
auto DBGPHIRange = make_range(LowerIt, UpperIt);
for (const DebugPHIRecord &DBG_PHI : DBGPHIRange)
if (!DBG_PHI.ValueRead)
- return None;
+ return std::nullopt;
// If there's only one DBG_PHI, then that is our value number.
if (std::distance(LowerIt, UpperIt) == 1)
@@ -3720,7 +4196,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl(
for (auto &PHIIt : PHI->IncomingValues) {
// Any undef input means DBG_PHIs didn't dominate the use point.
if (Updater.UndefMap.find(&PHIIt.first->BB) != Updater.UndefMap.end())
- return None;
+ return std::nullopt;
ValueIDNum ValueToCheck;
const ValueTable &BlockLiveOuts = MLiveOuts[PHIIt.first->BB.getNumber()];
@@ -3739,7 +4215,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl(
}
if (BlockLiveOuts[Loc.asU64()] != ValueToCheck)
- return None;
+ return std::nullopt;
}
// Record this value as validated.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
index 70aae47c8bdc..2fdc37c6dda2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include <optional>
#include "LiveDebugValues.h"
@@ -30,6 +31,7 @@ class InstrRefLDVTest;
namespace LiveDebugValues {
class MLocTracker;
+class DbgOpIDMap;
using namespace llvm;
@@ -168,6 +170,40 @@ public:
static ValueIDNum TombstoneValue;
};
+} // End namespace LiveDebugValues
+
+namespace llvm {
+using namespace LiveDebugValues;
+
+template <> struct DenseMapInfo<LocIdx> {
+ static inline LocIdx getEmptyKey() { return LocIdx::MakeIllegalLoc(); }
+ static inline LocIdx getTombstoneKey() { return LocIdx::MakeTombstoneLoc(); }
+
+ static unsigned getHashValue(const LocIdx &Loc) { return Loc.asU64(); }
+
+ static bool isEqual(const LocIdx &A, const LocIdx &B) { return A == B; }
+};
+
+template <> struct DenseMapInfo<ValueIDNum> {
+ static inline ValueIDNum getEmptyKey() { return ValueIDNum::EmptyValue; }
+ static inline ValueIDNum getTombstoneKey() {
+ return ValueIDNum::TombstoneValue;
+ }
+
+ static unsigned getHashValue(const ValueIDNum &Val) {
+ return hash_value(Val.asU64());
+ }
+
+ static bool isEqual(const ValueIDNum &A, const ValueIDNum &B) {
+ return A == B;
+ }
+};
+
+} // end namespace llvm
+
+namespace LiveDebugValues {
+using namespace llvm;
+
/// Type for a table of values in a block.
using ValueTable = std::unique_ptr<ValueIDNum[]>;
@@ -199,41 +235,219 @@ public:
/// the value, and Boolean of whether or not it's indirect.
class DbgValueProperties {
public:
- DbgValueProperties(const DIExpression *DIExpr, bool Indirect)
- : DIExpr(DIExpr), Indirect(Indirect) {}
+ DbgValueProperties(const DIExpression *DIExpr, bool Indirect, bool IsVariadic)
+ : DIExpr(DIExpr), Indirect(Indirect), IsVariadic(IsVariadic) {}
/// Extract properties from an existing DBG_VALUE instruction.
DbgValueProperties(const MachineInstr &MI) {
assert(MI.isDebugValue());
+ assert(MI.getDebugExpression()->getNumLocationOperands() == 0 ||
+ MI.isDebugValueList() || MI.isUndefDebugValue());
+ IsVariadic = MI.isDebugValueList();
DIExpr = MI.getDebugExpression();
- Indirect = MI.getOperand(1).isImm();
+ Indirect = MI.isDebugOffsetImm();
+ }
+
+ bool isJoinable(const DbgValueProperties &Other) const {
+ return DIExpression::isEqualExpression(DIExpr, Indirect, Other.DIExpr,
+ Other.Indirect);
}
bool operator==(const DbgValueProperties &Other) const {
- return std::tie(DIExpr, Indirect) == std::tie(Other.DIExpr, Other.Indirect);
+ return std::tie(DIExpr, Indirect, IsVariadic) ==
+ std::tie(Other.DIExpr, Other.Indirect, Other.IsVariadic);
}
bool operator!=(const DbgValueProperties &Other) const {
return !(*this == Other);
}
+ unsigned getLocationOpCount() const {
+ return IsVariadic ? DIExpr->getNumLocationOperands() : 1;
+ }
+
const DIExpression *DIExpr;
bool Indirect;
+ bool IsVariadic;
+};
+
+/// TODO: Might pack better if we changed this to a Struct of Arrays, since
+/// MachineOperand is width 32, making this struct width 33. We could also
+/// potentially avoid storing the whole MachineOperand (sizeof=32), instead
+/// choosing to store just the contents portion (sizeof=8) and a Kind enum,
+/// since we already know it is some type of immediate value.
+/// Stores a single debug operand, which can either be a MachineOperand for
+/// directly storing immediate values, or a ValueIDNum representing some value
+/// computed at some point in the program. IsConst is used as a discriminator.
+struct DbgOp {
+ union {
+ ValueIDNum ID;
+ MachineOperand MO;
+ };
+ bool IsConst;
+
+ DbgOp() : ID(ValueIDNum::EmptyValue), IsConst(false) {}
+ DbgOp(ValueIDNum ID) : ID(ID), IsConst(false) {}
+ DbgOp(MachineOperand MO) : MO(MO), IsConst(true) {}
+
+ bool isUndef() const { return !IsConst && ID == ValueIDNum::EmptyValue; }
+
+#ifndef NDEBUG
+ void dump(const MLocTracker *MTrack) const;
+#endif
+};
+
+/// A DbgOp whose ID (if any) has resolved to an actual location, LocIdx. Used
+/// when working with concrete debug values, i.e. when joining MLocs and VLocs
+/// in the TransferTracker or emitting DBG_VALUE/DBG_VALUE_LIST instructions in
+/// the MLocTracker.
+struct ResolvedDbgOp {
+ union {
+ LocIdx Loc;
+ MachineOperand MO;
+ };
+ bool IsConst;
+
+ ResolvedDbgOp(LocIdx Loc) : Loc(Loc), IsConst(false) {}
+ ResolvedDbgOp(MachineOperand MO) : MO(MO), IsConst(true) {}
+
+ bool operator==(const ResolvedDbgOp &Other) const {
+ if (IsConst != Other.IsConst)
+ return false;
+ if (IsConst)
+ return MO.isIdenticalTo(Other.MO);
+ return Loc == Other.Loc;
+ }
+
+#ifndef NDEBUG
+ void dump(const MLocTracker *MTrack) const;
+#endif
};
-/// Class recording the (high level) _value_ of a variable. Identifies either
-/// the value of the variable as a ValueIDNum, or a constant MachineOperand.
+/// An ID used in the DbgOpIDMap (below) to lookup a stored DbgOp. This is used
+/// in place of actual DbgOps inside of a DbgValue to reduce its size, as
+/// DbgValue is very frequently used and passed around, and the actual DbgOp is
+/// over 8x larger than this class, due to storing a MachineOperand. This ID
+/// should be equal for all equal DbgOps, and also encodes whether the mapped
+/// DbgOp is a constant, meaning that for simple equality or const-ness checks
+/// it is not necessary to lookup this ID.
+struct DbgOpID {
+ struct IsConstIndexPair {
+ uint32_t IsConst : 1;
+ uint32_t Index : 31;
+ };
+
+ union {
+ struct IsConstIndexPair ID;
+ uint32_t RawID;
+ };
+
+ DbgOpID() : RawID(UndefID.RawID) {
+ static_assert(sizeof(DbgOpID) == 4, "DbgOpID should fit within 4 bytes.");
+ }
+ DbgOpID(uint32_t RawID) : RawID(RawID) {}
+ DbgOpID(bool IsConst, uint32_t Index) : ID({IsConst, Index}) {}
+
+ static DbgOpID UndefID;
+
+ bool operator==(const DbgOpID &Other) const { return RawID == Other.RawID; }
+ bool operator!=(const DbgOpID &Other) const { return !(*this == Other); }
+
+ uint32_t asU32() const { return RawID; }
+
+ bool isUndef() const { return *this == UndefID; }
+ bool isConst() const { return ID.IsConst && !isUndef(); }
+ uint32_t getIndex() const { return ID.Index; }
+
+#ifndef NDEBUG
+ void dump(const MLocTracker *MTrack, const DbgOpIDMap *OpStore) const;
+#endif
+};
+
+/// Class storing the complete set of values that are observed by DbgValues
+/// within the current function. Allows 2-way lookup, with `find` returning the
+/// Op for a given ID and `insert` returning the ID for a given Op (creating one
+/// if none exists).
+class DbgOpIDMap {
+
+ SmallVector<ValueIDNum, 0> ValueOps;
+ SmallVector<MachineOperand, 0> ConstOps;
+
+ DenseMap<ValueIDNum, DbgOpID> ValueOpToID;
+ DenseMap<MachineOperand, DbgOpID> ConstOpToID;
+
+public:
+ /// If \p Op does not already exist in this map, it is inserted and the
+ /// corresponding DbgOpID is returned. If Op already exists in this map, then
+ /// no change is made and the existing ID for Op is returned.
+ /// Calling this with the undef DbgOp will always return DbgOpID::UndefID.
+ DbgOpID insert(DbgOp Op) {
+ if (Op.isUndef())
+ return DbgOpID::UndefID;
+ if (Op.IsConst)
+ return insertConstOp(Op.MO);
+ return insertValueOp(Op.ID);
+ }
+ /// Returns the DbgOp associated with \p ID. Should only be used for IDs
+ /// returned from calling `insert` from this map or DbgOpID::UndefID.
+ DbgOp find(DbgOpID ID) const {
+ if (ID == DbgOpID::UndefID)
+ return DbgOp();
+ if (ID.isConst())
+ return DbgOp(ConstOps[ID.getIndex()]);
+ return DbgOp(ValueOps[ID.getIndex()]);
+ }
+
+ void clear() {
+ ValueOps.clear();
+ ConstOps.clear();
+ ValueOpToID.clear();
+ ConstOpToID.clear();
+ }
+
+private:
+ DbgOpID insertConstOp(MachineOperand &MO) {
+ auto ExistingIt = ConstOpToID.find(MO);
+ if (ExistingIt != ConstOpToID.end())
+ return ExistingIt->second;
+ DbgOpID ID(true, ConstOps.size());
+ ConstOpToID.insert(std::make_pair(MO, ID));
+ ConstOps.push_back(MO);
+ return ID;
+ }
+ DbgOpID insertValueOp(ValueIDNum VID) {
+ auto ExistingIt = ValueOpToID.find(VID);
+ if (ExistingIt != ValueOpToID.end())
+ return ExistingIt->second;
+ DbgOpID ID(false, ValueOps.size());
+ ValueOpToID.insert(std::make_pair(VID, ID));
+ ValueOps.push_back(VID);
+ return ID;
+ }
+};
+
+// We set the maximum number of operands that we will handle to keep DbgValue
+// within a reasonable size (64 bytes), as we store and pass a lot of them
+// around.
+#define MAX_DBG_OPS 8
+
+/// Class recording the (high level) _value_ of a variable. Identifies the value
+/// of the variable as a list of ValueIDNums and constant MachineOperands, or as
+/// an empty list for undef debug values or VPHI values which we have not found
+/// valid locations for.
/// This class also stores meta-information about how the value is qualified.
/// Used to reason about variable values when performing the second
/// (DebugVariable specific) dataflow analysis.
class DbgValue {
+private:
+ /// If Kind is Def or VPHI, the set of IDs corresponding to the DbgOps that
+ /// are used. VPHIs set every ID to EmptyID when we have not found a valid
+ /// machine-value for every operand, and sets them to the corresponding
+ /// machine-values when we have found all of them.
+ DbgOpID DbgOps[MAX_DBG_OPS];
+ unsigned OpCount;
+
public:
- /// If Kind is Def, the value number that this value is based on. VPHIs set
- /// this field to EmptyValue if there is no machine-value for this VPHI, or
- /// the corresponding machine-value if there is one.
- ValueIDNum ID;
- /// If Kind is Const, the MachineOperand defining this value.
- Optional<MachineOperand> MO;
/// For a NoVal or VPHI DbgValue, which block it was generated in.
int BlockNo;
@@ -242,8 +456,8 @@ public:
typedef enum {
Undef, // Represents a DBG_VALUE $noreg in the transfer function only.
- Def, // This value is defined by an inst, or is a PHI value.
- Const, // A constant value contained in the MachineOperand field.
+ Def, // This value is defined by some combination of constants,
+ // instructions, or PHI values.
VPHI, // Incoming values to BlockNo differ, those values must be joined by
// a PHI in this block.
NoVal, // Empty DbgValue indicating an unknown value. Used as initializer,
@@ -252,52 +466,113 @@ public:
/// Discriminator for whether this is a constant or an in-program value.
KindT Kind;
- DbgValue(const ValueIDNum &Val, const DbgValueProperties &Prop, KindT Kind)
- : ID(Val), MO(None), BlockNo(0), Properties(Prop), Kind(Kind) {
- assert(Kind == Def);
+ DbgValue(ArrayRef<DbgOpID> DbgOps, const DbgValueProperties &Prop)
+ : OpCount(DbgOps.size()), BlockNo(0), Properties(Prop), Kind(Def) {
+ static_assert(sizeof(DbgValue) <= 64,
+ "DbgValue should fit within 64 bytes.");
+ assert(DbgOps.size() == Prop.getLocationOpCount());
+ if (DbgOps.size() > MAX_DBG_OPS ||
+ any_of(DbgOps, [](DbgOpID ID) { return ID.isUndef(); })) {
+ Kind = Undef;
+ OpCount = 0;
+#define DEBUG_TYPE "LiveDebugValues"
+ if (DbgOps.size() > MAX_DBG_OPS) {
+ LLVM_DEBUG(dbgs() << "Found DbgValue with more than maximum allowed "
+ "operands.\n");
+ }
+#undef DEBUG_TYPE
+ } else {
+ for (unsigned Idx = 0; Idx < DbgOps.size(); ++Idx)
+ this->DbgOps[Idx] = DbgOps[Idx];
+ }
}
DbgValue(unsigned BlockNo, const DbgValueProperties &Prop, KindT Kind)
- : ID(ValueIDNum::EmptyValue), MO(None), BlockNo(BlockNo),
- Properties(Prop), Kind(Kind) {
+ : OpCount(0), BlockNo(BlockNo), Properties(Prop), Kind(Kind) {
assert(Kind == NoVal || Kind == VPHI);
}
- DbgValue(const MachineOperand &MO, const DbgValueProperties &Prop, KindT Kind)
- : ID(ValueIDNum::EmptyValue), MO(MO), BlockNo(0), Properties(Prop),
- Kind(Kind) {
- assert(Kind == Const);
- }
-
DbgValue(const DbgValueProperties &Prop, KindT Kind)
- : ID(ValueIDNum::EmptyValue), MO(None), BlockNo(0), Properties(Prop),
- Kind(Kind) {
+ : OpCount(0), BlockNo(0), Properties(Prop), Kind(Kind) {
assert(Kind == Undef &&
"Empty DbgValue constructor must pass in Undef kind");
}
#ifndef NDEBUG
- void dump(const MLocTracker *MTrack) const;
+ void dump(const MLocTracker *MTrack = nullptr,
+ const DbgOpIDMap *OpStore = nullptr) const;
#endif
bool operator==(const DbgValue &Other) const {
if (std::tie(Kind, Properties) != std::tie(Other.Kind, Other.Properties))
return false;
- else if (Kind == Def && ID != Other.ID)
+ else if (Kind == Def && !equal(getDbgOpIDs(), Other.getDbgOpIDs()))
return false;
else if (Kind == NoVal && BlockNo != Other.BlockNo)
return false;
- else if (Kind == Const)
- return MO->isIdenticalTo(*Other.MO);
else if (Kind == VPHI && BlockNo != Other.BlockNo)
return false;
- else if (Kind == VPHI && ID != Other.ID)
+ else if (Kind == VPHI && !equal(getDbgOpIDs(), Other.getDbgOpIDs()))
return false;
return true;
}
bool operator!=(const DbgValue &Other) const { return !(*this == Other); }
+
+ // Returns an array of all the machine values used to calculate this variable
+ // value, or an empty list for an Undef or unjoined VPHI.
+ ArrayRef<DbgOpID> getDbgOpIDs() const { return {DbgOps, OpCount}; }
+
+ // Returns either DbgOps[Index] if this DbgValue has Debug Operands, or
+ // the ID for ValueIDNum::EmptyValue otherwise (i.e. if this is an Undef,
+ // NoVal, or an unjoined VPHI).
+ DbgOpID getDbgOpID(unsigned Index) const {
+ if (!OpCount)
+ return DbgOpID::UndefID;
+ assert(Index < OpCount);
+ return DbgOps[Index];
+ }
+ // Replaces this DbgValue's existing DbgOpIDs (if any) with the contents of
+ // \p NewIDs. The number of DbgOpIDs passed must be equal to the number of
+ // arguments expected by this DbgValue's properties (the return value of
+ // `getLocationOpCount()`).
+ void setDbgOpIDs(ArrayRef<DbgOpID> NewIDs) {
+ // We can go from no ops to some ops, but not from some ops to no ops.
+ assert(NewIDs.size() == getLocationOpCount() &&
+ "Incorrect number of Debug Operands for this DbgValue.");
+ OpCount = NewIDs.size();
+ for (unsigned Idx = 0; Idx < NewIDs.size(); ++Idx)
+ DbgOps[Idx] = NewIDs[Idx];
+ }
+
+ // The number of debug operands expected by this DbgValue's expression.
+ // getDbgOpIDs() should return an array of this length, unless this is an
+ // Undef or an unjoined VPHI.
+ unsigned getLocationOpCount() const {
+ return Properties.getLocationOpCount();
+ }
+
+ // Returns true if this or Other are unjoined PHIs, which do not have defined
+ // Loc Ops, or if the `n`th Loc Op for this has a different constness to the
+ // `n`th Loc Op for Other.
+ bool hasJoinableLocOps(const DbgValue &Other) const {
+ if (isUnjoinedPHI() || Other.isUnjoinedPHI())
+ return true;
+ for (unsigned Idx = 0; Idx < getLocationOpCount(); ++Idx) {
+ if (getDbgOpID(Idx).isConst() != Other.getDbgOpID(Idx).isConst())
+ return false;
+ }
+ return true;
+ }
+
+ bool isUnjoinedPHI() const { return Kind == VPHI && OpCount == 0; }
+
+ bool hasIdenticalValidLocOps(const DbgValue &Other) const {
+ if (!OpCount)
+ return false;
+ return equal(getDbgOpIDs(), Other.getDbgOpIDs());
+ }
};
class LocIdxToIndexFunctor {
@@ -620,9 +895,9 @@ public:
void writeRegMask(const MachineOperand *MO, unsigned CurBB, unsigned InstID);
/// Find LocIdx for SpillLoc \p L, creating a new one if it's not tracked.
- /// Returns None when in scenarios where a spill slot could be tracked, but
- /// we would likely run into resource limitations.
- Optional<SpillLocationNo> getOrTrackSpillLoc(SpillLoc L);
+ /// Returns std::nullopt when in scenarios where a spill slot could be
+ /// tracked, but we would likely run into resource limitations.
+ std::optional<SpillLocationNo> getOrTrackSpillLoc(SpillLoc L);
// Get LocIdx of a spill ID.
LocIdx getSpillMLoc(unsigned SpillID) {
@@ -667,10 +942,11 @@ public:
LLVM_DUMP_METHOD void dump_mloc_map();
#endif
- /// Create a DBG_VALUE based on machine location \p MLoc. Qualify it with the
+ /// Create a DBG_VALUE based on debug operands \p DbgOps. Qualify it with the
/// information in \pProperties, for variable Var. Don't insert it anywhere,
/// just return the builder for it.
- MachineInstrBuilder emitLoc(Optional<LocIdx> MLoc, const DebugVariable &Var,
+ MachineInstrBuilder emitLoc(const SmallVectorImpl<ResolvedDbgOp> &DbgOps,
+ const DebugVariable &Var,
const DbgValueProperties &Properties);
};
@@ -704,32 +980,16 @@ public:
public:
VLocTracker(const OverlapMap &O, const DIExpression *EmptyExpr)
- : OverlappingFragments(O), EmptyProperties(EmptyExpr, false) {}
+ : OverlappingFragments(O), EmptyProperties(EmptyExpr, false, false) {}
void defVar(const MachineInstr &MI, const DbgValueProperties &Properties,
- Optional<ValueIDNum> ID) {
- assert(MI.isDebugValue() || MI.isDebugRef());
+ const SmallVectorImpl<DbgOpID> &DebugOps) {
+ assert(MI.isDebugValueLike());
DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
MI.getDebugLoc()->getInlinedAt());
- DbgValue Rec = (ID) ? DbgValue(*ID, Properties, DbgValue::Def)
- : DbgValue(Properties, DbgValue::Undef);
-
- // Attempt insertion; overwrite if it's already mapped.
- auto Result = Vars.insert(std::make_pair(Var, Rec));
- if (!Result.second)
- Result.first->second = Rec;
- Scopes[Var] = MI.getDebugLoc().get();
-
- considerOverlaps(Var, MI.getDebugLoc().get());
- }
-
- void defVar(const MachineInstr &MI, const MachineOperand &MO) {
- // Only DBG_VALUEs can define constant-valued variables.
- assert(MI.isDebugValue());
- DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
- MI.getDebugLoc()->getInlinedAt());
- DbgValueProperties Properties(MI);
- DbgValue Rec = DbgValue(MO, Properties, DbgValue::Const);
+ DbgValue Rec = (DebugOps.size() > 0)
+ ? DbgValue(DebugOps, Properties)
+ : DbgValue(Properties, DbgValue::Undef);
// Attempt insertion; overwrite if it's already mapped.
auto Result = Vars.insert(std::make_pair(Var, Rec));
@@ -751,9 +1011,9 @@ public:
// The "empty" fragment is stored as DebugVariable::DefaultFragment, so
// that it overlaps with everything, however its cannonical representation
// in a DebugVariable is as "None".
- Optional<DIExpression::FragmentInfo> OptFragmentInfo = FragmentInfo;
+ std::optional<DIExpression::FragmentInfo> OptFragmentInfo = FragmentInfo;
if (DebugVariable::isDefaultFragment(FragmentInfo))
- OptFragmentInfo = None;
+ OptFragmentInfo = std::nullopt;
DebugVariable Overlapped(Var.getVariable(), OptFragmentInfo,
Var.getInlinedAt());
@@ -779,7 +1039,7 @@ public:
friend class ::InstrRefLDVTest;
using FragmentInfo = DIExpression::FragmentInfo;
- using OptFragmentInfo = Optional<DIExpression::FragmentInfo>;
+ using OptFragmentInfo = std::optional<DIExpression::FragmentInfo>;
// Helper while building OverlapMap, a map of all fragments seen for a given
// DILocalVariable.
@@ -872,12 +1132,12 @@ private:
uint64_t InstrNum;
/// Block where DBG_PHI occurred.
MachineBasicBlock *MBB;
- /// The value number read by the DBG_PHI -- or None if it didn't refer to
- /// a value.
- Optional<ValueIDNum> ValueRead;
- /// Register/Stack location the DBG_PHI reads -- or None if it referred to
- /// something unexpected.
- Optional<LocIdx> ReadLoc;
+ /// The value number read by the DBG_PHI -- or std::nullopt if it didn't
+ /// refer to a value.
+ std::optional<ValueIDNum> ValueRead;
+ /// Register/Stack location the DBG_PHI reads -- or std::nullopt if it
+ /// referred to something unexpected.
+ std::optional<LocIdx> ReadLoc;
operator unsigned() const { return InstrNum; }
};
@@ -896,7 +1156,10 @@ private:
/// DBG_INSTR_REFs that call resolveDbgPHIs. These variable references solve
/// a mini SSA problem caused by DBG_PHIs being cloned, this collection caches
/// the result.
- DenseMap<MachineInstr *, Optional<ValueIDNum>> SeenDbgPHIs;
+ DenseMap<std::pair<MachineInstr *, unsigned>, std::optional<ValueIDNum>>
+ SeenDbgPHIs;
+
+ DbgOpIDMap DbgOpStore;
/// True if we need to examine call instructions for stack clobbers. We
/// normally assume that they don't clobber SP, but stack probes on Windows
@@ -909,8 +1172,8 @@ private:
StringRef StackProbeSymbolName;
/// Tests whether this instruction is a spill to a stack slot.
- Optional<SpillLocationNo> isSpillInstruction(const MachineInstr &MI,
- MachineFunction *MF);
+ std::optional<SpillLocationNo> isSpillInstruction(const MachineInstr &MI,
+ MachineFunction *MF);
/// Decide if @MI is a spill instruction and return true if it is. We use 2
/// criteria to make this decision:
@@ -923,14 +1186,23 @@ private:
/// If a given instruction is identified as a spill, return the spill slot
/// and set \p Reg to the spilled register.
- Optional<SpillLocationNo> isRestoreInstruction(const MachineInstr &MI,
- MachineFunction *MF, unsigned &Reg);
+ std::optional<SpillLocationNo> isRestoreInstruction(const MachineInstr &MI,
+ MachineFunction *MF,
+ unsigned &Reg);
/// Given a spill instruction, extract the spill slot information, ensure it's
/// tracked, and return the spill number.
- Optional<SpillLocationNo>
+ std::optional<SpillLocationNo>
extractSpillBaseRegAndOffset(const MachineInstr &MI);
+ /// For an instruction reference given by \p InstNo and \p OpNo in instruction
+ /// \p MI returns the Value pointed to by that instruction reference if any
+ /// exists, otherwise returns None.
+ std::optional<ValueIDNum> getValueForInstrRef(unsigned InstNo, unsigned OpNo,
+ MachineInstr &MI,
+ const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns);
+
/// Observe a single instruction while stepping through a block.
void process(MachineInstr &MI, const ValueTable *MLiveOuts,
const ValueTable *MLiveIns);
@@ -972,17 +1244,18 @@ private:
/// forming another mini-ssa problem to solve.
/// \p Here the position of a DBG_INSTR_REF seeking a machine value number
/// \p InstrNum Debug instruction number defined by DBG_PHI instructions.
- /// \returns The machine value number at position Here, or None.
- Optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF,
- const ValueTable *MLiveOuts,
- const ValueTable *MLiveIns,
- MachineInstr &Here, uint64_t InstrNum);
-
- Optional<ValueIDNum> resolveDbgPHIsImpl(MachineFunction &MF,
- const ValueTable *MLiveOuts,
- const ValueTable *MLiveIns,
- MachineInstr &Here,
- uint64_t InstrNum);
+ /// \returns The machine value number at position Here, or std::nullopt.
+ std::optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF,
+ const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns,
+ MachineInstr &Here,
+ uint64_t InstrNum);
+
+ std::optional<ValueIDNum> resolveDbgPHIsImpl(MachineFunction &MF,
+ const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns,
+ MachineInstr &Here,
+ uint64_t InstrNum);
/// Step through the function, recording register definitions and movements
/// in an MLocTracker. Convert the observations into a per-block transfer
@@ -1086,14 +1359,21 @@ private:
SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
DbgValue &LiveIn);
- /// For the given block and live-outs feeding into it, try to find a
- /// machine location where all the variable values join together.
- /// \returns Value ID of a machine PHI if an appropriate one is available.
- Optional<ValueIDNum>
- pickVPHILoc(const MachineBasicBlock &MBB, const DebugVariable &Var,
+ /// For the given block and live-outs feeding into it, try to find
+ /// machine locations for each debug operand where all the values feeding
+ /// into that operand join together.
+ /// \returns true if a joined location was found for every value that needed
+ /// to be joined.
+ bool
+ pickVPHILoc(SmallVectorImpl<DbgOpID> &OutValues, const MachineBasicBlock &MBB,
const LiveIdxT &LiveOuts, FuncValueTable &MOutLocs,
const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders);
+ std::optional<ValueIDNum> pickOperandPHILoc(
+ unsigned DbgOpIdx, const MachineBasicBlock &MBB, const LiveIdxT &LiveOuts,
+ FuncValueTable &MOutLocs,
+ const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders);
+
/// Take collections of DBG_VALUE instructions stored in TTracker, and
/// install them into their output blocks. Preserves a stable order of
/// DBG_VALUEs produced (which would otherwise cause nondeterminism) through
@@ -1138,6 +1418,7 @@ public:
void dump_mloc_transfer(const MLocTransferMap &mloc_transfer) const;
bool isCalleeSaved(LocIdx L) const;
+ bool isCalleeSavedReg(Register R) const;
bool hasFoldedStackStore(const MachineInstr &MI) {
// Instruction must have a memory operand that's a stack slot, and isn't
@@ -1152,38 +1433,9 @@ public:
&& !MemOperand->getPseudoValue()->isAliased(MFI);
}
- Optional<LocIdx> findLocationForMemOperand(const MachineInstr &MI);
+ std::optional<LocIdx> findLocationForMemOperand(const MachineInstr &MI);
};
} // namespace LiveDebugValues
-namespace llvm {
-using namespace LiveDebugValues;
-
-template <> struct DenseMapInfo<LocIdx> {
- static inline LocIdx getEmptyKey() { return LocIdx::MakeIllegalLoc(); }
- static inline LocIdx getTombstoneKey() { return LocIdx::MakeTombstoneLoc(); }
-
- static unsigned getHashValue(const LocIdx &Loc) { return Loc.asU64(); }
-
- static bool isEqual(const LocIdx &A, const LocIdx &B) { return A == B; }
-};
-
-template <> struct DenseMapInfo<ValueIDNum> {
- static inline ValueIDNum getEmptyKey() { return ValueIDNum::EmptyValue; }
- static inline ValueIDNum getTombstoneKey() {
- return ValueIDNum::TombstoneValue;
- }
-
- static unsigned getHashValue(const ValueIDNum &Val) {
- return hash_value(Val.asU64());
- }
-
- static bool isEqual(const ValueIDNum &A, const ValueIDNum &B) {
- return A == B;
- }
-};
-
-} // end namespace llvm
-
#endif /* LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H */
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
index 141008ac2296..9dba9a88f703 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
@@ -18,6 +18,7 @@
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
/// \file LiveDebugValues.cpp
///
@@ -72,11 +73,6 @@ public:
/// Calculate the liveness information for the given machine function.
bool runOnMachineFunction(MachineFunction &MF) override;
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs);
- }
-
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -106,6 +102,14 @@ LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) {
}
bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
+ // Except for Wasm, all targets should be only using physical register at this
+ // point. Wasm only use virtual registers throught its pipeline, but its
+ // virtual registers don't participate in this LiveDebugValues analysis; only
+ // its target indices do.
+ assert(MF.getTarget().getTargetTriple().isWasm() ||
+ MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoVRegs));
+
bool InstrRefBased = MF.useDebugInstrRef();
// Allow the user to force selection of InstrRef LDV.
InstrRefBased |= ForceInstrRefLDV;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
index 32e07eb77efe..b78757b855f4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -10,12 +10,13 @@
///
/// LiveDebugValues is an optimistic "available expressions" dataflow
/// algorithm. The set of expressions is the set of machine locations
-/// (registers, spill slots, constants) that a variable fragment might be
-/// located, qualified by a DIExpression and indirect-ness flag, while each
-/// variable is identified by a DebugVariable object. The availability of an
-/// expression begins when a DBG_VALUE instruction specifies the location of a
-/// DebugVariable, and continues until that location is clobbered or
-/// re-specified by a different DBG_VALUE for the same DebugVariable.
+/// (registers, spill slots, constants, and target indices) that a variable
+/// fragment might be located, qualified by a DIExpression and indirect-ness
+/// flag, while each variable is identified by a DebugVariable object. The
+/// availability of an expression begins when a DBG_VALUE instruction specifies
+/// the location of a DebugVariable, and continues until that location is
+/// clobbered or re-specified by a different DBG_VALUE for the same
+/// DebugVariable.
///
/// The output of LiveDebugValues is additional DBG_VALUE instructions,
/// placed to extend variable locations as far they're available. This file
@@ -148,6 +149,7 @@
#include <cstdint>
#include <functional>
#include <map>
+#include <optional>
#include <queue>
#include <tuple>
#include <utility>
@@ -229,6 +231,14 @@ struct LocIndex {
static constexpr u32_location_t kEntryValueBackupLocation =
kFirstInvalidRegLocation + 1;
+ /// A special location reserved for VarLocs with locations of kind
+ /// WasmLocKind.
+ /// TODO Placing all Wasm target index locations in this single kWasmLocation
+ /// may cause slowdown in compilation time in very large functions. Consider
+ /// giving a each target index/offset pair its own u32_location_t if this
+ /// becomes a problem.
+ static constexpr u32_location_t kWasmLocation = kFirstInvalidRegLocation + 2;
+
LocIndex(u32_location_t Location, u32_index_t Index)
: Location(Location), Index(Index) {}
@@ -237,8 +247,7 @@ struct LocIndex {
}
template<typename IntT> static LocIndex fromRawInteger(IntT ID) {
- static_assert(std::is_unsigned<IntT>::value &&
- sizeof(ID) == sizeof(uint64_t),
+ static_assert(std::is_unsigned_v<IntT> && sizeof(ID) == sizeof(uint64_t),
"Cannot convert raw integer to LocIndex");
return {static_cast<u32_location_t>(ID >> 32),
static_cast<u32_index_t>(ID)};
@@ -282,7 +291,7 @@ private:
enum struct TransferKind { TransferCopy, TransferSpill, TransferRestore };
using FragmentInfo = DIExpression::FragmentInfo;
- using OptFragmentInfo = Optional<DIExpression::FragmentInfo>;
+ using OptFragmentInfo = std::optional<DIExpression::FragmentInfo>;
/// A pair of debug variable and value location.
struct VarLoc {
@@ -299,6 +308,21 @@ private:
}
};
+ // Target indices used for wasm-specific locations.
+ struct WasmLoc {
+ // One of TargetIndex values defined in WebAssembly.h. We deal with
+ // local-related TargetIndex in this analysis (TI_LOCAL and
+ // TI_LOCAL_INDIRECT). Stack operands (TI_OPERAND_STACK) will be handled
+ // separately WebAssemblyDebugFixup pass, and we don't associate debug
+ // info with values in global operands (TI_GLOBAL_RELOC) at the moment.
+ int Index;
+ int64_t Offset;
+ bool operator==(const WasmLoc &Other) const {
+ return Index == Other.Index && Offset == Other.Offset;
+ }
+ bool operator!=(const WasmLoc &Other) const { return !(*this == Other); }
+ };
+
/// Identity of the variable at this location.
const DebugVariable Var;
@@ -313,7 +337,8 @@ private:
InvalidKind = 0,
RegisterKind,
SpillLocKind,
- ImmediateKind
+ ImmediateKind,
+ WasmLocKind
};
enum class EntryValueLocKind {
@@ -332,6 +357,7 @@ private:
int64_t Immediate;
const ConstantFP *FPImm;
const ConstantInt *CImm;
+ WasmLoc WasmLocation;
MachineLocValue() : Hash(0) {}
};
@@ -348,6 +374,8 @@ private:
switch (Kind) {
case MachineLocKind::SpillLocKind:
return Value.SpillLocation == Other.Value.SpillLocation;
+ case MachineLocKind::WasmLocKind:
+ return Value.WasmLocation == Other.Value.WasmLocation;
case MachineLocKind::RegisterKind:
case MachineLocKind::ImmediateKind:
return Value.Hash == Other.Value.Hash;
@@ -366,6 +394,11 @@ private:
Other.Kind, Other.Value.SpillLocation.SpillBase,
Other.Value.SpillLocation.SpillOffset.getFixed(),
Other.Value.SpillLocation.SpillOffset.getScalable());
+ case MachineLocKind::WasmLocKind:
+ return std::make_tuple(Kind, Value.WasmLocation.Index,
+ Value.WasmLocation.Offset) <
+ std::make_tuple(Other.Kind, Other.Value.WasmLocation.Index,
+ Other.Value.WasmLocation.Offset);
case MachineLocKind::RegisterKind:
case MachineLocKind::ImmediateKind:
return std::tie(Kind, Value.Hash) <
@@ -386,7 +419,7 @@ private:
/// emitting a debug value.
SmallVector<unsigned, 8> OrigLocMap;
- VarLoc(const MachineInstr &MI, LexicalScopes &LS)
+ VarLoc(const MachineInstr &MI)
: Var(MI.getDebugVariable(), MI.getDebugExpression(),
MI.getDebugLoc()->getInlinedAt()),
Expr(MI.getDebugExpression()), MI(MI) {
@@ -429,6 +462,9 @@ private:
} else if (Op.isCImm()) {
Kind = MachineLocKind::ImmediateKind;
Loc.CImm = Op.getCImm();
+ } else if (Op.isTargetIndex()) {
+ Kind = MachineLocKind::WasmLocKind;
+ Loc.WasmLocation = {Op.getIndex(), Op.getOffset()};
} else
llvm_unreachable("Invalid Op kind for MachineLoc.");
return {Kind, Loc};
@@ -436,9 +472,9 @@ private:
/// Take the variable and machine-location in DBG_VALUE MI, and build an
/// entry location using the given expression.
- static VarLoc CreateEntryLoc(const MachineInstr &MI, LexicalScopes &LS,
+ static VarLoc CreateEntryLoc(const MachineInstr &MI,
const DIExpression *EntryExpr, Register Reg) {
- VarLoc VL(MI, LS);
+ VarLoc VL(MI);
assert(VL.Locs.size() == 1 &&
VL.Locs[0].Kind == MachineLocKind::RegisterKind);
VL.EVKind = EntryValueLocKind::EntryValueKind;
@@ -452,9 +488,8 @@ private:
/// location will turn into the normal location if the backup is valid at
/// the time of the primary location clobbering.
static VarLoc CreateEntryBackupLoc(const MachineInstr &MI,
- LexicalScopes &LS,
const DIExpression *EntryExpr) {
- VarLoc VL(MI, LS);
+ VarLoc VL(MI);
assert(VL.Locs.size() == 1 &&
VL.Locs[0].Kind == MachineLocKind::RegisterKind);
VL.EVKind = EntryValueLocKind::EntryValueBackupKind;
@@ -466,10 +501,9 @@ private:
/// function entry), and build a copy of an entry value backup location by
/// setting the register location to NewReg.
static VarLoc CreateEntryCopyBackupLoc(const MachineInstr &MI,
- LexicalScopes &LS,
const DIExpression *EntryExpr,
Register NewReg) {
- VarLoc VL(MI, LS);
+ VarLoc VL(MI);
assert(VL.Locs.size() == 1 &&
VL.Locs[0].Kind == MachineLocKind::RegisterKind);
VL.EVKind = EntryValueLocKind::EntryValueCopyBackupKind;
@@ -564,6 +598,10 @@ private:
MOs.push_back(Orig);
break;
}
+ case MachineLocKind::WasmLocKind: {
+ MOs.push_back(Orig);
+ break;
+ }
case MachineLocKind::InvalidKind:
llvm_unreachable("Tried to produce DBG_VALUE for invalid VarLoc");
}
@@ -649,6 +687,21 @@ private:
llvm_unreachable("Could not find given SpillLoc in Locs");
}
+ bool containsWasmLocs() const {
+ return any_of(Locs, [](VarLoc::MachineLoc ML) {
+ return ML.Kind == VarLoc::MachineLocKind::WasmLocKind;
+ });
+ }
+
+ /// If this variable is described in whole or part by \p WasmLocation,
+ /// return true.
+ bool usesWasmLoc(WasmLoc WasmLocation) const {
+ MachineLoc WasmML;
+ WasmML.Kind = MachineLocKind::WasmLocKind;
+ WasmML.Value.WasmLocation = WasmLocation;
+ return is_contained(Locs, WasmML);
+ }
+
/// Determine whether the lexical scope of this value's debug location
/// dominates MBB.
bool dominates(LexicalScopes &LS, MachineBasicBlock &MBB) const {
@@ -656,8 +709,9 @@ private:
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- // TRI can be null.
- void dump(const TargetRegisterInfo *TRI, raw_ostream &Out = dbgs()) const {
+ // TRI and TII can be null.
+ void dump(const TargetRegisterInfo *TRI, const TargetInstrInfo *TII,
+ raw_ostream &Out = dbgs()) const {
Out << "VarLoc(";
for (const MachineLoc &MLoc : Locs) {
if (Locs.begin() != &MLoc)
@@ -676,6 +730,22 @@ private:
case MachineLocKind::ImmediateKind:
Out << MLoc.Value.Immediate;
break;
+ case MachineLocKind::WasmLocKind: {
+ if (TII) {
+ auto Indices = TII->getSerializableTargetIndices();
+ auto Found =
+ find_if(Indices, [&](const std::pair<int, const char *> &I) {
+ return I.first == MLoc.Value.WasmLocation.Index;
+ });
+ assert(Found != Indices.end());
+ Out << Found->second;
+ if (MLoc.Value.WasmLocation.Offset > 0)
+ Out << " + " << MLoc.Value.WasmLocation.Offset;
+ } else {
+ Out << "WasmLoc";
+ }
+ break;
+ }
case MachineLocKind::InvalidKind:
llvm_unreachable("Invalid VarLoc in dump method");
}
@@ -743,10 +813,10 @@ private:
return RegNo < LocIndex::kFirstInvalidRegLocation;
}) &&
"Physreg out of range?");
- if (VL.containsSpillLocs()) {
- LocIndex::u32_location_t Loc = LocIndex::kSpillLocation;
- Locations.push_back(Loc);
- }
+ if (VL.containsSpillLocs())
+ Locations.push_back(LocIndex::kSpillLocation);
+ if (VL.containsWasmLocs())
+ Locations.push_back(LocIndex::kWasmLocation);
} else if (VL.EVKind != VarLoc::EntryValueLocKind::EntryValueKind) {
LocIndex::u32_location_t Loc = LocIndex::kEntryValueBackupLocation;
Locations.push_back(Loc);
@@ -859,7 +929,7 @@ private:
/// Insert a set of ranges.
void insertFromLocSet(const VarLocSet &ToLoad, const VarLocMap &Map);
- llvm::Optional<LocIndices> getEntryValueBackup(DebugVariable Var);
+ std::optional<LocIndices> getEntryValueBackup(DebugVariable Var);
/// Empty the set.
void clear() {
@@ -899,6 +969,12 @@ private:
return LocIndex::indexRangeForLocation(
getVarLocs(), LocIndex::kEntryValueBackupLocation);
}
+
+ /// Get all set IDs for VarLocs with MLs of kind WasmLocKind.
+ auto getWasmVarLocs() const {
+ return LocIndex::indexRangeForLocation(getVarLocs(),
+ LocIndex::kWasmLocation);
+ }
};
/// Collect all VarLoc IDs from \p CollectFrom for VarLocs with MLs of kind
@@ -946,9 +1022,9 @@ private:
/// If a given instruction is identified as a spill, return the spill location
/// and set \p Reg to the spilled register.
- Optional<VarLoc::SpillLoc> isRestoreInstruction(const MachineInstr &MI,
- MachineFunction *MF,
- Register &Reg);
+ std::optional<VarLoc::SpillLoc> isRestoreInstruction(const MachineInstr &MI,
+ MachineFunction *MF,
+ Register &Reg);
/// Given a spill instruction, extract the register and offset used to
/// address the spill location in a target independent way.
VarLoc::SpillLoc extractSpillBaseRegAndOffset(const MachineInstr &MI);
@@ -985,6 +1061,8 @@ private:
VarLocMap &VarLocIDs,
InstToEntryLocMap &EntryValTransfers,
RegDefToInstMap &RegSetInstrs);
+ void transferWasmDef(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs);
bool transferTerminator(MachineBasicBlock *MBB, OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
@@ -1110,13 +1188,13 @@ void VarLocBasedLDV::OpenRangesSet::insert(LocIndices VarLocIDs,
/// Return the Loc ID of an entry value backup location, if it exists for the
/// variable.
-llvm::Optional<LocIndices>
+std::optional<LocIndices>
VarLocBasedLDV::OpenRangesSet::getEntryValueBackup(DebugVariable Var) {
auto It = EntryValuesBackupVars.find(Var);
if (It != EntryValuesBackupVars.end())
return It->second;
- return llvm::None;
+ return std::nullopt;
}
void VarLocBasedLDV::collectIDsForRegs(VarLocsInRange &Collected,
@@ -1203,7 +1281,7 @@ void VarLocBasedLDV::printVarLocInMBB(const MachineFunction &MF,
for (const VarLoc &VL : VarLocs) {
Out << " Var: " << VL.Var.getVariable()->getName();
Out << " MI: ";
- VL.dump(TRI, Out);
+ VL.dump(TRI, TII, Out);
}
}
Out << "\n";
@@ -1341,10 +1419,10 @@ void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI,
if (all_of(MI.debug_operands(), [](const MachineOperand &MO) {
return (MO.isReg() && MO.getReg()) || MO.isImm() || MO.isFPImm() ||
- MO.isCImm();
+ MO.isCImm() || MO.isTargetIndex();
})) {
// Use normal VarLoc constructor for registers and immediates.
- VarLoc VL(MI, LS);
+ VarLoc VL(MI);
// End all previous ranges of VL.Var.
OpenRanges.erase(VL);
@@ -1357,7 +1435,7 @@ void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI,
// This must be an undefined location. If it has an open range, erase it.
assert(MI.isUndefDebugValue() &&
"Unexpected non-undef DBG_VALUE encountered");
- VarLoc VL(MI, LS);
+ VarLoc VL(MI);
OpenRanges.erase(VL);
}
}
@@ -1398,7 +1476,7 @@ void VarLocBasedLDV::emitEntryValues(MachineInstr &MI,
continue;
auto DebugVar = VL.Var;
- Optional<LocIndices> EntryValBackupIDs =
+ std::optional<LocIndices> EntryValBackupIDs =
OpenRanges.getEntryValueBackup(DebugVar);
// If the parameter has the entry value backup, it means we should
@@ -1407,7 +1485,7 @@ void VarLocBasedLDV::emitEntryValues(MachineInstr &MI,
continue;
const VarLoc &EntryVL = VarLocIDs[EntryValBackupIDs->back()];
- VarLoc EntryLoc = VarLoc::CreateEntryLoc(EntryVL.MI, LS, EntryVL.Expr,
+ VarLoc EntryLoc = VarLoc::CreateEntryLoc(EntryVL.MI, EntryVL.Expr,
EntryVL.Locs[0].Value.RegNo);
LocIndices EntryValueIDs = VarLocIDs.insert(EntryLoc);
assert(EntryValueIDs.size() == 1 &&
@@ -1454,7 +1532,7 @@ void VarLocBasedLDV::insertTransferDebugPair(
ProcessVarLoc(VL);
LLVM_DEBUG({
dbgs() << "Creating VarLoc for register copy:";
- VL.dump(TRI);
+ VL.dump(TRI, TII);
});
return;
}
@@ -1467,7 +1545,7 @@ void VarLocBasedLDV::insertTransferDebugPair(
ProcessVarLoc(VL);
LLVM_DEBUG({
dbgs() << "Creating VarLoc for spill:";
- VL.dump(TRI);
+ VL.dump(TRI, TII);
});
return;
}
@@ -1480,7 +1558,7 @@ void VarLocBasedLDV::insertTransferDebugPair(
ProcessVarLoc(VL);
LLVM_DEBUG({
dbgs() << "Creating VarLoc for restore:";
- VL.dump(TRI);
+ VL.dump(TRI, TII);
});
return;
}
@@ -1509,8 +1587,7 @@ void VarLocBasedLDV::transferRegisterDef(MachineInstr &MI,
SmallVector<const uint32_t *, 4> RegMasks;
for (const MachineOperand &MO : MI.operands()) {
// Determine whether the operand is a register def.
- if (MO.isReg() && MO.isDef() && MO.getReg() &&
- Register::isPhysicalRegister(MO.getReg()) &&
+ if (MO.isReg() && MO.isDef() && MO.getReg() && MO.getReg().isPhysical() &&
!(MI.isCall() && MO.getReg() == SP)) {
// Remove ranges of all aliased registers.
for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
@@ -1565,6 +1642,30 @@ void VarLocBasedLDV::transferRegisterDef(MachineInstr &MI,
}
}
+void VarLocBasedLDV::transferWasmDef(MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs) {
+ // If this is not a Wasm local.set or local.tee, which sets local values,
+ // return.
+ int Index;
+ int64_t Offset;
+ if (!TII->isExplicitTargetIndexDef(MI, Index, Offset))
+ return;
+
+ // Find the target indices killed by MI, and delete those variable locations
+ // from the open range.
+ VarLocsInRange KillSet;
+ VarLoc::WasmLoc Loc{Index, Offset};
+ for (uint64_t ID : OpenRanges.getWasmVarLocs()) {
+ LocIndex Idx = LocIndex::fromRawInteger(ID);
+ const VarLoc &VL = VarLocIDs[Idx];
+ assert(VL.containsWasmLocs() && "Broken VarLocSet?");
+ if (VL.usesWasmLoc(Loc))
+ KillSet.insert(ID);
+ }
+ OpenRanges.erase(KillSet, VarLocIDs, LocIndex::kWasmLocation);
+}
+
bool VarLocBasedLDV::isSpillInstruction(const MachineInstr &MI,
MachineFunction *MF) {
// TODO: Handle multiple stores folded into one.
@@ -1618,11 +1719,11 @@ bool VarLocBasedLDV::isLocationSpill(const MachineInstr &MI,
return false;
}
-Optional<VarLocBasedLDV::VarLoc::SpillLoc>
+std::optional<VarLocBasedLDV::VarLoc::SpillLoc>
VarLocBasedLDV::isRestoreInstruction(const MachineInstr &MI,
- MachineFunction *MF, Register &Reg) {
+ MachineFunction *MF, Register &Reg) {
if (!MI.hasOneMemOperand())
- return None;
+ return std::nullopt;
// FIXME: Handle folded restore instructions with more than one memory
// operand.
@@ -1630,7 +1731,7 @@ VarLocBasedLDV::isRestoreInstruction(const MachineInstr &MI,
Reg = MI.getOperand(0).getReg();
return extractSpillBaseRegAndOffset(MI);
}
- return None;
+ return std::nullopt;
}
/// A spilled register may indicate that we have to end the current range of
@@ -1647,7 +1748,7 @@ void VarLocBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI,
MachineFunction *MF = MI.getMF();
TransferKind TKind;
Register Reg;
- Optional<VarLoc::SpillLoc> Loc;
+ std::optional<VarLoc::SpillLoc> Loc;
LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump(););
@@ -1777,7 +1878,7 @@ void VarLocBasedLDV::transferRegisterCopy(MachineInstr &MI,
if (VL.isEntryValueBackupReg(SrcReg)) {
LLVM_DEBUG(dbgs() << "Copy of the entry value: "; MI.dump(););
VarLoc EntryValLocCopyBackup =
- VarLoc::CreateEntryCopyBackupLoc(VL.MI, LS, VL.Expr, DestReg);
+ VarLoc::CreateEntryCopyBackupLoc(VL.MI, VL.Expr, DestReg);
// Stop tracking the original entry value.
OpenRanges.erase(VL);
@@ -1818,7 +1919,7 @@ bool VarLocBasedLDV::transferTerminator(MachineBasicBlock *CurMBB,
for (VarLoc &VL : VarLocs) {
// Copy OpenRanges to OutLocs, if not already present.
dbgs() << "Add to OutLocs in MBB #" << CurMBB->getNumber() << ": ";
- VL.dump(TRI);
+ VL.dump(TRI, TII);
}
});
VarLocSet &VLS = getVarLocsInMBB(CurMBB, OutLocs);
@@ -1903,6 +2004,7 @@ void VarLocBasedLDV::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
RegSetInstrs);
transferRegisterDef(MI, OpenRanges, VarLocIDs, EntryValTransfers,
RegSetInstrs);
+ transferWasmDef(MI, OpenRanges, VarLocIDs);
transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers);
transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers);
}
@@ -2058,10 +2160,13 @@ bool VarLocBasedLDV::isEntryValueCandidate(
/// Collect all register defines (including aliases) for the given instruction.
static void collectRegDefs(const MachineInstr &MI, DefinedRegsSet &Regs,
const TargetRegisterInfo *TRI) {
- for (const MachineOperand &MO : MI.operands())
- if (MO.isReg() && MO.isDef() && MO.getReg())
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() && MO.getReg().isPhysical()) {
+ Regs.insert(MO.getReg());
for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
Regs.insert(*AI);
+ }
+ }
}
/// This routine records the entry values of function parameters. The values
@@ -2090,7 +2195,7 @@ void VarLocBasedLDV::recordEntryValue(const MachineInstr &MI,
// valid. It is valid until a parameter is not changed.
DIExpression *NewExpr =
DIExpression::prepend(MI.getDebugExpression(), DIExpression::EntryValue);
- VarLoc EntryValLocAsBackup = VarLoc::CreateEntryBackupLoc(MI, LS, NewExpr);
+ VarLoc EntryValLocAsBackup = VarLoc::CreateEntryBackupLoc(MI, NewExpr);
LocIndices EntryValLocIDs = VarLocIDs.insert(EntryValLocAsBackup);
OpenRanges.insert(EntryValLocIDs, EntryValLocAsBackup);
}
@@ -2102,7 +2207,7 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF,
TargetPassConfig *TPC, unsigned InputBBLimit,
unsigned InputDbgValLimit) {
(void)DomTree;
- LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n");
+ LLVM_DEBUG(dbgs() << "\nDebug Range Extension: " << MF.getName() << "\n");
if (!MF.getFunction().getSubprogram())
// VarLocBaseLDV will already have removed all DBG_VALUEs.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 574c0f98161e..9603c1f01e08 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -59,6 +59,7 @@
#include <cassert>
#include <iterator>
#include <memory>
+#include <optional>
#include <utility>
using namespace llvm;
@@ -137,8 +138,7 @@ public:
// Turn this into an undef debug value list; right now, the simplest form
// of this is an expression with one arg, and an undef debug operand.
Expression =
- DIExpression::get(Expr.getContext(), {dwarf::DW_OP_LLVM_arg, 0,
- dwarf::DW_OP_stack_value});
+ DIExpression::get(Expr.getContext(), {dwarf::DW_OP_LLVM_arg, 0});
if (auto FragmentInfoOpt = Expr.getFragmentInfo())
Expression = *DIExpression::createFragmentExpression(
Expression, FragmentInfoOpt->OffsetInBits,
@@ -286,7 +286,7 @@ class LDVImpl;
class UserValue {
const DILocalVariable *Variable; ///< The debug info variable we are part of.
/// The part of the variable we describe.
- const Optional<DIExpression::FragmentInfo> Fragment;
+ const std::optional<DIExpression::FragmentInfo> Fragment;
DebugLoc dl; ///< The debug location for the variable. This is
///< used by dwarf writer to find lexical scope.
UserValue *leader; ///< Equivalence class leader.
@@ -319,7 +319,7 @@ class UserValue {
public:
/// Create a new UserValue.
UserValue(const DILocalVariable *var,
- Optional<DIExpression::FragmentInfo> Fragment, DebugLoc L,
+ std::optional<DIExpression::FragmentInfo> Fragment, DebugLoc L,
LocMap::Allocator &alloc)
: Variable(var), Fragment(Fragment), dl(std::move(L)), leader(this),
locInts(alloc) {}
@@ -440,11 +440,12 @@ public:
/// VNInfo.
/// \param [out] Kills Append end points of VNI's live range to Kills.
/// \param LIS Live intervals analysis.
- void extendDef(SlotIndex Idx, DbgVariableValue DbgValue,
- SmallDenseMap<unsigned, std::pair<LiveRange *, const VNInfo *>>
- &LiveIntervalInfo,
- Optional<std::pair<SlotIndex, SmallVector<unsigned>>> &Kills,
- LiveIntervals &LIS);
+ void
+ extendDef(SlotIndex Idx, DbgVariableValue DbgValue,
+ SmallDenseMap<unsigned, std::pair<LiveRange *, const VNInfo *>>
+ &LiveIntervalInfo,
+ std::optional<std::pair<SlotIndex, SmallVector<unsigned>>> &Kills,
+ LiveIntervals &LIS);
/// The value in LI may be copies to other registers. Determine if
/// any of the copies are available at the kill points, and add defs if
@@ -582,7 +583,7 @@ class LDVImpl {
/// Find or create a UserValue.
UserValue *getUserValue(const DILocalVariable *Var,
- Optional<DIExpression::FragmentInfo> Fragment,
+ std::optional<DIExpression::FragmentInfo> Fragment,
const DebugLoc &DL);
/// Find the EC leader for VirtReg or null.
@@ -763,14 +764,14 @@ void LDVImpl::print(raw_ostream &OS) {
void UserValue::mapVirtRegs(LDVImpl *LDV) {
for (unsigned i = 0, e = locations.size(); i != e; ++i)
- if (locations[i].isReg() &&
- Register::isVirtualRegister(locations[i].getReg()))
+ if (locations[i].isReg() && locations[i].getReg().isVirtual())
LDV->mapVirtReg(locations[i].getReg(), this);
}
-UserValue *LDVImpl::getUserValue(const DILocalVariable *Var,
- Optional<DIExpression::FragmentInfo> Fragment,
- const DebugLoc &DL) {
+UserValue *
+LDVImpl::getUserValue(const DILocalVariable *Var,
+ std::optional<DIExpression::FragmentInfo> Fragment,
+ const DebugLoc &DL) {
// FIXME: Handle partially overlapping fragments. See
// https://reviews.llvm.org/D70121#1849741.
DebugVariable ID(Var, Fragment, DL->getInlinedAt());
@@ -784,7 +785,7 @@ UserValue *LDVImpl::getUserValue(const DILocalVariable *Var,
}
void LDVImpl::mapVirtReg(Register VirtReg, UserValue *EC) {
- assert(Register::isVirtualRegister(VirtReg) && "Only map VirtRegs");
+ assert(VirtReg.isVirtual() && "Only map VirtRegs");
UserValue *&Leader = virtRegToEqClass[VirtReg];
Leader = UserValue::merge(Leader, EC);
}
@@ -820,7 +821,7 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
// will be incorrect.
bool Discard = false;
for (const MachineOperand &Op : MI.debug_operands()) {
- if (Op.isReg() && Register::isVirtualRegister(Op.getReg())) {
+ if (Op.isReg() && Op.getReg().isVirtual()) {
const Register Reg = Op.getReg();
if (!LIS->hasInterval(Reg)) {
// The DBG_VALUE is described by a virtual register that does not have a
@@ -873,12 +874,16 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
MachineBasicBlock::iterator LDVImpl::handleDebugInstr(MachineInstr &MI,
SlotIndex Idx) {
- assert(MI.isDebugValue() || MI.isDebugRef() || MI.isDebugPHI());
+ assert(MI.isDebugValueLike() || MI.isDebugPHI());
// In instruction referencing mode, there should be no DBG_VALUE instructions
// that refer to virtual registers. They might still refer to constants.
- if (MI.isDebugValue())
- assert(!MI.getOperand(0).isReg() || !MI.getOperand(0).getReg().isVirtual());
+ if (MI.isDebugValueLike())
+ assert(none_of(MI.debug_operands(),
+ [](const MachineOperand &MO) {
+ return MO.isReg() && MO.getReg().isVirtual();
+ }) &&
+ "MIs should not refer to Virtual Registers in InstrRef mode.");
// Unlink the instruction, store it in the debug instructions collection.
auto NextInst = std::next(MI.getIterator());
@@ -955,7 +960,7 @@ void UserValue::extendDef(
SlotIndex Idx, DbgVariableValue DbgValue,
SmallDenseMap<unsigned, std::pair<LiveRange *, const VNInfo *>>
&LiveIntervalInfo,
- Optional<std::pair<SlotIndex, SmallVector<unsigned>>> &Kills,
+ std::optional<std::pair<SlotIndex, SmallVector<unsigned>>> &Kills,
LiveIntervals &LIS) {
SlotIndex Start = Idx;
MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
@@ -985,7 +990,7 @@ void UserValue::extendDef(
Start = Start.getNextSlot();
if (I.value() != DbgValue || I.stop() != Start) {
// Clear `Kills`, as we have a new def available.
- Kills = None;
+ Kills = std::nullopt;
return;
}
// This is a one-slot placeholder. Just skip it.
@@ -996,7 +1001,7 @@ void UserValue::extendDef(
if (I.valid() && I.start() < Stop) {
Stop = I.start();
// Clear `Kills`, as we have a new def available.
- Kills = None;
+ Kills = std::nullopt;
}
if (Start < Stop) {
@@ -1012,9 +1017,8 @@ void UserValue::addDefsFromCopies(
SmallVectorImpl<std::pair<SlotIndex, DbgVariableValue>> &NewDefs,
MachineRegisterInfo &MRI, LiveIntervals &LIS) {
// Don't track copies from physregs, there are too many uses.
- if (any_of(LocIntervals, [](auto LocI) {
- return !Register::isVirtualRegister(LocI.second->reg());
- }))
+ if (any_of(LocIntervals,
+ [](auto LocI) { return !LocI.second->reg().isVirtual(); }))
return;
// Collect all the (vreg, valno) pairs that are copies of LI.
@@ -1035,7 +1039,7 @@ void UserValue::addDefsFromCopies(
// arguments, and the argument registers are always call clobbered. We are
// better off in the source register which could be a callee-saved
// register, or it could be spilled.
- if (!Register::isVirtualRegister(DstReg))
+ if (!DstReg.isVirtual())
continue;
// Is the value extended to reach this copy? If not, another def may be
@@ -1114,7 +1118,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
bool ShouldExtendDef = false;
for (unsigned LocNo : DbgValue.loc_nos()) {
const MachineOperand &LocMO = locations[LocNo];
- if (!LocMO.isReg() || !Register::isVirtualRegister(LocMO.getReg())) {
+ if (!LocMO.isReg() || !LocMO.getReg().isVirtual()) {
ShouldExtendDef |= !LocMO.isReg();
continue;
}
@@ -1129,7 +1133,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
LIs[LocNo] = {LI, VNI};
}
if (ShouldExtendDef) {
- Optional<std::pair<SlotIndex, SmallVector<unsigned>>> Kills;
+ std::optional<std::pair<SlotIndex, SmallVector<unsigned>>> Kills;
extendDef(Idx, DbgValue, LIs, Kills, LIS);
if (Kills) {
@@ -1522,8 +1526,7 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF,
unsigned SpillOffset = 0;
MachineOperand Loc = locations[I];
// Only virtual registers are rewritten.
- if (Loc.isReg() && Loc.getReg() &&
- Register::isVirtualRegister(Loc.getReg())) {
+ if (Loc.isReg() && Loc.getReg() && Loc.getReg().isVirtual()) {
Register VirtReg = Loc.getReg();
if (VRM.isAssignedReg(VirtReg) &&
Register::isPhysicalRegister(VRM.getPhys(VirtReg))) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
index 9378aaeb181c..7cd3d26cf5b3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
@@ -963,7 +963,7 @@ void LiveInterval::computeSubRangeUndefs(SmallVectorImpl<SlotIndex> &Undefs,
LaneBitmask LaneMask,
const MachineRegisterInfo &MRI,
const SlotIndexes &Indexes) const {
- assert(Register::isVirtualRegister(reg()));
+ assert(reg().isVirtual());
LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg());
assert((VRegMask & LaneMask).any());
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp
index 3176d73b35f6..ccc5ae98086e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp
@@ -51,7 +51,7 @@ void LiveIntervalCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
// Visit all def operands. If the same instruction has multiple defs of Reg,
// createDeadDef() will deduplicate.
const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
- unsigned Reg = LI.reg();
+ Register Reg = LI.reg();
for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
if (!MO.isDef() && !MO.readsReg())
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
index 8a76048bb8c4..a49f6b0604c5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -180,7 +180,7 @@ LLVM_DUMP_METHOD void LiveIntervals::dumpInstrs() const {
#endif
LiveInterval *LiveIntervals::createInterval(Register reg) {
- float Weight = Register::isPhysicalRegister(reg) ? huge_valf : 0.0F;
+ float Weight = reg.isPhysical() ? huge_valf : 0.0F;
return new LiveInterval(reg, Weight);
}
@@ -449,8 +449,7 @@ void LiveIntervals::extendSegmentsToUses(LiveRange &Segments,
bool LiveIntervals::shrinkToUses(LiveInterval *li,
SmallVectorImpl<MachineInstr*> *dead) {
LLVM_DEBUG(dbgs() << "Shrink: " << *li << '\n');
- assert(Register::isVirtualRegister(li->reg()) &&
- "Can only shrink virtual registers");
+ assert(li->reg().isVirtual() && "Can only shrink virtual registers");
// Shrink subregister live ranges.
bool NeedsCleanup = false;
@@ -508,7 +507,6 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
bool LiveIntervals::computeDeadValues(LiveInterval &LI,
SmallVectorImpl<MachineInstr*> *dead) {
bool MayHaveSplitComponents = false;
- bool HaveDeadDef = false;
for (VNInfo *VNI : LI.valnos) {
if (VNI->isUnused())
@@ -534,29 +532,25 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
VNI->markUnused();
LI.removeSegment(I);
LLVM_DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n");
- MayHaveSplitComponents = true;
} else {
// This is a dead def. Make sure the instruction knows.
MachineInstr *MI = getInstructionFromIndex(Def);
assert(MI && "No instruction defining live value");
MI->addRegisterDead(LI.reg(), TRI);
- if (HaveDeadDef)
- MayHaveSplitComponents = true;
- HaveDeadDef = true;
if (dead && MI->allDefsAreDead()) {
LLVM_DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI);
dead->push_back(MI);
}
}
+ MayHaveSplitComponents = true;
}
return MayHaveSplitComponents;
}
void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, Register Reg) {
LLVM_DEBUG(dbgs() << "Shrink: " << SR << '\n');
- assert(Register::isVirtualRegister(Reg) &&
- "Can only shrink virtual registers");
+ assert(Reg.isVirtual() && "Can only shrink virtual registers");
// Find all the values used, including PHI kills.
ShrinkToUsesWorkList WorkList;
@@ -1025,7 +1019,7 @@ public:
Register Reg = MO.getReg();
if (!Reg)
continue;
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
LiveInterval &LI = LIS.getInterval(Reg);
if (LI.hasSubRanges()) {
unsigned SubReg = MO.getSubReg();
@@ -1079,7 +1073,7 @@ private:
return;
LLVM_DEBUG({
dbgs() << " ";
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
dbgs() << printReg(Reg);
if (LaneMask.any())
dbgs() << " L" << PrintLaneMask(LaneMask);
@@ -1455,7 +1449,7 @@ private:
// Return the last use of reg between NewIdx and OldIdx.
SlotIndex findLastUseBefore(SlotIndex Before, Register Reg,
LaneBitmask LaneMask) {
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
SlotIndex LastUse = Before;
for (MachineOperand &MO : MRI.use_nodbg_operands(Reg)) {
if (MO.isUndef())
@@ -1499,8 +1493,7 @@ private:
// Check if MII uses Reg.
for (MIBundleOperands MO(*MII); MO.isValid(); ++MO)
- if (MO->isReg() && !MO->isUndef() &&
- Register::isPhysicalRegister(MO->getReg()) &&
+ if (MO->isReg() && !MO->isUndef() && MO->getReg().isPhysical() &&
TRI.hasRegUnit(MO->getReg(), Reg))
return Idx.getRegSlot();
}
@@ -1747,9 +1740,8 @@ void LiveIntervals::splitSeparateComponents(LiveInterval &LI,
return;
LLVM_DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n');
Register Reg = LI.reg();
- const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
for (unsigned I = 1; I < NumComp; ++I) {
- Register NewVReg = MRI->createVirtualRegister(RegClass);
+ Register NewVReg = MRI->cloneVirtualRegister(Reg);
LiveInterval &NewLI = createEmptyInterval(NewVReg);
SplitLIs.push_back(&NewLI);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
index abf36b3f4c67..d8b024fbdfea 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -24,15 +24,16 @@ using namespace llvm;
#define DEBUG_TYPE "regalloc"
-STATISTIC(NumDCEDeleted, "Number of instructions deleted by DCE");
-STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE");
-STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE");
+STATISTIC(NumDCEDeleted, "Number of instructions deleted by DCE");
+STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE");
+STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE");
+STATISTIC(NumReMaterialization, "Number of instructions rematerialized");
void LiveRangeEdit::Delegate::anchor() { }
LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(Register OldReg,
bool createSubRanges) {
- Register VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+ Register VReg = MRI.cloneVirtualRegister(OldReg);
if (VRM)
VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
@@ -52,7 +53,7 @@ LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(Register OldReg,
}
Register LiveRangeEdit::createFrom(Register OldReg) {
- Register VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+ Register VReg = MRI.cloneVirtualRegister(OldReg);
if (VRM) {
VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
}
@@ -113,7 +114,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
// We can't remat physreg uses, unless it is a constant or target wants
// to ignore this use.
- if (Register::isPhysicalRegister(MO.getReg())) {
+ if (MO.getReg().isPhysical()) {
if (MRI.isConstantPhysReg(MO.getReg()) || TII.isIgnorableUse(MO))
continue;
return false;
@@ -134,9 +135,11 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
return false;
// Check that subrange is live at UseIdx.
- if (MO.getSubReg()) {
+ if (li.hasSubRanges()) {
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
- LaneBitmask LM = TRI->getSubRegIndexLaneMask(MO.getSubReg());
+ unsigned SubReg = MO.getSubReg();
+ LaneBitmask LM = SubReg ? TRI->getSubRegIndexLaneMask(SubReg)
+ : MRI.getMaxLaneMaskForVReg(MO.getReg());
for (LiveInterval::SubRange &SR : li.subranges()) {
if ((SR.LaneMask & LM).none())
continue;
@@ -181,14 +184,20 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
unsigned DestReg,
const Remat &RM,
const TargetRegisterInfo &tri,
- bool Late) {
+ bool Late,
+ unsigned SubIdx,
+ MachineInstr *ReplaceIndexMI) {
assert(RM.OrigMI && "Invalid remat");
- TII.reMaterialize(MBB, MI, DestReg, 0, *RM.OrigMI, tri);
+ TII.reMaterialize(MBB, MI, DestReg, SubIdx, *RM.OrigMI, tri);
// DestReg of the cloned instruction cannot be Dead. Set isDead of DestReg
// to false anyway in case the isDead flag of RM.OrigMI's dest register
// is true.
(*--MI).getOperand(0).setIsDead(false);
Rematted.insert(RM.ParentVNI);
+ ++NumReMaterialization;
+
+ if (ReplaceIndexMI)
+ return LIS.ReplaceMachineInstrInMaps(*ReplaceIndexMI, *MI).getRegSlot();
return LIS.getSlotIndexes()->insertMachineInstrInMaps(*MI, Late).getRegSlot();
}
@@ -309,7 +318,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
MI->getDesc().getNumDefs() == 1) {
Dest = MI->getOperand(0).getReg();
DestSubReg = MI->getOperand(0).getSubReg();
- unsigned Original = VRM->getOriginal(Dest);
+ Register Original = VRM->getOriginal(Dest);
LiveInterval &OrigLI = LIS.getInterval(Original);
VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);
// The original live-range may have been shrunk to
@@ -327,7 +336,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg)) {
+ if (!Reg.isVirtual()) {
// Check if MI reads any unreserved physregs.
if (Reg && MO.readsReg() && !MRI.isReserved(Reg))
ReadsPhysRegs = true;
@@ -369,7 +378,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
// Remove all operands that aren't physregs.
for (unsigned i = MI->getNumOperands(); i; --i) {
const MachineOperand &MO = MI->getOperand(i-1);
- if (MO.isReg() && Register::isPhysicalRegister(MO.getReg()))
+ if (MO.isReg() && MO.getReg().isPhysical())
continue;
MI->removeOperand(i-1);
}
@@ -439,7 +448,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
LiveInterval *LI = ToShrink.pop_back_val();
if (foldAsLoad(LI, Dead))
continue;
- unsigned VReg = LI->reg();
+ Register VReg = LI->reg();
if (TheDelegate)
TheDelegate->LRE_WillShrinkVirtReg(VReg);
if (!LIS.shrinkToUses(LI, &Dead))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
index 8e56985246db..93f5314539cd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
@@ -176,7 +176,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
Register Reg = MO.getReg();
// Do not move the instruction if it def/uses a physical register,
// unless it is a constant physical register or a noreg.
- if (!Register::isVirtualRegister(Reg)) {
+ if (!Reg.isVirtual()) {
if (!Reg || MRI.isConstantPhysReg(Reg))
continue;
Insert = nullptr;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp
index d8d8bd5d61a2..34de09dd2944 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp
@@ -22,8 +22,10 @@ using namespace llvm;
void LiveRegUnits::removeRegsNotPreserved(const uint32_t *RegMask) {
for (unsigned U = 0, E = TRI->getNumRegUnits(); U != E; ++U) {
for (MCRegUnitRootIterator RootReg(U, TRI); RootReg.isValid(); ++RootReg) {
- if (MachineOperand::clobbersPhysReg(RegMask, *RootReg))
+ if (MachineOperand::clobbersPhysReg(RegMask, *RootReg)) {
Units.reset(U);
+ break;
+ }
}
}
}
@@ -31,42 +33,54 @@ void LiveRegUnits::removeRegsNotPreserved(const uint32_t *RegMask) {
void LiveRegUnits::addRegsInMask(const uint32_t *RegMask) {
for (unsigned U = 0, E = TRI->getNumRegUnits(); U != E; ++U) {
for (MCRegUnitRootIterator RootReg(U, TRI); RootReg.isValid(); ++RootReg) {
- if (MachineOperand::clobbersPhysReg(RegMask, *RootReg))
+ if (MachineOperand::clobbersPhysReg(RegMask, *RootReg)) {
Units.set(U);
+ break;
+ }
}
}
}
void LiveRegUnits::stepBackward(const MachineInstr &MI) {
// Remove defined registers and regmask kills from the set.
- for (const MachineOperand &MOP : phys_regs_and_masks(MI)) {
+ for (const MachineOperand &MOP : MI.operands()) {
+ if (MOP.isReg()) {
+ if (MOP.isDef() && MOP.getReg().isPhysical())
+ removeReg(MOP.getReg());
+ continue;
+ }
+
if (MOP.isRegMask()) {
removeRegsNotPreserved(MOP.getRegMask());
continue;
}
-
- if (MOP.isDef())
- removeReg(MOP.getReg());
}
// Add uses to the set.
- for (const MachineOperand &MOP : phys_regs_and_masks(MI)) {
+ for (const MachineOperand &MOP : MI.operands()) {
if (!MOP.isReg() || !MOP.readsReg())
continue;
- addReg(MOP.getReg());
+
+ if (MOP.getReg().isPhysical())
+ addReg(MOP.getReg());
}
}
void LiveRegUnits::accumulate(const MachineInstr &MI) {
// Add defs, uses and regmask clobbers to the set.
- for (const MachineOperand &MOP : phys_regs_and_masks(MI)) {
+ for (const MachineOperand &MOP : MI.operands()) {
+ if (MOP.isReg()) {
+ if (!MOP.getReg().isPhysical())
+ continue;
+ if (MOP.isDef() || MOP.readsReg())
+ addReg(MOP.getReg());
+ continue;
+ }
+
if (MOP.isRegMask()) {
addRegsInMask(MOP.getRegMask());
continue;
}
- if (!MOP.isDef() && !MOP.readsReg())
- continue;
- addReg(MOP.getReg());
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
index 40250171fe1e..34c81c92707e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
@@ -518,7 +518,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI,
continue;
Register MOReg = MO.getReg();
if (MO.isUse()) {
- if (!(Register::isPhysicalRegister(MOReg) && MRI->isReserved(MOReg)))
+ if (!(MOReg.isPhysical() && MRI->isReserved(MOReg)))
MO.setIsKill(false);
if (MO.readsReg())
UseRegs.push_back(MOReg);
@@ -526,7 +526,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI,
assert(MO.isDef());
// FIXME: We should not remove any dead flags. However the MIPS RDDSP
// instruction needs it at the moment: http://llvm.org/PR27116.
- if (Register::isPhysicalRegister(MOReg) && !MRI->isReserved(MOReg))
+ if (MOReg.isPhysical() && !MRI->isReserved(MOReg))
MO.setIsDead(false);
DefRegs.push_back(MOReg);
}
@@ -762,7 +762,7 @@ void LiveVariables::removeVirtualRegistersKilled(MachineInstr &MI) {
if (MO.isReg() && MO.isKill()) {
MO.setIsKill(false);
Register Reg = MO.getReg();
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
bool removed = getVarInfo(Reg).removeKill(MI);
assert(removed && "kill not in register's VarInfo?");
(void)removed;
@@ -850,7 +850,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
// Record all vreg defs and kills of all instructions in SuccBB.
for (; BBI != BBE; ++BBI) {
for (const MachineOperand &Op : BBI->operands()) {
- if (Op.isReg() && Register::isVirtualRegister(Op.getReg())) {
+ if (Op.isReg() && Op.getReg().isVirtual()) {
if (Op.isDef())
Defs.insert(Op.getReg());
else if (Op.isKill())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 5f54d7cc8472..e491ed12034d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -288,7 +288,6 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// stack frame. If it wants one, re-use a suitable one we've previously
// allocated, or if there isn't one that fits the bill, allocate a new one
// and ask the target to create a defining instruction for it.
- bool UsedBaseReg = false;
MachineFrameInfo &MFI = Fn.getFrameInfo();
const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
@@ -386,7 +385,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// instruction itself will be taken into account by the target,
// so we don't have to adjust for it here when reusing a base
// register.
- if (UsedBaseReg &&
+ if (BaseReg.isValid() &&
lookupCandidateBaseReg(BaseReg, BaseOffset, FrameSizeAdjust,
LocalOffset, MI, TRI)) {
LLVM_DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n");
@@ -396,8 +395,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// No previously defined register was in range, so create a new one.
int64_t InstrOffset = TRI->getFrameIndexInstrOffset(&MI, idx);
- int64_t PrevBaseOffset = BaseOffset;
- BaseOffset = FrameSizeAdjust + LocalOffset + InstrOffset;
+ int64_t CandBaseOffset = FrameSizeAdjust + LocalOffset + InstrOffset;
// We'd like to avoid creating single-use virtual base registers.
// Because the FrameRefs are in sorted order, and we've already
@@ -406,12 +404,13 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// then don't bother creating it.
if (ref + 1 >= e ||
!lookupCandidateBaseReg(
- BaseReg, BaseOffset, FrameSizeAdjust,
+ BaseReg, CandBaseOffset, FrameSizeAdjust,
FrameReferenceInsns[ref + 1].getLocalOffset(),
- *FrameReferenceInsns[ref + 1].getMachineInstr(), TRI)) {
- BaseOffset = PrevBaseOffset;
+ *FrameReferenceInsns[ref + 1].getMachineInstr(), TRI))
continue;
- }
+
+ // Save the base offset.
+ BaseOffset = CandBaseOffset;
// Tell the target to insert the instruction to initialize
// the base register.
@@ -428,7 +427,6 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
Offset = -InstrOffset;
++NumBaseRegisters;
- UsedBaseReg = true;
}
assert(BaseReg && "Unable to allocate virtual base register!");
@@ -440,5 +438,5 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
++NumReplacements;
}
- return UsedBaseReg;
+ return BaseReg.isValid();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp
index efebb18c9908..5b388be27839 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp
@@ -11,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MBFIWrapper.h"
+#include <optional>
using namespace llvm;
@@ -31,7 +31,7 @@ void MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB,
MergedBBFreq[MBB] = F;
}
-Optional<uint64_t>
+std::optional<uint64_t>
MBFIWrapper::getBlockProfileCount(const MachineBasicBlock *MBB) const {
auto I = MergedBBFreq.find(MBB);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index 3e7b4dbc9d71..21b849244d9b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -155,7 +155,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
if (!MO.isReg())
continue;
- if (Register::isVirtualRegister(MO.getReg()))
+ if (MO.getReg().isVirtual())
continue;
if (!MO.isDef())
@@ -172,7 +172,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
continue;
MachineOperand &MO = II->getOperand(0);
- if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
+ if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
if (!MO.isDef())
continue;
@@ -185,7 +185,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
}
if (II->getOperand(i).isReg()) {
- if (!Register::isVirtualRegister(II->getOperand(i).getReg()))
+ if (!II->getOperand(i).getReg().isVirtual())
if (!llvm::is_contained(PhysRegDefs, II->getOperand(i).getReg())) {
continue;
}
@@ -307,9 +307,9 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) {
const Register Dst = MI->getOperand(0).getReg();
const Register Src = MI->getOperand(1).getReg();
- if (!Register::isVirtualRegister(Dst))
+ if (!Dst.isVirtual())
continue;
- if (!Register::isVirtualRegister(Src))
+ if (!Src.isVirtual())
continue;
// Not folding COPY instructions if regbankselect has not set the RCs.
// Why are we only considering Register Classes? Because the verifier
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
index 3152102410d7..ad8a17f25ec5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
@@ -70,7 +70,7 @@ static uint64_t getCallStackHash(const MachineBasicBlock &BB,
bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
if (!EnableFSDiscriminator)
return false;
- if (!MF.getFunction().isDebugInfoForProfiling())
+ if (!MF.getFunction().shouldEmitDebugInfoForProfiling())
return false;
bool Changed = false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index b0daa20913f5..c136b08223b8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "MILexer.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
@@ -33,7 +32,7 @@ class Cursor {
const char *End = nullptr;
public:
- Cursor(NoneType) {}
+ Cursor(std::nullopt_t) {}
explicit Cursor(StringRef Str) {
Ptr = Str.data();
@@ -159,7 +158,7 @@ static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) {
ErrorCallback(
C.location(),
"end of machine instruction reached before the closing '\"'");
- return None;
+ return std::nullopt;
}
}
C.advance();
@@ -217,6 +216,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("nofpexcept", MIToken::kw_nofpexcept)
.Case("debug-location", MIToken::kw_debug_location)
.Case("debug-instr-number", MIToken::kw_debug_instr_number)
+ .Case("dbg-instr-ref", MIToken::kw_dbg_instr_ref)
.Case("same_value", MIToken::kw_cfi_same_value)
.Case("offset", MIToken::kw_cfi_offset)
.Case("rel_offset", MIToken::kw_cfi_rel_offset)
@@ -258,7 +258,6 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("call-entry", MIToken::kw_call_entry)
.Case("custom", MIToken::kw_custom)
.Case("liveout", MIToken::kw_liveout)
- .Case("address-taken", MIToken::kw_address_taken)
.Case("landing-pad", MIToken::kw_landing_pad)
.Case("inlineasm-br-indirect-target",
MIToken::kw_inlineasm_br_indirect_target)
@@ -271,16 +270,22 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol)
.Case("post-instr-symbol", MIToken::kw_post_instr_symbol)
.Case("heap-alloc-marker", MIToken::kw_heap_alloc_marker)
+ .Case("pcsections", MIToken::kw_pcsections)
+ .Case("cfi-type", MIToken::kw_cfi_type)
.Case("bbsections", MIToken::kw_bbsections)
+ .Case("bb_id", MIToken::kw_bb_id)
.Case("unknown-size", MIToken::kw_unknown_size)
.Case("unknown-address", MIToken::kw_unknown_address)
.Case("distinct", MIToken::kw_distinct)
+ .Case("ir-block-address-taken", MIToken::kw_ir_block_address_taken)
+ .Case("machine-block-address-taken",
+ MIToken::kw_machine_block_address_taken)
.Default(MIToken::Identifier);
}
static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
if (!isalpha(C.peek()) && C.peek() != '_')
- return None;
+ return std::nullopt;
auto Range = C;
while (isIdentifierChar(C.peek()))
C.advance();
@@ -294,7 +299,7 @@ static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token,
ErrorCallbackType ErrorCallback) {
bool IsReference = C.remaining().startswith("%bb.");
if (!IsReference && !C.remaining().startswith("bb."))
- return None;
+ return std::nullopt;
auto Range = C;
unsigned PrefixLength = IsReference ? 4 : 3;
C.advance(PrefixLength); // Skip '%bb.' or 'bb.'
@@ -328,7 +333,7 @@ static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token,
static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule,
MIToken::TokenKind Kind) {
if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size())))
- return None;
+ return std::nullopt;
auto Range = C;
C.advance(Rule.size());
auto NumberRange = C;
@@ -341,7 +346,7 @@ static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule,
static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule,
MIToken::TokenKind Kind) {
if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size())))
- return None;
+ return std::nullopt;
auto Range = C;
C.advance(Rule.size());
auto NumberRange = C;
@@ -381,7 +386,7 @@ static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token,
ErrorCallbackType ErrorCallback) {
const StringRef Rule = "%subreg.";
if (!C.remaining().startswith(Rule))
- return None;
+ return std::nullopt;
return lexName(C, Token, MIToken::SubRegisterIndex, Rule.size(),
ErrorCallback);
}
@@ -390,7 +395,7 @@ static Cursor maybeLexIRBlock(Cursor C, MIToken &Token,
ErrorCallbackType ErrorCallback) {
const StringRef Rule = "%ir-block.";
if (!C.remaining().startswith(Rule))
- return None;
+ return std::nullopt;
if (isdigit(C.peek(Rule.size())))
return maybeLexIndex(C, Token, Rule, MIToken::IRBlock);
return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback);
@@ -400,7 +405,7 @@ static Cursor maybeLexIRValue(Cursor C, MIToken &Token,
ErrorCallbackType ErrorCallback) {
const StringRef Rule = "%ir.";
if (!C.remaining().startswith(Rule))
- return None;
+ return std::nullopt;
if (isdigit(C.peek(Rule.size())))
return maybeLexIndex(C, Token, Rule, MIToken::IRValue);
return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback);
@@ -409,7 +414,7 @@ static Cursor maybeLexIRValue(Cursor C, MIToken &Token,
static Cursor maybeLexStringConstant(Cursor C, MIToken &Token,
ErrorCallbackType ErrorCallback) {
if (C.peek() != '"')
- return None;
+ return std::nullopt;
return lexName(C, Token, MIToken::StringConstant, /*PrefixLength=*/0,
ErrorCallback);
}
@@ -443,7 +448,7 @@ static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token) {
static Cursor maybeLexRegister(Cursor C, MIToken &Token,
ErrorCallbackType ErrorCallback) {
if (C.peek() != '%' && C.peek() != '$')
- return None;
+ return std::nullopt;
if (C.peek() == '%') {
if (isdigit(C.peek(1)))
@@ -452,7 +457,7 @@ static Cursor maybeLexRegister(Cursor C, MIToken &Token,
if (isRegisterChar(C.peek(1)))
return lexNamedVirtualRegister(C, Token);
- return None;
+ return std::nullopt;
}
assert(C.peek() == '$');
@@ -468,7 +473,7 @@ static Cursor maybeLexRegister(Cursor C, MIToken &Token,
static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token,
ErrorCallbackType ErrorCallback) {
if (C.peek() != '@')
- return None;
+ return std::nullopt;
if (!isdigit(C.peek(1)))
return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1,
ErrorCallback);
@@ -485,7 +490,7 @@ static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token,
static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token,
ErrorCallbackType ErrorCallback) {
if (C.peek() != '&')
- return None;
+ return std::nullopt;
return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1,
ErrorCallback);
}
@@ -494,7 +499,7 @@ static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token,
ErrorCallbackType ErrorCallback) {
const StringRef Rule = "<mcsymbol ";
if (!C.remaining().startswith(Rule))
- return None;
+ return std::nullopt;
auto Start = C;
C.advance(Rule.size());
@@ -559,7 +564,7 @@ static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) {
static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) {
if (C.peek() != '0' || (C.peek(1) != 'x' && C.peek(1) != 'X'))
- return None;
+ return std::nullopt;
Cursor Range = C;
C.advance(2);
unsigned PrefLen = 2;
@@ -571,7 +576,7 @@ static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) {
C.advance();
StringRef StrVal = Range.upto(C);
if (StrVal.size() <= PrefLen)
- return None;
+ return std::nullopt;
if (PrefLen == 2)
Token.reset(MIToken::HexLiteral, Range.upto(C));
else // It must be 3, which means that there was a floating-point prefix.
@@ -581,7 +586,7 @@ static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) {
static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) {
if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1))))
- return None;
+ return std::nullopt;
auto Range = C;
C.advance();
while (isdigit(C.peek()))
@@ -607,7 +612,7 @@ static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
static Cursor maybeLexExclaim(Cursor C, MIToken &Token,
ErrorCallbackType ErrorCallback) {
if (C.peek() != '!')
- return None;
+ return std::nullopt;
auto Range = C;
C.advance(1);
if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) {
@@ -664,7 +669,7 @@ static Cursor maybeLexSymbol(Cursor C, MIToken &Token) {
} else
Kind = symbolToken(C.peek());
if (Kind == MIToken::Error)
- return None;
+ return std::nullopt;
auto Range = C;
C.advance(Length);
Token.reset(Kind, Range.upto(C));
@@ -673,7 +678,7 @@ static Cursor maybeLexSymbol(Cursor C, MIToken &Token) {
static Cursor maybeLexNewline(Cursor C, MIToken &Token) {
if (!isNewlineChar(C.peek()))
- return None;
+ return std::nullopt;
auto Range = C;
C.advance();
Token.reset(MIToken::Newline, Range.upto(C));
@@ -683,7 +688,7 @@ static Cursor maybeLexNewline(Cursor C, MIToken &Token) {
static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token,
ErrorCallbackType ErrorCallback) {
if (C.peek() != '`')
- return None;
+ return std::nullopt;
auto Range = C;
C.advance();
auto StrRange = C;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
index 70d17f819ce3..ac484cdfd6c8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -75,6 +75,7 @@ struct MIToken {
kw_nofpexcept,
kw_debug_location,
kw_debug_instr_number,
+ kw_dbg_instr_ref,
kw_cfi_same_value,
kw_cfi_offset,
kw_cfi_rel_offset,
@@ -114,7 +115,6 @@ struct MIToken {
kw_call_entry,
kw_custom,
kw_liveout,
- kw_address_taken,
kw_landing_pad,
kw_inlineasm_br_indirect_target,
kw_ehfunclet_entry,
@@ -126,9 +126,14 @@ struct MIToken {
kw_pre_instr_symbol,
kw_post_instr_symbol,
kw_heap_alloc_marker,
+ kw_pcsections,
+ kw_cfi_type,
kw_bbsections,
+ kw_bb_id,
kw_unknown_size,
kw_unknown_address,
+ kw_ir_block_address_taken,
+ kw_machine_block_address_taken,
// Metadata types.
kw_distinct,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index e3d6b59c5077..525f49347fc4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -16,8 +16,6 @@
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
@@ -377,10 +375,11 @@ struct ParsedMachineOperand {
MachineOperand Operand;
StringRef::iterator Begin;
StringRef::iterator End;
- Optional<unsigned> TiedDefIdx;
+ std::optional<unsigned> TiedDefIdx;
ParsedMachineOperand(const MachineOperand &Operand, StringRef::iterator Begin,
- StringRef::iterator End, Optional<unsigned> &TiedDefIdx)
+ StringRef::iterator End,
+ std::optional<unsigned> &TiedDefIdx)
: Operand(Operand), Begin(Begin), End(End), TiedDefIdx(TiedDefIdx) {
if (TiedDefIdx)
assert(Operand.isReg() && Operand.isUse() &&
@@ -449,7 +448,8 @@ public:
bool parseSubRegisterIndex(unsigned &SubReg);
bool parseRegisterTiedDefIndex(unsigned &TiedDefIdx);
bool parseRegisterOperand(MachineOperand &Dest,
- Optional<unsigned> &TiedDefIdx, bool IsDef = false);
+ std::optional<unsigned> &TiedDefIdx,
+ bool IsDef = false);
bool parseImmediateOperand(MachineOperand &Dest);
bool parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
const Constant *&C);
@@ -485,19 +485,22 @@ public:
bool parsePredicateOperand(MachineOperand &Dest);
bool parseShuffleMaskOperand(MachineOperand &Dest);
bool parseTargetIndexOperand(MachineOperand &Dest);
+ bool parseDbgInstrRefOperand(MachineOperand &Dest);
bool parseCustomRegisterMaskOperand(MachineOperand &Dest);
bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest);
bool parseMachineOperand(const unsigned OpCode, const unsigned OpIdx,
MachineOperand &Dest,
- Optional<unsigned> &TiedDefIdx);
+ std::optional<unsigned> &TiedDefIdx);
bool parseMachineOperandAndTargetFlags(const unsigned OpCode,
const unsigned OpIdx,
MachineOperand &Dest,
- Optional<unsigned> &TiedDefIdx);
+ std::optional<unsigned> &TiedDefIdx);
bool parseOffset(int64_t &Offset);
+ bool parseIRBlockAddressTaken(BasicBlock *&BB);
bool parseAlignment(uint64_t &Alignment);
bool parseAddrspace(unsigned &Addrspace);
- bool parseSectionID(Optional<MBBSectionID> &SID);
+ bool parseSectionID(std::optional<MBBSectionID> &SID);
+ bool parseBBID(std::optional<unsigned> &BBID);
bool parseOperandsOffset(MachineOperand &Op);
bool parseIRValue(const Value *&V);
bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags);
@@ -508,6 +511,7 @@ public:
bool parseMachineMemoryOperand(MachineMemOperand *&Dest);
bool parsePreOrPostInstrSymbol(MCSymbol *&Symbol);
bool parseHeapAllocMarker(MDNode *&Node);
+ bool parsePCSections(MDNode *&Node);
bool parseTargetImmMnemonic(const unsigned OpCode, const unsigned OpIdx,
MachineOperand &Dest, const MIRFormatter &MF);
@@ -593,7 +597,7 @@ bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) {
// Create a diagnostic for a YAML string literal.
Error = SMDiagnostic(SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
Loc - Source.data(), SourceMgr::DK_Error, Msg.str(),
- Source, None, None);
+ Source, std::nullopt, std::nullopt);
return true;
}
@@ -639,7 +643,7 @@ bool MIParser::consumeIfPresent(MIToken::TokenKind TokenKind) {
}
// Parse Machine Basic Block Section ID.
-bool MIParser::parseSectionID(Optional<MBBSectionID> &SID) {
+bool MIParser::parseSectionID(std::optional<MBBSectionID> &SID) {
assert(Token.is(MIToken::kw_bbsections));
lex();
if (Token.is(MIToken::IntegerLiteral)) {
@@ -660,6 +664,18 @@ bool MIParser::parseSectionID(Optional<MBBSectionID> &SID) {
return false;
}
+// Parse Machine Basic Block ID.
+bool MIParser::parseBBID(std::optional<unsigned> &BBID) {
+ assert(Token.is(MIToken::kw_bb_id));
+ lex();
+ unsigned Value = 0;
+ if (getUnsigned(Value))
+ return error("Unknown BB ID");
+ BBID = Value;
+ lex();
+ return false;
+}
+
bool MIParser::parseBasicBlockDefinition(
DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) {
assert(Token.is(MIToken::MachineBasicBlockLabel));
@@ -669,21 +685,27 @@ bool MIParser::parseBasicBlockDefinition(
auto Loc = Token.location();
auto Name = Token.stringValue();
lex();
- bool HasAddressTaken = false;
+ bool MachineBlockAddressTaken = false;
+ BasicBlock *AddressTakenIRBlock = nullptr;
bool IsLandingPad = false;
bool IsInlineAsmBrIndirectTarget = false;
bool IsEHFuncletEntry = false;
- Optional<MBBSectionID> SectionID;
+ std::optional<MBBSectionID> SectionID;
uint64_t Alignment = 0;
+ std::optional<unsigned> BBID;
BasicBlock *BB = nullptr;
if (consumeIfPresent(MIToken::lparen)) {
do {
// TODO: Report an error when multiple same attributes are specified.
switch (Token.kind()) {
- case MIToken::kw_address_taken:
- HasAddressTaken = true;
+ case MIToken::kw_machine_block_address_taken:
+ MachineBlockAddressTaken = true;
lex();
break;
+ case MIToken::kw_ir_block_address_taken:
+ if (parseIRBlockAddressTaken(AddressTakenIRBlock))
+ return true;
+ break;
case MIToken::kw_landing_pad:
IsLandingPad = true;
lex();
@@ -701,6 +723,7 @@ bool MIParser::parseBasicBlockDefinition(
return true;
break;
case MIToken::IRBlock:
+ case MIToken::NamedIRBlock:
// TODO: Report an error when both name and ir block are specified.
if (parseIRBlock(BB, MF.getFunction()))
return true;
@@ -710,6 +733,10 @@ bool MIParser::parseBasicBlockDefinition(
if (parseSectionID(SectionID))
return true;
break;
+ case MIToken::kw_bb_id:
+ if (parseBBID(BBID))
+ return true;
+ break;
default:
break;
}
@@ -736,15 +763,24 @@ bool MIParser::parseBasicBlockDefinition(
Twine(ID));
if (Alignment)
MBB->setAlignment(Align(Alignment));
- if (HasAddressTaken)
- MBB->setHasAddressTaken();
+ if (MachineBlockAddressTaken)
+ MBB->setMachineBlockAddressTaken();
+ if (AddressTakenIRBlock)
+ MBB->setAddressTakenIRBlock(AddressTakenIRBlock);
MBB->setIsEHPad(IsLandingPad);
MBB->setIsInlineAsmBrIndirectTarget(IsInlineAsmBrIndirectTarget);
MBB->setIsEHFuncletEntry(IsEHFuncletEntry);
if (SectionID) {
- MBB->setSectionID(SectionID.value());
+ MBB->setSectionID(*SectionID);
MF.setBBSectionsType(BasicBlockSection::List);
}
+ if (BBID.has_value()) {
+ // BBSectionsType is set to `List` if any basic blocks has `SectionID`.
+ // Here, we set it to `Labels` if it hasn't been set above.
+ if (!MF.hasBBSections())
+ MF.setBBSectionsType(BasicBlockSection::Labels);
+ MBB->setBBID(BBID.value());
+ }
return false;
}
@@ -987,7 +1023,7 @@ bool MIParser::parse(MachineInstr *&MI) {
SmallVector<ParsedMachineOperand, 8> Operands;
while (Token.isRegister() || Token.isRegisterFlag()) {
auto Loc = Token.location();
- Optional<unsigned> TiedDefIdx;
+ std::optional<unsigned> TiedDefIdx;
if (parseRegisterOperand(MO, TiedDefIdx, /*IsDef=*/true))
return true;
Operands.push_back(
@@ -1007,11 +1043,13 @@ bool MIParser::parse(MachineInstr *&MI) {
while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_pre_instr_symbol) &&
Token.isNot(MIToken::kw_post_instr_symbol) &&
Token.isNot(MIToken::kw_heap_alloc_marker) &&
+ Token.isNot(MIToken::kw_pcsections) &&
+ Token.isNot(MIToken::kw_cfi_type) &&
Token.isNot(MIToken::kw_debug_location) &&
Token.isNot(MIToken::kw_debug_instr_number) &&
Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) {
auto Loc = Token.location();
- Optional<unsigned> TiedDefIdx;
+ std::optional<unsigned> TiedDefIdx;
if (parseMachineOperandAndTargetFlags(OpCode, Operands.size(), MO, TiedDefIdx))
return true;
Operands.push_back(
@@ -1036,6 +1074,24 @@ bool MIParser::parse(MachineInstr *&MI) {
if (Token.is(MIToken::kw_heap_alloc_marker))
if (parseHeapAllocMarker(HeapAllocMarker))
return true;
+ MDNode *PCSections = nullptr;
+ if (Token.is(MIToken::kw_pcsections))
+ if (parsePCSections(PCSections))
+ return true;
+
+ unsigned CFIType = 0;
+ if (Token.is(MIToken::kw_cfi_type)) {
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal after 'cfi-type'");
+ // getUnsigned is sufficient for 32-bit integers.
+ if (getUnsigned(CFIType))
+ return true;
+ lex();
+ // Lex past trailing comma if present.
+ if (Token.is(MIToken::comma))
+ lex();
+ }
unsigned InstrNum = 0;
if (Token.is(MIToken::kw_debug_instr_number)) {
@@ -1116,6 +1172,10 @@ bool MIParser::parse(MachineInstr *&MI) {
MI->setPostInstrSymbol(MF, PostInstrSymbol);
if (HeapAllocMarker)
MI->setHeapAllocMarker(MF, HeapAllocMarker);
+ if (PCSections)
+ MI->setPCSections(MF, PCSections);
+ if (CFIType)
+ MI->setCFIType(MF, CFIType);
if (!MemOperands.empty())
MI->setMemRefs(MF, MemOperands);
if (InstrNum)
@@ -1322,7 +1382,7 @@ bool MIParser::parseMetadata(Metadata *&MD) {
// Forward reference.
auto &FwdRef = PFS.MachineForwardRefMDNodes[ID];
FwdRef = std::make_pair(
- MDTuple::getTemporary(MF.getFunction().getContext(), None), Loc);
+ MDTuple::getTemporary(MF.getFunction().getContext(), std::nullopt), Loc);
PFS.MachineMetadataNodes[ID].reset(FwdRef.first.get());
MD = FwdRef.first.get();
@@ -1336,7 +1396,7 @@ static const char *printImplicitRegisterFlag(const MachineOperand &MO) {
static std::string getRegisterName(const TargetRegisterInfo *TRI,
Register Reg) {
- assert(Register::isPhysicalRegister(Reg) && "expected phys reg");
+ assert(Reg.isPhysical() && "expected phys reg");
return StringRef(TRI->getName(Reg)).lower();
}
@@ -1359,14 +1419,10 @@ bool MIParser::verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands,
// Gather all the expected implicit operands.
SmallVector<MachineOperand, 4> ImplicitOperands;
- if (MCID.ImplicitDefs)
- for (const MCPhysReg *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs)
- ImplicitOperands.push_back(
- MachineOperand::CreateReg(*ImpDefs, true, true));
- if (MCID.ImplicitUses)
- for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses)
- ImplicitOperands.push_back(
- MachineOperand::CreateReg(*ImpUses, false, true));
+ for (MCPhysReg ImpDef : MCID.implicit_defs())
+ ImplicitOperands.push_back(MachineOperand::CreateReg(ImpDef, true, true));
+ for (MCPhysReg ImpUse : MCID.implicit_uses())
+ ImplicitOperands.push_back(MachineOperand::CreateReg(ImpUse, false, true));
const auto *TRI = MF.getSubtarget().getRegisterInfo();
assert(TRI && "Expected target register info");
@@ -1648,7 +1704,7 @@ bool MIParser::assignRegisterTies(MachineInstr &MI,
}
bool MIParser::parseRegisterOperand(MachineOperand &Dest,
- Optional<unsigned> &TiedDefIdx,
+ std::optional<unsigned> &TiedDefIdx,
bool IsDef) {
unsigned Flags = IsDef ? RegState::Define : 0;
while (Token.isRegisterFlag()) {
@@ -1666,11 +1722,11 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
if (Token.is(MIToken::dot)) {
if (parseSubRegisterIndex(SubReg))
return true;
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
return error("subregister index expects a virtual register");
}
if (Token.is(MIToken::colon)) {
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
return error("register class specification expects a virtual register");
lex();
if (parseRegisterClassOrBank(*RegInfo))
@@ -1700,7 +1756,7 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
}
} else if (consumeIfPresent(MIToken::lparen)) {
// Virtual registers may have a tpe with GlobalISel.
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
return error("unexpected type on physical register");
LLT Ty;
@@ -1715,7 +1771,7 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
MRI.setRegClassOrRegBank(Reg, static_cast<RegisterBank *>(nullptr));
MRI.setType(Reg, Ty);
- } else if (Register::isVirtualRegister(Reg)) {
+ } else if (Reg.isVirtual()) {
// Generic virtual registers must have a type.
// If we end up here this means the type hasn't been specified and
// this is bad!
@@ -1744,9 +1800,12 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
bool MIParser::parseImmediateOperand(MachineOperand &Dest) {
assert(Token.is(MIToken::IntegerLiteral));
const APSInt &Int = Token.integerValue();
- if (Int.getMinSignedBits() > 64)
+ if (auto SImm = Int.trySExtValue(); Int.isSigned() && SImm.has_value())
+ Dest = MachineOperand::CreateImm(*SImm);
+ else if (auto UImm = Int.tryZExtValue(); !Int.isSigned() && UImm.has_value())
+ Dest = MachineOperand::CreateImm(*UImm);
+ else
return error("integer literal is too large to be an immediate operand");
- Dest = MachineOperand::CreateImm(Int.getExtValue());
lex();
return false;
}
@@ -1813,7 +1872,7 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) {
return false;
}
-// See LLT implemntation for bit size limits.
+// See LLT implementation for bit size limits.
static bool verifyScalarSize(uint64_t Size) {
return Size != 0 && isUInt<16>(Size);
}
@@ -2681,6 +2740,37 @@ bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) {
return false;
}
+bool MIParser::parseDbgInstrRefOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_dbg_instr_ref));
+
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return error("expected syntax dbg-instr-ref(<unsigned>, <unsigned>)");
+
+ if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isNegative())
+ return error("expected unsigned integer for instruction index");
+ uint64_t InstrIdx = Token.integerValue().getZExtValue();
+ assert(InstrIdx <= std::numeric_limits<unsigned>::max() &&
+ "Instruction reference's instruction index is too large");
+ lex();
+
+ if (expectAndConsume(MIToken::comma))
+ return error("expected syntax dbg-instr-ref(<unsigned>, <unsigned>)");
+
+ if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isNegative())
+ return error("expected unsigned integer for operand index");
+ uint64_t OpIdx = Token.integerValue().getZExtValue();
+ assert(OpIdx <= std::numeric_limits<unsigned>::max() &&
+ "Instruction reference's operand index is too large");
+ lex();
+
+ if (expectAndConsume(MIToken::rparen))
+ return error("expected syntax dbg-instr-ref(<unsigned>, <unsigned>)");
+
+ Dest = MachineOperand::CreateDbgInstrRef(InstrIdx, OpIdx);
+ return false;
+}
+
bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) {
assert(Token.is(MIToken::kw_target_index));
lex();
@@ -2754,7 +2844,7 @@ bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) {
bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx,
MachineOperand &Dest,
- Optional<unsigned> &TiedDefIdx) {
+ std::optional<unsigned> &TiedDefIdx) {
switch (Token.kind()) {
case MIToken::kw_implicit:
case MIToken::kw_implicit_define:
@@ -2832,6 +2922,8 @@ bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx,
return parsePredicateOperand(Dest);
case MIToken::kw_shufflemask:
return parseShuffleMaskOperand(Dest);
+ case MIToken::kw_dbg_instr_ref:
+ return parseDbgInstrRefOperand(Dest);
case MIToken::Error:
return true;
case MIToken::Identifier:
@@ -2848,7 +2940,7 @@ bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx,
if (const auto *Formatter = TII->getMIRFormatter()) {
return parseTargetImmMnemonic(OpCode, OpIdx, Dest, *Formatter);
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
}
default:
// FIXME: Parse the MCSymbol machine operand.
@@ -2859,7 +2951,7 @@ bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx,
bool MIParser::parseMachineOperandAndTargetFlags(
const unsigned OpCode, const unsigned OpIdx, MachineOperand &Dest,
- Optional<unsigned> &TiedDefIdx) {
+ std::optional<unsigned> &TiedDefIdx) {
unsigned TF = 0;
bool HasTargetFlags = false;
if (Token.is(MIToken::kw_target_flags)) {
@@ -2918,6 +3010,19 @@ bool MIParser::parseOffset(int64_t &Offset) {
return false;
}
+bool MIParser::parseIRBlockAddressTaken(BasicBlock *&BB) {
+ assert(Token.is(MIToken::kw_ir_block_address_taken));
+ lex();
+ if (Token.isNot(MIToken::IRBlock) && Token.isNot(MIToken::NamedIRBlock))
+ return error("expected basic block after 'ir_block_address_taken'");
+
+ if (parseIRBlock(BB, MF.getFunction()))
+ return true;
+
+ lex();
+ return false;
+}
+
bool MIParser::parseAlignment(uint64_t &Alignment) {
assert(Token.is(MIToken::kw_align) || Token.is(MIToken::kw_basealign));
lex();
@@ -3378,6 +3483,22 @@ bool MIParser::parseHeapAllocMarker(MDNode *&Node) {
return false;
}
+bool MIParser::parsePCSections(MDNode *&Node) {
+ assert(Token.is(MIToken::kw_pcsections) &&
+ "Invalid token for a PC sections!");
+ lex();
+ parseMDNode(Node);
+ if (!Node)
+ return error("expected a MDNode after 'pcsections'");
+ if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
+ Token.is(MIToken::lbrace))
+ return false;
+ if (Token.isNot(MIToken::comma))
+ return error("expected ',' before the next machine operand");
+ lex();
+ return false;
+}
+
static void initSlots2BasicBlocks(
const Function &F,
DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index aa9522bc3459..a20c2bfe6c0f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -234,7 +234,8 @@ MIRParserImpl::parseIRModule(DataLayoutCallbackTy DataLayoutCallback) {
// Create an empty module when the MIR file is empty.
NoMIRDocuments = true;
auto M = std::make_unique<Module>(Filename, Context);
- if (auto LayoutOverride = DataLayoutCallback(M->getTargetTriple()))
+ if (auto LayoutOverride =
+ DataLayoutCallback(M->getTargetTriple(), M->getDataLayoutStr()))
M->setDataLayout(*LayoutOverride);
return M;
}
@@ -257,7 +258,8 @@ MIRParserImpl::parseIRModule(DataLayoutCallbackTy DataLayoutCallback) {
} else {
// Create an new, empty module.
M = std::make_unique<Module>(Filename, Context);
- if (auto LayoutOverride = DataLayoutCallback(M->getTargetTriple()))
+ if (auto LayoutOverride =
+ DataLayoutCallback(M->getTargetTriple(), M->getDataLayoutStr()))
M->setDataLayout(*LayoutOverride);
NoLLVMIR = true;
}
@@ -441,6 +443,9 @@ void MIRParserImpl::setupDebugValueTracking(
MF.makeDebugValueSubstitution({Sub.SrcInst, Sub.SrcOp},
{Sub.DstInst, Sub.DstOp}, Sub.Subreg);
}
+
+ // Flag for whether we're supposed to be using DBG_INSTR_REF.
+ MF.setUseDebugInstrRef(YamlMF.UseDebugInstrRef);
}
bool
@@ -659,9 +664,11 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,
const yaml::MachineFunction &YamlMF) {
MachineFunction &MF = PFS.MF;
MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
bool Error = false;
// Create VRegs
- auto populateVRegInfo = [&] (const VRegInfo &Info, Twine Name) {
+ auto populateVRegInfo = [&](const VRegInfo &Info, Twine Name) {
Register Reg = Info.VReg;
switch (Info.Kind) {
case VRegInfo::UNKNOWN:
@@ -670,6 +677,14 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,
Error = true;
break;
case VRegInfo::NORMAL:
+ if (!Info.D.RC->isAllocatable()) {
+ error(Twine("Cannot use non-allocatable class '") +
+ TRI->getRegClassName(Info.D.RC) + "' for virtual register " +
+ Name + " in function '" + MF.getName() + "'");
+ Error = true;
+ break;
+ }
+
MRI.setRegClass(Reg, Info.D.RC);
if (Info.PreferredReg != 0)
MRI.setSimpleHint(Reg, Info.PreferredReg);
@@ -695,7 +710,6 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,
// Compute MachineRegisterInfo::UsedPhysRegMask
for (const MachineBasicBlock &MBB : MF) {
// Make sure MRI knows about registers clobbered by unwinder.
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (MBB.isEHPad())
if (auto *RegMask = TRI->getCustomEHPadPreservedMask(MF))
MRI.addPhysRegsUsedFromRegMask(RegMask);
@@ -999,7 +1013,7 @@ SMDiagnostic MIRParserImpl::diagFromMIStringDiag(const SMDiagnostic &Error,
(HasQuote ? 1 : 0));
// TODO: Translate any source ranges as well.
- return SM.GetMessage(Loc, Error.getKind(), Error.getMessage(), None,
+ return SM.GetMessage(Loc, Error.getKind(), Error.getMessage(), std::nullopt,
Error.getFixIts());
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
index 25823b1567f7..0a4b28ac79a7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -200,6 +200,7 @@ void MIRPrinter::print(const MachineFunction &MF) {
YamlMF.HasEHCatchret = MF.hasEHCatchret();
YamlMF.HasEHScopes = MF.hasEHScopes();
YamlMF.HasEHFunclets = MF.hasEHFunclets();
+ YamlMF.UseDebugInstrRef = MF.useDebugInstrRef();
YamlMF.Legalized = MF.getProperties().hasProperty(
MachineFunctionProperties::Property::Legalized);
@@ -306,13 +307,13 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
// Print the virtual register definitions.
for (unsigned I = 0, E = RegInfo.getNumVirtRegs(); I < E; ++I) {
- unsigned Reg = Register::index2VirtReg(I);
+ Register Reg = Register::index2VirtReg(I);
yaml::VirtualRegisterDefinition VReg;
VReg.ID = I;
if (RegInfo.getVRegName(Reg) != "")
continue;
::printRegClassOrBank(Reg, VReg.Class, RegInfo, TRI);
- unsigned PreferredReg = RegInfo.getSimpleHint(Reg);
+ Register PreferredReg = RegInfo.getSimpleHint(Reg);
if (PreferredReg)
printRegMIR(PreferredReg, VReg.PreferredRegister, TRI);
MF.VirtualRegisters.push_back(VReg);
@@ -819,6 +820,19 @@ void MIPrinter::print(const MachineInstr &MI) {
HeapAllocMarker->printAsOperand(OS, MST);
NeedComma = true;
}
+ if (MDNode *PCSections = MI.getPCSections()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " pcsections ";
+ PCSections->printAsOperand(OS, MST);
+ NeedComma = true;
+ }
+ if (uint32_t CFIType = MI.getCFIType()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " cfi-type " << CFIType;
+ NeedComma = true;
+ }
if (auto Num = MI.peekDebugInstrNum()) {
if (NeedComma)
@@ -880,7 +894,7 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
MachineOperand::printSubRegIdx(OS, Op.getImm(), TRI);
break;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case MachineOperand::MO_Register:
case MachineOperand::MO_CImmediate:
case MachineOperand::MO_FPImmediate:
@@ -897,6 +911,7 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
case MachineOperand::MO_IntrinsicID:
case MachineOperand::MO_Predicate:
case MachineOperand::MO_BlockAddress:
+ case MachineOperand::MO_DbgInstrRef:
case MachineOperand::MO_ShuffleMask: {
unsigned TiedOperandIdx = 0;
if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
index a2abe71a6bd7..e634a2b284c3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
@@ -62,7 +62,8 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) {
/* HashConstantPoolIndices */ true,
/* HashMemOperands */ true);
assert(Hash && "Expected non-zero Hash");
- return std::to_string(Hash).substr(0, 5);
+ OS << format_hex_no_prefix(Hash, 16, true);
+ return OS.str();
}
// Gets a hashable artifact from a given MachineOperand (ie an unsigned).
@@ -76,7 +77,7 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) {
MO.getType(), MO.getTargetFlags(),
MO.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
case MachineOperand::MO_Register:
- if (Register::isVirtualRegister(MO.getReg()))
+ if (MO.getReg().isVirtual())
return MRI.getVRegDef(MO.getReg())->getOpcode();
return MO.getReg();
case MachineOperand::MO_Immediate:
@@ -112,6 +113,7 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) {
case MachineOperand::MO_Metadata:
case MachineOperand::MO_MCSymbol:
case MachineOperand::MO_ShuffleMask:
+ case MachineOperand::MO_DbgInstrRef:
return 0;
}
llvm_unreachable("Unexpected MachineOperandType.");
@@ -132,7 +134,8 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) {
}
auto HashMI = hash_combine_range(MIOperands.begin(), MIOperands.end());
- return std::to_string(HashMI).substr(0, 5);
+ OS << format_hex_no_prefix(HashMI, 16, true);
+ return OS.str();
}
unsigned VRegRenamer::createVirtualRegister(unsigned VReg) {
@@ -153,7 +156,7 @@ bool VRegRenamer::renameInstsInMBB(MachineBasicBlock *MBB) {
// Look for instructions that define VRegs in operand 0.
MachineOperand &MO = Candidate.getOperand(0);
// Avoid non regs, instructions defining physical regs.
- if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
+ if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
VRegs.push_back(
NamedVReg(MO.getReg(), Prefix + getInstructionOpcodeHash(Candidate)));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
index d21d552227cf..5cc8ad3d609e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
@@ -15,10 +15,12 @@
#include "RegAllocGreedy.h"
#include "llvm/Analysis/MLModelRunner.h"
#include "llvm/Analysis/TensorSpec.h"
-#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TF_API)
+#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TFLITE)
#include "llvm/Analysis/ModelUnderTrainingRunner.h"
#include "llvm/Analysis/NoInferenceModelRunner.h"
+#include "llvm/Analysis/Utils/TrainingLogger.h"
#endif
+#include "MLRegallocEvictAdvisor.h"
#include "llvm/Analysis/ReleaseModeModelRunner.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
@@ -51,7 +53,7 @@ using CompiledModelType = NoopSavedModelImpl;
#endif
// Options that only make sense in development mode
-#ifdef LLVM_HAVE_TF_API
+#ifdef LLVM_HAVE_TFLITE
#include "RegAllocScore.h"
#include "llvm/Analysis/Utils/TFUtils.h"
@@ -63,7 +65,14 @@ static cl::opt<std::string> ModelUnderTraining(
"regalloc-model", cl::Hidden,
cl::desc("The model being trained for register allocation eviction"));
-#endif // #ifdef LLVM_HAVE_TF_API
+static cl::opt<bool> EnableDevelopmentFeatures(
+ "regalloc-enable-development-features", cl::Hidden,
+ cl::desc("Whether or not to enable features under development for the ML "
+ "regalloc advisor"));
+
+#else
+static const bool EnableDevelopmentFeatures = false;
+#endif // #ifdef LLVM_HAVE_TFLITE
extern cl::opt<unsigned> EvictInterferenceCutoff;
@@ -89,6 +98,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
AU.addRequired<RegAllocEvictionAdvisorAnalysis>();
+ AU.addRequired<RegAllocPriorityAdvisorAnalysis>();
AU.addRequired<MachineBlockFrequencyInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -109,20 +119,9 @@ INITIALIZE_PASS(RegAllocScoring, "regallocscoringpass",
// Common ML Advisor declarations
// ===================================
namespace {
-// This is the maximum number of interfererring ranges. That's the number of
-// distinct AllocationOrder values, which comes from MCRegisterClass::RegsSize.
-// For X86, that's 32.
-// TODO: find a way to get this, statically, in a programmatic way.
-static const int64_t MaxInterferences = 32;
-
-// Logically, we can think of the feature set given to the evaluator as a 2D
-// matrix. The rows are the features (see next). The columns correspond to the
-// interferences. We treat the candidate virt reg as an 'interference', too, as
-// its feature set is the same as that of the interferring ranges. So we'll have
-// MaxInterferences + 1 columns and by convention, we will use the last column
-// for the virt reg seeking allocation.
-static const int64_t CandidateVirtRegPos = MaxInterferences;
-static const int64_t NumberOfInterferences = CandidateVirtRegPos + 1;
+// The model can only accept a specified number of opcodes and will error it if
+// fed an opcode it hasn't seen before. This constant sets the current cutoff.
+static const int OpcodeValueCutoff = 17716;
// Most features are as described above, so we'll reuse this vector in defining
// them.
@@ -192,25 +191,48 @@ static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences};
"lowest stage of an interval in this LR") \
M(float, progress, {1}, "ratio of current queue size to initial size")
-// The model learns to pick one of the mask == 1 interferences. This is the name
-// of the output tensor.
-// The contract with the model is that the output will be guaranteed to be to a
-// mask == 1 position.
-// Using a macro here to avoid 'not used' warnings (and keep cond compilation to
-// a minimum)
+#ifdef LLVM_HAVE_TFLITE
+#define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M) \
+ M(int64_t, instructions, InstructionsShape, \
+ "Opcodes of the instructions covered by the eviction problem")
+
+#define RA_EVICT_REST_DEVELOPMENT_FEATURES(M) \
+ M(int64_t, instructions_mapping, InstructionsMappingShape, \
+ "A binary matrix mapping LRs to instruction opcodes") \
+ M(float, mbb_frequencies, MBBFrequencyShape, \
+ "A vector of machine basic block frequencies") \
+ M(int64_t, mbb_mapping, InstructionsShape, \
+ "A vector of indicies mapping instructions to MBBs")
+#else
+#define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M)
+#define RA_EVICT_REST_DEVELOPMENT_FEATURES(M)
+#endif
+
+// The model learns to pick one of the mask == 1 interferences. This is the
+// name of the output tensor. The contract with the model is that the output
+// will be guaranteed to be to a mask == 1 position. Using a macro here to
+// avoid 'not used' warnings (and keep cond compilation to a minimum)
#define DecisionName "index_to_evict"
// Named features index.
enum FeatureIDs {
-#define _FEATURE_IDX(_, name, __, ___) name,
- RA_EVICT_FEATURES_LIST(_FEATURE_IDX)
+#define _FEATURE_IDX_SIMPLE(_, name, __, ___) name
+#define _FEATURE_IDX(A, B, C, D) _FEATURE_IDX_SIMPLE(A, B, C, D),
+ RA_EVICT_FEATURES_LIST(_FEATURE_IDX) FeatureCount,
+#ifdef LLVM_HAVE_TFLITE
+ RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX_SIMPLE) = FeatureCount,
+#else
+ RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX)
+#endif // #ifdef LLVM_HAVE_TFLITE
+ RA_EVICT_REST_DEVELOPMENT_FEATURES(_FEATURE_IDX) FeaturesWithDevelopmentCount
#undef _FEATURE_IDX
- FeatureCount
+#undef _FEATURE_IDX_SIMPLE
};
// The ML advisor will typically have a sparse input to the evaluator, because
// various phys regs won't be available. It's easier (maintenance-wise) to
-// bulk-reset the state of the evaluator each time we are about to use it again.
+// bulk-reset the state of the evaluator each time we are about to use it
+// again.
template <typename T> size_t getTotalSize(const std::vector<int64_t> &Shape) {
size_t Ret = sizeof(T);
for (const auto V : Shape)
@@ -223,11 +245,15 @@ void resetInputs(MLModelRunner &Runner) {
std::memset(Runner.getTensorUntyped(FeatureIDs::NAME), 0, \
getTotalSize<TYPE>(SHAPE));
RA_EVICT_FEATURES_LIST(_RESET)
+ if (EnableDevelopmentFeatures) {
+ RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_RESET)
+ RA_EVICT_REST_DEVELOPMENT_FEATURES(_RESET)
#undef _RESET
+ }
}
-// Per-live interval components that get aggregated into the feature values that
-// will be passed to the evaluator.
+// Per-live interval components that get aggregated into the feature values
+// that will be passed to the evaluator.
struct LIFeatureComponents {
double R = 0;
double W = 0;
@@ -241,7 +267,8 @@ struct LIFeatureComponents {
using CandidateRegList =
std::array<std::pair<MCRegister, bool>, NumberOfInterferences>;
-using FeaturesListNormalizer = std::array<float, FeatureIDs::FeatureCount>;
+using FeaturesListNormalizer =
+ llvm::SmallVector<float, FeatureIDs::FeatureCount>;
/// The ML evictor (commonalities between release and development mode)
class MLEvictAdvisor : public RegAllocEvictionAdvisor {
@@ -259,10 +286,10 @@ protected:
// error, and we shouldn't be asking for it here.
const MLModelRunner &getRunner() const { return *Runner; }
- /// This just calls Evaluate on the Runner, but in the development mode case,
- /// if we're just capturing the log of the default advisor, it needs to call
- /// the latter instead, so we need to pass all the necessary parameters for
- /// it. In the development case, it will also log.
+ /// This just calls Evaluate on the Runner, but in the development mode
+ /// case, if we're just capturing the log of the default advisor, it needs
+ /// to call the latter instead, so we need to pass all the necessary
+ /// parameters for it. In the development case, it will also log.
virtual int64_t
tryFindEvictionCandidatePosition(const LiveInterval &VirtReg,
const AllocationOrder &Order,
@@ -274,8 +301,8 @@ protected:
bool
loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg,
bool IsHint, const SmallVirtRegSet &FixedRegisters,
- std::array<float, FeatureIDs::FeatureCount> &Largest,
- size_t Pos) const;
+ llvm::SmallVectorImpl<float> &Largest, size_t Pos,
+ SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const;
private:
static float getInitialQueueSize(const MachineFunction &MF);
@@ -286,11 +313,12 @@ private:
const SmallVirtRegSet &FixedRegisters) const override;
void extractFeatures(const SmallVectorImpl<const LiveInterval *> &Intervals,
- std::array<float, FeatureIDs::FeatureCount> &Largest,
- size_t Pos, int64_t IsHint, int64_t LocalIntfsCount,
- float NrUrgent) const;
+ llvm::SmallVectorImpl<float> &Largest, size_t Pos,
+ int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent,
+ SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const;
- // Point-in-time: we didn't learn this, so we always delegate to the default.
+ // Point-in-time: we didn't learn this, so we always delegate to the
+ // default.
bool canEvictHintInterference(
const LiveInterval &VirtReg, MCRegister PhysReg,
const SmallVirtRegSet &FixedRegisters) const override {
@@ -302,9 +330,9 @@ private:
getLIFeatureComponents(const LiveInterval &LI) const;
// Hold on to a default advisor for:
- // 1) the implementation of canEvictHintInterference, because we didn't learn
- // that nuance yet;
- // 2) for bootstrapping (logging) in the development mode case.
+ // 1) the implementation of canEvictHintInterference, because we didn't
+ // learn that nuance yet; 2) for bootstrapping (logging) in the development
+ // mode case.
const DefaultEvictionAdvisor DefaultAdvisor;
MLModelRunner *const Runner;
const MachineBlockFrequencyInfo &MBFI;
@@ -322,10 +350,6 @@ private:
#define _DECL_FEATURES(type, name, shape, _) \
TensorSpec::createSpec<type>(#name, shape),
-static const std::vector<TensorSpec> InputFeatures{
- {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)},
-};
-#undef _DECL_FEATURES
// ===================================
// Release (AOT) - specifics
// ===================================
@@ -333,13 +357,23 @@ class ReleaseModeEvictionAdvisorAnalysis final
: public RegAllocEvictionAdvisorAnalysis {
public:
ReleaseModeEvictionAdvisorAnalysis()
- : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Release) {}
+ : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Release) {
+ if (EnableDevelopmentFeatures) {
+ InputFeatures = {RA_EVICT_FEATURES_LIST(
+ _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES)
+ RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)};
+ } else {
+ InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)};
+ }
+ }
// support for isa<> and dyn_cast.
static bool classof(const RegAllocEvictionAdvisorAnalysis *R) {
return R->getAdvisorMode() == AdvisorMode::Release;
}
private:
+ std::vector<TensorSpec> InputFeatures;
+
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineLoopInfo>();
@@ -363,25 +397,18 @@ private:
// ===================================
//
// Features we log
-#ifdef LLVM_HAVE_TF_API
+#ifdef LLVM_HAVE_TFLITE
static const TensorSpec Output =
TensorSpec::createSpec<int64_t>(DecisionName, {1});
static const TensorSpec Reward = TensorSpec::createSpec<float>("reward", {1});
// Features we bind on the model. The tensor names have a prefix, and we also
-// need to include some tensors that are expected to be present by the training
-// algo.
+// need to include some tensors that are expected to be present by the
+// training algo.
// TODO: can we just get rid of these?
#define _DECL_TRAIN_FEATURES(type, name, shape, _) \
TensorSpec::createSpec<type>(std::string("action_") + #name, shape),
-static const std::vector<TensorSpec> TrainingInputFeatures{
- {RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES)
- TensorSpec::createSpec<float>("action_discount", {1}),
- TensorSpec::createSpec<int32_t>("action_step_type", {1}),
- TensorSpec::createSpec<float>("action_reward", {1})}};
-#undef _DECL_TRAIN_FEATURES
-
class DevelopmentModeEvictAdvisor : public MLEvictAdvisor {
public:
DevelopmentModeEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
@@ -403,30 +430,74 @@ class DevelopmentModeEvictionAdvisorAnalysis final
: public RegAllocEvictionAdvisorAnalysis {
public:
DevelopmentModeEvictionAdvisorAnalysis()
- : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Development) {}
+ : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Development) {
+ if (EnableDevelopmentFeatures) {
+ InputFeatures = {RA_EVICT_FEATURES_LIST(
+ _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES)
+ RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)};
+ TrainingInputFeatures = {
+ RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES)
+ RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_TRAIN_FEATURES)
+ RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_TRAIN_FEATURES)
+ TensorSpec::createSpec<float>("action_discount", {1}),
+ TensorSpec::createSpec<int32_t>("action_step_type", {1}),
+ TensorSpec::createSpec<float>("action_reward", {1})};
+ } else {
+ InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)};
+ TrainingInputFeatures = {
+ RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES)
+ TensorSpec::createSpec<float>("action_discount", {1}),
+ TensorSpec::createSpec<int32_t>("action_step_type", {1}),
+ TensorSpec::createSpec<float>("action_reward", {1})};
+ }
+ }
// support for isa<> and dyn_cast.
static bool classof(const RegAllocEvictionAdvisorAnalysis *R) {
return R->getAdvisorMode() == AdvisorMode::Development;
}
- /// get the logger for the given function, or nullptr if we didn't collect
- /// one. This is used to inject the score by the RegAllocScoring pass.
- Logger *getLogger(const MachineFunction &MF) const {
- auto I = LogMap.find(MF.getName());
- if (I == LogMap.end())
- return nullptr;
- return I->second.get();
+ void logRewardIfNeeded(const MachineFunction &MF,
+ llvm::function_ref<float()> GetReward) override {
+ if (!Log)
+ return;
+ // The function pass manager would run all the function passes for a
+ // function, so we assume the last context belongs to this function. If
+ // this invariant ever changes, we can implement at that time switching
+ // contexts. At this point, it'd be an error
+ if (Log->currentContext() != MF.getName()) {
+ MF.getFunction().getContext().emitError(
+ "The training log context shouldn't have had changed.");
+ }
+ if (Log->hasObservationInProgress())
+ Log->logReward<float>(GetReward());
}
private:
+ std::vector<TensorSpec> InputFeatures;
+ std::vector<TensorSpec> TrainingInputFeatures;
+
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineLoopInfo>();
RegAllocEvictionAdvisorAnalysis::getAnalysisUsage(AU);
}
- // Save all the logs (when requested).
- bool doFinalization(Module &M) override {
+ bool doInitialization(Module &M) override {
+ LLVMContext &Ctx = M.getContext();
+ if (ModelUnderTraining.empty() && TrainingLog.empty()) {
+ Ctx.emitError("Regalloc development mode should be requested with at "
+ "least logging enabled and/or a training model");
+ return false;
+ }
+ if (ModelUnderTraining.empty())
+ Runner = std::make_unique<NoInferenceModelRunner>(Ctx, InputFeatures);
+ else
+ Runner = ModelUnderTrainingRunner::createAndEnsureValid(
+ Ctx, ModelUnderTraining, DecisionName, TrainingInputFeatures);
+ if (!Runner) {
+ Ctx.emitError("Regalloc: could not set up the model runner");
+ return false;
+ }
if (TrainingLog.empty())
return false;
std::error_code EC;
@@ -435,57 +506,35 @@ private:
M.getContext().emitError(EC.message() + ":" + TrainingLog);
return false;
}
- Logger::flushLogs(*OS, LogMap);
+ std::vector<TensorSpec> LFS = InputFeatures;
+ if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(Runner.get()))
+ append_range(LFS, MUTR->extraOutputsForLoggingSpecs());
+ // We always log the output; in particular, if we're not evaluating, we
+ // don't have an output spec json file. That's why we handle the
+ // 'normal' output separately.
+ LFS.push_back(Output);
+
+ Log = std::make_unique<Logger>(std::move(OS), LFS, Reward,
+ /*IncludeReward*/ true);
return false;
}
std::unique_ptr<RegAllocEvictionAdvisor>
getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
- LLVMContext &Ctx = MF.getFunction().getContext();
- if (ModelUnderTraining.empty() && TrainingLog.empty()) {
- Ctx.emitError("Regalloc development mode should be requested with at "
- "least logging enabled and/or a training model");
+ if (!Runner)
return nullptr;
- }
- if (!Runner) {
- if (ModelUnderTraining.empty())
- Runner = std::make_unique<NoInferenceModelRunner>(Ctx, InputFeatures);
- else
- Runner = ModelUnderTrainingRunner::createAndEnsureValid(
- Ctx, ModelUnderTraining, DecisionName, TrainingInputFeatures);
- if (!Runner) {
- Ctx.emitError("Regalloc: could not set up the model runner");
- return nullptr;
- }
- }
-
- Logger *Log = nullptr;
- if (!TrainingLog.empty()) {
- std::vector<LoggedFeatureSpec> LFS;
- for (const auto &FS : InputFeatures)
- LFS.push_back({FS, None});
- if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(Runner.get()))
- if (MUTR->outputLoggedFeatureSpecs().size() > 1)
- append_range(LFS, drop_begin(MUTR->outputLoggedFeatureSpecs()));
- // We always log the output; in particular, if we're not evaluating, we
- // don't have an output spec json file. That's why we handle the
- // 'normal' output separately.
- LFS.push_back({Output, None});
- auto I = LogMap.insert(std::make_pair(
- MF.getFunction().getName(),
- std::make_unique<Logger>(LFS, Reward, /*IncludeReward*/ true)));
- assert(I.second);
- Log = I.first->second.get();
- }
+ if (Log)
+ Log->switchContext(MF.getName());
return std::make_unique<DevelopmentModeEvictAdvisor>(
MF, RA, Runner.get(), getAnalysis<MachineBlockFrequencyInfo>(),
- getAnalysis<MachineLoopInfo>(), Log);
+ getAnalysis<MachineLoopInfo>(), Log.get());
}
std::unique_ptr<MLModelRunner> Runner;
- StringMap<std::unique_ptr<Logger>> LogMap;
+ std::unique_ptr<Logger> Log;
};
-#endif //#ifdef LLVM_HAVE_TF_API
+
+#endif //#ifdef LLVM_HAVE_TFLITE
} // namespace
float MLEvictAdvisor::getInitialQueueSize(const MachineFunction &MF) {
@@ -528,8 +577,9 @@ int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition(
bool MLEvictAdvisor::loadInterferenceFeatures(
const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
- const SmallVirtRegSet &FixedRegisters, FeaturesListNormalizer &Largest,
- size_t Pos) const {
+ const SmallVirtRegSet &FixedRegisters,
+ llvm::SmallVectorImpl<float> &Largest, size_t Pos,
+ llvm::SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const {
// It is only possible to evict virtual register interference.
if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) {
// leave unavailable
@@ -546,8 +596,8 @@ bool MLEvictAdvisor::loadInterferenceFeatures(
SmallVector<const LiveInterval *, MaxInterferences> InterferingIntervals;
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
- // Different from the default heuristic, we don't make any assumptions about
- // what having more than 10 results in the query may mean.
+ // Different from the default heuristic, we don't make any assumptions
+ // about what having more than 10 results in the query may mean.
const auto &IFIntervals = Q.interferingVRegs(EvictInterferenceCutoff);
if (IFIntervals.empty() && InterferingIntervals.empty())
continue;
@@ -555,7 +605,7 @@ bool MLEvictAdvisor::loadInterferenceFeatures(
return false;
InterferingIntervals.append(IFIntervals.begin(), IFIntervals.end());
for (const LiveInterval *Intf : reverse(IFIntervals)) {
- assert(Register::isVirtualRegister(Intf->reg()) &&
+ assert(Intf->reg().isVirtual() &&
"Only expecting virtual register interference from query");
// This is the same set of legality checks as in the default case: don't
// try to evict fixed regs or 'done' ones. Also don't break cascades,
@@ -588,7 +638,7 @@ bool MLEvictAdvisor::loadInterferenceFeatures(
// OK, so if we made it this far, this LR is an eviction candidate, load its
// features.
extractFeatures(InterferingIntervals, Largest, Pos, IsHint, LocalIntfs,
- NrUrgent);
+ NrUrgent, LRPosInfo);
return true;
}
@@ -604,14 +654,14 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
// max<uint8_t>, then any of the costs of the legally-evictable intervals
// would be lower. When that happens, one of those will be selected.
// Therefore, we allow the candidate be selected, unless the candidate is
- // unspillable, in which case it would be incorrect to not find a register for
- // it.
+ // unspillable, in which case it would be incorrect to not find a register
+ // for it.
const bool MustFindEviction =
(!VirtReg.isSpillable() && CostPerUseLimit == static_cast<uint8_t>(~0u));
// Number of available candidates - if 0, no need to continue.
size_t Available = 0;
- // Make sure we don't have leftover partial state from an attempt where we had
- // no available candidates and bailed out early.
+ // Make sure we don't have leftover partial state from an attempt where we
+ // had no available candidates and bailed out early.
resetInputs(*Runner);
// Track the index->register mapping because AllocationOrder doesn't do that
@@ -624,16 +674,15 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
// only normalize (some of) the float features, but it's just simpler to
// dimension 'Largest' to all the features, especially since we have the
// 'DoNotNormalize' list.
- FeaturesListNormalizer Largest;
- Largest.fill(0.0);
-
- // Same overal idea as in the default eviction policy - we visit the values of
- // AllocationOrder one at a time. If it's not legally available, we mask off
- // the corresponding feature column (==do nothing because we already reset all
- // the features to 0)
- // Use Pos to capture the column we load features at - in AllocationOrder
- // order.
+ FeaturesListNormalizer Largest(FeatureIDs::FeatureCount, 0.0);
+
+ // Same overal idea as in the default eviction policy - we visit the values
+ // of AllocationOrder one at a time. If it's not legally available, we mask
+ // off the corresponding feature column (==do nothing because we already
+ // reset all the features to 0) Use Pos to capture the column we load
+ // features at - in AllocationOrder order.
size_t Pos = 0;
+ SmallVector<LRStartEndInfo, NumberOfInterferences> LRPosInfo;
for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E;
++I, ++Pos) {
MCRegister PhysReg = *I;
@@ -643,7 +692,7 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
continue;
}
if (loadInterferenceFeatures(VirtReg, PhysReg, I.isHint(), FixedRegisters,
- Largest, Pos)) {
+ Largest, Pos, LRPosInfo)) {
++Available;
Regs[Pos] = std::make_pair(PhysReg, true);
}
@@ -659,10 +708,39 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
Regs[CandidateVirtRegPos].second = !MustFindEviction;
if (!MustFindEviction)
extractFeatures(SmallVector<const LiveInterval *, 1>(1, &VirtReg), Largest,
- CandidateVirtRegPos, /*IsHint*/ 0, /*LocalIntfsCount*/ 0,
- /*NrUrgent*/ 0.0);
+ CandidateVirtRegPos, /*IsHint*/ 0,
+ /*LocalIntfsCount*/ 0,
+ /*NrUrgent*/ 0.0, LRPosInfo);
assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had "
"nothing to allocate initially.");
+#ifdef LLVM_HAVE_TFLITE
+ if (EnableDevelopmentFeatures) {
+ extractInstructionFeatures(
+ LRPosInfo, Runner,
+ [this](SlotIndex InputIndex) -> int {
+ auto *CurrentMachineInstruction =
+ LIS->getInstructionFromIndex(InputIndex);
+ if (!CurrentMachineInstruction) {
+ return -1;
+ }
+ return CurrentMachineInstruction->getOpcode();
+ },
+ [this](SlotIndex InputIndex) -> float {
+ auto *CurrentMachineInstruction =
+ LIS->getInstructionFromIndex(InputIndex);
+ return MBFI.getBlockFreqRelativeToEntryBlock(
+ CurrentMachineInstruction->getParent());
+ },
+ [this](SlotIndex InputIndex) -> MachineBasicBlock * {
+ auto *CurrentMachineInstruction =
+ LIS->getInstructionFromIndex(InputIndex);
+ return CurrentMachineInstruction->getParent();
+ },
+ FeatureIDs::instructions, FeatureIDs::instructions_mapping,
+ FeatureIDs::mbb_frequencies, FeatureIDs::mbb_mapping,
+ LIS->getSlotIndexes()->getLastIndex());
+ }
+#endif // #ifdef LLVM_HAVE_TFLITE
// Normalize the features.
for (auto &V : Largest)
V = V ? V : 1.0;
@@ -746,8 +824,9 @@ MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const {
// of accummulating the various features, we keep them separate.
void MLEvictAdvisor::extractFeatures(
const SmallVectorImpl<const LiveInterval *> &Intervals,
- std::array<float, FeatureIDs::FeatureCount> &Largest, size_t Pos,
- int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent) const {
+ llvm::SmallVectorImpl<float> &Largest, size_t Pos, int64_t IsHint,
+ int64_t LocalIntfsCount, float NrUrgent,
+ SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const {
int64_t NrDefsAndUses = 0;
int64_t NrBrokenHints = 0;
double R = 0.0;
@@ -794,6 +873,13 @@ void MLEvictAdvisor::extractFeatures(
HintWeights += LIFC.HintWeights;
NrRematerializable += LIFC.IsRemat;
+
+ if (EnableDevelopmentFeatures) {
+ for (auto CurrentSegment : LI) {
+ LRPosInfo.push_back(
+ LRStartEndInfo{CurrentSegment.start, CurrentSegment.end, Pos});
+ }
+ }
}
size_t Size = 0;
if (!Intervals.empty()) {
@@ -836,8 +922,143 @@ void MLEvictAdvisor::extractFeatures(
#undef SET
}
+void extractInstructionFeatures(
+ SmallVectorImpl<LRStartEndInfo> &LRPosInfo, MLModelRunner *RegallocRunner,
+ function_ref<int(SlotIndex)> GetOpcode,
+ function_ref<float(SlotIndex)> GetMBBFreq,
+ function_ref<MachineBasicBlock *(SlotIndex)> GetMBBReference,
+ const int InstructionsIndex, const int InstructionsMappingIndex,
+ const int MBBFreqIndex, const int MBBMappingIndex,
+ const SlotIndex LastIndex) {
+ // This function extracts instruction based features relevant to the eviction
+ // problem currently being solved. This function ends up extracting two
+ // tensors.
+ // 1 - A vector of size max instruction count. It contains the opcodes of the
+ // instructions spanned by all the intervals in the current instance of the
+ // eviction problem.
+ // 2 - A binary mapping matrix of size (LR count * max
+ // instruction count) which maps where the LRs are live to the actual opcodes
+ // for which they are live.
+ // 3 - A vector of size max supported MBB count storing MBB frequencies,
+ // encompassing all of the MBBs covered by the eviction problem.
+ // 4 - A vector of size max instruction count of indices to members of the MBB
+ // frequency vector, mapping each instruction to its associated MBB.
+
+ // Start off by sorting the segments based on the beginning slot index.
+ std::sort(
+ LRPosInfo.begin(), LRPosInfo.end(),
+ [](LRStartEndInfo A, LRStartEndInfo B) { return A.Begin < B.Begin; });
+ size_t InstructionIndex = 0;
+ size_t CurrentSegmentIndex = 0;
+ SlotIndex CurrentIndex = LRPosInfo[0].Begin;
+ std::map<MachineBasicBlock *, size_t> VisitedMBBs;
+ size_t CurrentMBBIndex = 0;
+ // This loop processes all the segments sequentially by starting at the
+ // beginning slot index of the first segment, iterating through all the slot
+ // indices before the end slot index of that segment (while checking for
+ // overlaps with segments that start at greater slot indices). After hitting
+ // that end index, the current segment being processed gets bumped until they
+ // are all processed or the max instruction count is hit, where everything is
+ // just truncated.
+ while (true) {
+ // If the index that we are currently at is within the current segment and
+ // we haven't hit the max instruction count, continue processing the current
+ // segment.
+ while (CurrentIndex <= LRPosInfo[CurrentSegmentIndex].End &&
+ InstructionIndex < ModelMaxSupportedInstructionCount) {
+ int CurrentOpcode = GetOpcode(CurrentIndex);
+ // If the current machine instruction is null, skip it
+ if (CurrentOpcode == -1) {
+ // If we're currently at the last index in the SlotIndex analysis,
+ // we can't go any further, so return from the function
+ if (CurrentIndex >= LastIndex) {
+ return;
+ }
+ CurrentIndex = CurrentIndex.getNextIndex();
+ continue;
+ }
+ MachineBasicBlock *CurrentMBBReference = GetMBBReference(CurrentIndex);
+ if (VisitedMBBs.count(CurrentMBBReference) == 0) {
+ VisitedMBBs[CurrentMBBReference] = CurrentMBBIndex;
+ ++CurrentMBBIndex;
+ }
+ extractMBBFrequency(CurrentIndex, InstructionIndex, VisitedMBBs,
+ GetMBBFreq, CurrentMBBReference, RegallocRunner,
+ MBBFreqIndex, MBBMappingIndex);
+ // Current code assumes we're not going to get any disjointed segments
+ assert(LRPosInfo[CurrentSegmentIndex].Begin <= CurrentIndex);
+ RegallocRunner->getTensor<int64_t>(InstructionsIndex)[InstructionIndex] =
+ CurrentOpcode < OpcodeValueCutoff ? CurrentOpcode : 0;
+ // set value in the binary mapping matrix for the current instruction
+ auto CurrentSegmentPosition = LRPosInfo[CurrentSegmentIndex].Pos;
+ RegallocRunner->getTensor<int64_t>(
+ InstructionsMappingIndex)[CurrentSegmentPosition *
+ ModelMaxSupportedInstructionCount +
+ InstructionIndex] = 1;
+ // All of the segments are sorted based on the beginning slot index, but
+ // this doesn't mean that the beginning slot index of the next segment is
+ // after the end segment of the one being currently processed. This while
+ // loop checks for overlapping segments and modifies the portion of the
+ // column in the mapping matrix for the currently processed instruction
+ // for the LR it is checking. Also make sure that the beginning of the
+ // current segment we're checking for overlap in is less than the current
+ // index, otherwise we're done checking overlaps.
+ size_t OverlapCheckCurrentSegment = CurrentSegmentIndex + 1;
+ while (OverlapCheckCurrentSegment < LRPosInfo.size() &&
+ LRPosInfo[OverlapCheckCurrentSegment].Begin <= CurrentIndex) {
+ auto OverlapCurrentSegmentPosition =
+ LRPosInfo[OverlapCheckCurrentSegment].Pos;
+ if (LRPosInfo[OverlapCheckCurrentSegment].End >= CurrentIndex) {
+ RegallocRunner->getTensor<int64_t>(
+ InstructionsMappingIndex)[OverlapCurrentSegmentPosition *
+ ModelMaxSupportedInstructionCount +
+ InstructionIndex] = 1;
+ }
+ ++OverlapCheckCurrentSegment;
+ }
+ ++InstructionIndex;
+ if (CurrentIndex >= LastIndex) {
+ return;
+ }
+ CurrentIndex = CurrentIndex.getNextIndex();
+ }
+ // if we've just finished processing through the last segment or if we've
+ // hit the maximum number of instructions, break out of the loop.
+ if (CurrentSegmentIndex == LRPosInfo.size() - 1 ||
+ InstructionIndex >= ModelMaxSupportedInstructionCount) {
+ break;
+ }
+ // If the segments are not overlapping, we need to move to the beginning
+ // index of the next segment to avoid having instructions not attached to
+ // any register.
+ if (LRPosInfo[CurrentSegmentIndex + 1].Begin >
+ LRPosInfo[CurrentSegmentIndex].End) {
+ CurrentIndex = LRPosInfo[CurrentSegmentIndex + 1].Begin;
+ }
+ ++CurrentSegmentIndex;
+ }
+}
+
+void extractMBBFrequency(const SlotIndex CurrentIndex,
+ const size_t CurrentInstructionIndex,
+ std::map<MachineBasicBlock *, size_t> &VisitedMBBs,
+ function_ref<float(SlotIndex)> GetMBBFreq,
+ MachineBasicBlock *CurrentMBBReference,
+ MLModelRunner *RegallocRunner, const int MBBFreqIndex,
+ const int MBBMappingIndex) {
+ size_t CurrentMBBIndex = VisitedMBBs[CurrentMBBReference];
+ float CurrentMBBFreq = GetMBBFreq(CurrentIndex);
+ if (CurrentMBBIndex < ModelMaxSupportedMBBCount) {
+ RegallocRunner->getTensor<float>(MBBFreqIndex)[CurrentMBBIndex] =
+ CurrentMBBFreq;
+ RegallocRunner->getTensor<int64_t>(
+ MBBMappingIndex)[CurrentInstructionIndex] = CurrentMBBIndex;
+ }
+}
+
// Development mode-specific implementations
-#ifdef LLVM_HAVE_TF_API
+#ifdef LLVM_HAVE_TFLITE
+
RegAllocEvictionAdvisorAnalysis *llvm::createDevelopmentModeAdvisor() {
return new DevelopmentModeEvictionAdvisorAnalysis();
}
@@ -853,9 +1074,9 @@ int64_t DevelopmentModeEvictAdvisor::tryFindEvictionCandidatePosition(
} else {
MCRegister PhysReg = getDefaultAdvisor().tryFindEvictionCandidate(
VirtReg, Order, CostPerUseLimit, FixedRegisters);
- // Find the index of the selected PhysReg. We need it for logging, otherwise
- // this is wasted cycles (but so would starting development mode without a
- // model nor logging)
+ // Find the index of the selected PhysReg. We need it for logging,
+ // otherwise this is wasted cycles (but so would starting development mode
+ // without a model nor logging)
if (!PhysReg)
Ret = CandidateVirtRegPos;
else
@@ -866,41 +1087,57 @@ int64_t DevelopmentModeEvictAdvisor::tryFindEvictionCandidatePosition(
}
if (TrainingLog.empty())
return Ret;
+ // TODO(mtrofin): when we support optional rewards, this can go away. In the
+ // meantime, we log the "pretend" reward (0) for the previous observation
+ // before starting a new one.
+ if (Log->hasObservationInProgress())
+ Log->logReward<float>(0.0);
+
+ Log->startObservation();
size_t CurrentFeature = 0;
- for (; CurrentFeature < FeatureIDs::FeatureCount; ++CurrentFeature) {
- Log->logSpecifiedTensorValue(
- CurrentFeature, reinterpret_cast<const char *>(
+ size_t FeatureCount = EnableDevelopmentFeatures
+ ? FeatureIDs::FeaturesWithDevelopmentCount
+ : FeatureIDs::FeatureCount;
+ for (; CurrentFeature < FeatureCount; ++CurrentFeature) {
+ Log->logTensorValue(CurrentFeature,
+ reinterpret_cast<const char *>(
getRunner().getTensorUntyped(CurrentFeature)));
}
if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(&getRunner()))
- for (size_t I = 1; I < MUTR->outputLoggedFeatureSpecs().size();
+ for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size();
++I, ++CurrentFeature)
- Log->logSpecifiedTensorValue(
+ Log->logTensorValue(
CurrentFeature,
- reinterpret_cast<const char *>(
- MUTR->lastEvaluationResult()->getUntypedTensorValue(I)));
+ reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I)));
// The output is right after the features and the extra outputs
- Log->logInt64Value(CurrentFeature, &Ret);
+ Log->logTensorValue(CurrentFeature, reinterpret_cast<const char *>(&Ret));
+ Log->endObservation();
return Ret;
}
bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) {
- if (auto *DevModeAnalysis = dyn_cast<DevelopmentModeEvictionAdvisorAnalysis>(
- &getAnalysis<RegAllocEvictionAdvisorAnalysis>()))
- if (auto *Log = DevModeAnalysis->getLogger(MF))
- Log->logFloatFinalReward(static_cast<float>(
+ std::optional<float> CachedReward;
+ auto GetReward = [&]() {
+ if (!CachedReward)
+ CachedReward = static_cast<float>(
calculateRegAllocScore(MF, getAnalysis<MachineBlockFrequencyInfo>())
- .getScore()));
-
+ .getScore());
+ return *CachedReward;
+ };
+
+ getAnalysis<RegAllocEvictionAdvisorAnalysis>().logRewardIfNeeded(MF,
+ GetReward);
+ getAnalysis<RegAllocPriorityAdvisorAnalysis>().logRewardIfNeeded(MF,
+ GetReward);
return false;
}
-#endif // #ifdef LLVM_HAVE_TF_API
+#endif // #ifdef LLVM_HAVE_TFLITE
RegAllocEvictionAdvisorAnalysis *llvm::createReleaseModeAdvisor() {
return new ReleaseModeEvictionAdvisorAnalysis();
}
// In all cases except development mode, we don't need scoring.
-#if !defined(LLVM_HAVE_TF_API)
+#if !defined(LLVM_HAVE_TFLITE)
bool RegAllocScoring::runOnMachineFunction(MachineFunction &) { return false; }
#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h
new file mode 100644
index 000000000000..e36a41154096
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.h
@@ -0,0 +1,93 @@
+//===- MLRegAllocEvictAdvisor.cpp - ML eviction advisor -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Function declarations of utilities related to feature extraction for unit
+// testing.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MLREGALLOCEVICTIONADVISOR_H
+#define LLVM_CODEGEN_MLREGALLOCEVICTIONADVISOR_H
+
+#include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+
+using namespace llvm;
+
+// LRStartEndInfo contains the start and end of a specific live range as
+// slot indices as well as storing the index of the physical register it
+// is assigned to (or 1 above the phys reg count if its the candidate).
+// Used when extracting per-instruction features in the context of a
+// specific eviction problem.
+struct LRStartEndInfo {
+ SlotIndex Begin;
+ SlotIndex End;
+ size_t Pos = 0;
+};
+
+void extractInstructionFeatures(
+ llvm::SmallVectorImpl<LRStartEndInfo> &LRPosInfo,
+ MLModelRunner *RegallocRunner, function_ref<int(SlotIndex)> GetOpcode,
+ function_ref<float(SlotIndex)> GetMBBFreq,
+ function_ref<MachineBasicBlock *(SlotIndex)> GetMBBReference,
+ const int InstructionsIndex, const int InstructionsMappingIndex,
+ const int MBBFreqIndex, const int MBBMappingIndex,
+ const SlotIndex LastIndex);
+
+void extractMBBFrequency(const SlotIndex CurrentIndex,
+ const size_t CurrentInstructionIndex,
+ std::map<MachineBasicBlock *, size_t> &VisitedMBBs,
+ function_ref<float(SlotIndex)> GetMBBFreq,
+ MachineBasicBlock *CurrentMBBReference,
+ MLModelRunner *RegallocRunner, const int MBBFreqIndex,
+ const int MBBMappingIndex);
+
+// This is the maximum number of interfererring ranges. That's the number of
+// distinct AllocationOrder values, which comes from MCRegisterClass::RegsSize.
+// For X86, that's 32.
+// TODO: find a way to get this, statically, in a programmatic way.
+static const int64_t MaxInterferences = 32;
+
+// Logically, we can think of the feature set given to the evaluator as a 2D
+// matrix. The rows are the features (see next). The columns correspond to the
+// interferences. We treat the candidate virt reg as an 'interference', too, as
+// its feature set is the same as that of the interferring ranges. So we'll have
+// MaxInterferences + 1 columns and by convention, we will use the last column
+// for the virt reg seeking allocation.
+static const int64_t CandidateVirtRegPos = MaxInterferences;
+static const int64_t NumberOfInterferences = CandidateVirtRegPos + 1;
+
+// The number of instructions that a specific live range might have is variable,
+// but we're passing in a single matrix of instructions and tensorflow saved
+// models only support a fixed input size, so we have to cap the number of
+// instructions that can be passed along. The specific value was derived from
+// experimentation such that the majority of eviction problems would be
+// completely covered.
+static const int ModelMaxSupportedInstructionCount = 300;
+
+// When extracting per-instruction features, the advisor will currently create
+// a vector of size ModelMaxSupportedInstructionCount to hold the opcodes of the
+// instructions relevant to the eviction problem, and a NumberOfInterferences *
+// ModelMaxSupportedInstructionCount matrix that maps LRs to the instructions
+// that they span.
+static const std::vector<int64_t> InstructionsShape{
+ 1, ModelMaxSupportedInstructionCount};
+static const std::vector<int64_t> InstructionsMappingShape{
+ 1, NumberOfInterferences, ModelMaxSupportedInstructionCount};
+
+// When extracting mappings between MBBs and individual instructions, we create
+// a vector of MBB frequencies, currently of size 100, which was a value
+// determined through experimentation to encompass the vast majority of eviction
+// problems. The actual mapping is the same shape as the instruction opcodes
+// vector.
+static const int64_t ModelMaxSupportedMBBCount = 100;
+static const std::vector<int64_t> MBBFrequencyShape{1,
+ ModelMaxSupportedMBBCount};
+
+#endif // LLVM_CODEGEN_MLREGALLOCEVICTIONADVISOR_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp
new file mode 100644
index 000000000000..320a184bdcc5
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp
@@ -0,0 +1,335 @@
+//===- MLRegAllocPriorityAdvisor.cpp - ML priority advisor-----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the ML priority advisor and reward injection pass
+//
+//===----------------------------------------------------------------------===//
+
+#include "AllocationOrder.h"
+#include "RegAllocGreedy.h"
+#include "RegAllocPriorityAdvisor.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/Analysis/ReleaseModeModelRunner.h"
+#include "llvm/Analysis/TensorSpec.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/CommandLine.h"
+
+#if defined(LLVM_HAVE_TFLITE)
+#include "llvm/Analysis/ModelUnderTrainingRunner.h"
+#include "llvm/Analysis/NoInferenceModelRunner.h"
+#include "llvm/Analysis/Utils/TrainingLogger.h"
+#endif
+
+using namespace llvm;
+
+// Options that only make sense in development mode
+#ifdef LLVM_HAVE_TFLITE
+#include "RegAllocScore.h"
+#include "llvm/Analysis/Utils/TFUtils.h"
+
+static cl::opt<std::string> TrainingLog(
+ "regalloc-priority-training-log", cl::Hidden,
+ cl::desc("Training log for the register allocator priority model"));
+
+static cl::opt<std::string> ModelUnderTraining(
+ "regalloc-priority-model", cl::Hidden,
+ cl::desc("The model being trained for register allocation priority"));
+
+#endif // #ifdef LLVM_HAVE_TFLITE
+
+namespace llvm {
+
+static const std::vector<int64_t> PerLiveRangeShape{1};
+
+#define RA_PRIORITY_FEATURES_LIST(M) \
+ M(int64_t, li_size, PerLiveRangeShape, "size") \
+ M(int64_t, stage, PerLiveRangeShape, "stage") \
+ M(float, weight, PerLiveRangeShape, "weight")
+
+#define DecisionName "priority"
+
+// Named features index.
+enum FeatureIDs {
+#define _FEATURE_IDX(_, name, __, ___) name,
+ RA_PRIORITY_FEATURES_LIST(_FEATURE_IDX)
+#undef _FEATURE_IDX
+ FeatureCount
+};
+
+class MLPriorityAdvisor : public RegAllocPriorityAdvisor {
+public:
+ MLPriorityAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ SlotIndexes *const Indexes, MLModelRunner *Runner);
+
+protected:
+ const RegAllocPriorityAdvisor &getDefaultAdvisor() const {
+ return static_cast<const RegAllocPriorityAdvisor &>(DefaultAdvisor);
+ }
+
+ // The assumption is that if the Runner could not be constructed, we emit-ed
+ // error, and we shouldn't be asking for it here.
+ const MLModelRunner &getRunner() const { return *Runner; }
+ float getPriorityImpl(const LiveInterval &LI) const;
+ unsigned getPriority(const LiveInterval &LI) const override;
+
+private:
+ const DefaultPriorityAdvisor DefaultAdvisor;
+ MLModelRunner *const Runner;
+};
+
+#define _DECL_FEATURES(type, name, shape, _) \
+ TensorSpec::createSpec<type>(#name, shape),
+
+static const std::vector<TensorSpec> InputFeatures{
+ {RA_PRIORITY_FEATURES_LIST(_DECL_FEATURES)},
+};
+#undef _DECL_FEATURES
+
+// ===================================
+// Release (AOT) - specifics
+// ===================================
+class ReleaseModePriorityAdvisorAnalysis final
+ : public RegAllocPriorityAdvisorAnalysis {
+public:
+ ReleaseModePriorityAdvisorAnalysis()
+ : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Release) {}
+ // support for isa<> and dyn_cast.
+ static bool classof(const RegAllocPriorityAdvisorAnalysis *R) {
+ return R->getAdvisorMode() == AdvisorMode::Release;
+ }
+
+private:
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<SlotIndexes>();
+ RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU);
+ }
+
+ std::unique_ptr<RegAllocPriorityAdvisor>
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
+ if (!Runner)
+ Runner = std::make_unique<ReleaseModeModelRunner<NoopSavedModelImpl>>(
+ MF.getFunction().getContext(), InputFeatures, DecisionName);
+ return std::make_unique<MLPriorityAdvisor>(
+ MF, RA, &getAnalysis<SlotIndexes>(), Runner.get());
+ }
+ std::unique_ptr<ReleaseModeModelRunner<NoopSavedModelImpl>> Runner;
+};
+
+// ===================================
+// Development mode-specifics
+// ===================================
+//
+// Features we log
+#ifdef LLVM_HAVE_TFLITE
+
+static const TensorSpec Output =
+ TensorSpec::createSpec<float>(DecisionName, {1});
+static const TensorSpec Reward = TensorSpec::createSpec<float>("reward", {1});
+
+#define _DECL_TRAIN_FEATURES(type, name, shape, _) \
+ TensorSpec::createSpec<type>(std::string("action_") + #name, shape),
+
+static const std::vector<TensorSpec> TrainingInputFeatures{
+ {RA_PRIORITY_FEATURES_LIST(_DECL_TRAIN_FEATURES)
+ TensorSpec::createSpec<float>("action_discount", {1}),
+ TensorSpec::createSpec<int32_t>("action_step_type", {1}),
+ TensorSpec::createSpec<float>("action_reward", {1})}};
+#undef _DECL_TRAIN_FEATURES
+
+class DevelopmentModePriorityAdvisor : public MLPriorityAdvisor {
+public:
+ DevelopmentModePriorityAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ SlotIndexes *const Indexes,
+ MLModelRunner *Runner, Logger *Log)
+ : MLPriorityAdvisor(MF, RA, Indexes, Runner), Log(Log) {}
+
+private:
+ unsigned getPriority(const LiveInterval &LI) const override;
+ Logger *const Log;
+};
+
+class DevelopmentModePriorityAdvisorAnalysis final
+ : public RegAllocPriorityAdvisorAnalysis {
+public:
+ DevelopmentModePriorityAdvisorAnalysis()
+ : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Development) {}
+ // support for isa<> and dyn_cast.
+ static bool classof(const RegAllocPriorityAdvisorAnalysis *R) {
+ return R->getAdvisorMode() == AdvisorMode::Development;
+ }
+
+ void logRewardIfNeeded(const MachineFunction &MF,
+ llvm::function_ref<float()> GetReward) override {
+ if (!Log)
+ return;
+ // The function pass manager would run all the function passes for a
+ // function, so we assume the last context belongs to this function. If
+ // this invariant ever changes, we can implement at that time switching
+ // contexts. At this point, it'd be an error
+ if (Log->currentContext() != MF.getName()) {
+ MF.getFunction().getContext().emitError(
+ "The training log context shouldn't have had changed.");
+ }
+ if (Log->hasObservationInProgress())
+ Log->logReward<float>(GetReward());
+ }
+
+private:
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<SlotIndexes>();
+ RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU);
+ }
+
+ // Save all the logs (when requested).
+ bool doInitialization(Module &M) override {
+ LLVMContext &Ctx = M.getContext();
+ if (ModelUnderTraining.empty() && TrainingLog.empty()) {
+ Ctx.emitError("Regalloc development mode should be requested with at "
+ "least logging enabled and/or a training model");
+ return false;
+ }
+ if (ModelUnderTraining.empty())
+ Runner = std::make_unique<NoInferenceModelRunner>(Ctx, InputFeatures);
+ else
+ Runner = ModelUnderTrainingRunner::createAndEnsureValid(
+ Ctx, ModelUnderTraining, DecisionName, TrainingInputFeatures);
+ if (!Runner) {
+ Ctx.emitError("Regalloc: could not set up the model runner");
+ return false;
+ }
+ if (TrainingLog.empty())
+ return false;
+ std::error_code EC;
+ auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC);
+ if (EC) {
+ M.getContext().emitError(EC.message() + ":" + TrainingLog);
+ return false;
+ }
+ std::vector<TensorSpec> LFS = InputFeatures;
+ if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(Runner.get()))
+ append_range(LFS, MUTR->extraOutputsForLoggingSpecs());
+ // We always log the output; in particular, if we're not evaluating, we
+ // don't have an output spec json file. That's why we handle the
+ // 'normal' output separately.
+ LFS.push_back(Output);
+
+ Log = std::make_unique<Logger>(std::move(OS), LFS, Reward,
+ /*IncludeReward*/ true);
+ return false;
+ }
+
+ std::unique_ptr<RegAllocPriorityAdvisor>
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
+ if (!Runner)
+ return nullptr;
+ if (Log) {
+ Log->switchContext(MF.getName());
+ }
+
+ return std::make_unique<DevelopmentModePriorityAdvisor>(
+ MF, RA, &getAnalysis<SlotIndexes>(), Runner.get(), Log.get());
+ }
+
+ std::unique_ptr<MLModelRunner> Runner;
+ std::unique_ptr<Logger> Log;
+};
+#endif //#ifdef LLVM_HAVE_TFLITE
+
+} // namespace llvm
+
+RegAllocPriorityAdvisorAnalysis *llvm::createReleaseModePriorityAdvisor() {
+ return new ReleaseModePriorityAdvisorAnalysis();
+}
+
+MLPriorityAdvisor::MLPriorityAdvisor(const MachineFunction &MF,
+ const RAGreedy &RA,
+ SlotIndexes *const Indexes,
+ MLModelRunner *Runner)
+ : RegAllocPriorityAdvisor(MF, RA, Indexes), DefaultAdvisor(MF, RA, Indexes),
+ Runner(std::move(Runner)) {
+ assert(this->Runner);
+}
+
+float MLPriorityAdvisor::getPriorityImpl(const LiveInterval &LI) const {
+ const unsigned Size = LI.getSize();
+ LiveRangeStage Stage = RA.getExtraInfo().getStage(LI);
+
+ *Runner->getTensor<int64_t>(0) = static_cast<int64_t>(Size);
+ *Runner->getTensor<int64_t>(1) = static_cast<int64_t>(Stage);
+ *Runner->getTensor<float>(2) = static_cast<float>(LI.weight());
+
+ return Runner->evaluate<float>();
+}
+
+unsigned MLPriorityAdvisor::getPriority(const LiveInterval &LI) const {
+ return static_cast<unsigned>(getPriorityImpl(LI));
+}
+
+#ifdef LLVM_HAVE_TFLITE
+RegAllocPriorityAdvisorAnalysis *llvm::createDevelopmentModePriorityAdvisor() {
+ return new DevelopmentModePriorityAdvisorAnalysis();
+}
+
+unsigned
+DevelopmentModePriorityAdvisor::getPriority(const LiveInterval &LI) const {
+ double Prio = 0;
+
+ if (isa<ModelUnderTrainingRunner>(getRunner())) {
+ Prio = MLPriorityAdvisor::getPriorityImpl(LI);
+ } else {
+ Prio = getDefaultAdvisor().getPriority(LI);
+ }
+
+ if (TrainingLog.empty())
+ return Prio;
+
+ // TODO(mtrofin): when we support optional rewards, this can go away. In the
+ // meantime, we log the "pretend" reward (0) for the previous observation
+ // before starting a new one.
+ if (Log->hasObservationInProgress())
+ Log->logReward<float>(0.0);
+
+ Log->startObservation();
+ size_t CurrentFeature = 0;
+ for (; CurrentFeature < InputFeatures.size(); ++CurrentFeature) {
+ Log->logTensorValue(CurrentFeature,
+ reinterpret_cast<const char *>(
+ getRunner().getTensorUntyped(CurrentFeature)));
+ }
+
+ if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(&getRunner())) {
+ for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size();
+ ++I, ++CurrentFeature)
+ Log->logTensorValue(
+ CurrentFeature,
+ reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I)));
+ }
+
+ float Ret = static_cast<float>(Prio);
+ Log->logTensorValue(CurrentFeature, reinterpret_cast<const char *>(&Ret));
+ Log->endObservation();
+
+ return static_cast<unsigned>(Prio);
+}
+
+#endif // #ifdef LLVM_HAVE_TFLITE
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 7381c7e6b09c..5ef377f2a1c0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/LiveVariables.h"
@@ -34,6 +35,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
+#include <cmath>
using namespace llvm;
#define DEBUG_TYPE "codegen"
@@ -253,6 +255,10 @@ MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() {
return I;
}
+MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminatorForward() {
+ return find_if(instrs(), [](auto &II) { return II.isTerminator(); });
+}
+
MachineBasicBlock::iterator
MachineBasicBlock::getFirstNonDebugInstr(bool SkipPseudoOp) {
// Skip over begin-of-block dbg_value instructions.
@@ -450,8 +456,8 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (IrrLoopHeaderWeight && IsStandalone) {
if (Indexes) OS << '\t';
- OS.indent(2) << "; Irreducible loop header weight: "
- << IrrLoopHeaderWeight.value() << '\n';
+ OS.indent(2) << "; Irreducible loop header weight: " << *IrrLoopHeaderWeight
+ << '\n';
}
}
@@ -476,6 +482,28 @@ void MachineBasicBlock::printName(raw_ostream &os, unsigned printNameFlags,
os << "bb." << getNumber();
bool hasAttributes = false;
+ auto PrintBBRef = [&](const BasicBlock *bb) {
+ os << "%ir-block.";
+ if (bb->hasName()) {
+ os << bb->getName();
+ } else {
+ int slot = -1;
+
+ if (moduleSlotTracker) {
+ slot = moduleSlotTracker->getLocalSlot(bb);
+ } else if (bb->getParent()) {
+ ModuleSlotTracker tmpTracker(bb->getModule(), false);
+ tmpTracker.incorporateFunction(*bb->getParent());
+ slot = tmpTracker.getLocalSlot(bb);
+ }
+
+ if (slot == -1)
+ os << "<ir-block badref>";
+ else
+ os << slot;
+ }
+ };
+
if (printNameFlags & PrintNameIr) {
if (const auto *bb = getBasicBlock()) {
if (bb->hasName()) {
@@ -483,29 +511,21 @@ void MachineBasicBlock::printName(raw_ostream &os, unsigned printNameFlags,
} else {
hasAttributes = true;
os << " (";
-
- int slot = -1;
-
- if (moduleSlotTracker) {
- slot = moduleSlotTracker->getLocalSlot(bb);
- } else if (bb->getParent()) {
- ModuleSlotTracker tmpTracker(bb->getModule(), false);
- tmpTracker.incorporateFunction(*bb->getParent());
- slot = tmpTracker.getLocalSlot(bb);
- }
-
- if (slot == -1)
- os << "<ir-block badref>";
- else
- os << (Twine("%ir-block.") + Twine(slot)).str();
+ PrintBBRef(bb);
}
}
}
if (printNameFlags & PrintNameAttributes) {
- if (hasAddressTaken()) {
+ if (isMachineBlockAddressTaken()) {
os << (hasAttributes ? ", " : " (");
- os << "address-taken";
+ os << "machine-block-address-taken";
+ hasAttributes = true;
+ }
+ if (isIRBlockAddressTaken()) {
+ os << (hasAttributes ? ", " : " (");
+ os << "ir-block-address-taken ";
+ PrintBBRef(getAddressTakenIRBlock());
hasAttributes = true;
}
if (isEHPad()) {
@@ -543,6 +563,11 @@ void MachineBasicBlock::printName(raw_ostream &os, unsigned printNameFlags,
}
hasAttributes = true;
}
+ if (getBBID().has_value()) {
+ os << (hasAttributes ? ", " : " (");
+ os << "bb_id " << *getBBID();
+ hasAttributes = true;
+ }
}
if (hasAttributes)
@@ -919,7 +944,7 @@ const MachineBasicBlock *MachineBasicBlock::getSingleSuccessor() const {
return Successors.size() == 1 ? Successors[0] : nullptr;
}
-MachineBasicBlock *MachineBasicBlock::getFallThrough() {
+MachineBasicBlock *MachineBasicBlock::getFallThrough(bool JumpToFallThrough) {
MachineFunction::iterator Fallthrough = getIterator();
++Fallthrough;
// If FallthroughBlock is off the end of the function, it can't fall through.
@@ -950,8 +975,8 @@ MachineBasicBlock *MachineBasicBlock::getFallThrough() {
// If there is some explicit branch to the fallthrough block, it can obviously
// reach, even though the branch should get folded to fall through implicitly.
- if (MachineFunction::iterator(TBB) == Fallthrough ||
- MachineFunction::iterator(FBB) == Fallthrough)
+ if (!JumpToFallThrough && (MachineFunction::iterator(TBB) == Fallthrough ||
+ MachineFunction::iterator(FBB) == Fallthrough))
return &*Fallthrough;
// If it's an unconditional branch to some block not the fall through, it
@@ -1046,8 +1071,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
MO.isUndef())
continue;
Register Reg = MO.getReg();
- if (Register::isPhysicalRegister(Reg) ||
- LV->getVarInfo(Reg).removeKill(MI)) {
+ if (Reg.isPhysical() || LV->getVarInfo(Reg).removeKill(MI)) {
KilledRegs.push_back(Reg);
LLVM_DEBUG(dbgs() << "Removing terminator kill: " << MI);
MO.setIsKill(false);
@@ -1133,7 +1157,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
if (!(--I)->addRegisterKilled(Reg, TRI, /* AddIfNotFound= */ false))
continue;
- if (Register::isVirtualRegister(Reg))
+ if (Reg.isVirtual())
LV->getVarInfo(Reg).Kills.push_back(&*I);
LLVM_DEBUG(dbgs() << "Restored terminator kill: " << *I);
break;
@@ -1631,6 +1655,11 @@ bool MachineBasicBlock::sizeWithoutDebugLargerThan(unsigned Limit) const {
return false;
}
+unsigned MachineBasicBlock::getBBIDOrNumber() const {
+ uint8_t BBAddrMapVersion = getParent()->getContext().getBBAddrMapVersion();
+ return BBAddrMapVersion < 2 ? getNumber() : *getBBID();
+}
+
const MBBSectionID MBBSectionID::ColdSectionID(MBBSectionID::SectionType::Cold);
const MBBSectionID
MBBSectionID::ExceptionSectionID(MBBSectionID::SectionType::Exception);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index c569f0350366..b1cbe525d7e6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -12,7 +12,6 @@
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/iterator.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -23,6 +22,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/GraphWriter.h"
+#include <optional>
#include <string>
using namespace llvm;
@@ -231,19 +231,19 @@ MachineBlockFrequencyInfo::getBlockFreq(const MachineBasicBlock *MBB) const {
return MBFI ? MBFI->getBlockFreq(MBB) : 0;
}
-Optional<uint64_t> MachineBlockFrequencyInfo::getBlockProfileCount(
+std::optional<uint64_t> MachineBlockFrequencyInfo::getBlockProfileCount(
const MachineBasicBlock *MBB) const {
if (!MBFI)
- return None;
+ return std::nullopt;
const Function &F = MBFI->getFunction()->getFunction();
return MBFI->getBlockProfileCount(F, MBB);
}
-Optional<uint64_t>
+std::optional<uint64_t>
MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const {
if (!MBFI)
- return None;
+ return std::nullopt;
const Function &F = MBFI->getFunction()->getFunction();
return MBFI->getProfileCountFromFreq(F, Freq);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 9ff5c37627b4..7bbc347a8cf8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -201,6 +201,18 @@ static cl::opt<unsigned> TriangleChainCount(
cl::init(2),
cl::Hidden);
+// Use case: When block layout is visualized after MBP pass, the basic blocks
+// are labeled in layout order; meanwhile blocks could be numbered in a
+// different order. It's hard to map between the graph and pass output.
+// With this option on, the basic blocks are renumbered in function layout
+// order. For debugging only.
+static cl::opt<bool> RenumberBlocksBeforeView(
+ "renumber-blocks-before-view",
+ cl::desc(
+ "If true, basic blocks are re-numbered before MBP layout is printed "
+ "into a dot graph. Only used when a function is being printed."),
+ cl::init(false), cl::Hidden);
+
extern cl::opt<bool> EnableExtTspBlockPlacement;
extern cl::opt<bool> ApplyExtTspWithoutProfile;
@@ -3466,6 +3478,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (ViewBlockLayoutWithBFI != GVDT_None &&
(ViewBlockFreqFuncName.empty() ||
F->getFunction().getName().equals(ViewBlockFreqFuncName))) {
+ if (RenumberBlocksBeforeView)
+ MF.RenumberBlocks();
MBFI->view("MBP." + MF.getName(), false);
}
@@ -3488,7 +3502,7 @@ void MachineBlockPlacement::applyExtTsp() {
auto BlockSizes = std::vector<uint64_t>(F->size());
auto BlockCounts = std::vector<uint64_t>(F->size());
- DenseMap<std::pair<uint64_t, uint64_t>, uint64_t> JumpCounts;
+ std::vector<EdgeCountT> JumpCounts;
for (MachineBasicBlock &MBB : *F) {
// Getting the block frequency.
BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
@@ -3506,9 +3520,9 @@ void MachineBlockPlacement::applyExtTsp() {
// Getting jump frequencies.
for (MachineBasicBlock *Succ : MBB.successors()) {
auto EP = MBPI->getEdgeProbability(&MBB, Succ);
- BlockFrequency EdgeFreq = BlockFreq * EP;
- auto Edge = std::make_pair(BlockIndex[&MBB], BlockIndex[Succ]);
- JumpCounts[Edge] = EdgeFreq.getFrequency();
+ BlockFrequency JumpFreq = BlockFreq * EP;
+ auto Jump = std::make_pair(BlockIndex[&MBB], BlockIndex[Succ]);
+ JumpCounts.push_back(std::make_pair(Jump, JumpFreq.getFrequency()));
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCFGPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCFGPrinter.cpp
new file mode 100644
index 000000000000..7bfb81771380
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCFGPrinter.cpp
@@ -0,0 +1,95 @@
+//===- MachineCFGPrinter.cpp - DOT Printer for Machine Functions ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the `-dot-machine-cfg` analysis pass, which emits
+// Machine Function in DOT format in file titled `<prefix>.<function-name>.dot.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineCFGPrinter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/GraphWriter.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "dot-machine-cfg"
+
+static cl::opt<std::string>
+ MCFGFuncName("mcfg-func-name", cl::Hidden,
+ cl::desc("The name of a function (or its substring)"
+ " whose CFG is viewed/printed."));
+
+static cl::opt<std::string> MCFGDotFilenamePrefix(
+ "mcfg-dot-filename-prefix", cl::Hidden,
+ cl::desc("The prefix used for the Machine CFG dot file names."));
+
+static cl::opt<bool>
+ CFGOnly("dot-mcfg-only", cl::init(false), cl::Hidden,
+ cl::desc("Print only the CFG without blocks body"));
+
+static void writeMCFGToDotFile(MachineFunction &MF) {
+ std::string Filename =
+ (MCFGDotFilenamePrefix + "." + MF.getName() + ".dot").str();
+ errs() << "Writing '" << Filename << "'...";
+
+ std::error_code EC;
+ raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
+
+ DOTMachineFuncInfo MCFGInfo(&MF);
+
+ if (!EC)
+ WriteGraph(File, &MCFGInfo, CFGOnly);
+ else
+ errs() << " error opening file for writing!";
+ errs() << '\n';
+}
+
+namespace {
+
+class MachineCFGPrinter : public MachineFunctionPass {
+public:
+ static char ID;
+
+ MachineCFGPrinter();
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // namespace
+
+char MachineCFGPrinter::ID = 0;
+
+char &llvm::MachineCFGPrinterID = MachineCFGPrinter::ID;
+
+INITIALIZE_PASS(MachineCFGPrinter, DEBUG_TYPE, "Machine CFG Printer Pass",
+ false, true)
+
+/// Default construct and initialize the pass.
+MachineCFGPrinter::MachineCFGPrinter() : MachineFunctionPass(ID) {
+ initializeMachineCFGPrinterPass(*PassRegistry::getPassRegistry());
+}
+
+bool MachineCFGPrinter::runOnMachineFunction(MachineFunction &MF) {
+ if (!MCFGFuncName.empty() && !MF.getName().contains(MCFGFuncName))
+ return false;
+ errs() << "Writing Machine CFG for function ";
+ errs().write_escaped(MF.getName()) << '\n';
+
+ writeMCFGToDotFile(MF);
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
index c6756b1d3737..cd8644029530 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
@@ -60,6 +60,11 @@ STATISTIC(NumCrossBBCSEs,
"Number of cross-MBB physreg referencing CS eliminated");
STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
+// Threshold to avoid excessive cost to compute isProfitableToCSE.
+static cl::opt<int>
+ CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024),
+ cl::desc("Threshold for the size of CSUses"));
+
namespace {
class MachineCSE : public MachineFunctionPass {
@@ -140,7 +145,7 @@ namespace {
DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
bool PerformCSE(MachineDomTreeNode *Node);
- bool isPRECandidate(MachineInstr *MI);
+ bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs);
bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
bool PerformSimplePRE(MachineDominatorTree *DT);
/// Heuristics to see if it's profitable to move common computations of MBB
@@ -174,14 +179,14 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
if (!MO.isReg() || !MO.isUse())
continue;
Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
MachineInstr *DefMI = MRI->getVRegDef(Reg);
if (!DefMI->isCopy())
continue;
Register SrcReg = DefMI->getOperand(1).getReg();
- if (!Register::isVirtualRegister(SrcReg))
+ if (!SrcReg.isVirtual())
continue;
if (DefMI->getOperand(0).getSubReg())
continue;
@@ -260,8 +265,10 @@ bool MachineCSE::isPhysDefTriviallyDead(
}
static bool isCallerPreservedOrConstPhysReg(MCRegister Reg,
+ const MachineOperand &MO,
const MachineFunction &MF,
- const TargetRegisterInfo &TRI) {
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII) {
// MachineRegisterInfo::isConstantPhysReg directly called by
// MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
// reserved registers to be frozen. That doesn't cause a problem post-ISel as
@@ -270,7 +277,7 @@ static bool isCallerPreservedOrConstPhysReg(MCRegister Reg,
// It does cause issues mid-GlobalISel, however, hence the additional
// reservedRegsFrozen check.
const MachineRegisterInfo &MRI = MF.getRegInfo();
- return TRI.isCallerPreservedPhysReg(Reg, MF) ||
+ return TRI.isCallerPreservedPhysReg(Reg, MF) || TII.isIgnorableUse(MO) ||
(MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
}
@@ -290,10 +297,11 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
Register Reg = MO.getReg();
if (!Reg)
continue;
- if (Register::isVirtualRegister(Reg))
+ if (Reg.isVirtual())
continue;
// Reading either caller preserved or constant physregs is ok.
- if (!isCallerPreservedOrConstPhysReg(Reg.asMCReg(), *MI->getMF(), *TRI))
+ if (!isCallerPreservedOrConstPhysReg(Reg.asMCReg(), MO, *MI->getMF(), *TRI,
+ *TII))
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
PhysRefs.insert(*AI);
}
@@ -309,7 +317,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
Register Reg = MO.getReg();
if (!Reg)
continue;
- if (Register::isVirtualRegister(Reg))
+ if (Reg.isVirtual())
continue;
// Check against PhysRefs even if the def is "dead".
if (PhysRefs.count(Reg.asMCReg()))
@@ -384,7 +392,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
if (!MO.isReg() || !MO.isDef())
continue;
Register MOReg = MO.getReg();
- if (Register::isVirtualRegister(MOReg))
+ if (MOReg.isVirtual())
continue;
if (PhysRefs.count(MOReg.asMCReg()))
return false;
@@ -440,18 +448,26 @@ bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg,
// If CSReg is used at all uses of Reg, CSE should not increase register
// pressure of CSReg.
bool MayIncreasePressure = true;
- if (Register::isVirtualRegister(CSReg) && Register::isVirtualRegister(Reg)) {
+ if (CSReg.isVirtual() && Reg.isVirtual()) {
MayIncreasePressure = false;
SmallPtrSet<MachineInstr*, 8> CSUses;
+ int NumOfUses = 0;
for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
CSUses.insert(&MI);
- }
- for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
- if (!CSUses.count(&MI)) {
+ // Too costly to compute if NumOfUses is very large. Conservatively assume
+ // MayIncreasePressure to avoid spending too much time here.
+ if (++NumOfUses > CSUsesThreshold) {
MayIncreasePressure = true;
break;
}
}
+ if (!MayIncreasePressure)
+ for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
+ if (!CSUses.count(&MI)) {
+ MayIncreasePressure = true;
+ break;
+ }
+ }
}
if (!MayIncreasePressure) return true;
@@ -468,7 +484,7 @@ bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg,
// of the redundant computation are copies, do not cse.
bool HasVRegUse = false;
for (const MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.isUse() && Register::isVirtualRegister(MO.getReg())) {
+ if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual()) {
HasVRegUse = true;
break;
}
@@ -632,8 +648,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
continue;
}
- assert(Register::isVirtualRegister(OldReg) &&
- Register::isVirtualRegister(NewReg) &&
+ assert(OldReg.isVirtual() && NewReg.isVirtual() &&
"Do not CSE physical register defs!");
if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), &MI)) {
@@ -785,22 +800,24 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
// We use stronger checks for PRE candidate rather than for CSE ones to embrace
// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
// to exclude instrs created by PRE that won't be CSEed later.
-bool MachineCSE::isPRECandidate(MachineInstr *MI) {
+bool MachineCSE::isPRECandidate(MachineInstr *MI,
+ SmallSet<MCRegister, 8> &PhysRefs) {
if (!isCSECandidate(MI) ||
MI->isNotDuplicable() ||
MI->mayLoad() ||
- MI->isAsCheapAsAMove() ||
+ TII->isAsCheapAsAMove(*MI) ||
MI->getNumDefs() != 1 ||
MI->getNumExplicitDefs() != 1)
return false;
- for (const auto &def : MI->defs())
- if (!Register::isVirtualRegister(def.getReg()))
- return false;
-
- for (const auto &use : MI->uses())
- if (use.isReg() && !Register::isVirtualRegister(use.getReg()))
- return false;
+ for (const MachineOperand &MO : MI->operands()) {
+ if (MO.isReg() && !MO.getReg().isVirtual()) {
+ if (MO.isDef())
+ return false;
+ else
+ PhysRefs.insert(MO.getReg());
+ }
+ }
return true;
}
@@ -809,7 +826,8 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
MachineBasicBlock *MBB) {
bool Changed = false;
for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
- if (!isPRECandidate(&MI))
+ SmallSet<MCRegister, 8> PhysRefs;
+ if (!isPRECandidate(&MI, PhysRefs))
continue;
if (!PREMap.count(&MI)) {
@@ -845,6 +863,15 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
if (MI.isConvergent() && CMBB != MBB)
continue;
+ // If this instruction uses physical registers then we can only do PRE
+ // if it's using the value that is live at the place we're hoisting to.
+ bool NonLocal;
+ PhysDefVector PhysDefs;
+ if (!PhysRefs.empty() &&
+ !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &MI, PhysRefs,
+ PhysDefs, NonLocal))
+ continue;
+
assert(MI.getOperand(0).isDef() &&
"First operand of instr with one explicit def must be this def");
Register VReg = MI.getOperand(0).getReg();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
index 57e2cd20bdd0..974d570ece51 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineCombinerPattern.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -89,7 +90,6 @@ public:
StringRef getPassName() const override { return "Machine InstCombiner"; }
private:
- bool doSubstitute(unsigned NewSize, unsigned OldSize, bool OptForSize);
bool combineInstructions(MachineBasicBlock *);
MachineInstr *getOperandDef(const MachineOperand &MO);
bool isTransientMI(const MachineInstr *MI);
@@ -151,7 +151,7 @@ void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
MachineInstr *DefInstr = nullptr;
// We need a virtual register definition.
- if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
+ if (MO.isReg() && MO.getReg().isVirtual())
DefInstr = MRI->getUniqueVRegDef(MO.getReg());
// PHI's have no depth etc.
if (DefInstr && DefInstr->isPHI())
@@ -209,9 +209,6 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
MachineTraceMetrics::Trace BlockTrace) {
SmallVector<unsigned, 16> InstrDepth;
- assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
- "Missing machine model\n");
-
// For each instruction in the new sequence compute the depth based on the
// operands. Use the trace information when possible. For new operands which
// are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
@@ -219,7 +216,7 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
unsigned IDepth = 0;
for (const MachineOperand &MO : InstrPtr->operands()) {
// Check for virtual register operand.
- if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg())))
+ if (!(MO.isReg() && MO.getReg().isVirtual()))
continue;
if (!MO.isUse())
continue;
@@ -267,15 +264,12 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
/// \returns Latency of \p NewRoot
unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
MachineTraceMetrics::Trace BlockTrace) {
- assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
- "Missing machine model\n");
-
// Check each definition in NewRoot and compute the latency
unsigned NewRootLatency = 0;
for (const MachineOperand &MO : NewRoot->operands()) {
// Check for virtual register operand.
- if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg())))
+ if (!(MO.isReg() && MO.getReg().isVirtual()))
continue;
if (!MO.isDef())
continue;
@@ -318,6 +312,10 @@ static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
case MachineCombinerPattern::SUBADD_OP1:
case MachineCombinerPattern::SUBADD_OP2:
+ case MachineCombinerPattern::FMADD_AX:
+ case MachineCombinerPattern::FMADD_XA:
+ case MachineCombinerPattern::FMSUB:
+ case MachineCombinerPattern::FNMSUB:
return CombinerObjective::MustReduceDepth;
case MachineCombinerPattern::REASSOC_XY_BCA:
case MachineCombinerPattern::REASSOC_XY_BAC:
@@ -375,8 +373,6 @@ bool MachineCombiner::improvesCriticalPathLen(
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
MachineCombinerPattern Pattern,
bool SlackIsAccurate) {
- assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
- "Missing machine model\n");
// Get depth and latency of NewRoot and Root.
unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
@@ -459,8 +455,8 @@ bool MachineCombiner::preservesResourceLen(
instr2instrSC(InsInstrs, InsInstrsSC);
instr2instrSC(DelInstrs, DelInstrsSC);
- ArrayRef<const MCSchedClassDesc *> MSCInsArr = makeArrayRef(InsInstrsSC);
- ArrayRef<const MCSchedClassDesc *> MSCDelArr = makeArrayRef(DelInstrsSC);
+ ArrayRef<const MCSchedClassDesc *> MSCInsArr{InsInstrsSC};
+ ArrayRef<const MCSchedClassDesc *> MSCDelArr{DelInstrsSC};
// Compute new resource length.
unsigned ResLenAfterCombine =
@@ -480,17 +476,6 @@ bool MachineCombiner::preservesResourceLen(
ResLenBeforeCombine + TII->getExtendResourceLenLimit();
}
-/// \returns true when new instruction sequence should be generated
-/// independent if it lengthens critical path or not
-bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize,
- bool OptForSize) {
- if (OptForSize && (NewSize < OldSize))
- return true;
- if (!TSchedModel.hasInstrSchedModelOrItineraries())
- return true;
- return false;
-}
-
/// Inserts InsInstrs and deletes DelInstrs. Incrementally updates instruction
/// depths if requested.
///
@@ -636,18 +621,16 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
if (VerifyPatternOrder)
verifyPatternOrder(MBB, MI, Patterns);
- for (auto P : Patterns) {
+ for (const auto P : Patterns) {
SmallVector<MachineInstr *, 16> InsInstrs;
SmallVector<MachineInstr *, 16> DelInstrs;
DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
InstrIdxForVirtReg);
- unsigned NewInstCount = InsInstrs.size();
- unsigned OldInstCount = DelInstrs.size();
// Found pattern, but did not generate alternative sequence.
// This can happen e.g. when an immediate could not be materialized
// in a single instruction.
- if (!NewInstCount)
+ if (InsInstrs.empty())
continue;
LLVM_DEBUG(if (dump_intrs) {
@@ -662,10 +645,6 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
/*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
});
- bool SubstituteAlways = false;
- if (ML && TII->isThroughputPattern(P))
- SubstituteAlways = true;
-
if (IncrementalUpdate && LastUpdate != BlockIter) {
// Update depths since the last incremental update.
MinInstr->updateDepths(LastUpdate, BlockIter, RegUnits);
@@ -693,12 +672,17 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
}
}
- // Substitute when we optimize for codesize and the new sequence has
- // fewer instructions OR
- // the new sequence neither lengthens the critical path nor increases
- // resource pressure.
- if (SubstituteAlways ||
- doSubstitute(NewInstCount, OldInstCount, OptForSize)) {
+ if (ML && TII->isThroughputPattern(P)) {
+ LLVM_DEBUG(dbgs() << "\t Replacing due to throughput pattern in loop\n");
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
+ RegUnits, TII, P, IncrementalUpdate);
+ // Eagerly stop after the first pattern fires.
+ Changed = true;
+ break;
+ } else if (OptForSize && InsInstrs.size() < DelInstrs.size()) {
+ LLVM_DEBUG(dbgs() << "\t Replacing due to OptForSize ("
+ << InsInstrs.size() << " < "
+ << DelInstrs.size() << ")\n");
insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
RegUnits, TII, P, IncrementalUpdate);
// Eagerly stop after the first pattern fires.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 66f0eb83e57c..871824553aa4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -88,17 +88,17 @@ static cl::opt<bool> MCPUseCopyInstr("mcp-use-is-copy-instr", cl::init(false),
namespace {
-static Optional<DestSourcePair> isCopyInstr(const MachineInstr &MI,
- const TargetInstrInfo &TII,
- bool UseCopyInstr) {
+static std::optional<DestSourcePair> isCopyInstr(const MachineInstr &MI,
+ const TargetInstrInfo &TII,
+ bool UseCopyInstr) {
if (UseCopyInstr)
return TII.isCopyInstr(MI);
if (MI.isCopy())
- return Optional<DestSourcePair>(
+ return std::optional<DestSourcePair>(
DestSourcePair{MI.getOperand(0), MI.getOperand(1)});
- return None;
+ return std::nullopt;
}
class CopyTracker {
@@ -137,7 +137,7 @@ public:
auto I = Copies.find(*RUI);
if (I != Copies.end()) {
if (MachineInstr *MI = I->second.MI) {
- Optional<DestSourcePair> CopyOperands =
+ std::optional<DestSourcePair> CopyOperands =
isCopyInstr(*MI, TII, UseCopyInstr);
assert(CopyOperands && "Expect copy");
@@ -166,7 +166,7 @@ public:
// When we clobber the destination of a copy, we need to clobber the
// whole register it defined.
if (MachineInstr *MI = I->second.MI) {
- Optional<DestSourcePair> CopyOperands =
+ std::optional<DestSourcePair> CopyOperands =
isCopyInstr(*MI, TII, UseCopyInstr);
markRegsUnavailable({CopyOperands->Destination->getReg().asMCReg()},
TRI);
@@ -180,7 +180,8 @@ public:
/// Add this copy's registers into the tracker's copy maps.
void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI,
const TargetInstrInfo &TII, bool UseCopyInstr) {
- Optional<DestSourcePair> CopyOperands = isCopyInstr(*MI, TII, UseCopyInstr);
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*MI, TII, UseCopyInstr);
assert(CopyOperands && "Tracking non-copy?");
MCRegister Src = CopyOperands->Source->getReg().asMCReg();
@@ -236,7 +237,7 @@ public:
if (!AvailCopy)
return nullptr;
- Optional<DestSourcePair> CopyOperands =
+ std::optional<DestSourcePair> CopyOperands =
isCopyInstr(*AvailCopy, TII, UseCopyInstr);
Register AvailSrc = CopyOperands->Source->getReg();
Register AvailDef = CopyOperands->Destination->getReg();
@@ -266,7 +267,7 @@ public:
if (!AvailCopy)
return nullptr;
- Optional<DestSourcePair> CopyOperands =
+ std::optional<DestSourcePair> CopyOperands =
isCopyInstr(*AvailCopy, TII, UseCopyInstr);
Register AvailSrc = CopyOperands->Source->getReg();
Register AvailDef = CopyOperands->Destination->getReg();
@@ -383,7 +384,7 @@ static bool isNopCopy(const MachineInstr &PreviousCopy, MCRegister Src,
MCRegister Def, const TargetRegisterInfo *TRI,
const TargetInstrInfo *TII, bool UseCopyInstr) {
- Optional<DestSourcePair> CopyOperands =
+ std::optional<DestSourcePair> CopyOperands =
isCopyInstr(PreviousCopy, *TII, UseCopyInstr);
MCRegister PreviousSrc = CopyOperands->Source->getReg().asMCReg();
MCRegister PreviousDef = CopyOperands->Destination->getReg().asMCReg();
@@ -422,7 +423,8 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy,
// Copy was redundantly redefining either Src or Def. Remove earlier kill
// flags between Copy and PrevCopy because the value will be reused now.
- Optional<DestSourcePair> CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr);
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(Copy, *TII, UseCopyInstr);
assert(CopyOperands);
Register CopyDef = CopyOperands->Destination->getReg();
@@ -439,8 +441,8 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy,
bool MachineCopyPropagation::isBackwardPropagatableRegClassCopy(
const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) {
-
- Optional<DestSourcePair> CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr);
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(Copy, *TII, UseCopyInstr);
Register Def = CopyOperands->Destination->getReg();
if (const TargetRegisterClass *URC =
@@ -458,8 +460,8 @@ bool MachineCopyPropagation::isBackwardPropagatableRegClassCopy(
bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
const MachineInstr &UseI,
unsigned UseIdx) {
-
- Optional<DestSourcePair> CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr);
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(Copy, *TII, UseCopyInstr);
Register CopySrcReg = CopyOperands->Source->getReg();
// If the new register meets the opcode register constraints, then allow
@@ -587,7 +589,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
if (!Copy)
continue;
- Optional<DestSourcePair> CopyOperands =
+ std::optional<DestSourcePair> CopyOperands =
isCopyInstr(*Copy, *TII, UseCopyInstr);
Register CopyDstReg = CopyOperands->Destination->getReg();
const MachineOperand &CopySrc = *CopyOperands->Source;
@@ -654,7 +656,8 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
// Analyze copies (which don't overlap themselves).
- Optional<DestSourcePair> CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr);
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(MI, *TII, UseCopyInstr);
if (CopyOperands) {
Register RegSrc = CopyOperands->Source->getReg();
@@ -777,7 +780,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
MaybeDeadCopies.begin();
DI != MaybeDeadCopies.end();) {
MachineInstr *MaybeDead = *DI;
- Optional<DestSourcePair> CopyOperands =
+ std::optional<DestSourcePair> CopyOperands =
isCopyInstr(*MaybeDead, *TII, UseCopyInstr);
MCRegister Reg = CopyOperands->Destination->getReg().asMCReg();
assert(!MRI->isReserved(Reg));
@@ -816,7 +819,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: ";
MaybeDead->dump());
- Optional<DestSourcePair> CopyOperands =
+ std::optional<DestSourcePair> CopyOperands =
isCopyInstr(*MaybeDead, *TII, UseCopyInstr);
assert(CopyOperands);
@@ -845,7 +848,8 @@ static bool isBackwardPropagatableCopy(MachineInstr &MI,
const MachineRegisterInfo &MRI,
const TargetInstrInfo &TII,
bool UseCopyInstr) {
- Optional<DestSourcePair> CopyOperands = isCopyInstr(MI, TII, UseCopyInstr);
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(MI, TII, UseCopyInstr);
assert(CopyOperands && "MI is expected to be a COPY");
Register Def = CopyOperands->Destination->getReg();
@@ -887,7 +891,7 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
if (!Copy)
continue;
- Optional<DestSourcePair> CopyOperands =
+ std::optional<DestSourcePair> CopyOperands =
isCopyInstr(*Copy, *TII, UseCopyInstr);
Register Def = CopyOperands->Destination->getReg();
Register Src = CopyOperands->Source->getReg();
@@ -925,7 +929,8 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) {
// Ignore non-trivial COPYs.
- Optional<DestSourcePair> CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr);
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(MI, *TII, UseCopyInstr);
if (CopyOperands && MI.getNumOperands() == 2) {
Register DefReg = CopyOperands->Destination->getReg();
Register SrcReg = CopyOperands->Source->getReg();
@@ -986,8 +991,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
}
for (auto *Copy : MaybeDeadCopies) {
-
- Optional<DestSourcePair> CopyOperands =
+ std::optional<DestSourcePair> CopyOperands =
isCopyInstr(*Copy, *TII, UseCopyInstr);
Register Src = CopyOperands->Source->getReg();
Register Def = CopyOperands->Destination->getReg();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
index 6871ac35b300..57f7a098ac17 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
@@ -9,8 +9,10 @@
#include "llvm/CodeGen/MachineCycleAnalysis.h"
#include "llvm/ADT/GenericCycleImpl.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAContext.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
@@ -52,6 +54,7 @@ void MachineCycleInfoWrapperPass::releaseMemory() {
F = nullptr;
}
+namespace {
class MachineCycleInfoPrinterPass : public MachineFunctionPass {
public:
static char ID;
@@ -61,6 +64,7 @@ public:
bool runOnMachineFunction(MachineFunction &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
};
+} // namespace
char MachineCycleInfoPrinterPass::ID = 0;
@@ -105,7 +109,7 @@ bool llvm::isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I) {
// An instruction that uses or defines a physical register can't e.g. be
// hoisted, so mark this as not invariant.
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
if (MO.isUse()) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp
index b726a032ca18..adf1b51a950d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp
@@ -153,10 +153,15 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
NMD->setOperand(Idx, MDNode::get(Ctx, ValueAsMetadata::getConstant(
ConstantInt::get(Int32Ty, N))));
};
+ auto getDebugifyOperand = [&](unsigned Idx) {
+ return mdconst::extract<ConstantInt>(NMD->getOperand(Idx)->getOperand(0))
+ ->getZExtValue();
+ };
// Set number of lines.
setDebugifyOperand(0, NextLine - 1);
// Set number of variables.
- setDebugifyOperand(1, VarSet.size());
+ auto OldNumVars = getDebugifyOperand(1);
+ setDebugifyOperand(1, OldNumVars + VarSet.size());
}
return true;
@@ -166,6 +171,9 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
/// legacy module pass manager.
struct DebugifyMachineModule : public ModulePass {
bool runOnModule(Module &M) override {
+ // We will insert new debugify metadata, so erasing the old one.
+ assert(!M.getNamedMetadata("llvm.mir.debugify") &&
+ "llvm.mir.debugify metadata already exists! Strip it first");
MachineModuleInfo &MMI =
getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
return applyDebugifyMetadata(
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp
index f0190812389f..daf6a218165d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp
@@ -58,7 +58,7 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, Align Alignment,
!IsSpillSlot, StackID));
int Index = (int)Objects.size() - NumFixedObjects - 1;
assert(Index >= 0 && "Bad frame index!");
- if (StackID == 0)
+ if (contributesToMaxAlignment(StackID))
ensureMaxAlignment(Alignment);
return Index;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
index 6b481a374382..59e6647fa643 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
@@ -187,6 +187,7 @@ void MachineFunction::init() {
RegInfo = nullptr;
MFInfo = nullptr;
+
// We can realign the stack if the target supports it and the user hasn't
// explicitly asked us not to.
bool CanRealignSP = STI->getFrameLowering()->isStackRealignable() &&
@@ -232,6 +233,12 @@ void MachineFunction::init() {
PSVManager = std::make_unique<PseudoSourceValueManager>(getTarget());
}
+void MachineFunction::initTargetMachineFunctionInfo(
+ const TargetSubtargetInfo &STI) {
+ assert(!MFInfo && "MachineFunctionInfo already set");
+ MFInfo = Target.createMachineFunctionInfo(Allocator, F, &STI);
+}
+
MachineFunction::~MachineFunction() {
clear();
}
@@ -306,7 +313,7 @@ bool MachineFunction::shouldSplitStack() const {
return getFunction().hasFnAttribute("split-stack");
}
-LLVM_NODISCARD unsigned
+[[nodiscard]] unsigned
MachineFunction::addFrameInst(const MCCFIInstruction &Inst) {
FrameInstructions.push_back(Inst);
return FrameInstructions.size() - 1;
@@ -437,8 +444,16 @@ void MachineFunction::deleteMachineInstr(MachineInstr *MI) {
/// `new MachineBasicBlock'.
MachineBasicBlock *
MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) {
- return new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator))
- MachineBasicBlock(*this, bb);
+ MachineBasicBlock *MBB =
+ new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator))
+ MachineBasicBlock(*this, bb);
+ // Set BBID for `-basic-block=sections=labels` and
+ // `-basic-block-sections=list` to allow robust mapping of profiles to basic
+ // blocks.
+ if (Target.getBBSectionsType() == BasicBlockSection::Labels ||
+ Target.getBBSectionsType() == BasicBlockSection::List)
+ MBB->setBBID(NextBBID++);
+ return MBB;
}
/// Delete the given MachineBasicBlock.
@@ -530,9 +545,11 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
MachineInstr::ExtraInfo *MachineFunction::createMIExtraInfo(
ArrayRef<MachineMemOperand *> MMOs, MCSymbol *PreInstrSymbol,
- MCSymbol *PostInstrSymbol, MDNode *HeapAllocMarker) {
+ MCSymbol *PostInstrSymbol, MDNode *HeapAllocMarker, MDNode *PCSections,
+ uint32_t CFIType) {
return MachineInstr::ExtraInfo::create(Allocator, MMOs, PreInstrSymbol,
- PostInstrSymbol, HeapAllocMarker);
+ PostInstrSymbol, HeapAllocMarker,
+ PCSections, CFIType);
}
const char *MachineFunction::createExternalSymbolName(StringRef Name) {
@@ -750,12 +767,10 @@ MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) {
const Instruction *FirstI = LandingPad->getBasicBlock()->getFirstNonPHI();
if (const auto *LPI = dyn_cast<LandingPadInst>(FirstI)) {
- if (const auto *PF =
- dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()))
- getMMI().addPersonality(PF);
-
- if (LPI->isCleanup())
- addCleanup(LandingPad);
+ // If there's no typeid list specified, then "cleanup" is implicit.
+ // Otherwise, id 0 is reserved for the cleanup action.
+ if (LPI->isCleanup() && LPI->getNumClauses() != 0)
+ LP.TypeIds.push_back(0);
// FIXME: New EH - Add the clauses in reverse order. This isn't 100%
// correct, but we need to do it this way because of how the DWARF EH
@@ -763,23 +778,25 @@ MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) {
for (unsigned I = LPI->getNumClauses(); I != 0; --I) {
Value *Val = LPI->getClause(I - 1);
if (LPI->isCatch(I - 1)) {
- addCatchTypeInfo(LandingPad,
- dyn_cast<GlobalValue>(Val->stripPointerCasts()));
+ LP.TypeIds.push_back(
+ getTypeIDFor(dyn_cast<GlobalValue>(Val->stripPointerCasts())));
} else {
// Add filters in a list.
auto *CVal = cast<Constant>(Val);
- SmallVector<const GlobalValue *, 4> FilterList;
+ SmallVector<unsigned, 4> FilterList;
for (const Use &U : CVal->operands())
- FilterList.push_back(cast<GlobalValue>(U->stripPointerCasts()));
+ FilterList.push_back(
+ getTypeIDFor(cast<GlobalValue>(U->stripPointerCasts())));
- addFilterTypeInfo(LandingPad, FilterList);
+ LP.TypeIds.push_back(getFilterIDFor(FilterList));
}
}
} else if (const auto *CPI = dyn_cast<CatchPadInst>(FirstI)) {
- for (unsigned I = CPI->getNumArgOperands(); I != 0; --I) {
- Value *TypeInfo = CPI->getArgOperand(I - 1)->stripPointerCasts();
- addCatchTypeInfo(LandingPad, dyn_cast<GlobalValue>(TypeInfo));
+ for (unsigned I = CPI->arg_size(); I != 0; --I) {
+ auto *TypeInfo =
+ dyn_cast<GlobalValue>(CPI->getArgOperand(I - 1)->stripPointerCasts());
+ LP.TypeIds.push_back(getTypeIDFor(TypeInfo));
}
} else {
@@ -789,73 +806,6 @@ MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) {
return LandingPadLabel;
}
-void MachineFunction::addCatchTypeInfo(MachineBasicBlock *LandingPad,
- ArrayRef<const GlobalValue *> TyInfo) {
- LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
- for (const GlobalValue *GV : llvm::reverse(TyInfo))
- LP.TypeIds.push_back(getTypeIDFor(GV));
-}
-
-void MachineFunction::addFilterTypeInfo(MachineBasicBlock *LandingPad,
- ArrayRef<const GlobalValue *> TyInfo) {
- LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
- std::vector<unsigned> IdsInFilter(TyInfo.size());
- for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
- IdsInFilter[I] = getTypeIDFor(TyInfo[I]);
- LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
-}
-
-void MachineFunction::tidyLandingPads(DenseMap<MCSymbol *, uintptr_t> *LPMap,
- bool TidyIfNoBeginLabels) {
- for (unsigned i = 0; i != LandingPads.size(); ) {
- LandingPadInfo &LandingPad = LandingPads[i];
- if (LandingPad.LandingPadLabel &&
- !LandingPad.LandingPadLabel->isDefined() &&
- (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0))
- LandingPad.LandingPadLabel = nullptr;
-
- // Special case: we *should* emit LPs with null LP MBB. This indicates
- // "nounwind" case.
- if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) {
- LandingPads.erase(LandingPads.begin() + i);
- continue;
- }
-
- if (TidyIfNoBeginLabels) {
- for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
- MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
- MCSymbol *EndLabel = LandingPad.EndLabels[j];
- if ((BeginLabel->isDefined() || (LPMap && (*LPMap)[BeginLabel] != 0)) &&
- (EndLabel->isDefined() || (LPMap && (*LPMap)[EndLabel] != 0)))
- continue;
-
- LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
- LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
- --j;
- --e;
- }
-
- // Remove landing pads with no try-ranges.
- if (LandingPads[i].BeginLabels.empty()) {
- LandingPads.erase(LandingPads.begin() + i);
- continue;
- }
- }
-
- // If there is no landing pad, ensure that the list of typeids is empty.
- // If the only typeid is a cleanup, this is the same as having no typeids.
- if (!LandingPad.LandingPadBlock ||
- (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0]))
- LandingPad.TypeIds.clear();
- ++i;
- }
-}
-
-void MachineFunction::addCleanup(MachineBasicBlock *LandingPad) {
- LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
- LP.TypeIds.push_back(0);
-}
-
void MachineFunction::setCallSiteLandingPad(MCSymbol *Sym,
ArrayRef<unsigned> Sites) {
LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end());
@@ -869,7 +819,7 @@ unsigned MachineFunction::getTypeIDFor(const GlobalValue *TI) {
return TypeInfos.size();
}
-int MachineFunction::getFilterIDFor(std::vector<unsigned> &TyIds) {
+int MachineFunction::getFilterIDFor(ArrayRef<unsigned> TyIds) {
// If the new filter coincides with the tail of an existing filter, then
// re-use the existing filter. Folding filters more than this requires
// re-ordering filters and/or their elements - probably not worth it.
@@ -1187,58 +1137,65 @@ void MachineFunction::finalizeDebugInstrRefs() {
auto *TII = getSubtarget().getInstrInfo();
auto MakeUndefDbgValue = [&](MachineInstr &MI) {
- const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_VALUE);
+ const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_VALUE_LIST);
MI.setDesc(RefII);
- MI.getOperand(0).setReg(0);
- MI.getOperand(1).ChangeToRegister(0, false);
+ MI.setDebugValueUndef();
};
DenseMap<Register, DebugInstrOperandPair> ArgDbgPHIs;
for (auto &MBB : *this) {
for (auto &MI : MBB) {
- if (!MI.isDebugRef() || !MI.getOperand(0).isReg())
+ if (!MI.isDebugRef())
continue;
- Register Reg = MI.getOperand(0).getReg();
+ bool IsValidRef = true;
- // Some vregs can be deleted as redundant in the meantime. Mark those
- // as DBG_VALUE $noreg. Additionally, some normal instructions are
- // quickly deleted, leaving dangling references to vregs with no def.
- if (Reg == 0 || !RegInfo->hasOneDef(Reg)) {
- MakeUndefDbgValue(MI);
- continue;
- }
+ for (MachineOperand &MO : MI.debug_operands()) {
+ if (!MO.isReg())
+ continue;
- assert(Reg.isVirtual());
- MachineInstr &DefMI = *RegInfo->def_instr_begin(Reg);
+ Register Reg = MO.getReg();
- // If we've found a copy-like instruction, follow it back to the
- // instruction that defines the source value, see salvageCopySSA docs
- // for why this is important.
- if (DefMI.isCopyLike() || TII->isCopyInstr(DefMI)) {
- auto Result = salvageCopySSA(DefMI, ArgDbgPHIs);
- MI.getOperand(0).ChangeToImmediate(Result.first);
- MI.getOperand(1).setImm(Result.second);
- } else {
- // Otherwise, identify the operand number that the VReg refers to.
- unsigned OperandIdx = 0;
- for (const auto &MO : DefMI.operands()) {
- if (MO.isReg() && MO.isDef() && MO.getReg() == Reg)
- break;
- ++OperandIdx;
+ // Some vregs can be deleted as redundant in the meantime. Mark those
+ // as DBG_VALUE $noreg. Additionally, some normal instructions are
+ // quickly deleted, leaving dangling references to vregs with no def.
+ if (Reg == 0 || !RegInfo->hasOneDef(Reg)) {
+ IsValidRef = false;
+ break;
}
- assert(OperandIdx < DefMI.getNumOperands());
- // Morph this instr ref to point at the given instruction and operand.
- unsigned ID = DefMI.getDebugInstrNum();
- MI.getOperand(0).ChangeToImmediate(ID);
- MI.getOperand(1).setImm(OperandIdx);
+ assert(Reg.isVirtual());
+ MachineInstr &DefMI = *RegInfo->def_instr_begin(Reg);
+
+ // If we've found a copy-like instruction, follow it back to the
+ // instruction that defines the source value, see salvageCopySSA docs
+ // for why this is important.
+ if (DefMI.isCopyLike() || TII->isCopyInstr(DefMI)) {
+ auto Result = salvageCopySSA(DefMI, ArgDbgPHIs);
+ MO.ChangeToDbgInstrRef(Result.first, Result.second);
+ } else {
+ // Otherwise, identify the operand number that the VReg refers to.
+ unsigned OperandIdx = 0;
+ for (const auto &DefMO : DefMI.operands()) {
+ if (DefMO.isReg() && DefMO.isDef() && DefMO.getReg() == Reg)
+ break;
+ ++OperandIdx;
+ }
+ assert(OperandIdx < DefMI.getNumOperands());
+
+ // Morph this instr ref to point at the given instruction and operand.
+ unsigned ID = DefMI.getDebugInstrNum();
+ MO.ChangeToDbgInstrRef(ID, OperandIdx);
+ }
}
+
+ if (!IsValidRef)
+ MakeUndefDbgValue(MI);
}
}
}
-bool MachineFunction::useDebugInstrRef() const {
+bool MachineFunction::shouldUseDebugInstrRef() const {
// Disable instr-ref at -O0: it's very slow (in compile time). We can still
// have optimized code inlined into this unoptimized code, however with
// fewer and less aggressive optimizations happening, coverage and accuracy
@@ -1256,6 +1213,14 @@ bool MachineFunction::useDebugInstrRef() const {
return false;
}
+bool MachineFunction::useDebugInstrRef() const {
+ return UseDebugInstrRef;
+}
+
+void MachineFunction::setUseDebugInstrRef(bool Use) {
+ UseDebugInstrRef = Use;
+}
+
// Use one million as a high / reserved number.
const unsigned MachineFunction::DebugOperandMemNumber = 1000000;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp
index 477310f59112..3a1e1720be9c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -73,10 +73,16 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
// For --print-changed, if the function name is a candidate, save the
// serialized MF to be compared later.
- // TODO Implement --filter-passes.
SmallString<0> BeforeStr, AfterStr;
- bool ShouldPrintChanged = PrintChanged != ChangePrinter::None &&
- isFunctionInPrintList(MF.getName());
+ StringRef PassID;
+ if (PrintChanged != ChangePrinter::None) {
+ if (const PassInfo *PI = Pass::lookupPassInfo(getPassID()))
+ PassID = PI->getPassArgument();
+ }
+ const bool IsInterestingPass = isPassInPrintList(PassID);
+ const bool ShouldPrintChanged = PrintChanged != ChangePrinter::None &&
+ IsInterestingPass &&
+ isFunctionInPrintList(MF.getName());
if (ShouldPrintChanged) {
raw_svector_ostream OS(BeforeStr);
MF.print(OS);
@@ -112,18 +118,47 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
// For --print-changed, print if the serialized MF has changed. Modes other
// than quiet/verbose are unimplemented and treated the same as 'quiet'.
- if (ShouldPrintChanged) {
- raw_svector_ostream OS(AfterStr);
- MF.print(OS);
- if (BeforeStr != AfterStr) {
- StringRef Arg;
- if (const PassInfo *PI = Pass::lookupPassInfo(getPassID()))
- Arg = PI->getPassArgument();
- errs() << ("*** IR Dump After " + getPassName() + " (" + Arg + ") on " +
- MF.getName() + " ***\n" + AfterStr);
- } else if (PrintChanged == ChangePrinter::Verbose) {
- errs() << ("*** IR Dump After " + getPassName() + " on " + MF.getName() +
- " omitted because no change ***\n");
+ if (ShouldPrintChanged || !IsInterestingPass) {
+ if (ShouldPrintChanged) {
+ raw_svector_ostream OS(AfterStr);
+ MF.print(OS);
+ }
+ if (IsInterestingPass && BeforeStr != AfterStr) {
+ errs() << ("*** IR Dump After " + getPassName() + " (" + PassID +
+ ") on " + MF.getName() + " ***\n");
+ switch (PrintChanged) {
+ case ChangePrinter::None:
+ llvm_unreachable("");
+ case ChangePrinter::Quiet:
+ case ChangePrinter::Verbose:
+ case ChangePrinter::DotCfgQuiet: // unimplemented
+ case ChangePrinter::DotCfgVerbose: // unimplemented
+ errs() << AfterStr;
+ break;
+ case ChangePrinter::DiffQuiet:
+ case ChangePrinter::DiffVerbose:
+ case ChangePrinter::ColourDiffQuiet:
+ case ChangePrinter::ColourDiffVerbose: {
+ bool Color = llvm::is_contained(
+ {ChangePrinter::ColourDiffQuiet, ChangePrinter::ColourDiffVerbose},
+ PrintChanged.getValue());
+ StringRef Removed = Color ? "\033[31m-%l\033[0m\n" : "-%l\n";
+ StringRef Added = Color ? "\033[32m+%l\033[0m\n" : "+%l\n";
+ StringRef NoChange = " %l\n";
+ errs() << doSystemDiff(BeforeStr, AfterStr, Removed, Added, NoChange);
+ break;
+ }
+ }
+ } else if (llvm::is_contained({ChangePrinter::Verbose,
+ ChangePrinter::DiffVerbose,
+ ChangePrinter::ColourDiffVerbose},
+ PrintChanged.getValue())) {
+ const char *Reason =
+ IsInterestingPass ? " omitted because no change" : " filtered out";
+ errs() << "*** IR Dump After " << getPassName();
+ if (!PassID.empty())
+ errs() << " (" << PassID << ")";
+ errs() << " on " << MF.getName() + Reason + " ***\n";
}
}
return RV;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
index 3e1aace855a5..613c52900331 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/Function.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
+#include <optional>
using namespace llvm;
@@ -57,6 +58,11 @@ static cl::opt<unsigned> ColdCountThreshold(
"Minimum number of times a block must be executed to be retained."),
cl::init(1), cl::Hidden);
+static cl::opt<bool> SplitAllEHCode(
+ "mfs-split-ehcode",
+ cl::desc("Splits all EH code and it's descendants by default."),
+ cl::init(false), cl::Hidden);
+
namespace {
class MachineFunctionSplitter : public MachineFunctionPass {
@@ -76,10 +82,83 @@ public:
};
} // end anonymous namespace
+/// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable
+/// only by EH pad as cold. This will help mark EH pads statically cold instead
+/// of relying on profile data.
+static void
+setDescendantEHBlocksCold(SmallVectorImpl<MachineBasicBlock *> &EHBlocks,
+ MachineFunction &MF) {
+ MachineBasicBlock *StartBlock = &MF.front();
+ // A block can be unknown if its not reachable from anywhere
+ // EH if its only reachable from start blocks via some path through EH pads
+ // NonEH if it's reachable from Non EH blocks as well.
+ enum Status { Unknown = 0, EH = 1, NonEH = 2 };
+ DenseSet<MachineBasicBlock *> WorkList;
+ DenseMap<MachineBasicBlock *, Status> Statuses;
+
+ auto getStatus = [&](MachineBasicBlock *MBB) {
+ if (Statuses.find(MBB) != Statuses.end())
+ return Statuses[MBB];
+ else
+ return Unknown;
+ };
+
+ auto checkPredecessors = [&](MachineBasicBlock *MBB, Status Stat) {
+ for (auto *PredMBB : MBB->predecessors()) {
+ Status PredStatus = getStatus(PredMBB);
+ // If status of predecessor block has gone above current block
+ // we update current blocks status.
+ if (PredStatus > Stat)
+ Stat = PredStatus;
+ }
+ return Stat;
+ };
+
+ auto addSuccesors = [&](MachineBasicBlock *MBB) {
+ for (auto *SuccMBB : MBB->successors()) {
+ if (!SuccMBB->isEHPad())
+ WorkList.insert(SuccMBB);
+ }
+ };
+
+ // Insert the successors of start block
+ // and landing pads successor.
+ Statuses[StartBlock] = NonEH;
+ addSuccesors(StartBlock);
+ for (auto *LP : EHBlocks) {
+ addSuccesors(LP);
+ Statuses[LP] = EH;
+ }
+
+ // Worklist iterative algorithm.
+ while (!WorkList.empty()) {
+ auto *MBB = *WorkList.begin();
+ WorkList.erase(MBB);
+
+ Status OldStatus = getStatus(MBB);
+
+ // Check on predecessors and check for
+ // Status update.
+ Status NewStatus = checkPredecessors(MBB, OldStatus);
+
+ // Did the block status change?
+ bool changed = OldStatus != NewStatus;
+ if (changed) {
+ addSuccesors(MBB);
+ Statuses[MBB] = NewStatus;
+ }
+ }
+
+ for (auto Entry : Statuses) {
+ if (Entry.second == EH)
+ Entry.first->setSectionID(MBBSectionID::ColdSectionID);
+ }
+}
+
static bool isColdBlock(const MachineBasicBlock &MBB,
const MachineBlockFrequencyInfo *MBFI,
ProfileSummaryInfo *PSI) {
- Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
+ std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
if (!Count)
return true;
@@ -90,9 +169,11 @@ static bool isColdBlock(const MachineBasicBlock &MBB,
}
bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
- // TODO: We only target functions with profile data. Static information may
- // also be considered but we don't see performance improvements yet.
- if (!MF.getFunction().hasProfileData())
+ // We target functions with profile data. Static information in the form
+ // of exception handling code may be split to cold if user passes the
+ // mfs-split-ehcode flag.
+ bool UseProfileData = MF.getFunction().hasProfileData();
+ if (!UseProfileData && !SplitAllEHCode)
return false;
// TODO: We don't split functions where a section attribute has been set
@@ -105,9 +186,9 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
// We don't want to proceed further for cold functions
// or functions of unknown hotness. Lukewarm functions have no prefix.
- Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
- if (SectionPrefix && (SectionPrefix.value().equals("unlikely") ||
- SectionPrefix.value().equals("unknown"))) {
+ std::optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
+ if (SectionPrefix &&
+ (*SectionPrefix == "unlikely" || *SectionPrefix == "unknown")) {
return false;
}
@@ -117,8 +198,13 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
// made by prior passes such as MachineBlockPlacement.
MF.RenumberBlocks();
MF.setBBSectionsType(BasicBlockSection::Preset);
- auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
- auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+
+ MachineBlockFrequencyInfo *MBFI = nullptr;
+ ProfileSummaryInfo *PSI = nullptr;
+ if (UseProfileData) {
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ }
SmallVector<MachineBasicBlock *, 2> LandingPads;
for (auto &MBB : MF) {
@@ -127,21 +213,25 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
if (MBB.isEHPad())
LandingPads.push_back(&MBB);
- else if (isColdBlock(MBB, MBFI, PSI))
+ else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && !SplitAllEHCode)
MBB.setSectionID(MBBSectionID::ColdSectionID);
}
+ // Split all EH code and it's descendant statically by default.
+ if (SplitAllEHCode)
+ setDescendantEHBlocksCold(LandingPads, MF);
// We only split out eh pads if all of them are cold.
- bool HasHotLandingPads = false;
- for (const MachineBasicBlock *LP : LandingPads) {
- if (!isColdBlock(*LP, MBFI, PSI))
- HasHotLandingPads = true;
+ else {
+ bool HasHotLandingPads = false;
+ for (const MachineBasicBlock *LP : LandingPads) {
+ if (!isColdBlock(*LP, MBFI, PSI))
+ HasHotLandingPads = true;
+ }
+ if (!HasHotLandingPads) {
+ for (MachineBasicBlock *LP : LandingPads)
+ LP->setSectionID(MBBSectionID::ColdSectionID);
+ }
}
- if (!HasHotLandingPads) {
- for (MachineBasicBlock *LP : LandingPads)
- LP->setSectionID(MBBSectionID::ColdSectionID);
- }
-
auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
return X.getSectionID().Type < Y.getSectionID().Type;
};
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
index e92dec5bea48..8e0777f8438a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
@@ -13,7 +13,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallVector.h"
@@ -85,14 +84,10 @@ static void tryToGetTargetInfo(const MachineInstr &MI,
}
void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
- if (MCID->ImplicitDefs)
- for (const MCPhysReg *ImpDefs = MCID->getImplicitDefs(); *ImpDefs;
- ++ImpDefs)
- addOperand(MF, MachineOperand::CreateReg(*ImpDefs, true, true));
- if (MCID->ImplicitUses)
- for (const MCPhysReg *ImpUses = MCID->getImplicitUses(); *ImpUses;
- ++ImpUses)
- addOperand(MF, MachineOperand::CreateReg(*ImpUses, false, true));
+ for (MCPhysReg ImpDef : MCID->implicit_defs())
+ addOperand(MF, MachineOperand::CreateReg(ImpDef, true, true));
+ for (MCPhysReg ImpUse : MCID->implicit_uses())
+ addOperand(MF, MachineOperand::CreateReg(ImpUse, false, true));
}
/// MachineInstr ctor - This constructor creates a MachineInstr and adds the
@@ -104,8 +99,8 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &TID,
assert(DbgLoc.hasTrivialDestructor() && "Expected trivial destructor");
// Reserve space for the expected number of operands.
- if (unsigned NumOps = MCID->getNumOperands() +
- MCID->getNumImplicitDefs() + MCID->getNumImplicitUses()) {
+ if (unsigned NumOps = MCID->getNumOperands() + MCID->implicit_defs().size() +
+ MCID->implicit_uses().size()) {
CapOperands = OperandCapacity::get(NumOps);
Operands = MF.allocateOperandArray(CapOperands);
}
@@ -129,6 +124,14 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
for (const MachineOperand &MO : MI.operands())
addOperand(MF, MO);
+ // Replicate ties between the operands, which addOperand was not
+ // able to do reliably.
+ for (unsigned i = 0, e = getNumOperands(); i < e; ++i) {
+ MachineOperand &NewMO = getOperand(i);
+ const MachineOperand &OrigMO = MI.getOperand(i);
+ NewMO.TiedTo = OrigMO.TiedTo;
+ }
+
// Copy all the sensible flags.
setFlags(MI.Flags);
}
@@ -301,12 +304,15 @@ void MachineInstr::setExtraInfo(MachineFunction &MF,
ArrayRef<MachineMemOperand *> MMOs,
MCSymbol *PreInstrSymbol,
MCSymbol *PostInstrSymbol,
- MDNode *HeapAllocMarker) {
+ MDNode *HeapAllocMarker, MDNode *PCSections,
+ uint32_t CFIType) {
bool HasPreInstrSymbol = PreInstrSymbol != nullptr;
bool HasPostInstrSymbol = PostInstrSymbol != nullptr;
bool HasHeapAllocMarker = HeapAllocMarker != nullptr;
- int NumPointers =
- MMOs.size() + HasPreInstrSymbol + HasPostInstrSymbol + HasHeapAllocMarker;
+ bool HasPCSections = PCSections != nullptr;
+ bool HasCFIType = CFIType != 0;
+ int NumPointers = MMOs.size() + HasPreInstrSymbol + HasPostInstrSymbol +
+ HasHeapAllocMarker + HasPCSections + HasCFIType;
// Drop all extra info if there is none.
if (NumPointers <= 0) {
@@ -318,9 +324,11 @@ void MachineInstr::setExtraInfo(MachineFunction &MF,
// out of line because PointerSumType cannot hold more than 4 tag types with
// 32-bit pointers.
// FIXME: Maybe we should make the symbols in the extra info mutable?
- else if (NumPointers > 1 || HasHeapAllocMarker) {
- Info.set<EIIK_OutOfLine>(MF.createMIExtraInfo(
- MMOs, PreInstrSymbol, PostInstrSymbol, HeapAllocMarker));
+ else if (NumPointers > 1 || HasHeapAllocMarker || HasPCSections ||
+ HasCFIType) {
+ Info.set<EIIK_OutOfLine>(
+ MF.createMIExtraInfo(MMOs, PreInstrSymbol, PostInstrSymbol,
+ HeapAllocMarker, PCSections, CFIType));
return;
}
@@ -338,7 +346,7 @@ void MachineInstr::dropMemRefs(MachineFunction &MF) {
return;
setExtraInfo(MF, {}, getPreInstrSymbol(), getPostInstrSymbol(),
- getHeapAllocMarker());
+ getHeapAllocMarker(), getPCSections(), getCFIType());
}
void MachineInstr::setMemRefs(MachineFunction &MF,
@@ -349,7 +357,7 @@ void MachineInstr::setMemRefs(MachineFunction &MF,
}
setExtraInfo(MF, MMOs, getPreInstrSymbol(), getPostInstrSymbol(),
- getHeapAllocMarker());
+ getHeapAllocMarker(), getPCSections(), getCFIType());
}
void MachineInstr::addMemOperand(MachineFunction &MF,
@@ -372,7 +380,8 @@ void MachineInstr::cloneMemRefs(MachineFunction &MF, const MachineInstr &MI) {
// are the same (including null).
if (getPreInstrSymbol() == MI.getPreInstrSymbol() &&
getPostInstrSymbol() == MI.getPostInstrSymbol() &&
- getHeapAllocMarker() == MI.getHeapAllocMarker()) {
+ getHeapAllocMarker() == MI.getHeapAllocMarker() &&
+ getPCSections() == MI.getPCSections()) {
Info = MI.Info;
return;
}
@@ -457,7 +466,7 @@ void MachineInstr::setPreInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
}
setExtraInfo(MF, memoperands(), Symbol, getPostInstrSymbol(),
- getHeapAllocMarker());
+ getHeapAllocMarker(), getPCSections(), getCFIType());
}
void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
@@ -472,7 +481,7 @@ void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
}
setExtraInfo(MF, memoperands(), getPreInstrSymbol(), Symbol,
- getHeapAllocMarker());
+ getHeapAllocMarker(), getPCSections(), getCFIType());
}
void MachineInstr::setHeapAllocMarker(MachineFunction &MF, MDNode *Marker) {
@@ -481,7 +490,25 @@ void MachineInstr::setHeapAllocMarker(MachineFunction &MF, MDNode *Marker) {
return;
setExtraInfo(MF, memoperands(), getPreInstrSymbol(), getPostInstrSymbol(),
- Marker);
+ Marker, getPCSections(), getCFIType());
+}
+
+void MachineInstr::setPCSections(MachineFunction &MF, MDNode *PCSections) {
+ // Do nothing if old and new symbols are the same.
+ if (PCSections == getPCSections())
+ return;
+
+ setExtraInfo(MF, memoperands(), getPreInstrSymbol(), getPostInstrSymbol(),
+ getHeapAllocMarker(), PCSections, getCFIType());
+}
+
+void MachineInstr::setCFIType(MachineFunction &MF, uint32_t Type) {
+ // Do nothing if old and new types are the same.
+ if (Type == getCFIType())
+ return;
+
+ setExtraInfo(MF, memoperands(), getPreInstrSymbol(), getPostInstrSymbol(),
+ getHeapAllocMarker(), getPCSections(), Type);
}
void MachineInstr::cloneInstrSymbols(MachineFunction &MF,
@@ -496,6 +523,7 @@ void MachineInstr::cloneInstrSymbols(MachineFunction &MF,
setPreInstrSymbol(MF, MI.getPreInstrSymbol());
setPostInstrSymbol(MF, MI.getPostInstrSymbol());
setHeapAllocMarker(MF, MI.getHeapAllocMarker());
+ setPCSections(MF, MI.getPCSections());
}
uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
@@ -608,8 +636,7 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other,
if (Check == IgnoreDefs)
continue;
else if (Check == IgnoreVRegDefs) {
- if (!Register::isVirtualRegister(MO.getReg()) ||
- !Register::isVirtualRegister(OMO.getReg()))
+ if (!MO.getReg().isVirtual() || !OMO.getReg().isVirtual())
if (!MO.isIdenticalTo(OMO))
return false;
} else {
@@ -630,6 +657,34 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other,
if (getDebugLoc() && Other.getDebugLoc() &&
getDebugLoc() != Other.getDebugLoc())
return false;
+ // If pre- or post-instruction symbols do not match then the two instructions
+ // are not identical.
+ if (getPreInstrSymbol() != Other.getPreInstrSymbol() ||
+ getPostInstrSymbol() != Other.getPostInstrSymbol())
+ return false;
+ // Call instructions with different CFI types are not identical.
+ if (isCall() && getCFIType() != Other.getCFIType())
+ return false;
+
+ return true;
+}
+
+bool MachineInstr::isEquivalentDbgInstr(const MachineInstr &Other) const {
+ if (!isDebugValueLike() || !Other.isDebugValueLike())
+ return false;
+ if (getDebugLoc() != Other.getDebugLoc())
+ return false;
+ if (getDebugVariable() != Other.getDebugVariable())
+ return false;
+ if (getNumDebugOperands() != Other.getNumDebugOperands())
+ return false;
+ for (unsigned OpIdx = 0; OpIdx < getNumDebugOperands(); ++OpIdx)
+ if (!getDebugOperand(OpIdx).isIdenticalTo(Other.getDebugOperand(OpIdx)))
+ return false;
+ if (!DIExpression::isEqualExpression(
+ getDebugExpression(), isIndirectDebugValue(),
+ Other.getDebugExpression(), Other.isIndirectDebugValue()))
+ return false;
return true;
}
@@ -794,14 +849,14 @@ const DILabel *MachineInstr::getDebugLabel() const {
}
const MachineOperand &MachineInstr::getDebugVariableOp() const {
- assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE*");
- unsigned VariableOp = isDebugValueList() ? 0 : 2;
+ assert((isDebugValueLike()) && "not a DBG_VALUE*");
+ unsigned VariableOp = isNonListDebugValue() ? 2 : 0;
return getOperand(VariableOp);
}
MachineOperand &MachineInstr::getDebugVariableOp() {
- assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE*");
- unsigned VariableOp = isDebugValueList() ? 0 : 2;
+ assert((isDebugValueLike()) && "not a DBG_VALUE*");
+ unsigned VariableOp = isNonListDebugValue() ? 2 : 0;
return getOperand(VariableOp);
}
@@ -810,14 +865,14 @@ const DILocalVariable *MachineInstr::getDebugVariable() const {
}
const MachineOperand &MachineInstr::getDebugExpressionOp() const {
- assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE*");
- unsigned ExpressionOp = isDebugValueList() ? 1 : 3;
+ assert((isDebugValueLike()) && "not a DBG_VALUE*");
+ unsigned ExpressionOp = isNonListDebugValue() ? 3 : 1;
return getOperand(ExpressionOp);
}
MachineOperand &MachineInstr::getDebugExpressionOp() {
- assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE*");
- unsigned ExpressionOp = isDebugValueList() ? 1 : 3;
+ assert((isDebugValueLike()) && "not a DBG_VALUE*");
+ unsigned ExpressionOp = isNonListDebugValue() ? 3 : 1;
return getOperand(ExpressionOp);
}
@@ -993,7 +1048,7 @@ MachineInstr::readsWritesVirtualRegister(Register Reg,
int
MachineInstr::findRegisterDefOperandIdx(Register Reg, bool isDead, bool Overlap,
const TargetRegisterInfo *TRI) const {
- bool isPhys = Register::isPhysicalRegister(Reg);
+ bool isPhys = Reg.isPhysical();
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
// Accept regmask operands when Overlap is set.
@@ -1004,7 +1059,7 @@ MachineInstr::findRegisterDefOperandIdx(Register Reg, bool isDead, bool Overlap,
continue;
Register MOReg = MO.getReg();
bool Found = (MOReg == Reg);
- if (!Found && TRI && isPhys && Register::isPhysicalRegister(MOReg)) {
+ if (!Found && TRI && isPhys && MOReg.isPhysical()) {
if (Overlap)
Found = TRI->regsOverlap(MOReg, Reg);
else
@@ -1027,7 +1082,7 @@ int MachineInstr::findFirstPredOperandIdx() const {
const MCInstrDesc &MCID = getDesc();
if (MCID.isPredicable()) {
for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (MCID.OpInfo[i].isPredicate())
+ if (MCID.operands()[i].isPredicate())
return i;
}
@@ -1162,7 +1217,7 @@ void MachineInstr::clearKillInfo() {
void MachineInstr::substituteRegister(Register FromReg, Register ToReg,
unsigned SubIdx,
const TargetRegisterInfo &RegInfo) {
- if (Register::isPhysicalRegister(ToReg)) {
+ if (ToReg.isPhysical()) {
if (SubIdx)
ToReg = RegInfo.getSubReg(ToReg, SubIdx);
for (MachineOperand &MO : operands()) {
@@ -1465,7 +1520,7 @@ LLT MachineInstr::getTypeToPrint(unsigned OpIdx, SmallBitVector &PrintedTypes,
if (isVariadic() || OpIdx >= getNumExplicitOperands())
return MRI.getType(Op.getReg());
- auto &OpInfo = getDesc().OpInfo[OpIdx];
+ auto &OpInfo = getDesc().operands()[OpIdx];
if (!OpInfo.isGenericType())
return MRI.getType(Op.getReg());
@@ -1748,6 +1803,19 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << " heap-alloc-marker ";
HeapAllocMarker->printAsOperand(OS, MST);
}
+ if (MDNode *PCSections = getPCSections()) {
+ if (!FirstOp) {
+ FirstOp = false;
+ OS << ',';
+ }
+ OS << " pcsections ";
+ PCSections->printAsOperand(OS, MST);
+ }
+ if (uint32_t CFIType = getCFIType()) {
+ if (!FirstOp)
+ OS << ',';
+ OS << " cfi-type " << CFIType;
+ }
if (DebugInstrNum) {
if (!FirstOp)
@@ -1822,7 +1890,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
bool MachineInstr::addRegisterKilled(Register IncomingReg,
const TargetRegisterInfo *RegInfo,
bool AddIfNotFound) {
- bool isPhysReg = Register::isPhysicalRegister(IncomingReg);
+ bool isPhysReg = IncomingReg.isPhysical();
bool hasAliases = isPhysReg &&
MCRegAliasIterator(IncomingReg, RegInfo, false).isValid();
bool Found = false;
@@ -1853,7 +1921,7 @@ bool MachineInstr::addRegisterKilled(Register IncomingReg,
MO.setIsKill();
Found = true;
}
- } else if (hasAliases && MO.isKill() && Register::isPhysicalRegister(Reg)) {
+ } else if (hasAliases && MO.isKill() && Reg.isPhysical()) {
// A super-register kill already exists.
if (RegInfo->isSuperRegister(IncomingReg, Reg))
return true;
@@ -1887,7 +1955,7 @@ bool MachineInstr::addRegisterKilled(Register IncomingReg,
void MachineInstr::clearRegisterKills(Register Reg,
const TargetRegisterInfo *RegInfo) {
- if (!Register::isPhysicalRegister(Reg))
+ if (!Reg.isPhysical())
RegInfo = nullptr;
for (MachineOperand &MO : operands()) {
if (!MO.isReg() || !MO.isUse() || !MO.isKill())
@@ -1901,7 +1969,7 @@ void MachineInstr::clearRegisterKills(Register Reg,
bool MachineInstr::addRegisterDead(Register Reg,
const TargetRegisterInfo *RegInfo,
bool AddIfNotFound) {
- bool isPhysReg = Register::isPhysicalRegister(Reg);
+ bool isPhysReg = Reg.isPhysical();
bool hasAliases = isPhysReg &&
MCRegAliasIterator(Reg, RegInfo, false).isValid();
bool Found = false;
@@ -1917,8 +1985,7 @@ bool MachineInstr::addRegisterDead(Register Reg,
if (MOReg == Reg) {
MO.setIsDead();
Found = true;
- } else if (hasAliases && MO.isDead() &&
- Register::isPhysicalRegister(MOReg)) {
+ } else if (hasAliases && MO.isDead() && MOReg.isPhysical()) {
// There exists a super-register that's marked dead.
if (RegInfo->isSuperRegister(Reg, MOReg))
return true;
@@ -1969,7 +2036,7 @@ void MachineInstr::setRegisterDefReadUndef(Register Reg, bool IsUndef) {
void MachineInstr::addRegisterDefined(Register Reg,
const TargetRegisterInfo *RegInfo) {
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
MachineOperand *MO = findRegisterDefOperand(Reg, false, false, RegInfo);
if (MO)
return;
@@ -2017,7 +2084,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
HashComponents.reserve(MI->getNumOperands() + 1);
HashComponents.push_back(MI->getOpcode());
for (const MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.isDef() && Register::isVirtualRegister(MO.getReg()))
+ if (MO.isReg() && MO.isDef() && MO.getReg().isVirtual())
continue; // Skip virtual register defs.
HashComponents.push_back(hash_value(MO));
@@ -2065,41 +2132,35 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
const MCInstrDesc &MCID, bool IsIndirect,
- const MachineOperand &MO,
- const MDNode *Variable, const MDNode *Expr) {
- assert(isa<DILocalVariable>(Variable) && "not a variable");
- assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
- assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
- "Expected inlined-at fields to agree");
- if (MO.isReg())
- return BuildMI(MF, DL, MCID, IsIndirect, MO.getReg(), Variable, Expr);
-
- auto MIB = BuildMI(MF, DL, MCID).add(MO);
- if (IsIndirect)
- MIB.addImm(0U);
- else
- MIB.addReg(0U);
- return MIB.addMetadata(Variable).addMetadata(Expr);
-}
-
-MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
- const MCInstrDesc &MCID, bool IsIndirect,
- ArrayRef<MachineOperand> MOs,
+ ArrayRef<MachineOperand> DebugOps,
const MDNode *Variable, const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
- if (MCID.Opcode == TargetOpcode::DBG_VALUE)
- return BuildMI(MF, DL, MCID, IsIndirect, MOs[0], Variable, Expr);
+ if (MCID.Opcode == TargetOpcode::DBG_VALUE) {
+ assert(DebugOps.size() == 1 &&
+ "DBG_VALUE must contain exactly one debug operand");
+ MachineOperand DebugOp = DebugOps[0];
+ if (DebugOp.isReg())
+ return BuildMI(MF, DL, MCID, IsIndirect, DebugOp.getReg(), Variable,
+ Expr);
+
+ auto MIB = BuildMI(MF, DL, MCID).add(DebugOp);
+ if (IsIndirect)
+ MIB.addImm(0U);
+ else
+ MIB.addReg(0U);
+ return MIB.addMetadata(Variable).addMetadata(Expr);
+ }
auto MIB = BuildMI(MF, DL, MCID);
MIB.addMetadata(Variable).addMetadata(Expr);
- for (const MachineOperand &MO : MOs)
- if (MO.isReg())
- MIB.addReg(MO.getReg());
+ for (const MachineOperand &DebugOp : DebugOps)
+ if (DebugOp.isReg())
+ MIB.addReg(DebugOp.getReg());
else
- MIB.add(MO);
+ MIB.add(DebugOp);
return MIB;
}
@@ -2117,21 +2178,12 @@ MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB,
MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, const MCInstrDesc &MCID,
- bool IsIndirect, MachineOperand &MO,
- const MDNode *Variable, const MDNode *Expr) {
- MachineFunction &MF = *BB.getParent();
- MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, MO, Variable, Expr);
- BB.insert(I, MI);
- return MachineInstrBuilder(MF, *MI);
-}
-
-MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB,
- MachineBasicBlock::iterator I,
- const DebugLoc &DL, const MCInstrDesc &MCID,
- bool IsIndirect, ArrayRef<MachineOperand> MOs,
+ bool IsIndirect,
+ ArrayRef<MachineOperand> DebugOps,
const MDNode *Variable, const MDNode *Expr) {
MachineFunction &MF = *BB.getParent();
- MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, MOs, Variable, Expr);
+ MachineInstr *MI =
+ BuildMI(MF, DL, MCID, IsIndirect, DebugOps, Variable, Expr);
BB.insert(I, MI);
return MachineInstrBuilder(MF, *MI);
}
@@ -2173,6 +2225,8 @@ MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB,
MachineBasicBlock::iterator I,
const MachineInstr &Orig,
int FrameIndex, Register SpillReg) {
+ assert(!Orig.isDebugRef() &&
+ "DBG_INSTR_REF should not reference a virtual register.");
const DIExpression *Expr = computeExprForSpill(Orig, SpillReg);
MachineInstrBuilder NewMI =
BuildMI(BB, I, Orig.getDebugLoc(), Orig.getDesc());
@@ -2275,7 +2329,7 @@ static unsigned getSpillSlotSize(const MMOList &Accesses,
return Size;
}
-Optional<unsigned>
+std::optional<unsigned>
MachineInstr::getSpillSize(const TargetInstrInfo *TII) const {
int FI;
if (TII->isStoreToStackSlotPostFE(*this, FI)) {
@@ -2283,18 +2337,18 @@ MachineInstr::getSpillSize(const TargetInstrInfo *TII) const {
if (MFI.isSpillSlotObjectIndex(FI))
return (*memoperands_begin())->getSize();
}
- return None;
+ return std::nullopt;
}
-Optional<unsigned>
+std::optional<unsigned>
MachineInstr::getFoldedSpillSize(const TargetInstrInfo *TII) const {
MMOList Accesses;
if (TII->hasStoreToStackSlot(*this, Accesses))
return getSpillSlotSize(Accesses, getMF()->getFrameInfo());
- return None;
+ return std::nullopt;
}
-Optional<unsigned>
+std::optional<unsigned>
MachineInstr::getRestoreSize(const TargetInstrInfo *TII) const {
int FI;
if (TII->isLoadFromStackSlotPostFE(*this, FI)) {
@@ -2302,15 +2356,15 @@ MachineInstr::getRestoreSize(const TargetInstrInfo *TII) const {
if (MFI.isSpillSlotObjectIndex(FI))
return (*memoperands_begin())->getSize();
}
- return None;
+ return std::nullopt;
}
-Optional<unsigned>
+std::optional<unsigned>
MachineInstr::getFoldedRestoreSize(const TargetInstrInfo *TII) const {
MMOList Accesses;
if (TII->hasLoadFromStackSlot(*this, Accesses))
return getSpillSlotSize(Accesses, getMF()->getFrameInfo());
- return None;
+ return std::nullopt;
}
unsigned MachineInstr::getDebugInstrNum() {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
index 2f1d7b976264..0c059a145ca4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -198,7 +198,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
DeadDefSet.erase(Reg);
}
- if (!MO.isDead() && Register::isPhysicalRegister(Reg)) {
+ if (!MO.isDead() && Reg.isPhysical()) {
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
unsigned SubReg = *SubRegs;
if (LocalDefSet.insert(SubReg).second)
@@ -328,7 +328,7 @@ PhysRegInfo llvm::AnalyzePhysRegInBundle(const MachineInstr &MI, Register Reg,
continue;
Register MOReg = MO.getReg();
- if (!MOReg || !Register::isPhysicalRegister(MOReg))
+ if (!MOReg || !MOReg.isPhysical())
continue;
if (!TRI->regsOverlap(MOReg, Reg))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
index df7b6c782b91..1c09c01df3aa 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
@@ -452,8 +452,7 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI,
Register Reg = MO.getReg();
if (!Reg)
continue;
- assert(Register::isPhysicalRegister(Reg) &&
- "Not expecting virtual register!");
+ assert(Reg.isPhysical() && "Not expecting virtual register!");
if (!MO.isDef()) {
if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)))
@@ -844,7 +843,7 @@ MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
if (!MO.isReg() || MO.isImplicit())
continue;
Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
// FIXME: It seems bad to use RegSeen only for some of these calculations.
@@ -916,9 +915,9 @@ static bool isInvariantStore(const MachineInstr &MI,
Register Reg = MO.getReg();
// If operand is a virtual register, check if it comes from a copy of a
// physical register.
- if (Register::isVirtualRegister(Reg))
+ if (Reg.isVirtual())
Reg = TRI->lookThruCopyLike(MO.getReg(), MRI);
- if (Register::isVirtualRegister(Reg))
+ if (Reg.isVirtual())
return false;
if (!TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *MI.getMF()))
return false;
@@ -947,7 +946,7 @@ static bool isCopyFeedingInvariantStore(const MachineInstr &MI,
const MachineFunction *MF = MI.getMF();
// Check that we are copying a constant physical register.
Register CopySrcReg = MI.getOperand(1).getReg();
- if (Register::isVirtualRegister(CopySrcReg))
+ if (CopySrcReg.isVirtual())
return false;
if (!TRI->isCallerPreservedPhysReg(CopySrcReg.asMCReg(), *MF))
@@ -955,8 +954,7 @@ static bool isCopyFeedingInvariantStore(const MachineInstr &MI,
Register CopyDstReg = MI.getOperand(0).getReg();
// Check if any of the uses of the copy are invariant stores.
- assert(Register::isVirtualRegister(CopyDstReg) &&
- "copy dst is not a virtual reg");
+ assert(CopyDstReg.isVirtual() && "copy dst is not a virtual reg");
for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) {
if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI))
@@ -1020,7 +1018,7 @@ bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {
if (!MO.isReg() || !MO.isDef())
continue;
Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
// A PHI may cause a copy to be inserted.
@@ -1090,7 +1088,7 @@ bool MachineLICMBase::IsCheapInstruction(MachineInstr &MI) const {
continue;
--NumDefs;
Register Reg = DefMO.getReg();
- if (Register::isPhysicalRegister(Reg))
+ if (Reg.isPhysical())
continue;
if (!TII->hasLowDefLatency(SchedModel, MI, i))
@@ -1183,7 +1181,7 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
if (!MO.isReg() || MO.isImplicit())
continue;
Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) {
LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI);
@@ -1340,13 +1338,11 @@ bool MachineLICMBase::EliminateCSE(
const MachineOperand &MO = MI->getOperand(i);
// Physical registers may not differ here.
- assert((!MO.isReg() || MO.getReg() == 0 ||
- !Register::isPhysicalRegister(MO.getReg()) ||
+ assert((!MO.isReg() || MO.getReg() == 0 || !MO.getReg().isPhysical() ||
MO.getReg() == Dup->getOperand(i).getReg()) &&
"Instructions with different phys regs are not identical!");
- if (MO.isReg() && MO.isDef() &&
- !Register::isPhysicalRegister(MO.getReg()))
+ if (MO.isReg() && MO.isDef() && !MO.getReg().isPhysical())
Defs.push_back(i);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
new file mode 100644
index 000000000000..c400ce190b46
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -0,0 +1,239 @@
+//==--- MachineLateInstrsCleanup.cpp - Late Instructions Cleanup Pass -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This simple pass removes any identical and redundant immediate or address
+// loads to the same register. The immediate loads removed can originally be
+// the result of rematerialization, while the addresses are redundant frame
+// addressing anchor points created during Frame Indices elimination.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-latecleanup"
+
+STATISTIC(NumRemoved, "Number of redundant instructions removed.");
+
+namespace {
+
+class MachineLateInstrsCleanup : public MachineFunctionPass {
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+ // Data structures to map regs to their definitions per MBB.
+ using Reg2DefMap = std::map<Register, MachineInstr*>;
+ std::vector<Reg2DefMap> RegDefs;
+
+ // Walk through the instructions in MBB and remove any redundant
+ // instructions.
+ bool processBlock(MachineBasicBlock *MBB);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ MachineLateInstrsCleanup() : MachineFunctionPass(ID) {
+ initializeMachineLateInstrsCleanupPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+};
+
+} // end anonymous namespace
+
+char MachineLateInstrsCleanup::ID = 0;
+
+char &llvm::MachineLateInstrsCleanupID = MachineLateInstrsCleanup::ID;
+
+INITIALIZE_PASS(MachineLateInstrsCleanup, DEBUG_TYPE,
+ "Machine Late Instructions Cleanup Pass", false, false)
+
+bool MachineLateInstrsCleanup::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+
+ RegDefs.clear();
+ RegDefs.resize(MF.getNumBlockIDs());
+
+ // Visit all MBBs in an order that maximises the reuse from predecessors.
+ bool Changed = false;
+ ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
+ for (MachineBasicBlock *MBB : RPOT)
+ Changed |= processBlock(MBB);
+
+ return Changed;
+}
+
+// Clear any previous kill flag on Reg found before I in MBB. Walk backwards
+// in MBB and if needed continue in predecessors until a use/def of Reg is
+// encountered. This seems to be faster in practice than tracking kill flags
+// in a map.
+static void clearKillsForDef(Register Reg, MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ BitVector &VisitedPreds,
+ const TargetRegisterInfo *TRI) {
+ VisitedPreds.set(MBB->getNumber());
+ while (I != MBB->begin()) {
+ --I;
+ bool Found = false;
+ for (auto &MO : I->operands())
+ if (MO.isReg() && TRI->regsOverlap(MO.getReg(), Reg)) {
+ if (MO.isDef())
+ return;
+ if (MO.readsReg()) {
+ MO.setIsKill(false);
+ Found = true; // Keep going for an implicit kill of the super-reg.
+ }
+ }
+ if (Found)
+ return;
+ }
+
+ // If an earlier def is not in MBB, continue in predecessors.
+ if (!MBB->isLiveIn(Reg))
+ MBB->addLiveIn(Reg);
+ assert(!MBB->pred_empty() && "Predecessor def not found!");
+ for (MachineBasicBlock *Pred : MBB->predecessors())
+ if (!VisitedPreds.test(Pred->getNumber()))
+ clearKillsForDef(Reg, Pred, Pred->end(), VisitedPreds, TRI);
+}
+
+static void removeRedundantDef(MachineInstr *MI,
+ const TargetRegisterInfo *TRI) {
+ Register Reg = MI->getOperand(0).getReg();
+ BitVector VisitedPreds(MI->getMF()->getNumBlockIDs());
+ clearKillsForDef(Reg, MI->getParent(), MI->getIterator(), VisitedPreds, TRI);
+ MI->eraseFromParent();
+ ++NumRemoved;
+}
+
+// Return true if MI is a potential candidate for reuse/removal and if so
+// also the register it defines in DefedReg. A candidate is a simple
+// instruction that does not touch memory, has only one register definition
+// and the only reg it may use is FrameReg. Typically this is an immediate
+// load or a load-address instruction.
+static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
+ Register FrameReg) {
+ DefedReg = MCRegister::NoRegister;
+ bool SawStore = true;
+ if (!MI->isSafeToMove(nullptr, SawStore) || MI->isImplicitDef() ||
+ MI->isInlineAsm())
+ return false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg()) {
+ if (MO.isDef()) {
+ if (i == 0 && !MO.isImplicit() && !MO.isDead())
+ DefedReg = MO.getReg();
+ else
+ return false;
+ } else if (MO.getReg() && MO.getReg() != FrameReg)
+ return false;
+ } else if (!(MO.isImm() || MO.isCImm() || MO.isFPImm() || MO.isCPI() ||
+ MO.isGlobal() || MO.isSymbol()))
+ return false;
+ }
+ return DefedReg.isValid();
+}
+
+bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
+ bool Changed = false;
+ Reg2DefMap &MBBDefs = RegDefs[MBB->getNumber()];
+
+ // Find reusable definitions in the predecessor(s).
+ if (!MBB->pred_empty() && !MBB->isEHPad()) {
+ MachineBasicBlock *FirstPred = *MBB->pred_begin();
+ for (auto [Reg, DefMI] : RegDefs[FirstPred->getNumber()])
+ if (llvm::all_of(
+ drop_begin(MBB->predecessors()),
+ [&, &Reg = Reg, &DefMI = DefMI](const MachineBasicBlock *Pred) {
+ auto PredDefI = RegDefs[Pred->getNumber()].find(Reg);
+ return PredDefI != RegDefs[Pred->getNumber()].end() &&
+ DefMI->isIdenticalTo(*PredDefI->second);
+ })) {
+ MBBDefs[Reg] = DefMI;
+ LLVM_DEBUG(dbgs() << "Reusable instruction from pred(s): in "
+ << printMBBReference(*MBB) << ": " << *DefMI;);
+ }
+ }
+
+ // Process MBB.
+ MachineFunction *MF = MBB->getParent();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ Register FrameReg = TRI->getFrameRegister(*MF);
+ for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
+ // If FrameReg is modified, no previous load-address instructions (using
+ // it) are valid.
+ if (MI.modifiesRegister(FrameReg, TRI)) {
+ MBBDefs.clear();
+ continue;
+ }
+
+ Register DefedReg;
+ bool IsCandidate = isCandidate(&MI, DefedReg, FrameReg);
+
+ // Check for an earlier identical and reusable instruction.
+ if (IsCandidate) {
+ auto DefI = MBBDefs.find(DefedReg);
+ if (DefI != MBBDefs.end() && MI.isIdenticalTo(*DefI->second)) {
+ LLVM_DEBUG(dbgs() << "Removing redundant instruction in "
+ << printMBBReference(*MBB) << ": " << MI;);
+ removeRedundantDef(&MI, TRI);
+ Changed = true;
+ continue;
+ }
+ }
+
+ // Clear any entries in map that MI clobbers.
+ for (auto DefI = MBBDefs.begin(); DefI != MBBDefs.end();) {
+ Register Reg = DefI->first;
+ if (MI.modifiesRegister(Reg, TRI))
+ DefI = MBBDefs.erase(DefI);
+ else
+ ++DefI;
+ }
+
+ // Record this MI for potential later reuse.
+ if (IsCandidate) {
+ LLVM_DEBUG(dbgs() << "Found interesting instruction in "
+ << printMBBReference(*MBB) << ": " << MI;);
+ MBBDefs[DefedReg] = &MI;
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 5cbded4b9264..fb3af385a0c1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -168,7 +168,7 @@ bool MachineLoop::isLoopInvariant(MachineInstr &I) const {
// An instruction that uses or defines a physical register can't e.g. be
// hoisted, so mark this as not invariant.
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
if (MO.isUse()) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 23d55a5df9f5..a0c0166d06f0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -47,8 +47,6 @@ void MachineModuleInfo::initialize() {
}
void MachineModuleInfo::finalize() {
- Personalities.clear();
-
Context.reset();
// We don't clear the ExternalContext.
@@ -89,16 +87,6 @@ MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM,
MachineModuleInfo::~MachineModuleInfo() { finalize(); }
-/// \name Exception Handling
-/// \{
-
-void MachineModuleInfo::addPersonality(const Function *Personality) {
- if (!llvm::is_contained(Personalities, Personality))
- Personalities.push_back(Personality);
-}
-
-/// \}
-
MachineFunction *
MachineModuleInfo::getMachineFunction(const Function &F) const {
auto I = MachineFunctions.find(&F);
@@ -118,6 +106,7 @@ MachineFunction &MachineModuleInfo::getOrCreateMachineFunction(Function &F) {
// No pre-existing machine function, create a new one.
const TargetSubtargetInfo &STI = *TM.getSubtargetImpl(F);
MF = new MachineFunction(F, TM, STI, NextFnNum++, *this);
+ MF->initTargetMachineFunctionInfo(STI);
// Update the set entry.
I.first->second.reset(MF);
} else {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
index 46ad1de78c46..0a7b12e9ccb9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StableHashing.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Config/llvm-config.h"
@@ -28,6 +29,7 @@
#include "llvm/MC/MCDwarf.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include <optional>
using namespace llvm;
@@ -45,6 +47,7 @@ static const MachineFunction *getMFIfAvailable(const MachineOperand &MO) {
return MF;
return nullptr;
}
+
static MachineFunction *getMFIfAvailable(MachineOperand &MO) {
return const_cast<MachineFunction *>(
getMFIfAvailable(const_cast<const MachineOperand &>(MO)));
@@ -115,7 +118,7 @@ void MachineOperand::setIsDef(bool Val) {
bool MachineOperand::isRenamable() const {
assert(isReg() && "Wrong MachineOperand accessor");
- assert(Register::isPhysicalRegister(getReg()) &&
+ assert(getReg().isPhysical() &&
"isRenamable should only be checked on physical registers");
if (!IsRenamable)
return false;
@@ -133,7 +136,7 @@ bool MachineOperand::isRenamable() const {
void MachineOperand::setIsRenamable(bool Val) {
assert(isReg() && "Wrong MachineOperand accessor");
- assert(Register::isPhysicalRegister(getReg()) &&
+ assert(getReg().isPhysical() &&
"setIsRenamable should only be called on physical registers");
IsRenamable = Val;
}
@@ -233,6 +236,19 @@ void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset,
setTargetFlags(TargetFlags);
}
+void MachineOperand::ChangeToDbgInstrRef(unsigned InstrIdx, unsigned OpIdx,
+ unsigned TargetFlags) {
+ assert((!isReg() || !isTied()) &&
+ "Cannot change a tied operand into a DbgInstrRef");
+
+ removeRegFromUses();
+
+ OpKind = MO_DbgInstrRef;
+ setInstrRefInstrIndex(InstrIdx);
+ setInstrRefOpIndex(OpIdx);
+ setTargetFlags(TargetFlags);
+}
+
/// ChangeToRegister - Replace this operand with a new register operand of
/// the specified value. If an operand is known to be an register already,
/// the setReg method should be used.
@@ -323,10 +339,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
return true;
if (const MachineFunction *MF = getMFIfAvailable(*this)) {
- // Calculate the size of the RegMask
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
-
+ unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
// Deep compare of the two RegMasks
return std::equal(RegMask, RegMask + RegMaskSize, OtherRegMask);
}
@@ -336,6 +350,9 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
}
case MachineOperand::MO_MCSymbol:
return getMCSymbol() == Other.getMCSymbol();
+ case MachineOperand::MO_DbgInstrRef:
+ return getInstrRefInstrIndex() == Other.getInstrRefInstrIndex() &&
+ getInstrRefOpIndex() == Other.getInstrRefOpIndex();
case MachineOperand::MO_CFIIndex:
return getCFIIndex() == Other.getCFIIndex();
case MachineOperand::MO_Metadata:
@@ -382,12 +399,27 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getBlockAddress(),
MO.getOffset());
case MachineOperand::MO_RegisterMask:
- case MachineOperand::MO_RegisterLiveOut:
- return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask());
+ case MachineOperand::MO_RegisterLiveOut: {
+ if (const MachineFunction *MF = getMFIfAvailable(MO)) {
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
+ const uint32_t *RegMask = MO.getRegMask();
+ std::vector<stable_hash> RegMaskHashes(RegMask, RegMask + RegMaskSize);
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ stable_hash_combine_array(RegMaskHashes.data(),
+ RegMaskHashes.size()));
+ }
+
+ assert(0 && "MachineOperand not associated with any MachineFunction");
+ return hash_combine(MO.getType(), MO.getTargetFlags());
+ }
case MachineOperand::MO_Metadata:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata());
case MachineOperand::MO_MCSymbol:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol());
+ case MachineOperand::MO_DbgInstrRef:
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getInstrRefInstrIndex(), MO.getInstrRefOpIndex());
case MachineOperand::MO_CFIIndex:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex());
case MachineOperand::MO_IntrinsicID:
@@ -445,7 +477,7 @@ static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS,
return;
}
- if (Optional<unsigned> Reg = TRI->getLLVMRegNum(DwarfReg, true))
+ if (std::optional<unsigned> Reg = TRI->getLLVMRegNum(DwarfReg, true))
OS << printReg(*Reg, TRI);
else
OS << "<badreg>";
@@ -458,7 +490,7 @@ static void printIRBlockReference(raw_ostream &OS, const BasicBlock &BB,
printLLVMNameWithoutPrefix(OS, BB.getName());
return;
}
- Optional<int> Slot;
+ std::optional<int> Slot;
if (const Function *F = BB.getParent()) {
if (F == MST.getCurrentFunction()) {
Slot = MST.getLocalSlot(&BB);
@@ -519,7 +551,7 @@ static void printFrameIndex(raw_ostream& OS, int FrameIndex, bool IsFixed,
void MachineOperand::printSubRegIdx(raw_ostream &OS, uint64_t Index,
const TargetRegisterInfo *TRI) {
OS << "%subreg.";
- if (TRI)
+ if (TRI && Index != 0 && Index < TRI->getNumSubRegIndices())
OS << TRI->getSubRegIndexName(Index);
else
OS << Index;
@@ -736,15 +768,16 @@ void MachineOperand::print(raw_ostream &OS, LLT TypeToPrint,
const TargetIntrinsicInfo *IntrinsicInfo) const {
tryToGetTargetInfo(*this, TRI, IntrinsicInfo);
ModuleSlotTracker DummyMST(nullptr);
- print(OS, DummyMST, TypeToPrint, None, /*PrintDef=*/false,
+ print(OS, DummyMST, TypeToPrint, std::nullopt, /*PrintDef=*/false,
/*IsStandalone=*/true,
/*ShouldPrintRegisterTies=*/true,
/*TiedOperandIdx=*/0, TRI, IntrinsicInfo);
}
void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
- LLT TypeToPrint, Optional<unsigned> OpIdx, bool PrintDef,
- bool IsStandalone, bool ShouldPrintRegisterTies,
+ LLT TypeToPrint, std::optional<unsigned> OpIdx,
+ bool PrintDef, bool IsStandalone,
+ bool ShouldPrintRegisterTies,
unsigned TiedOperandIdx,
const TargetRegisterInfo *TRI,
const TargetIntrinsicInfo *IntrinsicInfo) const {
@@ -767,13 +800,13 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "undef ";
if (isEarlyClobber())
OS << "early-clobber ";
- if (Register::isPhysicalRegister(getReg()) && isRenamable())
+ if (getReg().isPhysical() && isRenamable())
OS << "renamable ";
// isDebug() is exactly true for register operands of a DBG_VALUE. So we
// simply infer it when parsing and do not need to print it.
const MachineRegisterInfo *MRI = nullptr;
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
if (const MachineFunction *MF = getMFIfAvailable(*this)) {
MRI = &MF->getRegInfo();
}
@@ -788,7 +821,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << ".subreg" << SubReg;
}
// Print the register class / bank.
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
if (const MachineFunction *MF = getMFIfAvailable(*this)) {
const MachineRegisterInfo &MRI = MF->getRegInfo();
if (IsStandalone || !PrintDef || MRI.def_empty(Reg)) {
@@ -928,6 +961,11 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
case MachineOperand::MO_MCSymbol:
printSymbol(OS, *getMCSymbol());
break;
+ case MachineOperand::MO_DbgInstrRef: {
+ OS << "dbg-instr-ref(" << getInstrRefInstrIndex() << ", "
+ << getInstrRefOpIndex() << ')';
+ break;
+ }
case MachineOperand::MO_CFIIndex: {
if (const MachineFunction *MF = getMFIfAvailable(*this))
printCFI(OS, MF->getFrameInstructions()[getCFIIndex()], TRI);
@@ -1102,15 +1140,24 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "dereferenceable ";
if (isInvariant())
OS << "invariant ";
- if (getFlags() & MachineMemOperand::MOTargetFlag1)
- OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag1)
- << "\" ";
- if (getFlags() & MachineMemOperand::MOTargetFlag2)
- OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag2)
- << "\" ";
- if (getFlags() & MachineMemOperand::MOTargetFlag3)
- OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag3)
- << "\" ";
+ if (TII) {
+ if (getFlags() & MachineMemOperand::MOTargetFlag1)
+ OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag1)
+ << "\" ";
+ if (getFlags() & MachineMemOperand::MOTargetFlag2)
+ OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag2)
+ << "\" ";
+ if (getFlags() & MachineMemOperand::MOTargetFlag3)
+ OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag3)
+ << "\" ";
+ } else {
+ if (getFlags() & MachineMemOperand::MOTargetFlag1)
+ OS << "\"MOTargetFlag1\" ";
+ if (getFlags() & MachineMemOperand::MOTargetFlag2)
+ OS << "\"MOTargetFlag2\" ";
+ if (getFlags() & MachineMemOperand::MOTargetFlag3)
+ OS << "\"MOTargetFlag3\" ";
+ }
assert((isLoad() || isStore()) &&
"machine memory operand must be a load or store (or both)");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index 631768ec986c..1c31eba909e7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/InitializePasses.h"
+#include <optional>
using namespace llvm;
@@ -30,10 +31,10 @@ DiagnosticInfoMIROptimization::MachineArgument::MachineArgument(
/*SkipDebugLoc=*/true);
}
-Optional<uint64_t>
+std::optional<uint64_t>
MachineOptimizationRemarkEmitter::computeHotness(const MachineBasicBlock &MBB) {
if (!MBFI)
- return None;
+ return std::nullopt;
return MBFI->getBlockProfileCount(&MBB);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
index 5da68abc8f6a..c7ba66bd3678 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -727,7 +727,8 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()),
Unit /* File */,
0 /* Line 0 is reserved for compiler-generated code. */,
- DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */
+ DB.createSubroutineType(
+ DB.getOrCreateTypeArray(std::nullopt)), /* void type */
0, /* Line 0 is reserved for compiler-generated code. */
DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
/* Outlined code is optimized code by definition. */
@@ -879,10 +880,13 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M,
// iterating over each Function in M.
for (Function &F : M) {
- // If there's nothing in F, then there's no reason to try and outline from
- // it.
- if (F.empty())
+ if (F.hasFnAttribute("nooutline")) {
+ LLVM_DEBUG({
+ dbgs() << "... Skipping function with nooutline attribute: "
+ << F.getName() << "\n";
+ });
continue;
+ }
// There's something in F. Check if it has a MachineFunction associated with
// it.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp
index 476dc059d2b5..039634f3d047 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp
@@ -41,7 +41,7 @@ Error MachineFunctionPassManager::run(Module &M,
// current pipeline is the top-level pipeline. Callbacks are not used after
// current pipeline.
PI.pushBeforeNonSkippedPassCallback([&MFAM](StringRef PassID, Any IR) {
- assert(any_isa<const MachineFunction *>(IR));
+ assert(any_cast<const MachineFunction *>(&IR));
const MachineFunction *MF = any_cast<const MachineFunction *>(IR);
assert(MF && "Machine function should be valid for printing");
std::string Banner = std::string("After ") + std::string(PassID);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
index 52501ca7c871..adb630469003 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -43,6 +43,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CycleAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -84,9 +85,11 @@
#include <cstdint>
#include <deque>
#include <functional>
+#include <iomanip>
#include <iterator>
#include <map>
#include <memory>
+#include <sstream>
#include <tuple>
#include <utility>
#include <vector>
@@ -121,6 +124,12 @@ static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
cl::desc("Size limit for the MII."),
cl::Hidden, cl::init(27));
+/// A command line argument to force pipeliner to use specified initial
+/// interval.
+static cl::opt<int> SwpForceII("pipeliner-force-ii",
+ cl::desc("Force pipeliner to use specified II."),
+ cl::Hidden, cl::init(-1));
+
/// A command line argument to limit the number of stages in the pipeline.
static cl::opt<int>
SwpMaxStages("pipeliner-max-stages",
@@ -172,6 +181,13 @@ cl::opt<bool> SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
cl::init(true),
cl::desc("Enable CopyToPhi DAG Mutation"));
+/// A command line argument to force pipeliner to use specified issue
+/// width.
+cl::opt<int> SwpForceIssueWidth(
+ "pipeliner-force-issue-width",
+ cl::desc("Force pipeliner to use specified issue width."), cl::Hidden,
+ cl::init(-1));
+
} // end namespace llvm
unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
@@ -454,14 +470,18 @@ void MachinePipeliner::getAnalysisUsage(AnalysisUsage &AU) const {
}
void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
- if (II_setByPragma > 0)
+ if (SwpForceII > 0)
+ MII = SwpForceII;
+ else if (II_setByPragma > 0)
MII = II_setByPragma;
else
MII = std::max(ResMII, RecMII);
}
void SwingSchedulerDAG::setMAX_II() {
- if (II_setByPragma > 0)
+ if (SwpForceII > 0)
+ MAX_II = SwpForceII;
+ else if (II_setByPragma > 0)
MAX_II = II_setByPragma;
else
MAX_II = MII + 10;
@@ -560,7 +580,7 @@ void SwingSchedulerDAG::schedule() {
// check for node order issues
checkValidNodeOrder(Circuits);
- SMSchedule Schedule(Pass.MF);
+ SMSchedule Schedule(Pass.MF, this);
Scheduled = schedulePipeline(Schedule);
if (!Scheduled){
@@ -1002,7 +1022,7 @@ struct FuncUnitSorter {
make_range(InstrItins->beginStage(SchedClass),
InstrItins->endStage(SchedClass))) {
InstrStage::FuncUnits funcUnits = IS.getUnits();
- unsigned numAlternatives = countPopulation(funcUnits);
+ unsigned numAlternatives = llvm::popcount(funcUnits);
if (numAlternatives < min) {
min = numAlternatives;
F = funcUnits;
@@ -1048,7 +1068,7 @@ struct FuncUnitSorter {
make_range(InstrItins->beginStage(SchedClass),
InstrItins->endStage(SchedClass))) {
InstrStage::FuncUnits FuncUnits = IS.getUnits();
- if (countPopulation(FuncUnits) == 1)
+ if (llvm::popcount(FuncUnits) == 1)
Resources[FuncUnits]++;
}
return;
@@ -1093,72 +1113,9 @@ struct FuncUnitSorter {
/// to add it to each existing DFA, until a legal space is found. If the
/// instruction cannot be reserved in an existing DFA, we create a new one.
unsigned SwingSchedulerDAG::calculateResMII() {
-
LLVM_DEBUG(dbgs() << "calculateResMII:\n");
- SmallVector<ResourceManager*, 8> Resources;
- MachineBasicBlock *MBB = Loop.getHeader();
- Resources.push_back(new ResourceManager(&MF.getSubtarget()));
-
- // Sort the instructions by the number of available choices for scheduling,
- // least to most. Use the number of critical resources as the tie breaker.
- FuncUnitSorter FUS = FuncUnitSorter(MF.getSubtarget());
- for (MachineInstr &MI :
- llvm::make_range(MBB->getFirstNonPHI(), MBB->getFirstTerminator()))
- FUS.calcCriticalResources(MI);
- PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
- FuncUnitOrder(FUS);
-
- for (MachineInstr &MI :
- llvm::make_range(MBB->getFirstNonPHI(), MBB->getFirstTerminator()))
- FuncUnitOrder.push(&MI);
-
- while (!FuncUnitOrder.empty()) {
- MachineInstr *MI = FuncUnitOrder.top();
- FuncUnitOrder.pop();
- if (TII->isZeroCost(MI->getOpcode()))
- continue;
- // Attempt to reserve the instruction in an existing DFA. At least one
- // DFA is needed for each cycle.
- unsigned NumCycles = getSUnit(MI)->Latency;
- unsigned ReservedCycles = 0;
- SmallVectorImpl<ResourceManager *>::iterator RI = Resources.begin();
- SmallVectorImpl<ResourceManager *>::iterator RE = Resources.end();
- LLVM_DEBUG({
- dbgs() << "Trying to reserve resource for " << NumCycles
- << " cycles for \n";
- MI->dump();
- });
- for (unsigned C = 0; C < NumCycles; ++C)
- while (RI != RE) {
- if ((*RI)->canReserveResources(*MI)) {
- (*RI)->reserveResources(*MI);
- ++ReservedCycles;
- break;
- }
- RI++;
- }
- LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
- << ", NumCycles:" << NumCycles << "\n");
- // Add new DFAs, if needed, to reserve resources.
- for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
- LLVM_DEBUG(if (SwpDebugResource) dbgs()
- << "NewResource created to reserve resources"
- << "\n");
- ResourceManager *NewResource = new ResourceManager(&MF.getSubtarget());
- assert(NewResource->canReserveResources(*MI) && "Reserve error.");
- NewResource->reserveResources(*MI);
- Resources.push_back(NewResource);
- }
- }
- int Resmii = Resources.size();
- LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n");
- // Delete the memory for each of the DFAs that were created earlier.
- for (ResourceManager *RI : Resources) {
- ResourceManager *D = RI;
- delete D;
- }
- Resources.clear();
- return Resmii;
+ ResourceManager RM(&MF.getSubtarget(), this);
+ return RM.calculateResMII();
}
/// Calculate the recurrence-constrainted minimum initiation interval.
@@ -1605,7 +1562,7 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
for (const MachineOperand &MO : MI->operands())
if (MO.isReg() && MO.isUse()) {
Register Reg = MO.getReg();
- if (Register::isVirtualRegister(Reg))
+ if (Reg.isVirtual())
Uses.insert(Reg);
else if (MRI.isAllocatable(Reg))
for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
@@ -1617,7 +1574,7 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
for (const MachineOperand &MO : SU->getInstr()->operands())
if (MO.isReg() && MO.isDef() && !MO.isDead()) {
Register Reg = MO.getReg();
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
if (!Uses.count(Reg))
LiveOutRegs.push_back(RegisterMaskPair(Reg,
LaneBitmask::getNone()));
@@ -2099,6 +2056,12 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
<< ")\n");
if (scheduleFound) {
+ scheduleFound = LoopPipelinerInfo->shouldUseSchedule(*this, Schedule);
+ if (!scheduleFound)
+ LLVM_DEBUG(dbgs() << "Target rejected schedule\n");
+ }
+
+ if (scheduleFound) {
Schedule.finalizeSchedule(this);
Pass.ORE->emit([&]() {
return MachineOptimizationRemarkAnalysis(
@@ -2314,20 +2277,28 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
assert(!OffsetSIsScalable && !OffsetDIsScalable &&
"Expected offsets to be byte offsets");
- if (!BaseOpS->isIdenticalTo(*BaseOpD))
+ MachineInstr *DefS = MRI.getVRegDef(BaseOpS->getReg());
+ MachineInstr *DefD = MRI.getVRegDef(BaseOpD->getReg());
+ if (!DefS || !DefD || !DefS->isPHI() || !DefD->isPHI())
+ return true;
+
+ unsigned InitValS = 0;
+ unsigned LoopValS = 0;
+ unsigned InitValD = 0;
+ unsigned LoopValD = 0;
+ getPhiRegs(*DefS, BB, InitValS, LoopValS);
+ getPhiRegs(*DefD, BB, InitValD, LoopValD);
+ MachineInstr *InitDefS = MRI.getVRegDef(InitValS);
+ MachineInstr *InitDefD = MRI.getVRegDef(InitValD);
+
+ if (!InitDefS->isIdenticalTo(*InitDefD))
return true;
// Check that the base register is incremented by a constant value for each
// iteration.
- MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg());
- if (!Def || !Def->isPHI())
- return true;
- unsigned InitVal = 0;
- unsigned LoopVal = 0;
- getPhiRegs(*Def, BB, InitVal, LoopVal);
- MachineInstr *LoopDef = MRI.getVRegDef(LoopVal);
+ MachineInstr *LoopDefS = MRI.getVRegDef(LoopValS);
int D = 0;
- if (!LoopDef || !TII->getIncrementValue(*LoopDef, D))
+ if (!LoopDefS || !TII->getIncrementValue(*LoopDefS, D))
return true;
uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
@@ -2369,28 +2340,15 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
for (int curCycle = StartCycle; curCycle != termCycle;
forward ? ++curCycle : --curCycle) {
- // Add the already scheduled instructions at the specified cycle to the
- // DFA.
- ProcItinResources.clearResources();
- for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
- checkCycle <= LastCycle; checkCycle += II) {
- std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
-
- for (SUnit *CI : cycleInstrs) {
- if (ST.getInstrInfo()->isZeroCost(CI->getInstr()->getOpcode()))
- continue;
- assert(ProcItinResources.canReserveResources(*CI->getInstr()) &&
- "These instructions have already been scheduled.");
- ProcItinResources.reserveResources(*CI->getInstr());
- }
- }
if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
- ProcItinResources.canReserveResources(*SU->getInstr())) {
+ ProcItinResources.canReserveResources(*SU, curCycle)) {
LLVM_DEBUG({
dbgs() << "\tinsert at cycle " << curCycle << " ";
SU->getInstr()->dump();
});
+ if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
+ ProcItinResources.reserveResources(*SU, curCycle);
ScheduledInstrs[curCycle].push_back(SU);
InstrToCycle.insert(std::make_pair(SU, curCycle));
if (curCycle > LastCycle)
@@ -2542,7 +2500,7 @@ void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
++I, ++Pos) {
for (MachineOperand &MO : MI->operands()) {
- if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
+ if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
Register Reg = MO.getReg();
@@ -3019,6 +2977,26 @@ void SMSchedule::print(raw_ostream &os) const {
LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }
LLVM_DUMP_METHOD void NodeSet::dump() const { print(dbgs()); }
+void ResourceManager::dumpMRT() const {
+ LLVM_DEBUG({
+ if (UseDFA)
+ return;
+ std::stringstream SS;
+ SS << "MRT:\n";
+ SS << std::setw(4) << "Slot";
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I)
+ SS << std::setw(3) << I;
+ SS << std::setw(7) << "#Mops"
+ << "\n";
+ for (int Slot = 0; Slot < InitiationInterval; ++Slot) {
+ SS << std::setw(4) << Slot;
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I)
+ SS << std::setw(3) << MRT[Slot][I];
+ SS << std::setw(7) << NumScheduledMops[Slot] << "\n";
+ }
+ dbgs() << SS.str();
+ });
+}
#endif
void ResourceManager::initProcResourceVectors(
@@ -3063,97 +3041,244 @@ void ResourceManager::initProcResourceVectors(
});
}
-bool ResourceManager::canReserveResources(const MCInstrDesc *MID) const {
-
+bool ResourceManager::canReserveResources(SUnit &SU, int Cycle) {
LLVM_DEBUG({
if (SwpDebugResource)
dbgs() << "canReserveResources:\n";
});
if (UseDFA)
- return DFAResources->canReserveResources(MID);
+ return DFAResources[positiveModulo(Cycle, InitiationInterval)]
+ ->canReserveResources(&SU.getInstr()->getDesc());
- unsigned InsnClass = MID->getSchedClass();
- const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
+ const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU);
if (!SCDesc->isValid()) {
LLVM_DEBUG({
dbgs() << "No valid Schedule Class Desc for schedClass!\n";
- dbgs() << "isPseudo:" << MID->isPseudo() << "\n";
+ dbgs() << "isPseudo:" << SU.getInstr()->isPseudo() << "\n";
});
return true;
}
- const MCWriteProcResEntry *I = STI->getWriteProcResBegin(SCDesc);
- const MCWriteProcResEntry *E = STI->getWriteProcResEnd(SCDesc);
- for (; I != E; ++I) {
- if (!I->Cycles)
- continue;
- const MCProcResourceDesc *ProcResource =
- SM.getProcResource(I->ProcResourceIdx);
- unsigned NumUnits = ProcResource->NumUnits;
- LLVM_DEBUG({
- if (SwpDebugResource)
- dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
- ProcResource->Name, I->ProcResourceIdx,
- ProcResourceCount[I->ProcResourceIdx], NumUnits,
- I->Cycles);
- });
- if (ProcResourceCount[I->ProcResourceIdx] >= NumUnits)
- return false;
- }
- LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return true\n\n";);
- return true;
+ reserveResources(SCDesc, Cycle);
+ bool Result = !isOverbooked();
+ unreserveResources(SCDesc, Cycle);
+
+ LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return " << Result << "\n\n";);
+ return Result;
}
-void ResourceManager::reserveResources(const MCInstrDesc *MID) {
+void ResourceManager::reserveResources(SUnit &SU, int Cycle) {
LLVM_DEBUG({
if (SwpDebugResource)
dbgs() << "reserveResources:\n";
});
if (UseDFA)
- return DFAResources->reserveResources(MID);
+ return DFAResources[positiveModulo(Cycle, InitiationInterval)]
+ ->reserveResources(&SU.getInstr()->getDesc());
- unsigned InsnClass = MID->getSchedClass();
- const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
+ const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU);
if (!SCDesc->isValid()) {
LLVM_DEBUG({
dbgs() << "No valid Schedule Class Desc for schedClass!\n";
- dbgs() << "isPseudo:" << MID->isPseudo() << "\n";
+ dbgs() << "isPseudo:" << SU.getInstr()->isPseudo() << "\n";
});
return;
}
- for (const MCWriteProcResEntry &PRE :
- make_range(STI->getWriteProcResBegin(SCDesc),
- STI->getWriteProcResEnd(SCDesc))) {
- if (!PRE.Cycles)
- continue;
- ++ProcResourceCount[PRE.ProcResourceIdx];
- LLVM_DEBUG({
- if (SwpDebugResource) {
- const MCProcResourceDesc *ProcResource =
- SM.getProcResource(PRE.ProcResourceIdx);
- dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
- ProcResource->Name, PRE.ProcResourceIdx,
- ProcResourceCount[PRE.ProcResourceIdx],
- ProcResource->NumUnits, PRE.Cycles);
- }
- });
- }
+
+ reserveResources(SCDesc, Cycle);
+
LLVM_DEBUG({
- if (SwpDebugResource)
+ if (SwpDebugResource) {
+ dumpMRT();
dbgs() << "reserveResources: done!\n\n";
+ }
});
}
-bool ResourceManager::canReserveResources(const MachineInstr &MI) const {
- return canReserveResources(&MI.getDesc());
+void ResourceManager::reserveResources(const MCSchedClassDesc *SCDesc,
+ int Cycle) {
+ assert(!UseDFA);
+ for (const MCWriteProcResEntry &PRE : make_range(
+ STI->getWriteProcResBegin(SCDesc), STI->getWriteProcResEnd(SCDesc)))
+ for (int C = Cycle; C < Cycle + PRE.Cycles; ++C)
+ ++MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];
+
+ for (int C = Cycle; C < Cycle + SCDesc->NumMicroOps; ++C)
+ ++NumScheduledMops[positiveModulo(C, InitiationInterval)];
+}
+
+void ResourceManager::unreserveResources(const MCSchedClassDesc *SCDesc,
+ int Cycle) {
+ assert(!UseDFA);
+ for (const MCWriteProcResEntry &PRE : make_range(
+ STI->getWriteProcResBegin(SCDesc), STI->getWriteProcResEnd(SCDesc)))
+ for (int C = Cycle; C < Cycle + PRE.Cycles; ++C)
+ --MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];
+
+ for (int C = Cycle; C < Cycle + SCDesc->NumMicroOps; ++C)
+ --NumScheduledMops[positiveModulo(C, InitiationInterval)];
}
-void ResourceManager::reserveResources(const MachineInstr &MI) {
- return reserveResources(&MI.getDesc());
+bool ResourceManager::isOverbooked() const {
+ assert(!UseDFA);
+ for (int Slot = 0; Slot < InitiationInterval; ++Slot) {
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc *Desc = SM.getProcResource(I);
+ if (MRT[Slot][I] > Desc->NumUnits)
+ return true;
+ }
+ if (NumScheduledMops[Slot] > IssueWidth)
+ return true;
+ }
+ return false;
+}
+
+int ResourceManager::calculateResMIIDFA() const {
+ assert(UseDFA);
+
+ // Sort the instructions by the number of available choices for scheduling,
+ // least to most. Use the number of critical resources as the tie breaker.
+ FuncUnitSorter FUS = FuncUnitSorter(*ST);
+ for (SUnit &SU : DAG->SUnits)
+ FUS.calcCriticalResources(*SU.getInstr());
+ PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
+ FuncUnitOrder(FUS);
+
+ for (SUnit &SU : DAG->SUnits)
+ FuncUnitOrder.push(SU.getInstr());
+
+ SmallVector<std::unique_ptr<DFAPacketizer>, 8> Resources;
+ Resources.push_back(
+ std::unique_ptr<DFAPacketizer>(TII->CreateTargetScheduleState(*ST)));
+
+ while (!FuncUnitOrder.empty()) {
+ MachineInstr *MI = FuncUnitOrder.top();
+ FuncUnitOrder.pop();
+ if (TII->isZeroCost(MI->getOpcode()))
+ continue;
+
+ // Attempt to reserve the instruction in an existing DFA. At least one
+ // DFA is needed for each cycle.
+ unsigned NumCycles = DAG->getSUnit(MI)->Latency;
+ unsigned ReservedCycles = 0;
+ auto *RI = Resources.begin();
+ auto *RE = Resources.end();
+ LLVM_DEBUG({
+ dbgs() << "Trying to reserve resource for " << NumCycles
+ << " cycles for \n";
+ MI->dump();
+ });
+ for (unsigned C = 0; C < NumCycles; ++C)
+ while (RI != RE) {
+ if ((*RI)->canReserveResources(*MI)) {
+ (*RI)->reserveResources(*MI);
+ ++ReservedCycles;
+ break;
+ }
+ RI++;
+ }
+ LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
+ << ", NumCycles:" << NumCycles << "\n");
+ // Add new DFAs, if needed, to reserve resources.
+ for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
+ LLVM_DEBUG(if (SwpDebugResource) dbgs()
+ << "NewResource created to reserve resources"
+ << "\n");
+ auto *NewResource = TII->CreateTargetScheduleState(*ST);
+ assert(NewResource->canReserveResources(*MI) && "Reserve error.");
+ NewResource->reserveResources(*MI);
+ Resources.push_back(std::unique_ptr<DFAPacketizer>(NewResource));
+ }
+ }
+
+ int Resmii = Resources.size();
+ LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n");
+ return Resmii;
}
-void ResourceManager::clearResources() {
+int ResourceManager::calculateResMII() const {
if (UseDFA)
- return DFAResources->clearResources();
- std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0);
+ return calculateResMIIDFA();
+
+ // Count each resource consumption and divide it by the number of units.
+ // ResMII is the max value among them.
+
+ int NumMops = 0;
+ SmallVector<uint64_t> ResourceCount(SM.getNumProcResourceKinds());
+ for (SUnit &SU : DAG->SUnits) {
+ if (TII->isZeroCost(SU.getInstr()->getOpcode()))
+ continue;
+
+ const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU);
+ if (!SCDesc->isValid())
+ continue;
+
+ LLVM_DEBUG({
+ if (SwpDebugResource) {
+ DAG->dumpNode(SU);
+ dbgs() << " #Mops: " << SCDesc->NumMicroOps << "\n"
+ << " WriteProcRes: ";
+ }
+ });
+ NumMops += SCDesc->NumMicroOps;
+ for (const MCWriteProcResEntry &PRE :
+ make_range(STI->getWriteProcResBegin(SCDesc),
+ STI->getWriteProcResEnd(SCDesc))) {
+ LLVM_DEBUG({
+ if (SwpDebugResource) {
+ const MCProcResourceDesc *Desc =
+ SM.getProcResource(PRE.ProcResourceIdx);
+ dbgs() << Desc->Name << ": " << PRE.Cycles << ", ";
+ }
+ });
+ ResourceCount[PRE.ProcResourceIdx] += PRE.Cycles;
+ }
+ LLVM_DEBUG(if (SwpDebugResource) dbgs() << "\n");
+ }
+
+ int Result = (NumMops + IssueWidth - 1) / IssueWidth;
+ LLVM_DEBUG({
+ if (SwpDebugResource)
+ dbgs() << "#Mops: " << NumMops << ", "
+ << "IssueWidth: " << IssueWidth << ", "
+ << "Cycles: " << Result << "\n";
+ });
+
+ LLVM_DEBUG({
+ if (SwpDebugResource) {
+ std::stringstream SS;
+ SS << std::setw(2) << "ID" << std::setw(16) << "Name" << std::setw(10)
+ << "Units" << std::setw(10) << "Consumed" << std::setw(10) << "Cycles"
+ << "\n";
+ dbgs() << SS.str();
+ }
+ });
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc *Desc = SM.getProcResource(I);
+ int Cycles = (ResourceCount[I] + Desc->NumUnits - 1) / Desc->NumUnits;
+ LLVM_DEBUG({
+ if (SwpDebugResource) {
+ std::stringstream SS;
+ SS << std::setw(2) << I << std::setw(16) << Desc->Name << std::setw(10)
+ << Desc->NumUnits << std::setw(10) << ResourceCount[I]
+ << std::setw(10) << Cycles << "\n";
+ dbgs() << SS.str();
+ }
+ });
+ if (Cycles > Result)
+ Result = Cycles;
+ }
+ return Result;
+}
+
+void ResourceManager::init(int II) {
+ InitiationInterval = II;
+ DFAResources.clear();
+ DFAResources.resize(II);
+ for (auto &I : DFAResources)
+ I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
+ MRT.clear();
+ MRT.resize(II, SmallVector<uint64_t>(SM.getNumProcResourceKinds()));
+ NumScheduledMops.clear();
+ NumScheduledMops.resize(II);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 511bb80052c2..1ad08e19feae 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -48,6 +48,7 @@ MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF)
RegAllocHints.reserve(256);
UsedPhysRegMask.resize(NumRegs);
PhysRegUseDefLists.reset(new MachineOperand*[NumRegs]());
+ TheDelegates.clear();
}
/// setRegClass - Set the register class of the specified virtual register.
@@ -79,10 +80,10 @@ constrainRegClass(MachineRegisterInfo &MRI, Register Reg,
return NewRC;
}
-const TargetRegisterClass *
-MachineRegisterInfo::constrainRegClass(Register Reg,
- const TargetRegisterClass *RC,
- unsigned MinNumRegs) {
+const TargetRegisterClass *MachineRegisterInfo::constrainRegClass(
+ Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs) {
+ if (Reg.isPhysical())
+ return nullptr;
return ::constrainRegClass(*this, Reg, getRegClass(Reg), RC, MinNumRegs);
}
@@ -162,8 +163,7 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
// New virtual register number.
Register Reg = createIncompleteVirtualRegister(Name);
VRegInfo[Reg].first = RegClass;
- if (TheDelegate)
- TheDelegate->MRI_NoteNewVirtualRegister(Reg);
+ noteNewVirtualRegister(Reg);
return Reg;
}
@@ -172,8 +172,7 @@ Register MachineRegisterInfo::cloneVirtualRegister(Register VReg,
Register Reg = createIncompleteVirtualRegister(Name);
VRegInfo[Reg].first = VRegInfo[VReg].first;
setType(Reg, getType(VReg));
- if (TheDelegate)
- TheDelegate->MRI_NoteNewVirtualRegister(Reg);
+ noteCloneVirtualRegister(Reg, VReg);
return Reg;
}
@@ -189,8 +188,7 @@ MachineRegisterInfo::createGenericVirtualRegister(LLT Ty, StringRef Name) {
// FIXME: Should we use a dummy register class?
VRegInfo[Reg].first = static_cast<RegisterBank *>(nullptr);
setType(Reg, Ty);
- if (TheDelegate)
- TheDelegate->MRI_NoteNewVirtualRegister(Reg);
+ noteNewVirtualRegister(Reg);
return Reg;
}
@@ -204,7 +202,11 @@ void MachineRegisterInfo::clearVirtRegs() {
if (!VRegInfo[Reg].second)
continue;
verifyUseList(Reg);
- llvm_unreachable("Remaining virtual register operands");
+ errs() << "Remaining virtual register "
+ << printReg(Reg, getTargetRegisterInfo()) << "...\n";
+ for (MachineInstr &MI : reg_instructions(Reg))
+ errs() << "...in instruction: " << MI << "\n";
+ std::abort();
}
#endif
VRegInfo.clear();
@@ -382,7 +384,7 @@ void MachineRegisterInfo::replaceRegWith(Register FromReg, Register ToReg) {
// TODO: This could be more efficient by bulk changing the operands.
for (MachineOperand &O : llvm::make_early_inc_range(reg_operands(FromReg))) {
- if (Register::isPhysicalRegister(ToReg)) {
+ if (ToReg.isPhysical()) {
O.substPhysReg(ToReg, *TRI);
} else {
O.setReg(ToReg);
@@ -420,6 +422,12 @@ bool MachineRegisterInfo::hasOneNonDBGUser(Register RegNo) const {
return hasSingleElement(use_nodbg_instructions(RegNo));
}
+bool MachineRegisterInfo::hasAtMostUserInstrs(Register Reg,
+ unsigned MaxUsers) const {
+ return hasNItemsOrLess(use_instr_nodbg_begin(Reg), use_instr_nodbg_end(),
+ MaxUsers);
+}
+
/// clearKillFlags - Iterate over all the uses of the given register and
/// clear the kill flag from the MachineOperand. This function is used by
/// optimization passes which extend register lifetimes and need only
@@ -488,7 +496,7 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(Register Reg) const {
// Lane masks are only defined for vregs.
- assert(Register::isVirtualRegister(Reg));
+ assert(Reg.isVirtual());
const TargetRegisterClass &TRC = *getRegClass(Reg);
return TRC.getLaneMask();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp
index 01cea85ecc7c..6de8f8da9254 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp
@@ -21,20 +21,52 @@
using namespace llvm;
-MachineBasicBlock *MachineSSAContext::getEntryBlock(MachineFunction &F) {
- return &F.front();
-}
+const Register MachineSSAContext::ValueRefNull{};
void MachineSSAContext::setFunction(MachineFunction &Fn) {
MF = &Fn;
RegInfo = &MF->getRegInfo();
}
-Printable MachineSSAContext::print(MachineBasicBlock *Block) const {
+MachineBasicBlock *MachineSSAContext::getEntryBlock(MachineFunction &F) {
+ return &F.front();
+}
+
+void MachineSSAContext::appendBlockTerms(
+ SmallVectorImpl<const MachineInstr *> &terms,
+ const MachineBasicBlock &block) {
+ for (auto &T : block.terminators())
+ terms.push_back(&T);
+}
+
+void MachineSSAContext::appendBlockDefs(SmallVectorImpl<Register> &defs,
+ const MachineBasicBlock &block) {
+ for (const MachineInstr &instr : block.instrs()) {
+ for (const MachineOperand &op : instr.operands()) {
+ if (op.isReg() && op.isDef())
+ defs.push_back(op.getReg());
+ }
+ }
+}
+
+/// Get the defining block of a value.
+MachineBasicBlock *MachineSSAContext::getDefBlock(Register value) const {
+ if (!value)
+ return nullptr;
+ return RegInfo->getVRegDef(value)->getParent();
+}
+
+bool MachineSSAContext::isConstantValuePhi(const MachineInstr &Phi) {
+ return Phi.isConstantValuePHI();
+}
+
+Printable MachineSSAContext::print(const MachineBasicBlock *Block) const {
+ if (!Block)
+ return Printable([](raw_ostream &Out) { Out << "<nullptr>"; });
return Printable([Block](raw_ostream &Out) { Block->printName(Out); });
}
-Printable MachineSSAContext::print(MachineInstr *I) const {
+Printable MachineSSAContext::print(const MachineInstr *I) const {
return Printable([I](raw_ostream &Out) { I->print(Out); });
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
index e5cd46268600..5ab5a40e7574 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -95,9 +95,15 @@ cl::opt<bool> ViewMISchedDAGs(
cl::desc("Pop up a window to show MISched dags after they are processed"));
cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
cl::desc("Print schedule DAGs"));
+cl::opt<bool> MISchedDumpReservedCycles(
+ "misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
+ cl::desc("Dump resource usage at schedule boundary."));
#else
const bool ViewMISchedDAGs = false;
const bool PrintDAGs = false;
+#ifdef LLVM_ENABLE_DUMP
+const bool MISchedDumpReservedCycles = false;
+#endif // LLVM_ENABLE_DUMP
#endif // NDEBUG
} // end namespace llvm
@@ -955,7 +961,7 @@ void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
continue;
Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
// Ignore re-defs.
@@ -1116,7 +1122,7 @@ void ScheduleDAGMILive::updatePressureDiffs(
for (const RegisterMaskPair &P : LiveUses) {
Register Reg = P.RegUnit;
/// FIXME: Currently assuming single-use physregs.
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
if (ShouldTrackLaneMasks) {
@@ -1340,7 +1346,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
// Visit each live out vreg def to find def/use pairs that cross iterations.
for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
Register Reg = P.RegUnit;
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
const LiveInterval &LI = LIS->getInterval(Reg);
const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
@@ -1823,12 +1829,12 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
// Check for pure vreg copies.
const MachineOperand &SrcOp = Copy->getOperand(1);
Register SrcReg = SrcOp.getReg();
- if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
+ if (!SrcReg.isVirtual() || !SrcOp.readsReg())
return;
const MachineOperand &DstOp = Copy->getOperand(0);
Register DstReg = DstOp.getReg();
- if (!Register::isVirtualRegister(DstReg) || DstOp.isDead())
+ if (!DstReg.isVirtual() || DstOp.isDead())
return;
// Check if either the dest or source is local. If it's live across a back
@@ -2589,6 +2595,28 @@ SUnit *SchedBoundary::pickOnlyChoice() {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+
+/// Dump the content of the \ref ReservedCycles vector for the
+/// resources that are used in the basic block.
+///
+LLVM_DUMP_METHOD void SchedBoundary::dumpReservedCycles() const {
+ if (!SchedModel->hasInstrSchedModel())
+ return;
+
+ unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
+ unsigned StartIdx = 0;
+
+ for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {
+ const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;
+ std::string ResName = SchedModel->getResourceName(ResIdx);
+ for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
+ dbgs() << ResName << "(" << UnitIdx
+ << ") = " << ReservedCycles[StartIdx + UnitIdx] << "\n";
+ }
+ StartIdx += NumUnits;
+ }
+}
+
// This is useful information to dump after bumpNode.
// Note that the Queue contents are more useful before pickNodeFromQueue.
LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
@@ -2611,6 +2639,8 @@ LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
<< "\n ExpectedLatency: " << ExpectedLatency << "c\n"
<< (IsResourceLimited ? " - Resource" : " - Latency")
<< " limited.\n";
+ if (MISchedDumpReservedCycles)
+ dumpReservedCycles();
}
#endif
@@ -3102,12 +3132,12 @@ int biasPhysReg(const SUnit *SU, bool isTop) {
unsigned UnscheduledOper = isTop ? 0 : 1;
// If we have already scheduled the physreg produce/consumer, immediately
// schedule the copy.
- if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg()))
+ if (MI->getOperand(ScheduledOper).getReg().isPhysical())
return 1;
// If the physreg is at the boundary, defer it. Otherwise schedule it
// immediately to free the dependent. We can hoist the copy later.
bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
- if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg()))
+ if (MI->getOperand(UnscheduledOper).getReg().isPhysical())
return AtBoundary ? -1 : 1;
}
@@ -3117,7 +3147,7 @@ int biasPhysReg(const SUnit *SU, bool isTop) {
// physical registers.
bool DoBias = true;
for (const MachineOperand &Op : MI->defs()) {
- if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) {
+ if (Op.isReg() && !Op.getReg().isPhysical()) {
DoBias = false;
break;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
index 0568bc6a4600..8429d468254a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
@@ -275,8 +275,8 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
Register SrcReg = MI.getOperand(1).getReg();
Register DstReg = MI.getOperand(0).getReg();
- if (!Register::isVirtualRegister(SrcReg) ||
- !Register::isVirtualRegister(DstReg) || !MRI->hasOneNonDBGUse(SrcReg))
+ if (!SrcReg.isVirtual() || !DstReg.isVirtual() ||
+ !MRI->hasOneNonDBGUse(SrcReg))
return false;
const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
@@ -309,7 +309,7 @@ bool MachineSinking::AllUsesDominatedByBlock(Register Reg,
MachineBasicBlock *DefMBB,
bool &BreakPHIEdge,
bool &LocalUse) const {
- assert(Register::isVirtualRegister(Reg) && "Only makes sense for vregs");
+ assert(Reg.isVirtual() && "Only makes sense for vregs");
// Ignore debug uses because debug info doesn't affect the code.
if (MRI->use_nodbg_empty(Reg))
@@ -611,7 +611,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
// We don't move live definitions of physical registers,
// so sinking their uses won't enable any opportunities.
- if (Register::isPhysicalRegister(Reg))
+ if (Reg.isPhysical())
continue;
// If this instruction is the only user of a virtual register,
@@ -805,7 +805,7 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
if (Reg == 0)
continue;
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
if (MO.isUse() &&
(MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO)))
continue;
@@ -910,7 +910,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
Register Reg = MO.getReg();
if (Reg == 0) continue;
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
if (MO.isUse()) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
@@ -1323,7 +1323,7 @@ static bool blockPrologueInterferes(MachineBasicBlock *BB,
if (!Reg)
continue;
if (MO.isUse()) {
- if (Register::isPhysicalRegister(Reg) &&
+ if (Reg.isPhysical() &&
(TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(Reg))))
continue;
if (PI->modifiesRegister(Reg, TRI))
@@ -1387,7 +1387,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
if (!MO.isReg() || MO.isUse())
continue;
Register Reg = MO.getReg();
- if (Reg == 0 || !Register::isPhysicalRegister(Reg))
+ if (Reg == 0 || !Reg.isPhysical())
continue;
if (SuccToSinkTo->isLiveIn(Reg))
return false;
@@ -1779,11 +1779,11 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// We must sink this DBG_VALUE if its operand is sunk. To avoid searching
// for DBG_VALUEs later, record them when they're encountered.
- if (MI.isDebugValue()) {
+ if (MI.isDebugValue() && !MI.isDebugRef()) {
SmallDenseMap<MCRegister, SmallVector<unsigned, 2>, 4> MIUnits;
bool IsValid = true;
for (MachineOperand &MO : MI.debug_operands()) {
- if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) {
+ if (MO.isReg() && MO.getReg().isPhysical()) {
// Bail if we can already tell the sink would be rejected, rather
// than needlessly accumulating lots of DBG_VALUEs.
if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp
index b546a5082b07..9628e4c5aeb5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -63,7 +63,7 @@ STATISTIC(StableHashBailingMetadataUnsupported,
stable_hash llvm::stableHashValue(const MachineOperand &MO) {
switch (MO.getType()) {
case MachineOperand::MO_Register:
- if (Register::isVirtualRegister(MO.getReg())) {
+ if (MO.getReg().isVirtual()) {
const MachineRegisterInfo &MRI = MO.getParent()->getMF()->getRegInfo();
SmallVector<unsigned> DefOpcodes;
for (auto &Def : MRI.def_instructions(MO.getReg()))
@@ -119,8 +119,26 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
stable_hash_combine_string(MO.getSymbolName()));
case MachineOperand::MO_RegisterMask:
- case MachineOperand::MO_RegisterLiveOut:
- return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask());
+ case MachineOperand::MO_RegisterLiveOut: {
+ if (const MachineInstr *MI = MO.getParent()) {
+ if (const MachineBasicBlock *MBB = MI->getParent()) {
+ if (const MachineFunction *MF = MBB->getParent()) {
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ unsigned RegMaskSize =
+ MachineOperand::getRegMaskSize(TRI->getNumRegs());
+ const uint32_t *RegMask = MO.getRegMask();
+ std::vector<llvm::stable_hash> RegMaskHashes(RegMask,
+ RegMask + RegMaskSize);
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ stable_hash_combine_array(RegMaskHashes.data(),
+ RegMaskHashes.size()));
+ }
+ }
+ }
+
+ assert(0 && "MachineOperand not associated with any MachineFunction");
+ return hash_combine(MO.getType(), MO.getTargetFlags());
+ }
case MachineOperand::MO_ShuffleMask: {
std::vector<llvm::stable_hash> ShuffleMaskHashes;
@@ -147,6 +165,9 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
case MachineOperand::MO_Predicate:
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
MO.getPredicate());
+ case MachineOperand::MO_DbgInstrRef:
+ return stable_hash_combine(MO.getType(), MO.getInstrRefInstrIndex(),
+ MO.getInstrRefOpIndex());
}
llvm_unreachable("Invalid machine operand type");
}
@@ -164,8 +185,7 @@ stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs,
HashComponents.push_back(MI.getOpcode());
HashComponents.push_back(MI.getFlags());
for (const MachineOperand &MO : MI.operands()) {
- if (!HashVRegs && MO.isReg() && MO.isDef() &&
- Register::isVirtualRegister(MO.getReg()))
+ if (!HashVRegs && MO.isReg() && MO.isDef() && MO.getReg().isVirtual())
continue; // Skip virtual register defs.
if (MO.isCPI()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 715e5da26989..5c6efd4af074 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -9,7 +9,6 @@
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -147,7 +146,7 @@ MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
"getResources() must be called before getProcResourceCycles()");
unsigned PRKinds = SchedModel.getNumProcResourceKinds();
assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
- return makeArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds);
+ return ArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds);
}
//===----------------------------------------------------------------------===//
@@ -265,7 +264,7 @@ MachineTraceMetrics::Ensemble::
getProcResourceDepths(unsigned MBBNum) const {
unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
- return makeArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
+ return ArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
}
/// Get an array of processor resource heights for MBB. Indexed by processor
@@ -278,7 +277,7 @@ MachineTraceMetrics::Ensemble::
getProcResourceHeights(unsigned MBBNum) const {
unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
- return makeArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
+ return ArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
}
//===----------------------------------------------------------------------===//
@@ -352,7 +351,7 @@ MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
// Select the preferred successor for MBB.
const MachineBasicBlock*
MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
- if (MBB->pred_empty())
+ if (MBB->succ_empty())
return nullptr;
const MachineLoop *CurLoop = getLoopFor(MBB);
const MachineBasicBlock *Best = nullptr;
@@ -449,7 +448,7 @@ public:
void finishPostorder(const MachineBasicBlock*) {}
- bool insertEdge(Optional<const MachineBasicBlock *> From,
+ bool insertEdge(std::optional<const MachineBasicBlock *> From,
const MachineBasicBlock *To) {
// Skip already visited To blocks.
MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
@@ -664,7 +663,7 @@ static bool getDataDeps(const MachineInstr &UseMI,
Register Reg = MO.getReg();
if (!Reg)
continue;
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
HasPhysRegs = true;
continue;
}
@@ -903,7 +902,7 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
- if (!Register::isPhysicalRegister(Reg))
+ if (!Reg.isPhysical())
continue;
if (MO.readsReg())
ReadOps.push_back(MI.getOperandNo(MOI));
@@ -980,7 +979,7 @@ addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
ArrayRef<const MachineBasicBlock*> Trace) {
assert(!Trace.empty() && "Trace should contain at least one block");
Register Reg = DefMI->getOperand(DefOp).getReg();
- assert(Register::isVirtualRegister(Reg));
+ assert(Reg.isVirtual());
const MachineBasicBlock *DefMBB = DefMI->getParent();
// Reg is live-in to all blocks in Trace that follow DefMBB.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
new file mode 100644
index 000000000000..2fe5e40a58c2
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
@@ -0,0 +1,223 @@
+//===- MachineUniformityAnalysis.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineUniformityAnalysis.h"
+#include "llvm/ADT/GenericUniformityImpl.h"
+#include "llvm/CodeGen/MachineCycleAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAContext.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+template <>
+bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::hasDivergentDefs(
+ const MachineInstr &I) const {
+ for (auto &op : I.operands()) {
+ if (!op.isReg() || !op.isDef())
+ continue;
+ if (isDivergent(op.getReg()))
+ return true;
+ }
+ return false;
+}
+
+template <>
+bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::markDefsDivergent(
+ const MachineInstr &Instr, bool AllDefsDivergent) {
+ bool insertedDivergent = false;
+ const auto &MRI = F.getRegInfo();
+ const auto &TRI = *MRI.getTargetRegisterInfo();
+ for (auto &op : Instr.operands()) {
+ if (!op.isReg() || !op.isDef())
+ continue;
+ if (!op.getReg().isVirtual())
+ continue;
+ assert(!op.getSubReg());
+ if (!AllDefsDivergent) {
+ auto *RC = MRI.getRegClassOrNull(op.getReg());
+ if (RC && !TRI.isDivergentRegClass(RC))
+ continue;
+ }
+ insertedDivergent |= markDivergent(op.getReg());
+ }
+ return insertedDivergent;
+}
+
+template <>
+void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::initialize() {
+ const auto &InstrInfo = *F.getSubtarget().getInstrInfo();
+
+ for (const MachineBasicBlock &block : F) {
+ for (const MachineInstr &instr : block) {
+ auto uniformity = InstrInfo.getInstructionUniformity(instr);
+ if (uniformity == InstructionUniformity::AlwaysUniform) {
+ addUniformOverride(instr);
+ continue;
+ }
+
+ if (uniformity == InstructionUniformity::NeverUniform) {
+ markDefsDivergent(instr, /* AllDefsDivergent = */ false);
+ }
+ }
+ }
+}
+
+template <>
+void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::pushUsers(
+ Register Reg) {
+ const auto &RegInfo = F.getRegInfo();
+ for (MachineInstr &UserInstr : RegInfo.use_instructions(Reg)) {
+ if (isAlwaysUniform(UserInstr))
+ continue;
+ if (markDivergent(UserInstr))
+ Worklist.push_back(&UserInstr);
+ }
+}
+
+template <>
+void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::pushUsers(
+ const MachineInstr &Instr) {
+ assert(!isAlwaysUniform(Instr));
+ if (Instr.isTerminator())
+ return;
+ for (const MachineOperand &op : Instr.operands()) {
+ if (op.isReg() && op.isDef() && op.getReg().isVirtual())
+ pushUsers(op.getReg());
+ }
+}
+
+template <>
+bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::usesValueFromCycle(
+ const MachineInstr &I, const MachineCycle &DefCycle) const {
+ assert(!isAlwaysUniform(I));
+ for (auto &Op : I.operands()) {
+ if (!Op.isReg() || !Op.readsReg())
+ continue;
+ auto Reg = Op.getReg();
+ assert(Reg.isVirtual());
+ auto *Def = F.getRegInfo().getVRegDef(Reg);
+ if (DefCycle.contains(Def->getParent()))
+ return true;
+ }
+ return false;
+}
+
+// This ensures explicit instantiation of
+// GenericUniformityAnalysisImpl::ImplDeleter::operator()
+template class llvm::GenericUniformityInfo<MachineSSAContext>;
+template struct llvm::GenericUniformityAnalysisImplDeleter<
+ llvm::GenericUniformityAnalysisImpl<MachineSSAContext>>;
+
+MachineUniformityInfo
+llvm::computeMachineUniformityInfo(MachineFunction &F,
+ const MachineCycleInfo &cycleInfo,
+ const MachineDomTree &domTree) {
+ assert(F.getRegInfo().isSSA() && "Expected to be run on SSA form!");
+ return MachineUniformityInfo(F, domTree, cycleInfo);
+}
+
+namespace {
+
+/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
+class MachineUniformityAnalysisPass : public MachineFunctionPass {
+ MachineUniformityInfo UI;
+
+public:
+ static char ID;
+
+ MachineUniformityAnalysisPass();
+
+ MachineUniformityInfo &getUniformityInfo() { return UI; }
+ const MachineUniformityInfo &getUniformityInfo() const { return UI; }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void print(raw_ostream &OS, const Module *M = nullptr) const override;
+
+ // TODO: verify analysis
+};
+
+class MachineUniformityInfoPrinterPass : public MachineFunctionPass {
+public:
+ static char ID;
+
+ MachineUniformityInfoPrinterPass();
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+} // namespace
+
+char MachineUniformityAnalysisPass::ID = 0;
+
+MachineUniformityAnalysisPass::MachineUniformityAnalysisPass()
+ : MachineFunctionPass(ID) {
+ initializeMachineUniformityAnalysisPassPass(*PassRegistry::getPassRegistry());
+}
+
+INITIALIZE_PASS_BEGIN(MachineUniformityAnalysisPass, "machine-uniformity",
+ "Machine Uniformity Info Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(MachineUniformityAnalysisPass, "machine-uniformity",
+ "Machine Uniformity Info Analysis", true, true)
+
+void MachineUniformityAnalysisPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineCycleInfoWrapperPass>();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineUniformityAnalysisPass::runOnMachineFunction(MachineFunction &MF) {
+ auto &DomTree = getAnalysis<MachineDominatorTree>().getBase();
+ auto &CI = getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
+ UI = computeMachineUniformityInfo(MF, CI, DomTree);
+ return false;
+}
+
+void MachineUniformityAnalysisPass::print(raw_ostream &OS,
+ const Module *) const {
+ OS << "MachineUniformityInfo for function: " << UI.getFunction().getName()
+ << "\n";
+ UI.print(OS);
+}
+
+char MachineUniformityInfoPrinterPass::ID = 0;
+
+MachineUniformityInfoPrinterPass::MachineUniformityInfoPrinterPass()
+ : MachineFunctionPass(ID) {
+ initializeMachineUniformityInfoPrinterPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+INITIALIZE_PASS_BEGIN(MachineUniformityInfoPrinterPass,
+ "print-machine-uniformity",
+ "Print Machine Uniformity Info Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
+INITIALIZE_PASS_END(MachineUniformityInfoPrinterPass,
+ "print-machine-uniformity",
+ "Print Machine Uniformity Info Analysis", true, true)
+
+void MachineUniformityInfoPrinterPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineUniformityAnalysisPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineUniformityInfoPrinterPass::runOnMachineFunction(
+ MachineFunction &F) {
+ auto &UI = getAnalysis<MachineUniformityAnalysisPass>();
+ UI.print(errs());
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
index 93e68918b632..ddd5a027c2cd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -73,6 +73,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ModRef.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
@@ -294,6 +295,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addUsedIfAvailable<LiveStacks>();
+ AU.addUsedIfAvailable<LiveVariables>();
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -564,7 +566,7 @@ void MachineVerifier::report_context_vreg(Register VReg) const {
}
void MachineVerifier::report_context_vreg_regunit(Register VRegOrUnit) const {
- if (Register::isVirtualRegister(VRegOrUnit)) {
+ if (VRegOrUnit.isVirtual()) {
report_context_vreg(VRegOrUnit);
} else {
errs() << "- regunit: " << printRegUnit(VRegOrUnit, TRI) << '\n';
@@ -632,6 +634,13 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
}
}
+ if (MBB->isIRBlockAddressTaken()) {
+ if (!MBB->getAddressTakenIRBlock()->hasAddressTaken())
+ report("ir-block-address-taken is associated with basic block not used by "
+ "a blockaddress.",
+ MBB);
+ }
+
// Count the number of landing pad successors.
SmallPtrSet<const MachineBasicBlock*, 4> LandingPadSuccs;
for (const auto *succ : MBB->successors()) {
@@ -821,8 +830,12 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
if (!FirstTerminator)
FirstTerminator = MI;
} else if (FirstTerminator) {
- report("Non-terminator instruction after the first terminator", MI);
- errs() << "First terminator was:\t" << *FirstTerminator;
+ // For GlobalISel, G_INVOKE_REGION_START is a terminator that we allow to
+ // precede non-terminators.
+ if (FirstTerminator->getOpcode() != TargetOpcode::G_INVOKE_REGION_START) {
+ report("Non-terminator instruction after the first terminator", MI);
+ errs() << "First terminator was:\t" << *FirstTerminator;
+ }
}
}
@@ -869,6 +882,34 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {
if (!MO.isReg() || !MO.isImplicit())
report("Expected implicit register after groups", &MO, OpNo);
}
+
+ if (MI->getOpcode() == TargetOpcode::INLINEASM_BR) {
+ const MachineBasicBlock *MBB = MI->getParent();
+
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ if (!MO.isMBB())
+ continue;
+
+ // Check the successor & predecessor lists look ok, assume they are
+ // not. Find the indirect target without going through the successors.
+ const MachineBasicBlock *IndirectTargetMBB = MO.getMBB();
+ if (!IndirectTargetMBB) {
+ report("INLINEASM_BR indirect target does not exist", &MO, i);
+ break;
+ }
+
+ if (!MBB->isSuccessor(IndirectTargetMBB))
+ report("INLINEASM_BR indirect target missing from successor list", &MO,
+ i);
+
+ if (!IndirectTargetMBB->isPredecessor(MBB))
+ report("INLINEASM_BR indirect target predecessor list missing parent",
+ &MO, i);
+ }
+ }
}
bool MachineVerifier::verifyAllRegOpsScalar(const MachineInstr &MI,
@@ -937,11 +978,11 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
SmallVector<LLT, 4> Types;
for (unsigned I = 0, E = std::min(MCID.getNumOperands(), NumOps);
I != E; ++I) {
- if (!MCID.OpInfo[I].isGenericType())
+ if (!MCID.operands()[I].isGenericType())
continue;
// Generic instructions specify type equality constraints between some of
// their operands. Make sure these are consistent.
- size_t TypeIdx = MCID.OpInfo[I].getGenericTypeIndex();
+ size_t TypeIdx = MCID.operands()[I].getGenericTypeIndex();
Types.resize(std::max(TypeIdx + 1, Types.size()));
const MachineOperand *MO = &MI->getOperand(I);
@@ -969,7 +1010,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
// Generic opcodes must not have physical register operands.
for (unsigned I = 0; I < MI->getNumOperands(); ++I) {
const MachineOperand *MO = &MI->getOperand(I);
- if (MO->isReg() && Register::isPhysicalRegister(MO->getReg()))
+ if (MO->isReg() && MO->getReg().isPhysical())
report("Generic instruction cannot have physical register", MO, I);
}
@@ -1274,17 +1315,38 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
case TargetOpcode::G_UNMERGE_VALUES: {
+ unsigned NumDsts = MI->getNumOperands() - 1;
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
- LLT SrcTy = MRI->getType(MI->getOperand(MI->getNumOperands()-1).getReg());
- // For now G_UNMERGE can split vectors.
- for (unsigned i = 0; i < MI->getNumOperands()-1; ++i) {
- if (MRI->getType(MI->getOperand(i).getReg()) != DstTy)
+ for (unsigned i = 1; i < NumDsts; ++i) {
+ if (MRI->getType(MI->getOperand(i).getReg()) != DstTy) {
report("G_UNMERGE_VALUES destination types do not match", MI);
+ break;
+ }
}
- if (SrcTy.getSizeInBits() !=
- (DstTy.getSizeInBits() * (MI->getNumOperands() - 1))) {
- report("G_UNMERGE_VALUES source operand does not cover dest operands",
- MI);
+
+ LLT SrcTy = MRI->getType(MI->getOperand(NumDsts).getReg());
+ if (DstTy.isVector()) {
+ // This case is the converse of G_CONCAT_VECTORS.
+ if (!SrcTy.isVector() || SrcTy.getScalarType() != DstTy.getScalarType() ||
+ SrcTy.getNumElements() != NumDsts * DstTy.getNumElements())
+ report("G_UNMERGE_VALUES source operand does not match vector "
+ "destination operands",
+ MI);
+ } else if (SrcTy.isVector()) {
+ // This case is the converse of G_BUILD_VECTOR, but relaxed to allow
+ // mismatched types as long as the total size matches:
+ // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<4 x s32>)
+ if (SrcTy.getSizeInBits() != NumDsts * DstTy.getSizeInBits())
+ report("G_UNMERGE_VALUES vector source operand does not match scalar "
+ "destination operands",
+ MI);
+ } else {
+ // This case is the converse of G_MERGE_VALUES.
+ if (SrcTy.getSizeInBits() != NumDsts * DstTy.getSizeInBits()) {
+ report("G_UNMERGE_VALUES scalar source operand does not match scalar "
+ "destination operands",
+ MI);
+ }
}
break;
}
@@ -1438,10 +1500,9 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
bool NoSideEffects = MI->getOpcode() == TargetOpcode::G_INTRINSIC;
unsigned IntrID = IntrIDOp.getIntrinsicID();
if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) {
- AttributeList Attrs
- = Intrinsic::getAttributes(MF->getFunction().getContext(),
- static_cast<Intrinsic::ID>(IntrID));
- bool DeclHasSideEffects = !Attrs.hasFnAttr(Attribute::ReadNone);
+ AttributeList Attrs = Intrinsic::getAttributes(
+ MF->getFunction().getContext(), static_cast<Intrinsic::ID>(IntrID));
+ bool DeclHasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory();
if (NoSideEffects && DeclHasSideEffects) {
report("G_INTRINSIC used with intrinsic that accesses memory", MI);
break;
@@ -1678,16 +1739,11 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
report("Incorrect floating-point class set (operand 2)", MI);
break;
}
- const MachineOperand &SemanticsMO = MI->getOperand(3);
- if (!SemanticsMO.isImm()) {
- report("floating-point semantics (operand 3) must be an immediate", MI);
- break;
- }
- int64_t Semantics = SemanticsMO.getImm();
- if (Semantics < 0 || Semantics > APFloat::S_MaxSemantics) {
- report("Incorrect floating-point semantics (operand 3)", MI);
- break;
- }
+ break;
+ }
+ case TargetOpcode::G_ASSERT_ALIGN: {
+ if (MI->getOperand(2).getImm() < 1)
+ report("alignment immediate must be >= 1", MI);
break;
}
default:
@@ -1888,6 +1944,36 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
break;
}
} break;
+ case TargetOpcode::REG_SEQUENCE: {
+ unsigned NumOps = MI->getNumOperands();
+ if (!(NumOps & 1)) {
+ report("Invalid number of operands for REG_SEQUENCE", MI);
+ break;
+ }
+
+ for (unsigned I = 1; I != NumOps; I += 2) {
+ const MachineOperand &RegOp = MI->getOperand(I);
+ const MachineOperand &SubRegOp = MI->getOperand(I + 1);
+
+ if (!RegOp.isReg())
+ report("Invalid register operand for REG_SEQUENCE", &RegOp, I);
+
+ if (!SubRegOp.isImm() || SubRegOp.getImm() == 0 ||
+ SubRegOp.getImm() >= TRI->getNumSubRegIndices()) {
+ report("Invalid subregister index operand for REG_SEQUENCE",
+ &SubRegOp, I + 1);
+ }
+ }
+
+ Register DstReg = MI->getOperand(0).getReg();
+ if (DstReg.isPhysical())
+ report("REG_SEQUENCE does not support physical register results", MI);
+
+ if (MI->getOperand(0).getSubReg())
+ report("Invalid subreg result for REG_SEQUENCE", MI);
+
+ break;
+ }
}
}
@@ -1901,7 +1987,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
// The first MCID.NumDefs operands must be explicit register defines
if (MONum < NumDefs) {
- const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
+ const MCOperandInfo &MCOI = MCID.operands()[MONum];
if (!MO->isReg())
report("Explicit definition must be a register", MO, MONum);
else if (!MO->isDef() && !MCOI.isOptionalDef())
@@ -1909,7 +1995,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
else if (MO->isImplicit())
report("Explicit definition marked as implicit", MO, MONum);
} else if (MONum < MCID.getNumOperands()) {
- const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
+ const MCOperandInfo &MCOI = MCID.operands()[MONum];
// Don't check if it's the last operand in a variadic instruction. See,
// e.g., LDM_RET in the arm back end. Check non-variadic operands only.
bool IsOptional = MI->isVariadic() && MONum == MCID.getNumOperands() - 1;
@@ -1941,11 +2027,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
report("Operand should be tied", MO, MONum);
else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum))
report("Tied def doesn't match MCInstrDesc", MO, MONum);
- else if (Register::isPhysicalRegister(MO->getReg())) {
+ else if (MO->getReg().isPhysical()) {
const MachineOperand &MOTied = MI->getOperand(TiedTo);
if (!MOTied.isReg())
report("Tied counterpart must be a register", &MOTied, TiedTo);
- else if (Register::isPhysicalRegister(MOTied.getReg()) &&
+ else if (MOTied.getReg().isPhysical() &&
MO->getReg() != MOTied.getReg())
report("Tied physical registers must match.", &MOTied, TiedTo);
}
@@ -2017,7 +2103,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
// Check register classes.
unsigned SubIdx = MO->getSubReg();
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
if (SubIdx) {
report("Illegal subregister index for physical register", MO, MONum);
return;
@@ -2255,8 +2341,18 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
bool SubRangeCheck,
LaneBitmask LaneMask) {
if (const VNInfo *VNI = LR.getVNInfoAt(DefIdx)) {
- assert(VNI && "NULL valno is not allowed");
- if (VNI->def != DefIdx) {
+ // The LR can correspond to the whole reg and its def slot is not obliged
+ // to be the same as the MO' def slot. E.g. when we check here "normal"
+ // subreg MO but there is other EC subreg MO in the same instruction so the
+ // whole reg has EC def slot and differs from the currently checked MO' def
+ // slot. For example:
+ // %0 [16e,32r:0) 0@16e L..3 [16e,32r:0) 0@16e L..C [16r,32r:0) 0@16r
+ // Check that there is an early-clobber def of the same superregister
+ // somewhere is performed in visitMachineFunctionAfter()
+ if (((SubRangeCheck || MO->getSubReg() == 0) && VNI->def != DefIdx) ||
+ !SlotIndex::isSameInstr(VNI->def, DefIdx) ||
+ (VNI->def != DefIdx &&
+ (!VNI->def.isEarlyClobber() || !DefIdx.isRegister()))) {
report("Inconsistent valno->def", MO, MONum);
report_context_liverange(LR);
report_context_vreg_regunit(VRegOrUnit);
@@ -2277,8 +2373,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
if (MO->isDead()) {
LiveQueryResult LRQ = LR.Query(DefIdx);
if (!LRQ.isDeadDef()) {
- assert(Register::isVirtualRegister(VRegOrUnit) &&
- "Expecting a virtual register.");
+ assert(VRegOrUnit.isVirtual() && "Expecting a virtual register.");
// A dead subreg def only tells us that the specific subreg is dead. There
// could be other non-dead defs of other subregs, or we could have other
// parts of the register being live through the instruction. So unless we
@@ -2688,7 +2783,7 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) {
MODef.isEarlyClobber() || MODef.isDebug())
report("Unexpected flag on PHI operand", &MODef, 0);
Register DefReg = MODef.getReg();
- if (!Register::isVirtualRegister(DefReg))
+ if (!DefReg.isVirtual())
report("Expected first PHI operand to be a virtual register", &MODef, 0);
for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) {
@@ -2920,12 +3015,11 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) {
if (!MOI->isReg() || !MOI->isDef())
continue;
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
if (MOI->getReg() != Reg)
continue;
} else {
- if (!Register::isPhysicalRegister(MOI->getReg()) ||
- !TRI->hasRegUnit(MOI->getReg(), Reg))
+ if (!MOI->getReg().isPhysical() || !TRI->hasRegUnit(MOI->getReg(), Reg))
continue;
}
if (LaneMask.any() &&
@@ -3007,8 +3101,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
return;
// RegUnit intervals are allowed dead phis.
- if (!Register::isVirtualRegister(Reg) && VNI->isPHIDef() &&
- S.start == VNI->def && S.end == VNI->def.getDeadSlot())
+ if (!Reg.isVirtual() && VNI->isPHIDef() && S.start == VNI->def &&
+ S.end == VNI->def.getDeadSlot())
return;
// The live segment is ending inside EndMBB
@@ -3055,7 +3149,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// The following checks only apply to virtual registers. Physreg liveness
// is too weird to check.
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
// A live segment can end with either a redefinition, a kill flag on a
// use, or a dead flag on a def.
bool hasRead = false;
@@ -3128,7 +3222,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
while (true) {
assert(LiveInts->isLiveInToMBB(LR, &*MFI));
// We don't know how to track physregs into a landing pad.
- if (!Register::isVirtualRegister(Reg) && MFI->isEHPad()) {
+ if (!Reg.isVirtual() && MFI->isEHPad()) {
if (&*MFI == EndMBB)
break;
++MFI;
@@ -3196,7 +3290,7 @@ void MachineVerifier::verifyLiveRange(const LiveRange &LR, Register Reg,
void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
Register Reg = LI.reg();
- assert(Register::isVirtualRegister(Reg));
+ assert(Reg.isVirtual());
verifyLiveRange(LI, Reg);
LaneBitmask Mask;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
index 581168b31384..af9fef0720f9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -116,6 +116,12 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
// a map between register names in the original block and the names created
// in each stage of the pipelined loop.
ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
+
+ // The renaming destination by Phis for the registers across stages.
+ // This map is updated during Phis generation to point to the most recent
+ // renaming destination.
+ ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];
+
InstrMapTy InstrMap;
SmallVector<MachineBasicBlock *, 4> PrologBBs;
@@ -151,14 +157,15 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
InstrMap, MaxStageCount, MaxStageCount, false);
- generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap,
- MaxStageCount, MaxStageCount, false);
+ generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
+ InstrMap, MaxStageCount, MaxStageCount, false);
LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
SmallVector<MachineBasicBlock *, 4> EpilogBBs;
// Generate the epilog instructions to complete the pipeline.
- generateEpilog(MaxStageCount, KernelBB, BB, VRMap, EpilogBBs, PrologBBs);
+ generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
+ PrologBBs);
// We need this step because the register allocation doesn't handle some
// situations well, so we insert copies to help out.
@@ -171,6 +178,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
delete[] VRMap;
+ delete[] VRMapPhi;
}
void ModuloScheduleExpander::cleanup() {
@@ -242,7 +250,8 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage,
/// block for each stage that needs to complete.
void ModuloScheduleExpander::generateEpilog(
unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
- ValueMapTy *VRMap, MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs) {
+ ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
+ MBBVectorTy &PrologBBs) {
// We need to change the branch from the kernel to the first epilog block, so
// this call to analyze branch uses the kernel rather than the original BB.
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
@@ -296,8 +305,8 @@ void ModuloScheduleExpander::generateEpilog(
}
generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
InstrMap, LastStage, EpilogStage, i == 1);
- generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap,
- LastStage, EpilogStage, i == 1);
+ generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
+ InstrMap, LastStage, EpilogStage, i == 1);
PredBB = NewBB;
LLVM_DEBUG({
@@ -593,8 +602,9 @@ void ModuloScheduleExpander::generateExistingPhis(
/// use in the pipelined sequence.
void ModuloScheduleExpander::generatePhis(
MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
- MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
- unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
+ MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
+ InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
+ bool IsLast) {
// Compute the stage number that contains the initial Phi value, and
// the Phi from the previous stage.
unsigned PrologStage = 0;
@@ -614,8 +624,7 @@ void ModuloScheduleExpander::generatePhis(
BBI != BBE; ++BBI) {
for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = BBI->getOperand(i);
- if (!MO.isReg() || !MO.isDef() ||
- !Register::isVirtualRegister(MO.getReg()))
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
continue;
int StageScheduled = Schedule.getStage(&*BBI);
@@ -631,26 +640,49 @@ void ModuloScheduleExpander::generatePhis(
if (!InKernel && (unsigned)StageScheduled > PrologStage)
continue;
- unsigned PhiOp2 = VRMap[PrevStage][Def];
- if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
- if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
- PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
+ unsigned PhiOp2;
+ if (InKernel) {
+ PhiOp2 = VRMap[PrevStage][Def];
+ if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
+ if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
+ PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
+ }
// The number of Phis can't exceed the number of prolog stages. The
// prolog stage number is zero based.
if (NumPhis > PrologStage + 1 - StageScheduled)
NumPhis = PrologStage + 1 - StageScheduled;
for (unsigned np = 0; np < NumPhis; ++np) {
+ // Example for
+ // Org:
+ // %Org = ... (Scheduled at Stage#0, NumPhi = 2)
+ //
+ // Prolog0 (Stage0):
+ // %Clone0 = ...
+ // Prolog1 (Stage1):
+ // %Clone1 = ...
+ // Kernel (Stage2):
+ // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel
+ // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel
+ // %Clone2 = ...
+ // Epilog0 (Stage3):
+ // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel
+ // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel
+ // Epilog1 (Stage4):
+ // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0
+ //
+ // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2}
+ // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0}
+ // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2}
+
unsigned PhiOp1 = VRMap[PrologStage][Def];
if (np <= PrologStage)
PhiOp1 = VRMap[PrologStage - np][Def];
- if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
- if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
- PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
- if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
- PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
+ if (!InKernel) {
+ if (PrevStage == LastStageNum && np == 0)
+ PhiOp2 = VRMap[LastStageNum][Def];
+ else
+ PhiOp2 = VRMapPhi[PrevStage - np][Def];
}
- if (!InKernel)
- PhiOp2 = VRMap[PrevStage - np][Def];
const TargetRegisterClass *RC = MRI.getRegClass(Def);
Register NewReg = MRI.createVirtualRegister(RC);
@@ -672,9 +704,9 @@ void ModuloScheduleExpander::generatePhis(
NewReg);
PhiOp2 = NewReg;
- VRMap[PrevStage - np - 1][Def] = NewReg;
+ VRMapPhi[PrevStage - np - 1][Def] = NewReg;
} else {
- VRMap[CurStageNum - np][Def] = NewReg;
+ VRMapPhi[CurStageNum - np][Def] = NewReg;
if (np == NumPhis - 1)
rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
NewReg);
@@ -716,7 +748,7 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
continue;
Register reg = MO.getReg();
// Assume physical registers are used, unless they are marked dead.
- if (Register::isPhysicalRegister(reg)) {
+ if (reg.isPhysical()) {
used = !MO.isDead();
if (used)
break;
@@ -847,7 +879,7 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
MachineBasicBlock *Epilog = EpilogBBs[i];
SmallVector<MachineOperand, 4> Cond;
- Optional<bool> StaticallyGreater =
+ std::optional<bool> StaticallyGreater =
LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
unsigned numAdded = 0;
if (!StaticallyGreater) {
@@ -965,17 +997,6 @@ MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
unsigned CurStageNum,
unsigned InstStageNum) {
MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
- // Check for tied operands in inline asm instructions. This should be handled
- // elsewhere, but I'm not sure of the best solution.
- if (OldMI->isInlineAsm())
- for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
- const auto &MO = OldMI->getOperand(i);
- if (MO.isReg() && MO.isUse())
- break;
- unsigned UseIdx;
- if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
- NewMI->tieOperands(i, UseIdx);
- }
updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
return NewMI;
}
@@ -1010,7 +1031,7 @@ void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
unsigned InstrStageNum,
ValueMapTy *VRMap) {
for (MachineOperand &MO : NewMI->operands()) {
- if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
+ if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
Register reg = MO.getReg();
if (MO.isDef()) {
@@ -1259,7 +1280,7 @@ class KernelRewriter {
// Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
// If InitReg is not given it is chosen arbitrarily. It will either be undef
// or will be chosen so as to share another phi.
- Register phi(Register LoopReg, Optional<Register> InitReg = {},
+ Register phi(Register LoopReg, std::optional<Register> InitReg = {},
const TargetRegisterClass *RC = nullptr);
// Create an undef register of the given register class.
Register undef(const TargetRegisterClass *RC);
@@ -1367,7 +1388,7 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
// First, dive through the phi chain to find the defaults for the generated
// phis.
- SmallVector<Optional<Register>, 4> Defaults;
+ SmallVector<std::optional<Register>, 4> Defaults;
Register LoopReg = Reg;
auto LoopProducer = Producer;
while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
@@ -1378,7 +1399,7 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
}
int LoopProducerStage = S.getStage(LoopProducer);
- Optional<Register> IllegalPhiDefault;
+ std::optional<Register> IllegalPhiDefault;
if (LoopProducerStage == -1) {
// Do nothing.
@@ -1410,9 +1431,9 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
// If we need more phis than we have defaults for, pad out with undefs for
// the earliest phis, which are at the end of the defaults chain (the
// chain is in reverse order).
- Defaults.resize(Defaults.size() + StageDiff, Defaults.empty()
- ? Optional<Register>()
- : Defaults.back());
+ Defaults.resize(Defaults.size() + StageDiff,
+ Defaults.empty() ? std::optional<Register>()
+ : Defaults.back());
}
}
@@ -1444,11 +1465,11 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
return LoopReg;
}
-Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg,
+Register KernelRewriter::phi(Register LoopReg, std::optional<Register> InitReg,
const TargetRegisterClass *RC) {
// If the init register is not undef, try and find an existing phi.
if (InitReg) {
- auto I = Phis.find({LoopReg, InitReg.value()});
+ auto I = Phis.find({LoopReg, *InitReg});
if (I != Phis.end())
return I->second;
} else {
@@ -1469,10 +1490,10 @@ Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg,
return R;
// Found a phi taking undef as input, so rewrite it to take InitReg.
MachineInstr *MI = MRI.getVRegDef(R);
- MI->getOperand(1).setReg(InitReg.value());
- Phis.insert({{LoopReg, InitReg.value()}, R});
+ MI->getOperand(1).setReg(*InitReg);
+ Phis.insert({{LoopReg, *InitReg}, R});
const TargetRegisterClass *ConstrainRegClass =
- MRI.constrainRegClass(R, MRI.getRegClass(InitReg.value()));
+ MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
assert(ConstrainRegClass && "Expected a valid constrained register class!");
(void)ConstrainRegClass;
UndefPhis.erase(I);
@@ -1941,7 +1962,7 @@ void PeelingModuloScheduleExpander::fixupBranches() {
MachineBasicBlock *Epilog = *EI;
SmallVector<MachineOperand, 4> Cond;
TII->removeBranch(*Prolog);
- Optional<bool> StaticallyGreater =
+ std::optional<bool> StaticallyGreater =
LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
if (!StaticallyGreater) {
LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp
index d5d262e4047a..e68a6398cf51 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -117,7 +117,7 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
// Skip over register-to-register moves.
if (SrcMI && SrcMI->isCopy() && !SrcMI->getOperand(0).getSubReg() &&
!SrcMI->getOperand(1).getSubReg() &&
- Register::isVirtualRegister(SrcMI->getOperand(1).getReg())) {
+ SrcMI->getOperand(1).getReg().isVirtual()) {
SrcReg = SrcMI->getOperand(1).getReg();
SrcMI = MRI->getVRegDef(SrcReg);
}
@@ -142,8 +142,7 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) {
assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction");
Register DstReg = MI->getOperand(0).getReg();
- assert(Register::isVirtualRegister(DstReg) &&
- "PHI destination is not a virtual register");
+ assert(DstReg.isVirtual() && "PHI destination is not a virtual register");
// See if we already saw this register.
if (!PHIsInCycle.insert(MI).second)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
index 7709095cd683..51035d2e442f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
@@ -161,7 +161,7 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
for (unsigned Index = 0, e = MRI->getNumVirtRegs(); Index != e; ++Index) {
// Set the bit for this register for each MBB where it is
// live-through or live-in (killed).
- unsigned VirtReg = Register::index2VirtReg(Index);
+ Register VirtReg = Register::index2VirtReg(Index);
MachineInstr *DefMI = MRI->getVRegDef(VirtReg);
if (!DefMI)
continue;
@@ -441,7 +441,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();
bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() ||
isImplicitlyDefined(SrcReg, *MRI);
- assert(Register::isVirtualRegister(SrcReg) &&
+ assert(SrcReg.isVirtual() &&
"Machine PHI Operands must all be virtual registers!");
// Get the MachineBasicBlock equivalent of the BasicBlock that is the source
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp
index 0f9da0637ced..9449f143366f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp
@@ -37,23 +37,6 @@ struct PatchableFunction : public MachineFunctionPass {
};
}
-/// Returns true if instruction \p MI will not result in actual machine code
-/// instructions.
-static bool doesNotGeneratecode(const MachineInstr &MI) {
- // TODO: Introduce an MCInstrDesc flag for this
- switch (MI.getOpcode()) {
- default: return false;
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- case TargetOpcode::CFI_INSTRUCTION:
- case TargetOpcode::EH_LABEL:
- case TargetOpcode::GC_LABEL:
- case TargetOpcode::DBG_VALUE:
- case TargetOpcode::DBG_LABEL:
- return true;
- }
-}
-
bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) {
if (MF.getFunction().hasFnAttribute("patchable-function-entry")) {
MachineBasicBlock &FirstMBB = *MF.begin();
@@ -74,11 +57,28 @@ bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) {
#endif
auto &FirstMBB = *MF.begin();
- MachineBasicBlock::iterator FirstActualI = FirstMBB.begin();
- for (; doesNotGeneratecode(*FirstActualI); ++FirstActualI)
- assert(FirstActualI != FirstMBB.end());
-
auto *TII = MF.getSubtarget().getInstrInfo();
+
+ MachineBasicBlock::iterator FirstActualI = llvm::find_if(
+ FirstMBB, [](const MachineInstr &MI) { return !MI.isMetaInstruction(); });
+
+ if (FirstActualI == FirstMBB.end()) {
+ // As of Microsoft documentation on /hotpatch feature, we must ensure that
+ // "the first instruction of each function is at least two bytes, and no
+ // jump within the function goes to the first instruction"
+
+ // When the first MBB is empty, insert a patchable no-op. This ensures the
+ // first instruction is patchable in two special cases:
+ // - the function is empty (e.g. unreachable)
+ // - the function jumps back to the first instruction, which is in a
+ // successor MBB.
+ BuildMI(&FirstMBB, DebugLoc(), TII->get(TargetOpcode::PATCHABLE_OP))
+ .addImm(2)
+ .addImm(TargetOpcode::PATCHABLE_OP);
+ MF.ensureAlignment(Align(16));
+ return true;
+ }
+
auto MIB = BuildMI(FirstMBB, FirstActualI, FirstActualI->getDebugLoc(),
TII->get(TargetOpcode::PATCHABLE_OP))
.addImm(2)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 31e37c4cd7e3..c3458be0f883 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -66,7 +66,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -273,11 +272,11 @@ namespace {
: MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
MachineInstr *getMI() const { return MI; }
- Optional<IndexPair> getCommutePair() const { return CommutePair; }
+ std::optional<IndexPair> getCommutePair() const { return CommutePair; }
private:
MachineInstr *MI;
- Optional<IndexPair> CommutePair;
+ std::optional<IndexPair> CommutePair;
};
/// Helper class to hold a reply for ValueTracker queries.
@@ -696,7 +695,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
do {
CurSrcPair = SrcToLook.pop_back_val();
// As explained above, do not handle physical registers
- if (Register::isPhysicalRegister(CurSrcPair.Reg))
+ if (CurSrcPair.Reg.isPhysical())
return false;
ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII);
@@ -744,7 +743,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
// constraints to the register allocator. Moreover, if we want to extend
// the live-range of a physical register, unlike SSA virtual register,
// we will have to check that they aren't redefine before the related use.
- if (Register::isPhysicalRegister(CurSrcPair.Reg))
+ if (CurSrcPair.Reg.isPhysical())
return false;
// Keep following the chain if the value isn't any better yet.
@@ -1191,7 +1190,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
"Coalescer can understand multiple defs?!");
const MachineOperand &MODef = MI.getOperand(0);
// Do not rewrite physical definitions.
- if (Register::isPhysicalRegister(MODef.getReg()))
+ if (MODef.getReg().isPhysical())
return false;
bool Changed = false;
@@ -1242,8 +1241,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
MachineInstr &
PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
RegSubRegPair Def, RewriteMapTy &RewriteMap) {
- assert(!Register::isPhysicalRegister(Def.Reg) &&
- "We do not rewrite physical registers");
+ assert(!Def.Reg.isPhysical() && "We do not rewrite physical registers");
// Find the new source to use in the COPY rewrite.
RegSubRegPair NewSrc = getNewSource(MRI, TII, Def, RewriteMap);
@@ -1301,7 +1299,7 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy(
while (CpyRewriter.getNextRewritableSource(Src, Def)) {
// If a physical register is here, this is probably for a good reason.
// Do not rewrite that.
- if (Register::isPhysicalRegister(Def.Reg))
+ if (Def.Reg.isPhysical())
return false;
// If we do not know how to rewrite this definition, there is no point
@@ -1460,7 +1458,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
- if (isNAPhysCopy(SrcReg) && Register::isVirtualRegister(DstReg)) {
+ if (isNAPhysCopy(SrcReg) && DstReg.isVirtual()) {
// %vreg = COPY $physreg
// Avoid using a datastructure which can track multiple live non-allocatable
// phys->virt copies since LLVM doesn't seem to do this.
@@ -2110,7 +2108,7 @@ ValueTrackerResult ValueTracker::getNextSource() {
// If we can still move up in the use-def chain, move to the next
// definition.
- if (!Register::isPhysicalRegister(Reg) && OneRegSrc) {
+ if (!Reg.isPhysical() && OneRegSrc) {
MachineRegisterInfo::def_iterator DI = MRI.def_begin(Reg);
if (DI != MRI.def_end()) {
Def = DI->getParent();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 54bb4a31ef49..7e46dd35ce47 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -82,7 +82,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
LLVM_DEBUG(dbgs() << "Processing " << *MI);
Register Reg = MI->getOperand(0).getReg();
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
// For virtual registers, mark all uses as <undef>, and convert users to
// implicit-def when possible.
for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
@@ -108,8 +108,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
if (!MO.isReg())
continue;
Register UserReg = MO.getReg();
- if (!Register::isPhysicalRegister(UserReg) ||
- !TRI->regsOverlap(Reg, UserReg))
+ if (!UserReg.isPhysical() || !TRI->regsOverlap(Reg, UserReg))
continue;
// UserMI uses or redefines Reg. Set <undef> flags on all uses.
Found = true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index a8d40edd88d3..cc70ec477650 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -57,6 +57,7 @@
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -127,6 +128,17 @@ private:
void replaceFrameIndices(MachineFunction &MF);
void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
int &SPAdj);
+ // Frame indices in debug values are encoded in a target independent
+ // way with simply the frame index and offset rather than any
+ // target-specific addressing mode.
+ bool replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI,
+ unsigned OpIdx, int SPAdj = 0);
+ // Does same as replaceFrameIndices but using the backward MIR walk and
+ // backward register scavenger walk. Does not yet support call sequence
+ // processing.
+ void replaceFrameIndicesBackward(MachineBasicBlock *BB, MachineFunction &MF,
+ int &SPAdj);
+
void insertPrologEpilogCode(MachineFunction &MF);
void insertZeroCallUsedRegs(MachineFunction &MF);
};
@@ -283,13 +295,35 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
assert(!Failed && "Invalid warn-stack-size fn attr value");
(void)Failed;
}
- if (MF.getFunction().hasFnAttribute(Attribute::SafeStack)) {
- StackSize += MFI.getUnsafeStackSize();
- }
+ uint64_t UnsafeStackSize = MFI.getUnsafeStackSize();
+ if (MF.getFunction().hasFnAttribute(Attribute::SafeStack))
+ StackSize += UnsafeStackSize;
+
if (StackSize > Threshold) {
DiagnosticInfoStackSize DiagStackSize(F, StackSize, Threshold, DS_Warning);
F.getContext().diagnose(DiagStackSize);
+ int64_t SpillSize = 0;
+ for (int Idx = MFI.getObjectIndexBegin(), End = MFI.getObjectIndexEnd();
+ Idx != End; ++Idx) {
+ if (MFI.isSpillSlotObjectIndex(Idx))
+ SpillSize += MFI.getObjectSize(Idx);
+ }
+
+ float SpillPct =
+ static_cast<float>(SpillSize) / static_cast<float>(StackSize);
+ float VarPct = 1.0f - SpillPct;
+ int64_t VariableSize = StackSize - SpillSize;
+ dbgs() << formatv("{0}/{1} ({3:P}) spills, {2}/{1} ({4:P}) variables",
+ SpillSize, StackSize, VariableSize, SpillPct, VarPct);
+ if (UnsafeStackSize != 0) {
+ float UnsafePct =
+ static_cast<float>(UnsafeStackSize) / static_cast<float>(StackSize);
+ dbgs() << formatv(", {0}/{2} ({1:P}) unsafe stack", UnsafeStackSize,
+ UnsafePct, StackSize);
+ }
+ dbgs() << "\n";
}
+
ORE->emit([&]() {
return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize",
MF.getFunction().getSubprogram(),
@@ -575,7 +609,7 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
} else {
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
- TRI);
+ TRI, Register());
}
}
}
@@ -601,7 +635,8 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
.addReg(CI.getDstReg(), getKillRegState(true));
} else {
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
+ TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC,
+ TRI, Register());
assert(I != RestoreBlock.begin() &&
"loadRegFromStackSlot didn't insert any code!");
// Insert in reverse order. loadRegFromStackSlot can insert
@@ -1195,7 +1230,11 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
BitVector UsedRegs(TRI.getNumRegs());
if (OnlyUsed)
for (const MachineBasicBlock &MBB : MF)
- for (const MachineInstr &MI : MBB)
+ for (const MachineInstr &MI : MBB) {
+ // skip debug instructions
+ if (MI.isDebugInstr())
+ continue;
+
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
@@ -1205,6 +1244,12 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
(MO.isDef() || MO.isUse()))
UsedRegs.set(Reg);
}
+ }
+
+ // Get a list of registers that are used.
+ BitVector LiveIns(TRI.getNumRegs());
+ for (const MachineBasicBlock::RegisterMaskPair &LI : MF.front().liveins())
+ LiveIns.set(LI.PhysReg);
BitVector RegsToZero(TRI.getNumRegs());
for (MCRegister Reg : AllocatableSet.set_bits()) {
@@ -1221,8 +1266,14 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
continue;
// Want only registers used for arguments.
- if (OnlyArg && !TRI.isArgumentRegister(MF, Reg))
- continue;
+ if (OnlyArg) {
+ if (OnlyUsed) {
+ if (!LiveIns[Reg])
+ continue;
+ } else if (!TRI.isArgumentRegister(MF, Reg)) {
+ continue;
+ }
+ }
RegsToZero.set(Reg);
}
@@ -1325,6 +1376,154 @@ void PEI::replaceFrameIndices(MachineFunction &MF) {
}
}
+bool PEI::replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI,
+ unsigned OpIdx, int SPAdj) {
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ if (MI.isDebugValue()) {
+
+ MachineOperand &Op = MI.getOperand(OpIdx);
+ assert(MI.isDebugOperand(&Op) &&
+ "Frame indices can only appear as a debug operand in a DBG_VALUE*"
+ " machine instruction");
+ Register Reg;
+ unsigned FrameIdx = Op.getIndex();
+ unsigned Size = MF.getFrameInfo().getObjectSize(FrameIdx);
+
+ StackOffset Offset = TFI->getFrameIndexReference(MF, FrameIdx, Reg);
+ Op.ChangeToRegister(Reg, false /*isDef*/);
+
+ const DIExpression *DIExpr = MI.getDebugExpression();
+
+ // If we have a direct DBG_VALUE, and its location expression isn't
+ // currently complex, then adding an offset will morph it into a
+ // complex location that is interpreted as being a memory address.
+ // This changes a pointer-valued variable to dereference that pointer,
+ // which is incorrect. Fix by adding DW_OP_stack_value.
+
+ if (MI.isNonListDebugValue()) {
+ unsigned PrependFlags = DIExpression::ApplyOffset;
+ if (!MI.isIndirectDebugValue() && !DIExpr->isComplex())
+ PrependFlags |= DIExpression::StackValue;
+
+ // If we have DBG_VALUE that is indirect and has a Implicit location
+ // expression need to insert a deref before prepending a Memory
+ // location expression. Also after doing this we change the DBG_VALUE
+ // to be direct.
+ if (MI.isIndirectDebugValue() && DIExpr->isImplicit()) {
+ SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size};
+ bool WithStackValue = true;
+ DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue);
+ // Make the DBG_VALUE direct.
+ MI.getDebugOffset().ChangeToRegister(0, false);
+ }
+ DIExpr = TRI.prependOffsetExpression(DIExpr, PrependFlags, Offset);
+ } else {
+ // The debug operand at DebugOpIndex was a frame index at offset
+ // `Offset`; now the operand has been replaced with the frame
+ // register, we must add Offset with `register x, plus Offset`.
+ unsigned DebugOpIndex = MI.getDebugOperandIndex(&Op);
+ SmallVector<uint64_t, 3> Ops;
+ TRI.getOffsetOpcodes(Offset, Ops);
+ DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, DebugOpIndex);
+ }
+ MI.getDebugExpressionOp().setMetadata(DIExpr);
+ return true;
+ }
+
+ if (MI.isDebugPHI()) {
+ // Allow stack ref to continue onwards.
+ return true;
+ }
+
+ // TODO: This code should be commoned with the code for
+ // PATCHPOINT. There's no good reason for the difference in
+ // implementation other than historical accident. The only
+ // remaining difference is the unconditional use of the stack
+ // pointer as the base register.
+ if (MI.getOpcode() == TargetOpcode::STATEPOINT) {
+ assert((!MI.isDebugValue() || OpIdx == 0) &&
+ "Frame indicies can only appear as the first operand of a "
+ "DBG_VALUE machine instruction");
+ Register Reg;
+ MachineOperand &Offset = MI.getOperand(OpIdx + 1);
+ StackOffset refOffset = TFI->getFrameIndexReferencePreferSP(
+ MF, MI.getOperand(OpIdx).getIndex(), Reg, /*IgnoreSPUpdates*/ false);
+ assert(!refOffset.getScalable() &&
+ "Frame offsets with a scalable component are not supported");
+ Offset.setImm(Offset.getImm() + refOffset.getFixed() + SPAdj);
+ MI.getOperand(OpIdx).ChangeToRegister(Reg, false /*isDef*/);
+ return true;
+ }
+ return false;
+}
+
+void PEI::replaceFrameIndicesBackward(MachineBasicBlock *BB,
+ MachineFunction &MF, int &SPAdj) {
+ assert(MF.getSubtarget().getRegisterInfo() &&
+ "getRegisterInfo() must be implemented!");
+
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+
+ RS->enterBasicBlockEnd(*BB);
+
+ for (MachineInstr &MI : make_early_inc_range(reverse(*BB))) {
+
+ // Register scavenger backward step
+ MachineBasicBlock::iterator Step(MI);
+ for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
+ if (!MI.getOperand(i).isFI())
+ continue;
+
+ if (replaceFrameIndexDebugInstr(MF, MI, i, SPAdj))
+ continue;
+
+ // If this instruction has a FrameIndex operand, we need to
+ // use that target machine register info object to eliminate
+ // it.
+
+ // TRI.eliminateFrameIndex may lower the frame index to a sequence of
+ // instructions. It also can remove/change instructions passed by the
+ // iterator and invalidate the iterator. We have to take care of this. For
+ // that we support two iterators: *Step* - points to the position up to
+ // which the scavenger should scan by the next iteration to have liveness
+ // information up to date. *Curr* - keeps track of the correct RS->MBBI -
+ // the scan start point. It points to the currently processed instruction
+ // right before the frame lowering.
+ //
+ // ITERATORS WORK AS FOLLOWS:
+ // *Step* is shifted one step back right before the frame lowering and
+ // one step forward right after it. No matter how many instructions were
+ // inserted, *Step* will be right after the position which is going to be
+ // processed in the next iteration, thus, in the correct position for the
+ // scavenger to go up to.
+ // *Curr* is shifted one step forward right before calling
+ // TRI.eliminateFrameIndex and one step backward after. Thus, we make sure
+ // it points right to the position that is the correct starting point for
+ // the scavenger to scan.
+ MachineBasicBlock::iterator Curr = ++RS->getCurrentPosition();
+
+ // Shift back
+ --Step;
+
+ bool Removed = TRI.eliminateFrameIndex(MI, SPAdj, i, RS);
+ // Restore to unify logic with a shift back that happens in the end of
+ // the outer loop.
+ ++Step;
+ RS->skipTo(--Curr);
+ if (Removed)
+ break;
+ }
+
+ // Shift it to make RS collect reg info up to the current instruction.
+ if (Step != BB->begin())
+ Step--;
+
+ // Update register states.
+ RS->backward(Step);
+ }
+}
+
void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
int &SPAdj) {
assert(MF.getSubtarget().getRegisterInfo() &&
@@ -1333,6 +1532,9 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ if (RS && TRI.supportsBackwardScavenger())
+ return replaceFrameIndicesBackward(BB, MF, SPAdj);
+
if (RS && FrameIndexEliminationScavenging)
RS->enterBasicBlock(*BB);
@@ -1353,83 +1555,8 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
if (!MI.getOperand(i).isFI())
continue;
- // Frame indices in debug values are encoded in a target independent
- // way with simply the frame index and offset rather than any
- // target-specific addressing mode.
- if (MI.isDebugValue()) {
- MachineOperand &Op = MI.getOperand(i);
- assert(
- MI.isDebugOperand(&Op) &&
- "Frame indices can only appear as a debug operand in a DBG_VALUE*"
- " machine instruction");
- Register Reg;
- unsigned FrameIdx = Op.getIndex();
- unsigned Size = MF.getFrameInfo().getObjectSize(FrameIdx);
-
- StackOffset Offset =
- TFI->getFrameIndexReference(MF, FrameIdx, Reg);
- Op.ChangeToRegister(Reg, false /*isDef*/);
-
- const DIExpression *DIExpr = MI.getDebugExpression();
-
- // If we have a direct DBG_VALUE, and its location expression isn't
- // currently complex, then adding an offset will morph it into a
- // complex location that is interpreted as being a memory address.
- // This changes a pointer-valued variable to dereference that pointer,
- // which is incorrect. Fix by adding DW_OP_stack_value.
-
- if (MI.isNonListDebugValue()) {
- unsigned PrependFlags = DIExpression::ApplyOffset;
- if (!MI.isIndirectDebugValue() && !DIExpr->isComplex())
- PrependFlags |= DIExpression::StackValue;
-
- // If we have DBG_VALUE that is indirect and has a Implicit location
- // expression need to insert a deref before prepending a Memory
- // location expression. Also after doing this we change the DBG_VALUE
- // to be direct.
- if (MI.isIndirectDebugValue() && DIExpr->isImplicit()) {
- SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size};
- bool WithStackValue = true;
- DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue);
- // Make the DBG_VALUE direct.
- MI.getDebugOffset().ChangeToRegister(0, false);
- }
- DIExpr = TRI.prependOffsetExpression(DIExpr, PrependFlags, Offset);
- } else {
- // The debug operand at DebugOpIndex was a frame index at offset
- // `Offset`; now the operand has been replaced with the frame
- // register, we must add Offset with `register x, plus Offset`.
- unsigned DebugOpIndex = MI.getDebugOperandIndex(&Op);
- SmallVector<uint64_t, 3> Ops;
- TRI.getOffsetOpcodes(Offset, Ops);
- DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, DebugOpIndex);
- }
- MI.getDebugExpressionOp().setMetadata(DIExpr);
+ if (replaceFrameIndexDebugInstr(MF, MI, i, SPAdj))
continue;
- } else if (MI.isDebugPHI()) {
- // Allow stack ref to continue onwards.
- continue;
- }
-
- // TODO: This code should be commoned with the code for
- // PATCHPOINT. There's no good reason for the difference in
- // implementation other than historical accident. The only
- // remaining difference is the unconditional use of the stack
- // pointer as the base register.
- if (MI.getOpcode() == TargetOpcode::STATEPOINT) {
- assert((!MI.isDebugValue() || i == 0) &&
- "Frame indicies can only appear as the first operand of a "
- "DBG_VALUE machine instruction");
- Register Reg;
- MachineOperand &Offset = MI.getOperand(i + 1);
- StackOffset refOffset = TFI->getFrameIndexReferencePreferSP(
- MF, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false);
- assert(!refOffset.getScalable() &&
- "Frame offsets with a scalable component are not supported");
- Offset.setImm(Offset.getImm() + refOffset.getFixed() + SPAdj);
- MI.getOperand(i).ChangeToRegister(Reg, false /*isDef*/);
- continue;
- }
// Some instructions (e.g. inline asm instructions) can have
// multiple frame indices and/or cause eliminateFrameIndex
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp
index 51de99b81057..dcb1a44c75e4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp
@@ -105,8 +105,8 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) {
static void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
const DataFlowGraph &G) {
- OS << Print<NodeId>(RA.Id, G) << '<'
- << Print<RegisterRef>(RA.Addr->getRegRef(G), G) << '>';
+ OS << Print(RA.Id, G) << '<'
+ << Print(RA.Addr->getRegRef(G), G) << '>';
if (RA.Addr->getFlags() & NodeAttrs::Fixed)
OS << '!';
}
@@ -115,16 +115,16 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<DefNode*>> &P) {
printRefHeader(OS, P.Obj, P.G);
OS << '(';
if (NodeId N = P.Obj.Addr->getReachingDef())
- OS << Print<NodeId>(N, P.G);
+ OS << Print(N, P.G);
OS << ',';
if (NodeId N = P.Obj.Addr->getReachedDef())
- OS << Print<NodeId>(N, P.G);
+ OS << Print(N, P.G);
OS << ',';
if (NodeId N = P.Obj.Addr->getReachedUse())
- OS << Print<NodeId>(N, P.G);
+ OS << Print(N, P.G);
OS << "):";
if (NodeId N = P.Obj.Addr->getSibling())
- OS << Print<NodeId>(N, P.G);
+ OS << Print(N, P.G);
return OS;
}
@@ -132,10 +132,10 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<UseNode*>> &P) {
printRefHeader(OS, P.Obj, P.G);
OS << '(';
if (NodeId N = P.Obj.Addr->getReachingDef())
- OS << Print<NodeId>(N, P.G);
+ OS << Print(N, P.G);
OS << "):";
if (NodeId N = P.Obj.Addr->getSibling())
- OS << Print<NodeId>(N, P.G);
+ OS << Print(N, P.G);
return OS;
}
@@ -144,13 +144,13 @@ raw_ostream &operator<< (raw_ostream &OS,
printRefHeader(OS, P.Obj, P.G);
OS << '(';
if (NodeId N = P.Obj.Addr->getReachingDef())
- OS << Print<NodeId>(N, P.G);
+ OS << Print(N, P.G);
OS << ',';
if (NodeId N = P.Obj.Addr->getPredecessor())
- OS << Print<NodeId>(N, P.G);
+ OS << Print(N, P.G);
OS << "):";
if (NodeId N = P.Obj.Addr->getSibling())
- OS << Print<NodeId>(N, P.G);
+ OS << Print(N, P.G);
return OS;
}
@@ -172,7 +172,7 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<RefNode*>> &P) {
raw_ostream &operator<< (raw_ostream &OS, const Print<NodeList> &P) {
unsigned N = P.Obj.size();
for (auto I : P.Obj) {
- OS << Print<NodeId>(I.Id, P.G);
+ OS << Print(I.Id, P.G);
if (--N)
OS << ' ';
}
@@ -182,7 +182,7 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeList> &P) {
raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) {
unsigned N = P.Obj.size();
for (auto I : P.Obj) {
- OS << Print<NodeId>(I, P.G);
+ OS << Print(I, P.G);
if (--N)
OS << ' ';
}
@@ -214,7 +214,7 @@ namespace {
} // end anonymous namespace
raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) {
- OS << Print<NodeId>(P.Obj.Id, P.G) << ": phi ["
+ OS << Print(P.Obj.Id, P.G) << ": phi ["
<< PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']';
return OS;
}
@@ -222,7 +222,7 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) {
raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<StmtNode *>> &P) {
const MachineInstr &MI = *P.Obj.Addr->getCode();
unsigned Opc = MI.getOpcode();
- OS << Print<NodeId>(P.Obj.Id, P.G) << ": " << P.G.getTII().getName(Opc);
+ OS << Print(P.Obj.Id, P.G) << ": " << P.G.getTII().getName(Opc);
// Print the target for calls and branches (for readability).
if (MI.isCall() || MI.isBranch()) {
MachineInstr::const_mop_iterator T =
@@ -254,7 +254,7 @@ raw_ostream &operator<< (raw_ostream &OS,
OS << PrintNode<StmtNode*>(P.Obj, P.G);
break;
default:
- OS << "instr? " << Print<NodeId>(P.Obj.Id, P.G);
+ OS << "instr? " << Print(P.Obj.Id, P.G);
break;
}
return OS;
@@ -274,7 +274,7 @@ raw_ostream &operator<< (raw_ostream &OS,
}
};
- OS << Print<NodeId>(P.Obj.Id, P.G) << ": --- " << printMBBReference(*BB)
+ OS << Print(P.Obj.Id, P.G) << ": --- " << printMBBReference(*BB)
<< " --- preds(" << NP << "): ";
for (MachineBasicBlock *B : BB->predecessors())
Ns.push_back(B->getNumber());
@@ -294,7 +294,7 @@ raw_ostream &operator<< (raw_ostream &OS,
}
raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<FuncNode *>> &P) {
- OS << "DFG dump:[\n" << Print<NodeId>(P.Obj.Id, P.G) << ": Function: "
+ OS << "DFG dump:[\n" << Print(P.Obj.Id, P.G) << ": Function: "
<< P.Obj.Addr->getCode()->getName() << '\n';
for (auto I : P.Obj.Addr->members(P.G))
OS << PrintNode<BlockNode*>(I, P.G) << '\n';
@@ -305,7 +305,7 @@ raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<FuncNode *>> &P) {
raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterSet> &P) {
OS << '{';
for (auto I : P.Obj)
- OS << ' ' << Print<RegisterRef>(I, P.G);
+ OS << ' ' << Print(I, P.G);
OS << " }";
return OS;
}
@@ -318,8 +318,8 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterAggr> &P) {
raw_ostream &operator<< (raw_ostream &OS,
const Print<DataFlowGraph::DefStack> &P) {
for (auto I = P.Obj.top(), E = P.Obj.bottom(); I != E; ) {
- OS << Print<NodeId>(I->Id, P.G)
- << '<' << Print<RegisterRef>(I->Addr->getRegRef(P.G), P.G) << '>';
+ OS << Print(I->Id, P.G)
+ << '<' << Print(I->Addr->getRegRef(P.G), P.G) << '>';
I.down();
if (I != E)
OS << ' ';
@@ -623,7 +623,7 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
return true;
const MCInstrDesc &D = In.getDesc();
- if (!D.getImplicitDefs() && !D.getImplicitUses())
+ if (D.implicit_defs().empty() && D.implicit_uses().empty())
return false;
const MachineOperand &Op = In.getOperand(OpNum);
// If there is a sub-register, treat the operand as non-fixed. Currently,
@@ -632,14 +632,9 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
if (Op.getSubReg() != 0)
return false;
Register Reg = Op.getReg();
- const MCPhysReg *ImpR = Op.isDef() ? D.getImplicitDefs()
- : D.getImplicitUses();
- if (!ImpR)
- return false;
- while (*ImpR)
- if (*ImpR++ == Reg)
- return true;
- return false;
+ ArrayRef<MCPhysReg> ImpOps =
+ Op.isDef() ? D.implicit_defs() : D.implicit_uses();
+ return is_contained(ImpOps, Reg);
}
//
@@ -648,6 +643,14 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
+ const MachineDominanceFrontier &mdf)
+ : DefaultTOI(std::make_unique<TargetOperandInfo>(tii)), MF(mf), TII(tii),
+ TRI(tri), PRI(tri, mf), MDT(mdt), MDF(mdf), TOI(*DefaultTOI),
+ LiveIns(PRI) {
+}
+
+DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
+ const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi)
: MF(mf), TII(tii), TRI(tri), PRI(tri, mf), MDT(mdt), MDF(mdf), TOI(toi),
LiveIns(PRI) {
@@ -1087,7 +1090,7 @@ void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
if (!Defined.insert(RR.Reg).second) {
MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
dbgs() << "Multiple definitions of register: "
- << Print<RegisterRef>(RR, *this) << " in\n " << *MI << "in "
+ << Print(RR, *this) << " in\n " << *MI << "in "
<< printMBBReference(*MI->getParent()) << '\n';
llvm_unreachable(nullptr);
}
@@ -1275,7 +1278,7 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
continue;
Register R = Op.getReg();
- if (!R || !Register::isPhysicalRegister(R))
+ if (!R || !R.isPhysical())
continue;
uint16_t Flags = NodeAttrs::None;
if (TOI.isPreserving(In, OpN)) {
@@ -1320,7 +1323,7 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
if (!Op.isReg() || !Op.isDef() || !Op.isImplicit())
continue;
Register R = Op.getReg();
- if (!R || !Register::isPhysicalRegister(R) || DoneDefs.test(R))
+ if (!R || !R.isPhysical() || DoneDefs.test(R))
continue;
RegisterRef RR = makeRegRef(Op);
uint16_t Flags = NodeAttrs::None;
@@ -1349,7 +1352,7 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
if (!Op.isReg() || !Op.isUse())
continue;
Register R = Op.getReg();
- if (!R || !Register::isPhysicalRegister(R))
+ if (!R || !R.isPhysical())
continue;
uint16_t Flags = NodeAttrs::None;
if (Op.isUndef())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp
index d8eac20d16b6..902b29d41ce1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp
@@ -64,7 +64,7 @@ namespace rdf {
for (const auto &I : P.Obj) {
OS << ' ' << printReg(I.first, &P.G.getTRI()) << '{';
for (auto J = I.second.begin(), E = I.second.end(); J != E; ) {
- OS << Print<NodeId>(J->first, P.G) << PrintLaneMaskOpt(J->second);
+ OS << Print(J->first, P.G) << PrintLaneMaskOpt(J->second);
if (++J != E)
OS << ',';
}
@@ -619,10 +619,9 @@ void Liveness::computePhiInfo() {
if (Trace) {
dbgs() << "Phi-up-to-phi map with intervening defs:\n";
for (auto I : PhiUp) {
- dbgs() << "phi " << Print<NodeId>(I.first, DFG) << " -> {";
+ dbgs() << "phi " << Print(I.first, DFG) << " -> {";
for (auto R : I.second)
- dbgs() << ' ' << Print<NodeId>(R.first, DFG)
- << Print<RegisterAggr>(R.second, DFG);
+ dbgs() << ' ' << Print(R.first, DFG) << Print(R.second, DFG);
dbgs() << " }\n";
}
}
@@ -720,16 +719,16 @@ void Liveness::computePhiInfo() {
if (Trace) {
dbgs() << "Real use map:\n";
for (auto I : RealUseMap) {
- dbgs() << "phi " << Print<NodeId>(I.first, DFG);
+ dbgs() << "phi " << Print(I.first, DFG);
NodeAddr<PhiNode*> PA = DFG.addr<PhiNode*>(I.first);
NodeList Ds = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Def>, DFG);
if (!Ds.empty()) {
RegisterRef RR = NodeAddr<DefNode*>(Ds[0]).Addr->getRegRef(DFG);
- dbgs() << '<' << Print<RegisterRef>(RR, DFG) << '>';
+ dbgs() << '<' << Print(RR, DFG) << '>';
} else {
dbgs() << "<noreg>";
}
- dbgs() << " -> " << Print<RefMap>(I.second, DFG) << '\n';
+ dbgs() << " -> " << Print(I.second, DFG) << '\n';
}
}
}
@@ -788,7 +787,7 @@ void Liveness::computeLiveIns() {
dbgs() << "Phi live-on-entry map:\n";
for (auto &I : PhiLON)
dbgs() << "block #" << I.first->getNumber() << " -> "
- << Print<RefMap>(I.second, DFG) << '\n';
+ << Print(I.second, DFG) << '\n';
}
// Build the phi live-on-exit map. Each phi node has some set of reached
@@ -851,7 +850,7 @@ void Liveness::computeLiveIns() {
dbgs() << "Phi live-on-exit map:\n";
for (auto &I : PhiLOX)
dbgs() << "block #" << I.first->getNumber() << " -> "
- << Print<RefMap>(I.second, DFG) << '\n';
+ << Print(I.second, DFG) << '\n';
}
RefMap LiveIn;
@@ -869,9 +868,9 @@ void Liveness::computeLiveIns() {
llvm::sort(LV);
dbgs() << printMBBReference(B) << "\t rec = {";
for (auto I : LV)
- dbgs() << ' ' << Print<RegisterRef>(I, DFG);
+ dbgs() << ' ' << Print(I, DFG);
dbgs() << " }\n";
- //dbgs() << "\tcomp = " << Print<RegisterAggr>(LiveMap[&B], DFG) << '\n';
+ //dbgs() << "\tcomp = " << Print(LiveMap[&B], DFG) << '\n';
LV.clear();
const RegisterAggr &LG = LiveMap[&B];
@@ -880,7 +879,7 @@ void Liveness::computeLiveIns() {
llvm::sort(LV);
dbgs() << "\tcomp = {";
for (auto I : LV)
- dbgs() << ' ' << Print<RegisterRef>(I, DFG);
+ dbgs() << ' ' << Print(I, DFG);
dbgs() << " }\n";
}
@@ -942,7 +941,7 @@ void Liveness::resetKills(MachineBasicBlock *B) {
if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
continue;
Register R = Op.getReg();
- if (!Register::isPhysicalRegister(R))
+ if (!R.isPhysical())
continue;
for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR)
Live.reset(*SR);
@@ -951,7 +950,7 @@ void Liveness::resetKills(MachineBasicBlock *B) {
if (!Op.isReg() || !Op.isUse() || Op.isUndef())
continue;
Register R = Op.getReg();
- if (!Register::isPhysicalRegister(R))
+ if (!R.isPhysical())
continue;
bool IsLive = false;
for (MCRegAliasIterator AR(R, &TRI, true); AR.isValid(); ++AR) {
@@ -1018,8 +1017,8 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
for (auto *I : *N)
dbgs() << ' ' << I->getBlock()->getNumber();
dbgs() << " }\n";
- dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
- dbgs() << " Local: " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n';
+ dbgs() << " LiveIn: " << Print(LiveIn, DFG) << '\n';
+ dbgs() << " Local: " << Print(LiveMap[B], DFG) << '\n';
}
// Add reaching defs of phi uses that are live on exit from this block.
@@ -1029,8 +1028,8 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
if (Trace) {
dbgs() << "after LOX\n";
- dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
- dbgs() << " Local: " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n';
+ dbgs() << " LiveIn: " << Print(LiveIn, DFG) << '\n';
+ dbgs() << " Local: " << Print(LiveMap[B], DFG) << '\n';
}
// The LiveIn map at this point has all defs that are live-on-exit from B,
@@ -1113,8 +1112,8 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
if (Trace) {
dbgs() << "after defs in block\n";
- dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
- dbgs() << " Local: " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n';
+ dbgs() << " LiveIn: " << Print(LiveIn, DFG) << '\n';
+ dbgs() << " Local: " << Print(LiveMap[B], DFG) << '\n';
}
// Scan the block for upward-exposed uses and add them to the tracking set.
@@ -1134,8 +1133,8 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
if (Trace) {
dbgs() << "after uses in block\n";
- dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
- dbgs() << " Local: " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n';
+ dbgs() << " LiveIn: " << Print(LiveIn, DFG) << '\n';
+ dbgs() << " Local: " << Print(LiveMap[B], DFG) << '\n';
}
// Phi uses should not be propagated up the dominator tree, since they
@@ -1151,8 +1150,8 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
if (Trace) {
dbgs() << "after phi uses in block\n";
- dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
- dbgs() << " Local: " << Print<RegisterAggr>(Local, DFG) << '\n';
+ dbgs() << " LiveIn: " << Print(LiveIn, DFG) << '\n';
+ dbgs() << " Local: " << Print(Local, DFG) << '\n';
}
for (auto *C : IIDF[B]) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp
index 990dd84c829d..900f0e9079d6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -156,7 +156,7 @@ void RegAllocBase::allocatePhysRegs() {
continue;
}
LLVM_DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
- assert(Register::isVirtualRegister(SplitVirtReg->reg()) &&
+ assert(SplitVirtReg->reg().isVirtual() &&
"expect split value in virtual register");
enqueue(SplitVirtReg);
++NumNewQueued;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
index ee03feda796f..b1743d3f987d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
@@ -95,7 +95,7 @@ template <> Pass *llvm::callDefaultCtor<RegAllocEvictionAdvisorAnalysis>() {
Ret = new DefaultEvictionAdvisorAnalysis(/*NotAsRequested*/ false);
break;
case RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development:
-#if defined(LLVM_HAVE_TF_API)
+#if defined(LLVM_HAVE_TFLITE)
Ret = createDevelopmentModeAdvisor();
#endif
break;
@@ -210,7 +210,7 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
// Check if any interfering live range is heavier than MaxWeight.
for (const LiveInterval *Intf : reverse(Interferences)) {
- assert(Register::isVirtualRegister(Intf->reg()) &&
+ assert(Intf->reg().isVirtual() &&
"Only expecting virtual register interference from query");
// Do not allow eviction of a virtual register if we are in the middle
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
index d6a3997e4b70..46838570a2fc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
@@ -10,7 +10,6 @@
#define LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/Register.h"
@@ -126,9 +125,9 @@ protected:
// Get the upper limit of elements in the given Order we need to analize.
// TODO: is this heuristic, we could consider learning it.
- Optional<unsigned> getOrderLimit(const LiveInterval &VirtReg,
- const AllocationOrder &Order,
- unsigned CostPerUseLimit) const;
+ std::optional<unsigned> getOrderLimit(const LiveInterval &VirtReg,
+ const AllocationOrder &Order,
+ unsigned CostPerUseLimit) const;
// Determine if it's worth trying to allocate this reg, given the
// CostPerUseLimit
@@ -177,6 +176,8 @@ public:
virtual std::unique_ptr<RegAllocEvictionAdvisor>
getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0;
AdvisorMode getAdvisorMode() const { return Mode; }
+ virtual void logRewardIfNeeded(const MachineFunction &MF,
+ llvm::function_ref<float()> GetReward){};
protected:
// This analysis preserves everything, and subclasses may have additional
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
index cb552f212fbb..775e66e48406 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -281,6 +281,7 @@ namespace {
Register traceCopies(Register VirtReg) const;
Register traceCopyChain(Register Reg) const;
+ bool shouldAllocateRegister(const Register Reg) const;
int getStackSpaceFor(Register VirtReg);
void spill(MachineBasicBlock::iterator Before, Register VirtReg,
MCPhysReg AssignedReg, bool Kill, bool LiveOut);
@@ -300,6 +301,12 @@ char RegAllocFast::ID = 0;
INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false,
false)
+bool RegAllocFast::shouldAllocateRegister(const Register Reg) const {
+ assert(Reg.isVirtual());
+ const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
+ return ShouldAllocateClass(*TRI, RC);
+}
+
void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI)
RegUnitStates[*UI] = NewState;
@@ -428,7 +435,8 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,
LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
- TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI);
+ TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI,
+ VirtReg);
++NumStores;
MachineBasicBlock::iterator FirstTerm = MBB->getFirstTerminator();
@@ -485,7 +493,7 @@ void RegAllocFast::reload(MachineBasicBlock::iterator Before, Register VirtReg,
<< printReg(PhysReg, TRI) << '\n');
int FI = getStackSpaceFor(VirtReg);
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
- TII->loadRegFromStackSlot(*MBB, Before, PhysReg, FI, &RC, TRI);
+ TII->loadRegFromStackSlot(*MBB, Before, PhysReg, FI, &RC, TRI, VirtReg);
++NumLoads;
}
@@ -841,7 +849,9 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR,
void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
assert(MO.isUndef() && "expected undef use");
Register VirtReg = MO.getReg();
- assert(Register::isVirtualRegister(VirtReg) && "Expected virtreg");
+ assert(VirtReg.isVirtual() && "Expected virtreg");
+ if (!shouldAllocateRegister(VirtReg))
+ return;
LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
MCPhysReg PhysReg;
@@ -867,6 +877,8 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
/// (tied or earlyclobber) that may interfere with preassigned uses.
void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
Register VirtReg) {
+ if (!shouldAllocateRegister(VirtReg))
+ return;
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
if (LRI != LiveVirtRegs.end()) {
MCPhysReg PrevReg = LRI->PhysReg;
@@ -900,6 +912,8 @@ void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
Register VirtReg, bool LookAtPhysRegUses) {
assert(VirtReg.isVirtual() && "Not a virtual register");
+ if (!shouldAllocateRegister(VirtReg))
+ return;
MachineOperand &MO = MI.getOperand(OpNum);
LiveRegMap::iterator LRI;
bool New;
@@ -950,6 +964,8 @@ void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
Register VirtReg) {
assert(VirtReg.isVirtual() && "Not a virtual register");
+ if (!shouldAllocateRegister(VirtReg))
+ return;
MachineOperand &MO = MI.getOperand(OpNum);
LiveRegMap::iterator LRI;
bool New;
@@ -974,8 +990,13 @@ void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
Register Hint;
if (MI.isCopy() && MI.getOperand(1).getSubReg() == 0) {
Hint = MI.getOperand(0).getReg();
- assert(Hint.isPhysical() &&
- "Copy destination should already be assigned");
+ if (Hint.isVirtual()) {
+ assert(!shouldAllocateRegister(Hint));
+ Hint = Register();
+ } else {
+ assert(Hint.isPhysical() &&
+ "Copy destination should already be assigned");
+ }
}
allocVirtReg(MI, *LRI, Hint, false);
if (LRI->Error) {
@@ -1083,6 +1104,8 @@ void RegAllocFast::addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts
assert(RegClassDefCounts.size() == TRI->getNumRegClasses());
if (Reg.isVirtual()) {
+ if (!shouldAllocateRegister(Reg))
+ return;
const TargetRegisterClass *OpRC = MRI->getRegClass(Reg);
for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses();
RCIdx != RCIdxEnd; ++RCIdx) {
@@ -1142,6 +1165,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
if (MO.isReg()) {
Register Reg = MO.getReg();
if (Reg.isVirtual()) {
+ if (!shouldAllocateRegister(Reg))
+ continue;
if (MO.isDef()) {
HasDef = true;
HasVRegDef = true;
@@ -1205,7 +1230,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
}
if (MO.isDef()) {
- if (Reg.isVirtual())
+ if (Reg.isVirtual() && shouldAllocateRegister(Reg))
DefOperandIndexes.push_back(I);
addRegClassDefCounts(RegClassDefCounts, Reg);
@@ -1295,6 +1320,10 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
Register Reg = MO.getReg();
if (!Reg)
continue;
+ if (Reg.isVirtual()) {
+ assert(!shouldAllocateRegister(Reg));
+ continue;
+ }
assert(Reg.isPhysical());
if (MRI->isReserved(Reg))
continue;
@@ -1329,7 +1358,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
if (MRI->isReserved(Reg))
continue;
bool displacedAny = usePhysReg(MI, Reg);
- if (!displacedAny && !MRI->isReserved(Reg))
+ if (!displacedAny)
MO.setIsKill(true);
}
}
@@ -1341,7 +1370,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
if (!MO.isReg() || !MO.isUse())
continue;
Register Reg = MO.getReg();
- if (!Reg.isVirtual())
+ if (!Reg.isVirtual() || !shouldAllocateRegister(Reg))
continue;
if (MO.isUndef()) {
@@ -1368,7 +1397,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
if (!MO.isReg() || !MO.isUse())
continue;
Register Reg = MO.getReg();
- if (!Reg.isVirtual())
+ if (!Reg.isVirtual() || !shouldAllocateRegister(Reg))
continue;
assert(MO.isUndef() && "Should only have undef virtreg uses left");
@@ -1381,16 +1410,15 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
for (MachineOperand &MO : llvm::reverse(MI.operands())) {
if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber())
continue;
- // subreg defs don't free the full register. We left the subreg number
- // around as a marker in setPhysReg() to recognize this case here.
- if (MO.getSubReg() != 0) {
- MO.setSubReg(0);
- continue;
- }
+ assert(!MO.getSubReg() && "should be already handled in def processing");
Register Reg = MO.getReg();
if (!Reg)
continue;
+ if (Reg.isVirtual()) {
+ assert(!shouldAllocateRegister(Reg));
+ continue;
+ }
assert(Reg.isPhysical() && "should have register assigned");
// We sometimes get odd situations like:
@@ -1418,7 +1446,9 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) {
// Ignore DBG_VALUEs that aren't based on virtual registers. These are
// mostly constants and frame indices.
for (Register Reg : MI.getUsedDebugRegs()) {
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
+ continue;
+ if (!shouldAllocateRegister(Reg))
continue;
// Already spilled to a stackslot?
@@ -1460,7 +1490,7 @@ void RegAllocFast::handleBundle(MachineInstr &MI) {
continue;
Register Reg = MO.getReg();
- if (!Reg.isVirtual())
+ if (!Reg.isVirtual() || !shouldAllocateRegister(Reg))
continue;
DenseMap<Register, MCPhysReg>::iterator DI;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 9c6cb7c3a4e2..b43a4d2a4b85 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -17,12 +17,12 @@
#include "LiveDebugVariables.h"
#include "RegAllocBase.h"
#include "RegAllocEvictionAdvisor.h"
+#include "RegAllocPriorityAdvisor.h"
#include "SpillPlacement.h"
#include "SplitKit.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -163,6 +163,7 @@ INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
INITIALIZE_PASS_DEPENDENCY(SpillPlacement)
INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
INITIALIZE_PASS_DEPENDENCY(RegAllocEvictionAdvisorAnalysis)
+INITIALIZE_PASS_DEPENDENCY(RegAllocPriorityAdvisorAnalysis)
INITIALIZE_PASS_END(RAGreedy, "greedy",
"Greedy Register Allocator", false, false)
@@ -219,6 +220,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<SpillPlacement>();
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
AU.addRequired<RegAllocEvictionAdvisorAnalysis>();
+ AU.addRequired<RegAllocPriorityAdvisorAnalysis>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -279,16 +281,28 @@ void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); }
void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
// Prioritize live ranges by size, assigning larger ranges first.
// The queue holds (size, reg) pairs.
- const unsigned Size = LI->getSize();
const Register Reg = LI->reg();
assert(Reg.isVirtual() && "Can only enqueue virtual registers");
- unsigned Prio;
auto Stage = ExtraInfo->getOrInitStage(Reg);
if (Stage == RS_New) {
Stage = RS_Assign;
ExtraInfo->setStage(Reg, Stage);
}
+
+ unsigned Ret = PriorityAdvisor->getPriority(*LI);
+
+ // The virtual register number is a tie breaker for same-sized ranges.
+ // Give lower vreg numbers higher priority to assign them first.
+ CurQueue.push(std::make_pair(Ret, ~Reg));
+}
+
+unsigned DefaultPriorityAdvisor::getPriority(const LiveInterval &LI) const {
+ const unsigned Size = LI.getSize();
+ const Register Reg = LI.reg();
+ unsigned Prio;
+ LiveRangeStage Stage = RA.getExtraInfo().getStage(LI);
+
if (Stage == RS_Split) {
// Unsplit ranges that couldn't be allocated immediately are deferred until
// everything else has been allocated.
@@ -304,23 +318,24 @@ void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
// Giant live ranges fall back to the global assignment heuristic, which
// prevents excessive spilling in pathological cases.
const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
- bool ForceGlobal = !ReverseLocalAssignment &&
- (Size / SlotIndex::InstrDist) >
- (2 * RegClassInfo.getNumAllocatableRegs(&RC));
+ bool ForceGlobal = RC.GlobalPriority ||
+ (!ReverseLocalAssignment &&
+ (Size / SlotIndex::InstrDist) >
+ (2 * RegClassInfo.getNumAllocatableRegs(&RC)));
unsigned GlobalBit = 0;
- if (Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
- LIS->intervalIsInOneMBB(*LI)) {
+ if (Stage == RS_Assign && !ForceGlobal && !LI.empty() &&
+ LIS->intervalIsInOneMBB(LI)) {
// Allocate original local ranges in linear instruction order. Since they
// are singly defined, this produces optimal coloring in the absence of
// global interference and other constraints.
if (!ReverseLocalAssignment)
- Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex());
+ Prio = LI.beginIndex().getApproxInstrDistance(Indexes->getLastIndex());
else {
// Allocating bottom up may allow many short LRGs to be assigned first
// to one of the cheap registers. This could be much faster for very
// large blocks on targets with many physical registers.
- Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex());
+ Prio = Indexes->getZeroIndex().getApproxInstrDistance(LI.endIndex());
}
} else {
// Allocate global and split ranges in long->short order. Long ranges that
@@ -329,6 +344,22 @@ void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
Prio = Size;
GlobalBit = 1;
}
+
+ // Priority bit layout:
+ // 31 RS_Assign priority
+ // 30 Preference priority
+ // if (RegClassPriorityTrumpsGlobalness)
+ // 29-25 AllocPriority
+ // 24 GlobalBit
+ // else
+ // 29 Global bit
+ // 28-24 AllocPriority
+ // 0-23 Size/Instr distance
+
+ // Clamp the size to fit with the priority masking scheme
+ Prio = std::min(Prio, (unsigned)maxUIntN(24));
+ assert(isUInt<5>(RC.AllocationPriority) && "allocation priority overflow");
+
if (RegClassPriorityTrumpsGlobalness)
Prio |= RC.AllocationPriority << 25 | GlobalBit << 24;
else
@@ -341,9 +372,8 @@ void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
if (VRM->hasKnownPreference(Reg))
Prio |= (1u << 30);
}
- // The virtual register number is a tie breaker for same-sized ranges.
- // Give lower vreg numbers higher priority to assign them first.
- CurQueue.push(std::make_pair(Prio, ~Reg));
+
+ return Prio;
}
const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
@@ -493,7 +523,7 @@ bool RegAllocEvictionAdvisor::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
return !Matrix->isPhysRegUsed(PhysReg);
}
-Optional<unsigned>
+std::optional<unsigned>
RegAllocEvictionAdvisor::getOrderLimit(const LiveInterval &VirtReg,
const AllocationOrder &Order,
unsigned CostPerUseLimit) const {
@@ -506,7 +536,7 @@ RegAllocEvictionAdvisor::getOrderLimit(const LiveInterval &VirtReg,
if (MinCost >= CostPerUseLimit) {
LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
<< MinCost << ", no cheaper registers to be found.\n");
- return None;
+ return std::nullopt;
}
// It is normal for register classes to have a long tail of registers with
@@ -651,7 +681,7 @@ bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
assert(T < GroupSize && "Array overflow");
TBS[T] = Number;
if (++T == GroupSize) {
- SpillPlacer->addLinks(makeArrayRef(TBS, T));
+ SpillPlacer->addLinks(ArrayRef(TBS, T));
T = 0;
}
continue;
@@ -680,13 +710,13 @@ bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
BCS[B].Exit = SpillPlacement::PrefSpill;
if (++B == GroupSize) {
- SpillPlacer->addConstraints(makeArrayRef(BCS, B));
+ SpillPlacer->addConstraints(ArrayRef(BCS, B));
B = 0;
}
}
- SpillPlacer->addConstraints(makeArrayRef(BCS, B));
- SpillPlacer->addLinks(makeArrayRef(TBS, T));
+ SpillPlacer->addConstraints(ArrayRef(BCS, B));
+ SpillPlacer->addLinks(ArrayRef(TBS, T));
return true;
}
@@ -727,7 +757,7 @@ bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
// Compute through constraints from the interference, or assume that all
// through blocks prefer spilling when forming compact regions.
- auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
+ auto NewBlocks = ArrayRef(ActiveBlocks).slice(AddedTo);
if (Cand.PhysReg) {
if (!addThroughConstraints(Cand.Intf, NewBlocks))
return false;
@@ -1227,6 +1257,55 @@ static unsigned getNumAllocatableRegsForConstraints(
return RCI.getNumAllocatableRegs(ConstrainedRC);
}
+static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const MachineInstr &MI, Register Reg) {
+ LaneBitmask Mask;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg == 0 && MO.isUse()) {
+ Mask |= MRI.getMaxLaneMaskForVReg(Reg);
+ continue;
+ }
+
+ LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
+ if (MO.isDef()) {
+ if (!MO.isUndef())
+ Mask |= ~SubRegMask;
+ } else
+ Mask |= SubRegMask;
+ }
+
+ return Mask;
+}
+
+/// Return true if \p MI at \P Use reads a subset of the lanes live in \p
+/// VirtReg.
+static bool readsLaneSubset(const MachineRegisterInfo &MRI,
+ const MachineInstr *MI, const LiveInterval &VirtReg,
+ const TargetRegisterInfo *TRI, SlotIndex Use) {
+ // Early check the common case.
+ if (MI->isCopy() &&
+ MI->getOperand(0).getSubReg() == MI->getOperand(1).getSubReg())
+ return false;
+
+ // FIXME: We're only considering uses, but should be consider defs too?
+ LaneBitmask ReadMask = getInstReadLaneMask(MRI, *TRI, *MI, VirtReg.reg());
+
+ LaneBitmask LiveAtMask;
+ for (const LiveInterval::SubRange &S : VirtReg.subranges()) {
+ if (S.liveAt(Use))
+ LiveAtMask |= S.LaneMask;
+ }
+
+ // If the live lanes aren't different from the lanes used by the instruction,
+ // this doesn't help.
+ return (ReadMask & ~(LiveAtMask & TRI->getCoveringLanes())).any();
+}
+
/// tryInstructionSplit - Split a live range around individual instructions.
/// This is normally not worthwhile since the spiller is doing essentially the
/// same thing. However, when the live range is in a constrained register
@@ -1239,8 +1318,13 @@ unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
SmallVectorImpl<Register> &NewVRegs) {
const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
// There is no point to this if there are no larger sub-classes.
- if (!RegClassInfo.isProperSubClass(CurRC))
- return 0;
+
+ bool SplitSubClass = true;
+ if (!RegClassInfo.isProperSubClass(CurRC)) {
+ if (!VirtReg.hasSubRanges())
+ return 0;
+ SplitSubClass = false;
+ }
// Always enable split spill mode, since we're effectively spilling to a
// register.
@@ -1263,14 +1347,19 @@ unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
// Otherwise, splitting just inserts uncoalescable copies that do not help
// the allocation.
for (const SlotIndex Use : Uses) {
- if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use))
+ if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) {
if (MI->isFullCopy() ||
- SuperRCNumAllocatableRegs ==
- getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
- TII, TRI, RegClassInfo)) {
+ (SplitSubClass &&
+ SuperRCNumAllocatableRegs ==
+ getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
+ TII, TRI, RegClassInfo)) ||
+ // TODO: Handle split for subranges with subclass constraints?
+ (!SplitSubClass && VirtReg.hasSubRanges() &&
+ !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use))) {
LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
continue;
}
+ }
SE->openIntv();
SlotIndex SegStart = SE->enterIntvBefore(Use);
SlotIndex SegStop = SE->leaveIntvAfter(Use);
@@ -2113,7 +2202,7 @@ void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
Reg = RecoloringCandidates.pop_back_val();
// We cannot recolor physical register.
- if (Register::isPhysicalRegister(Reg))
+ if (Reg.isPhysical())
continue;
// This may be a skipped class
@@ -2207,7 +2296,7 @@ void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
/// getting rid of 2 copies.
void RAGreedy::tryHintsRecoloring() {
for (const LiveInterval *LI : SetOfBrokenHints) {
- assert(Register::isVirtualRegister(LI->reg()) &&
+ assert(LI->reg().isVirtual() &&
"Recoloring is possible only for virtual registers");
// Some dead defs may be around (e.g., because of debug uses).
// Ignore those.
@@ -2369,11 +2458,25 @@ RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
};
for (MachineInstr &MI : MBB) {
if (MI.isCopy()) {
- MachineOperand &Dest = MI.getOperand(0);
- MachineOperand &Src = MI.getOperand(1);
- if (Dest.isReg() && Src.isReg() && Dest.getReg().isVirtual() &&
- Src.getReg().isVirtual())
- ++Stats.Copies;
+ const MachineOperand &Dest = MI.getOperand(0);
+ const MachineOperand &Src = MI.getOperand(1);
+ Register SrcReg = Src.getReg();
+ Register DestReg = Dest.getReg();
+ // Only count `COPY`s with a virtual register as source or destination.
+ if (SrcReg.isVirtual() || DestReg.isVirtual()) {
+ if (SrcReg.isVirtual()) {
+ SrcReg = VRM->getPhys(SrcReg);
+ if (Src.getSubReg())
+ SrcReg = TRI->getSubReg(SrcReg, Src.getSubReg());
+ }
+ if (DestReg.isVirtual()) {
+ DestReg = VRM->getPhys(DestReg);
+ if (Dest.getSubReg())
+ DestReg = TRI->getSubReg(DestReg, Dest.getSubReg());
+ }
+ if (SrcReg != DestReg)
+ ++Stats.Copies;
+ }
continue;
}
@@ -2540,6 +2643,8 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
ExtraInfo.emplace();
EvictAdvisor =
getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this);
+ PriorityAdvisor =
+ getAnalysis<RegAllocPriorityAdvisorAnalysis>().getAdvisor(*MF, *this);
VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h
index 483f59ed8e8e..e0ac88c0aeb9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h
@@ -15,6 +15,7 @@
#include "InterferenceCache.h"
#include "RegAllocBase.h"
#include "RegAllocEvictionAdvisor.h"
+#include "RegAllocPriorityAdvisor.h"
#include "SpillPlacement.h"
#include "SplitKit.h"
#include "llvm/ADT/ArrayRef.h"
@@ -79,7 +80,7 @@ public:
unsigned NextCascade = 1;
public:
- ExtraRegInfo() = default;
+ ExtraRegInfo() {}
ExtraRegInfo(const ExtraRegInfo &) = delete;
LiveRangeStage getStage(Register Reg) const { return Info[Reg].Stage; }
@@ -147,10 +148,17 @@ public:
size_t getQueueSize() const { return Queue.size(); }
// end (interface to eviction advisers)
+ // Interface to priority advisers
+ bool getRegClassPriorityTrumpsGlobalness() const {
+ return RegClassPriorityTrumpsGlobalness;
+ }
+ bool getReverseLocalAssignment() const { return ReverseLocalAssignment; }
+ // end (interface to priority advisers)
+
private:
// Convenient shortcuts.
using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>;
- using SmallLISet = SmallPtrSet<const LiveInterval *, 4>;
+ using SmallLISet = SmallSetVector<const LiveInterval *, 4>;
// We need to track all tentative recolorings so we can roll back any
// successful and unsuccessful recoloring attempts.
@@ -177,9 +185,11 @@ private:
std::unique_ptr<Spiller> SpillerInstance;
PQueue Queue;
std::unique_ptr<VirtRegAuxInfo> VRAI;
- Optional<ExtraRegInfo> ExtraInfo;
+ std::optional<ExtraRegInfo> ExtraInfo;
std::unique_ptr<RegAllocEvictionAdvisor> EvictAdvisor;
+ std::unique_ptr<RegAllocPriorityAdvisor> PriorityAdvisor;
+
// Enum CutOffStage to keep a track whether the register allocation failed
// because of the cutoffs encountered in last chance recoloring.
// Note: This is used as bitmask. New value should be next power of 2.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp
new file mode 100644
index 000000000000..b3a13cc92316
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp
@@ -0,0 +1,114 @@
+//===- RegAllocPriorityAdvisor.cpp - live ranges priority advisor ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the default priority advisor and of the Analysis pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RegAllocPriorityAdvisor.h"
+#include "RegAllocGreedy.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+static cl::opt<RegAllocPriorityAdvisorAnalysis::AdvisorMode> Mode(
+ "regalloc-enable-priority-advisor", cl::Hidden,
+ cl::init(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default),
+ cl::desc("Enable regalloc advisor mode"),
+ cl::values(
+ clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default,
+ "default", "Default"),
+ clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Release,
+ "release", "precompiled"),
+ clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Development,
+ "development", "for training")));
+
+char RegAllocPriorityAdvisorAnalysis::ID = 0;
+INITIALIZE_PASS(RegAllocPriorityAdvisorAnalysis, "regalloc-priority",
+ "Regalloc priority policy", false, true)
+
+namespace {
+class DefaultPriorityAdvisorAnalysis final
+ : public RegAllocPriorityAdvisorAnalysis {
+public:
+ DefaultPriorityAdvisorAnalysis(bool NotAsRequested)
+ : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Default),
+ NotAsRequested(NotAsRequested) {}
+
+ // support for isa<> and dyn_cast.
+ static bool classof(const RegAllocPriorityAdvisorAnalysis *R) {
+ return R->getAdvisorMode() == AdvisorMode::Default;
+ }
+
+private:
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<SlotIndexes>();
+ RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU);
+ }
+ std::unique_ptr<RegAllocPriorityAdvisor>
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
+ return std::make_unique<DefaultPriorityAdvisor>(
+ MF, RA, &getAnalysis<SlotIndexes>());
+ }
+ bool doInitialization(Module &M) override {
+ if (NotAsRequested)
+ M.getContext().emitError("Requested regalloc priority advisor analysis "
+ "could be created. Using default");
+ return RegAllocPriorityAdvisorAnalysis::doInitialization(M);
+ }
+ const bool NotAsRequested;
+};
+} // namespace
+
+template <> Pass *llvm::callDefaultCtor<RegAllocPriorityAdvisorAnalysis>() {
+ Pass *Ret = nullptr;
+ switch (Mode) {
+ case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default:
+ Ret = new DefaultPriorityAdvisorAnalysis(/*NotAsRequested*/ false);
+ break;
+ case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Development:
+#if defined(LLVM_HAVE_TFLITE)
+ Ret = createDevelopmentModePriorityAdvisor();
+#endif
+ break;
+ case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Release:
+#if defined(LLVM_HAVE_TF_AOT_REGALLOCPRIORITYMODEL)
+ Ret = createReleaseModePriorityAdvisor();
+#endif
+ break;
+ }
+ if (Ret)
+ return Ret;
+ return new DefaultPriorityAdvisorAnalysis(/*NotAsRequested*/ true);
+}
+
+StringRef RegAllocPriorityAdvisorAnalysis::getPassName() const {
+ switch (getAdvisorMode()) {
+ case AdvisorMode::Default:
+ return "Default Regalloc Priority Advisor";
+ case AdvisorMode::Release:
+ return "Release mode Regalloc Priority Advisor";
+ case AdvisorMode::Development:
+ return "Development mode Regalloc Priority Advisor";
+ }
+ llvm_unreachable("Unknown advisor kind");
+}
+
+RegAllocPriorityAdvisor::RegAllocPriorityAdvisor(const MachineFunction &MF,
+ const RAGreedy &RA,
+ SlotIndexes *const Indexes)
+ : RA(RA), LIS(RA.getLiveIntervals()), VRM(RA.getVirtRegMap()),
+ MRI(&VRM->getRegInfo()), TRI(MF.getSubtarget().getRegisterInfo()),
+ RegClassInfo(RA.getRegClassInfo()), Indexes(Indexes),
+ RegClassPriorityTrumpsGlobalness(
+ RA.getRegClassPriorityTrumpsGlobalness()),
+ ReverseLocalAssignment(RA.getReverseLocalAssignment()) {}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h
new file mode 100644
index 000000000000..1e9fa967214c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h
@@ -0,0 +1,96 @@
+//===- RegAllocPriorityAdvisor.h - live ranges priority advisor -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCPRIORITYADVISOR_H
+#define LLVM_CODEGEN_REGALLOCPRIORITYADVISOR_H
+
+#include "RegAllocEvictionAdvisor.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+class MachineFunction;
+class VirtRegMap;
+class RAGreedy;
+
+/// Interface to the priority advisor, which is responsible for prioritizing
+/// live ranges.
+class RegAllocPriorityAdvisor {
+public:
+ RegAllocPriorityAdvisor(const RegAllocPriorityAdvisor &) = delete;
+ RegAllocPriorityAdvisor(RegAllocPriorityAdvisor &&) = delete;
+ virtual ~RegAllocPriorityAdvisor() = default;
+
+ /// Find the priority value for a live range. A float value is used since ML
+ /// prefers it.
+ virtual unsigned getPriority(const LiveInterval &LI) const = 0;
+
+ RegAllocPriorityAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ SlotIndexes *const Indexes);
+
+protected:
+ const RAGreedy &RA;
+ LiveIntervals *const LIS;
+ VirtRegMap *const VRM;
+ MachineRegisterInfo *const MRI;
+ const TargetRegisterInfo *const TRI;
+ const RegisterClassInfo &RegClassInfo;
+ SlotIndexes *const Indexes;
+ const bool RegClassPriorityTrumpsGlobalness;
+ const bool ReverseLocalAssignment;
+};
+
+class DefaultPriorityAdvisor : public RegAllocPriorityAdvisor {
+public:
+ DefaultPriorityAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ SlotIndexes *const Indexes)
+ : RegAllocPriorityAdvisor(MF, RA, Indexes) {}
+
+private:
+ unsigned getPriority(const LiveInterval &LI) const override;
+};
+
+class RegAllocPriorityAdvisorAnalysis : public ImmutablePass {
+public:
+ enum class AdvisorMode : int { Default, Release, Development };
+
+ RegAllocPriorityAdvisorAnalysis(AdvisorMode Mode)
+ : ImmutablePass(ID), Mode(Mode){};
+ static char ID;
+
+ /// Get an advisor for the given context (i.e. machine function, etc)
+ virtual std::unique_ptr<RegAllocPriorityAdvisor>
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0;
+ AdvisorMode getAdvisorMode() const { return Mode; }
+ virtual void logRewardIfNeeded(const MachineFunction &MF,
+ llvm::function_ref<float()> GetReward){};
+
+protected:
+ // This analysis preserves everything, and subclasses may have additional
+ // requirements.
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+
+private:
+ StringRef getPassName() const override;
+ const AdvisorMode Mode;
+};
+
+/// Specialization for the API used by the analysis infrastructure to create
+/// an instance of the priority advisor.
+template <> Pass *callDefaultCtor<RegAllocPriorityAdvisorAnalysis>();
+
+RegAllocPriorityAdvisorAnalysis *createReleaseModePriorityAdvisor();
+
+RegAllocPriorityAdvisorAnalysis *createDevelopmentModePriorityAdvisor();
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_REGALLOCPRIORITYADVISOR_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp
index 17e3eeef664b..e420283dfcfa 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp
@@ -14,8 +14,6 @@
#include "RegAllocScore.h"
#include "llvm/ADT/DenseMapInfo.h"
-#include "llvm/ADT/STLForwardCompat.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/ilist_iterator.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp
index de851ffc7fdc..27ed17b9f4f6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp
@@ -79,7 +79,7 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
const RegisterBank *
RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
// FIXME: This was probably a copy to a virtual register that does have a
// type we could use.
return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI), LLT());
@@ -97,7 +97,7 @@ RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI,
const TargetRegisterClass &
RegisterBankInfo::getMinimalPhysRegClass(Register Reg,
const TargetRegisterInfo &TRI) const {
- assert(Register::isPhysicalRegister(Reg) && "Reg must be a physreg");
+ assert(Reg.isPhysical() && "Reg must be a physreg");
const auto &RegRCIt = PhysRegMinimalRCs.find(Reg);
if (RegRCIt != PhysRegMinimalRCs.end())
return *RegRCIt->second;
@@ -449,6 +449,9 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
LLVM_DEBUG(dbgs() << " is $noreg, nothing to be done\n");
continue;
}
+ LLT Ty = MRI.getType(MO.getReg());
+ if (!Ty.isValid())
+ continue;
assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns !=
0 &&
"Invalid mapping");
@@ -490,7 +493,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
unsigned RegisterBankInfo::getSizeInBits(Register Reg,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
// The size is not directly available for physical registers.
// Instead, we need to access a register class that contains Reg and
// get the size of that register class.
@@ -601,6 +604,7 @@ bool RegisterBankInfo::InstructionMapping::verify(
const MachineFunction &MF = *MI.getMF();
const RegisterBankInfo *RBI = MF.getSubtarget().getRegBankInfo();
(void)RBI;
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
const MachineOperand &MO = MI.getOperand(Idx);
@@ -612,6 +616,9 @@ bool RegisterBankInfo::InstructionMapping::verify(
Register Reg = MO.getReg();
if (!Reg)
continue;
+ LLT Ty = MRI.getType(Reg);
+ if (!Ty.isValid())
+ continue;
assert(getOperandMapping(Idx).isValid() &&
"We must have a mapping for reg operands");
const RegisterBankInfo::ValueMapping &MOMapping = getOperandMapping(Idx);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
index 374fcc9a6014..fba8c35ecec2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -52,22 +52,43 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
Update = true;
}
- // Does this MF have different CSRs?
- assert(TRI && "no register info set");
+ // Test if CSRs have changed from the previous function.
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ const MCPhysReg *CSR = MRI.getCalleeSavedRegs();
+ bool CSRChanged = true;
+ if (!Update) {
+ CSRChanged = false;
+ size_t LastSize = LastCalleeSavedRegs.size();
+ for (unsigned I = 0;; ++I) {
+ if (CSR[I] == 0) {
+ CSRChanged = I != LastSize;
+ break;
+ }
+ if (I >= LastSize) {
+ CSRChanged = true;
+ break;
+ }
+ if (CSR[I] != LastCalleeSavedRegs[I]) {
+ CSRChanged = true;
+ break;
+ }
+ }
+ }
// Get the callee saved registers.
- const MCPhysReg *CSR = MF->getRegInfo().getCalleeSavedRegs();
- if (Update || CSR != CalleeSavedRegs) {
+ if (CSRChanged) {
+ LastCalleeSavedRegs.clear();
// Build a CSRAlias map. Every CSR alias saves the last
// overlapping CSR.
CalleeSavedAliases.assign(TRI->getNumRegs(), 0);
- for (const MCPhysReg *I = CSR; *I; ++I)
+ for (const MCPhysReg *I = CSR; *I; ++I) {
for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI)
CalleeSavedAliases[*AI] = *I;
+ LastCalleeSavedRegs.push_back(*I);
+ }
Update = true;
}
- CalleeSavedRegs = CSR;
// Even if CSR list is same, we could have had a different allocation order
// if ignoreCSRForAllocationOrder is evaluated differently.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 8a6f823c8a0c..ab1215974fc5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -199,12 +199,7 @@ namespace {
DenseMap<Register, unsigned long> LargeLIVisitCounter;
/// Recursively eliminate dead defs in DeadDefs.
- void eliminateDeadDefs();
-
- /// allUsesAvailableAt - Return true if all registers used by OrigMI at
- /// OrigIdx are also available with the same value at UseIdx.
- bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
- SlotIndex UseIdx);
+ void eliminateDeadDefs(LiveRangeEdit *Edit = nullptr);
/// LiveRangeEdit callback for eliminateDeadDefs().
void LRE_WillEraseInstruction(MachineInstr *MI) override;
@@ -418,24 +413,24 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
"Simple Register Coalescing", false, false)
-LLVM_NODISCARD static bool isMoveInstr(const TargetRegisterInfo &tri,
- const MachineInstr *MI, Register &Src,
- Register &Dst, unsigned &SrcSub,
- unsigned &DstSub) {
- if (MI->isCopy()) {
- Dst = MI->getOperand(0).getReg();
- DstSub = MI->getOperand(0).getSubReg();
- Src = MI->getOperand(1).getReg();
- SrcSub = MI->getOperand(1).getSubReg();
- } else if (MI->isSubregToReg()) {
- Dst = MI->getOperand(0).getReg();
- DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
- MI->getOperand(3).getImm());
- Src = MI->getOperand(2).getReg();
- SrcSub = MI->getOperand(2).getSubReg();
- } else
- return false;
- return true;
+[[nodiscard]] static bool isMoveInstr(const TargetRegisterInfo &tri,
+ const MachineInstr *MI, Register &Src,
+ Register &Dst, unsigned &SrcSub,
+ unsigned &DstSub) {
+ if (MI->isCopy()) {
+ Dst = MI->getOperand(0).getReg();
+ DstSub = MI->getOperand(0).getSubReg();
+ Src = MI->getOperand(1).getReg();
+ SrcSub = MI->getOperand(1).getSubReg();
+ } else if (MI->isSubregToReg()) {
+ Dst = MI->getOperand(0).getReg();
+ DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
+ MI->getOperand(3).getImm());
+ Src = MI->getOperand(2).getReg();
+ SrcSub = MI->getOperand(2).getSubReg();
+ } else
+ return false;
+ return true;
}
/// Return true if this block should be vacated by the coalescer to eliminate
@@ -467,8 +462,8 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
Partial = SrcSub || DstSub;
// If one register is a physreg, it must be Dst.
- if (Register::isPhysicalRegister(Src)) {
- if (Register::isPhysicalRegister(Dst))
+ if (Src.isPhysical()) {
+ if (Dst.isPhysical())
return false;
std::swap(Src, Dst);
std::swap(SrcSub, DstSub);
@@ -477,7 +472,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
- if (Register::isPhysicalRegister(Dst)) {
+ if (Dst.isPhysical()) {
// Eliminate DstSub on a physreg.
if (DstSub) {
Dst = TRI.getSubReg(Dst, DstSub);
@@ -535,16 +530,15 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
CrossClass = NewRC != DstRC || NewRC != SrcRC;
}
// Check our invariants
- assert(Register::isVirtualRegister(Src) && "Src must be virtual");
- assert(!(Register::isPhysicalRegister(Dst) && DstSub) &&
- "Cannot have a physical SubIdx");
+ assert(Src.isVirtual() && "Src must be virtual");
+ assert(!(Dst.isPhysical() && DstSub) && "Cannot have a physical SubIdx");
SrcReg = Src;
DstReg = Dst;
return true;
}
bool CoalescerPair::flip() {
- if (Register::isPhysicalRegister(DstReg))
+ if (DstReg.isPhysical())
return false;
std::swap(SrcReg, DstReg);
std::swap(SrcIdx, DstIdx);
@@ -603,20 +597,16 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-void RegisterCoalescer::eliminateDeadDefs() {
+void RegisterCoalescer::eliminateDeadDefs(LiveRangeEdit *Edit) {
+ if (Edit) {
+ Edit->eliminateDeadDefs(DeadDefs);
+ return;
+ }
SmallVector<Register, 8> NewRegs;
LiveRangeEdit(nullptr, NewRegs, *MF, *LIS,
nullptr, this).eliminateDeadDefs(DeadDefs);
}
-bool RegisterCoalescer::allUsesAvailableAt(const MachineInstr *OrigMI,
- SlotIndex OrigIdx,
- SlotIndex UseIdx) {
- SmallVector<Register, 8> NewRegs;
- return LiveRangeEdit(nullptr, NewRegs, *MF, *LIS, nullptr, this)
- .allUsesAvailableAt(OrigMI, OrigIdx, UseIdx);
-}
-
void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
// MI may be in WorkList. Make sure we don't visit it.
ErasedInstrs.insert(MI);
@@ -911,8 +901,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
if (!NewMI)
return { false, false };
- if (Register::isVirtualRegister(IntA.reg()) &&
- Register::isVirtualRegister(IntB.reg()) &&
+ if (IntA.reg().isVirtual() && IntB.reg().isVirtual() &&
!MRI->constrainRegClass(IntB.reg(), MRI->getRegClass(IntA.reg())))
return { false, false };
if (NewMI != DefMI) {
@@ -950,7 +939,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
continue;
// Kill flags are no longer accurate. They are recomputed after RA.
UseMO.setIsKill(false);
- if (Register::isPhysicalRegister(NewReg))
+ if (NewReg.isPhysical())
UseMO.substPhysReg(NewReg, *TRI);
else
UseMO.setReg(NewReg);
@@ -1287,7 +1276,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx();
Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx();
- if (Register::isPhysicalRegister(SrcReg))
+ if (SrcReg.isPhysical())
return false;
LiveInterval &SrcInt = LIS->getInterval(SrcReg);
@@ -1306,8 +1295,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
}
if (!TII->isAsCheapAsAMove(*DefMI))
return false;
- if (!TII->isTriviallyReMaterializable(*DefMI))
+
+ SmallVector<Register, 8> NewRegs;
+ LiveRangeEdit Edit(&SrcInt, NewRegs, *MF, *LIS, nullptr, this);
+ if (!Edit.checkRematerializable(ValNo, DefMI))
return false;
+
if (!definesFullReg(*DefMI, SrcReg))
return false;
bool SawStore = false;
@@ -1347,19 +1340,21 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
} else {
// Theoretically, some stack frame reference could exist. Just make sure
// it hasn't actually happened.
- assert(Register::isVirtualRegister(DstReg) &&
+ assert(DstReg.isVirtual() &&
"Only expect to deal with virtual or physical registers");
}
}
- if (!allUsesAvailableAt(DefMI, ValNo->def, CopyIdx))
+ LiveRangeEdit::Remat RM(ValNo);
+ RM.OrigMI = DefMI;
+ if (!Edit.canRematerializeAt(RM, ValNo, CopyIdx, true))
return false;
DebugLoc DL = CopyMI->getDebugLoc();
MachineBasicBlock *MBB = CopyMI->getParent();
MachineBasicBlock::iterator MII =
std::next(MachineBasicBlock::iterator(CopyMI));
- TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, *DefMI, *TRI);
+ Edit.rematerializeAt(*MBB, MII, DstReg, RM, *TRI, false, SrcIdx, CopyMI);
MachineInstr &NewMI = *std::prev(MII);
NewMI.setDebugLoc(DL);
@@ -1379,8 +1374,18 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
TRI->getCommonSubClass(DefRC, DstRC);
if (CommonRC != nullptr) {
NewRC = CommonRC;
+
+ // Instruction might contain "undef %0:subreg" as use operand:
+ // %0:subreg = instr op_1, ..., op_N, undef %0:subreg, op_N+2, ...
+ //
+ // Need to check all operands.
+ for (MachineOperand &MO : NewMI.operands()) {
+ if (MO.isReg() && MO.getReg() == DstReg && MO.getSubReg() == DstIdx) {
+ MO.setSubReg(0);
+ }
+ }
+
DstIdx = 0;
- DefMO.setSubReg(0);
DefMO.setIsUndef(false); // Only subregs can have def+undef.
}
}
@@ -1398,12 +1403,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
if (MO.isReg()) {
assert(MO.isImplicit() && "No explicit operands after implicit operands.");
// Discard VReg implicit defs.
- if (Register::isPhysicalRegister(MO.getReg()))
+ if (MO.getReg().isPhysical())
ImplicitOps.push_back(MO);
}
}
- LIS->ReplaceMachineInstrInMaps(*CopyMI, NewMI);
CopyMI->eraseFromParent();
ErasedInstrs.insert(CopyMI);
@@ -1416,8 +1420,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
i != e; ++i) {
MachineOperand &MO = NewMI.getOperand(i);
if (MO.isReg() && MO.isDef()) {
- assert(MO.isImplicit() && MO.isDead() &&
- Register::isPhysicalRegister(MO.getReg()));
+ assert(MO.isImplicit() && MO.isDead() && MO.getReg().isPhysical());
NewMIImplDefs.push_back(MO.getReg().asMCReg());
}
}
@@ -1520,7 +1523,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
} else if (NewMI.getOperand(0).getReg() != CopyDstReg) {
// The New instruction may be defining a sub-register of what's actually
// been asked for. If so it must implicitly define the whole thing.
- assert(Register::isPhysicalRegister(DstReg) &&
+ assert(DstReg.isPhysical() &&
"Only expect virtual or physical registers in remat");
NewMI.getOperand(0).setIsDead(true);
NewMI.addOperand(MachineOperand::CreateReg(
@@ -1573,7 +1576,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
llvm::make_early_inc_range(MRI->use_operands(SrcReg))) {
MachineInstr *UseMI = UseMO.getParent();
if (UseMI->isDebugInstr()) {
- if (Register::isPhysicalRegister(DstReg))
+ if (DstReg.isPhysical())
UseMO.substPhysReg(DstReg, *TRI);
else
UseMO.setReg(DstReg);
@@ -1597,7 +1600,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// The source interval can become smaller because we removed a use.
shrinkToUses(&SrcInt, &DeadDefs);
if (!DeadDefs.empty())
- eliminateDeadDefs();
+ eliminateDeadDefs(&Edit);
} else {
ToBeUpdated.insert(SrcReg);
}
@@ -1641,18 +1644,20 @@ MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
SlotIndex RegIndex = Idx.getRegSlot();
LiveRange::Segment *Seg = DstLI.getSegmentContaining(RegIndex);
assert(Seg != nullptr && "No segment for defining instruction");
- if (VNInfo *V = DstLI.getVNInfoAt(Seg->end)) {
- if (V->isPHIDef()) {
- CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
- for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) {
- MachineOperand &MO = CopyMI->getOperand(i-1);
- if (MO.isReg() && MO.isUse())
- CopyMI->removeOperand(i-1);
- }
- LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an "
- "implicit def\n");
- return CopyMI;
+ VNInfo *V = DstLI.getVNInfoAt(Seg->end);
+
+ // The source interval may also have been on an undef use, in which case the
+ // copy introduced a live value.
+ if (((V && V->isPHIDef()) || (!V && !DstLI.liveAt(Idx)))) {
+ CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+ for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) {
+ MachineOperand &MO = CopyMI->getOperand(i-1);
+ if (MO.isReg() && MO.isUse())
+ CopyMI->removeOperand(i-1);
}
+ LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an "
+ "implicit def\n");
+ return CopyMI;
}
// Remove any DstReg segments starting at the instruction.
@@ -1744,7 +1749,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
unsigned SubIdx) {
- bool DstIsPhys = Register::isPhysicalRegister(DstReg);
+ bool DstIsPhys = DstReg.isPhysical();
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
@@ -2103,6 +2108,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
<< ")\n");
LIS->shrinkToUses(S, LI.reg());
+ ShrinkMainRange = true;
}
LI.removeEmptySubRanges();
}
@@ -2742,8 +2748,10 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
}
V.OtherVNI = OtherVNI;
Val &OtherV = Other.Vals[OtherVNI->id];
- // Keep this value, check for conflicts when analyzing OtherVNI.
- if (!OtherV.isAnalyzed())
+ // Keep this value, check for conflicts when analyzing OtherVNI. Avoid
+ // revisiting OtherVNI->id in JoinVals::computeAssignment() below before it
+ // is assigned.
+ if (!OtherV.isAnalyzed() || Other.Assignments[OtherVNI->id] == -1)
return CR_Keep;
// Both sides have been analyzed now.
// Allow overlapping PHI values. Any real interference would show up in a
@@ -2955,7 +2963,7 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
}
OtherV.Pruned = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
}
default:
// This value number needs to go in the final joined live range.
@@ -3398,7 +3406,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
if (LI != nullptr)
dbgs() << "\t\t LHS = " << *LI << '\n';
});
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
}
case CR_Erase: {
@@ -3406,8 +3414,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
assert(MI && "No instruction to erase");
if (MI->isCopy()) {
Register Reg = MI->getOperand(1).getReg();
- if (Register::isVirtualRegister(Reg) && Reg != CP.getSrcReg() &&
- Reg != CP.getDstReg())
+ if (Reg.isVirtual() && Reg != CP.getSrcReg() && Reg != CP.getDstReg())
ShrinkRegs.push_back(Reg);
}
ErasedInstrs.insert(MI);
@@ -3885,8 +3892,7 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
Register SrcReg = Copy->getOperand(1).getReg();
Register DstReg = Copy->getOperand(0).getReg();
- if (Register::isPhysicalRegister(SrcReg) ||
- Register::isPhysicalRegister(DstReg))
+ if (SrcReg.isPhysical() || DstReg.isPhysical())
return false;
return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
@@ -3975,8 +3981,7 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
if (OtherReg == SrcReg)
OtherReg = OtherSrcReg;
// Check if OtherReg is a non-terminal.
- if (Register::isPhysicalRegister(OtherReg) ||
- isTerminalReg(OtherReg, MI, MRI))
+ if (OtherReg.isPhysical() || isTerminalReg(OtherReg, MI, MRI))
continue;
// Check that OtherReg interfere with DstReg.
if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
@@ -4107,7 +4112,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
// calls
if (fn.exposesReturnsTwice()) {
LLVM_DEBUG(
- dbgs() << "* Skipped as it exposes funcions that returns twice.\n");
+ dbgs() << "* Skipped as it exposes functions that returns twice.\n");
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp
index b14a36e4eeb4..d4c29f96a4f9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -361,8 +361,7 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
assert(isBottomClosed() && "need bottom-up tracking to intialize.");
for (const RegisterMaskPair &Pair : P.LiveOutRegs) {
Register RegUnit = Pair.RegUnit;
- if (Register::isVirtualRegister(RegUnit)
- && !RPTracker.hasUntiedDef(RegUnit))
+ if (RegUnit.isVirtual() && !RPTracker.hasUntiedDef(RegUnit))
increaseSetPressure(LiveThruPressure, *MRI, RegUnit,
LaneBitmask::getNone(), Pair.LaneMask);
}
@@ -608,8 +607,8 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
// If the def is all that is live after the instruction, then in case
// of a subregister def we need a read-undef flag.
Register RegUnit = I->RegUnit;
- if (Register::isVirtualRegister(RegUnit) &&
- AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask).none())
+ if (RegUnit.isVirtual() && AddFlagsMI != nullptr &&
+ (LiveAfter & ~I->LaneMask).none())
AddFlagsMI->setRegisterDefReadUndef(RegUnit);
LaneBitmask ActualDef = I->LaneMask & LiveAfter;
@@ -634,7 +633,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
if (AddFlagsMI != nullptr) {
for (const RegisterMaskPair &P : DeadDefs) {
Register RegUnit = P.RegUnit;
- if (!Register::isVirtualRegister(RegUnit))
+ if (!RegUnit.isVirtual())
continue;
LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, RegUnit,
Pos.getDeadSlot());
@@ -843,7 +842,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
if (TrackUntiedDefs) {
for (const RegisterMaskPair &Def : RegOpers.Defs) {
Register RegUnit = Def.RegUnit;
- if (Register::isVirtualRegister(RegUnit) &&
+ if (RegUnit.isVirtual() &&
(LiveRegs.contains(RegUnit) & Def.LaneMask).none())
UntiedDefs.insert(RegUnit);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
index 289d31be2d2d..8d10a5558315 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -184,7 +184,7 @@ void RegScavenger::forward() {
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
- if (!Register::isPhysicalRegister(Reg) || isReserved(Reg))
+ if (!Reg.isPhysical() || isReserved(Reg))
continue;
if (MO.isUse()) {
if (MO.isUndef())
@@ -308,7 +308,7 @@ Register RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
Candidates.clearBitsNotInMask(MO.getRegMask());
if (!MO.isReg() || MO.isUndef() || !MO.getReg())
continue;
- if (Register::isVirtualRegister(MO.getReg())) {
+ if (MO.getReg().isVirtual()) {
if (MO.isDef())
isVirtDefInsn = true;
else if (MO.isKill())
@@ -394,6 +394,13 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI,
Used.accumulate(*std::next(From));
}
if (FoundTo) {
+ // Don't search to FrameSetup instructions if we were searching from
+ // Non-FrameSetup instructions. Otherwise, the spill position may point
+ // before FrameSetup instructions.
+ if (!From->getFlag(MachineInstr::FrameSetup) &&
+ MI.getFlag(MachineInstr::FrameSetup))
+ break;
+
if (Survivor == 0 || !Used.available(Survivor)) {
MCPhysReg AvilableReg = 0;
for (MCPhysReg Reg : AllocationOrder) {
@@ -413,7 +420,7 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI,
// be usefull for this other vreg as well later.
bool FoundVReg = false;
for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) {
+ if (MO.isReg() && MO.getReg().isVirtual()) {
FoundVReg = true;
break;
}
@@ -499,14 +506,14 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj,
": Cannot scavenge register without an emergency "
"spill slot!");
}
- TII->storeRegToStackSlot(*MBB, Before, Reg, true, FI, &RC, TRI);
+ TII->storeRegToStackSlot(*MBB, Before, Reg, true, FI, &RC, TRI, Register());
MachineBasicBlock::iterator II = std::prev(Before);
unsigned FIOperandNum = getFrameIndexOperandNum(*II);
TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
// Restore the scavenged register before its use (or first terminator).
- TII->loadRegFromStackSlot(*MBB, UseMI, Reg, FI, &RC, TRI);
+ TII->loadRegFromStackSlot(*MBB, UseMI, Reg, FI, &RC, TRI, Register());
II = std::prev(UseMI);
FIOperandNum = getFrameIndexOperandNum(*II);
@@ -526,7 +533,7 @@ Register RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
// Exclude all the registers being used by the instruction.
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) &&
- !Register::isVirtualRegister(MO.getReg()))
+ !MO.getReg().isVirtual())
for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
Candidates.reset(*AI);
}
@@ -704,7 +711,7 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI,
// We only care about virtual registers and ignore virtual registers
// created by the target callbacks in the process (those will be handled
// in a scavenging round).
- if (!Register::isVirtualRegister(Reg) ||
+ if (!Reg.isVirtual() ||
Register::virtReg2Index(Reg) >= InitialNumVirtRegs)
continue;
if (!MO.readsReg())
@@ -724,7 +731,7 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI,
continue;
Register Reg = MO.getReg();
// Only vregs, no newly created vregs (see above).
- if (!Register::isVirtualRegister(Reg) ||
+ if (!Reg.isVirtual() ||
Register::virtReg2Index(Reg) >= InitialNumVirtRegs)
continue;
// We have to look at all operands anyway so we can precalculate here
@@ -743,7 +750,7 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI,
}
#ifndef NDEBUG
for (const MachineOperand &MO : MBB.front().operands()) {
- if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
+ if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
assert(!MO.isInternalRead() && "Cannot assign inside bundles");
assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp
index 9d9cdf9edbb3..51bac3fc0a23 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp
@@ -63,7 +63,7 @@ ArrayRef<uint32_t>
PhysicalRegisterUsageInfo::getRegUsageInfo(const Function &FP) {
auto It = RegMasks.find(&FP);
if (It != RegMasks.end())
- return makeArrayRef<uint32_t>(It->second);
+ return ArrayRef<uint32_t>(It->second);
return ArrayRef<uint32_t>();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
index 01886e40a4a3..feb31e59f5fd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
@@ -89,7 +89,7 @@ static bool reduceDbgValsForwardScan(MachineBasicBlock &MBB) {
for (auto &MI : MBB) {
if (MI.isDebugValue()) {
- DebugVariable Var(MI.getDebugVariable(), NoneType(),
+ DebugVariable Var(MI.getDebugVariable(), std::nullopt,
MI.getDebugLoc()->getInlinedAt());
auto VMI = VariableMap.find(Var);
// Just stop tracking this variable, until we cover DBG_VALUE_LIST.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
index 466022ae0ac1..05bbd1a2d03b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -130,7 +130,7 @@ bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const {
return false;
// Create a new VReg for each class.
- unsigned Reg = LI.reg();
+ Register Reg = LI.reg();
const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
SmallVector<LiveInterval*, 4> Intervals;
Intervals.push_back(&LI);
@@ -175,7 +175,7 @@ bool RenameIndependentSubregs::findComponents(IntEqClasses &Classes,
// across subranges when they are affected by the same MachineOperand.
const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
Classes.grow(NumComponents);
- unsigned Reg = LI.reg();
+ Register Reg = LI.reg();
for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
if (!MO.isDef() && !MO.readsReg())
continue;
@@ -304,7 +304,7 @@ void RenameIndependentSubregs::computeMainRangesFixFlags(
const SlotIndexes &Indexes = *LIS->getSlotIndexes();
for (size_t I = 0, E = Intervals.size(); I < E; ++I) {
LiveInterval &LI = *Intervals[I];
- unsigned Reg = LI.reg();
+ Register Reg = LI.reg();
LI.removeEmptySubRanges();
@@ -391,7 +391,7 @@ bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) {
// there can't be any further splitting.
bool Changed = false;
for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) {
- unsigned Reg = Register::index2VirtReg(I);
+ Register Reg = Register::index2VirtReg(I);
if (!LIS->hasInterval(Reg))
continue;
LiveInterval &LI = LIS->getInterval(Reg);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
index 0f73973c8a51..0ad6ef84220a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
@@ -66,6 +66,8 @@ namespace {
LLVM_DEBUG(dbgs() << "Resetting: " << MF.getName() << '\n');
++NumFunctionsReset;
MF.reset();
+ MF.initTargetMachineFunctionInfo(MF.getSubtarget());
+
if (EmitFallbackDiag) {
const Function &F = MF.getFunction();
DiagnosticInfoISelFallback DiagFallback(F);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
index 00a551ade213..bcad7a3f24da 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
@@ -67,6 +67,7 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
+#include <optional>
#include <string>
#include <utility>
@@ -896,7 +897,7 @@ public:
DominatorTree *DT;
bool ShouldPreserveDominatorTree;
- Optional<DominatorTree> LazilyComputedDomTree;
+ std::optional<DominatorTree> LazilyComputedDomTree;
// Do we already have a DominatorTree avaliable from the previous pass?
// Note that we should *NOT* require it, to avoid the case where we end up
@@ -907,7 +908,7 @@ public:
} else {
// Otherwise, we need to compute it.
LazilyComputedDomTree.emplace(F);
- DT = LazilyComputedDomTree.getPointer();
+ DT = &*LazilyComputedDomTree;
ShouldPreserveDominatorTree = false;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
new file mode 100644
index 000000000000..dd70a2f23e45
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
@@ -0,0 +1,80 @@
+//===- SanitizerBinaryMetadata.cpp
+//----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of SanitizerBinaryMetadata.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include <algorithm>
+
+using namespace llvm;
+
+namespace {
+class MachineSanitizerBinaryMetadata : public MachineFunctionPass {
+public:
+ static char ID;
+
+ MachineSanitizerBinaryMetadata();
+ bool runOnMachineFunction(MachineFunction &F) override;
+};
+} // namespace
+
+INITIALIZE_PASS(MachineSanitizerBinaryMetadata, "machine-sanmd",
+ "Machine Sanitizer Binary Metadata", false, false)
+
+char MachineSanitizerBinaryMetadata::ID = 0;
+char &llvm::MachineSanitizerBinaryMetadataID =
+ MachineSanitizerBinaryMetadata::ID;
+
+MachineSanitizerBinaryMetadata::MachineSanitizerBinaryMetadata()
+ : MachineFunctionPass(ID) {
+ initializeMachineSanitizerBinaryMetadataPass(
+ *PassRegistry::getPassRegistry());
+}
+
+bool MachineSanitizerBinaryMetadata::runOnMachineFunction(MachineFunction &MF) {
+ MDNode *MD = MF.getFunction().getMetadata(LLVMContext::MD_pcsections);
+ if (!MD)
+ return false;
+ const auto &Section = *cast<MDString>(MD->getOperand(0));
+ if (!Section.getString().equals(kSanitizerBinaryMetadataCoveredSection))
+ return false;
+ auto &AuxMDs = *cast<MDTuple>(MD->getOperand(1));
+ // Assume it currently only has features.
+ assert(AuxMDs.getNumOperands() == 1);
+ auto *Features = cast<ConstantAsMetadata>(AuxMDs.getOperand(0))->getValue();
+ if (!Features->getUniqueInteger()[kSanitizerBinaryMetadataUARBit])
+ return false;
+ // Calculate size of stack args for the function.
+ int64_t Size = 0;
+ uint64_t Align = 0;
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ for (int i = -1; i >= (int)-MFI.getNumFixedObjects(); --i) {
+ Size = std::max(Size, MFI.getObjectOffset(i) + MFI.getObjectSize(i));
+ Align = std::max(Align, MFI.getObjectAlign(i).value());
+ }
+ Size = (Size + Align - 1) & ~(Align - 1);
+ auto &F = MF.getFunction();
+ IRBuilder<> IRB(F.getContext());
+ MDBuilder MDB(F.getContext());
+ // Keep the features and append size of stack args to the metadata.
+ F.setMetadata(LLVMContext::MD_pcsections,
+ MDB.createPCSections(
+ {{Section.getString(), {Features, IRB.getInt32(Size)}}}));
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 4fc9399c2b9e..1b213e87e75c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+
#include "llvm/ADT/IntEqClasses.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
@@ -53,7 +54,6 @@
#include <algorithm>
#include <cassert>
#include <iterator>
-#include <string>
#include <utility>
#include <vector>
@@ -84,6 +84,12 @@ static cl::opt<unsigned> ReductionSize(
cl::desc("A huge scheduling region will have maps reduced by this many "
"nodes at a time. Defaults to HugeRegion / 2."));
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static cl::opt<bool> SchedPrintCycles(
+ "sched-print-cycles", cl::Hidden, cl::init(false),
+ cl::desc("Report top/bottom cycles when dumping SUnit instances"));
+#endif
+
static unsigned getReductionSize() {
// Always reduce a huge region with half of the elements, except
// when user sets this number explicitly.
@@ -92,12 +98,12 @@ static unsigned getReductionSize() {
return ReductionSize;
}
-static void dumpSUList(ScheduleDAGInstrs::SUList &L) {
+static void dumpSUList(const ScheduleDAGInstrs::SUList &L) {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dbgs() << "{ ";
- for (const SUnit *su : L) {
- dbgs() << "SU(" << su->NodeNum << ")";
- if (su != L.back())
+ for (const SUnit *SU : L) {
+ dbgs() << "SU(" << SU->NodeNum << ")";
+ if (SU != L.back())
dbgs() << ", ";
}
dbgs() << "}\n";
@@ -125,7 +131,7 @@ static bool getUnderlyingObjectsForInstr(const MachineInstr *MI,
const MachineFrameInfo &MFI,
UnderlyingObjectsVector &Objects,
const DataLayout &DL) {
- auto allMMOsOkay = [&]() {
+ auto AllMMOsOkay = [&]() {
for (const MachineMemOperand *MMO : MI->memoperands()) {
// TODO: Figure out whether isAtomic is really necessary (see D57601).
if (MMO->isVolatile() || MMO->isAtomic())
@@ -147,7 +153,7 @@ static bool getUnderlyingObjectsForInstr(const MachineInstr *MI,
return false;
bool MayAlias = PSV->mayAlias(&MFI);
- Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
+ Objects.emplace_back(PSV, MayAlias);
} else if (const Value *V = MMO->getValue()) {
SmallVector<Value *, 4> Objs;
if (!getUnderlyingObjectsForCodeGen(V, Objs))
@@ -155,7 +161,7 @@ static bool getUnderlyingObjectsForInstr(const MachineInstr *MI,
for (Value *V : Objs) {
assert(isIdentifiedObject(V));
- Objects.push_back(UnderlyingObjectsVector::value_type(V, true));
+ Objects.emplace_back(V, true);
}
} else
return false;
@@ -163,7 +169,7 @@ static bool getUnderlyingObjectsForInstr(const MachineInstr *MI,
return true;
};
- if (!allMMOsOkay()) {
+ if (!AllMMOsOkay()) {
Objects.clear();
return false;
}
@@ -205,9 +211,9 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
for (const MachineOperand &MO : ExitMI->operands()) {
if (!MO.isReg() || MO.isDef()) continue;
Register Reg = MO.getReg();
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
- } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) {
+ } else if (Reg.isVirtual() && MO.readsReg()) {
addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO));
}
}
@@ -676,9 +682,9 @@ void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) {
assert(BarrierChain != nullptr);
- for (auto &I : map) {
- SUList &sus = I.second;
- for (auto *SU : sus)
+ for (auto &[V, SUs] : map) {
+ (void)V;
+ for (auto *SU : SUs)
SU->addPredBarrier(BarrierChain);
}
map.clear();
@@ -793,7 +799,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA,
MII != MIE; --MII) {
MachineInstr &MI = *std::prev(MII);
if (DbgMI) {
- DbgValues.push_back(std::make_pair(DbgMI, &MI));
+ DbgValues.emplace_back(DbgMI, &MI);
DbgMI = nullptr;
}
@@ -839,9 +845,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA,
if (!MO.isReg() || !MO.isDef())
continue;
Register Reg = MO.getReg();
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
addPhysRegDeps(SU, j);
- } else if (Register::isVirtualRegister(Reg)) {
+ } else if (Reg.isVirtual()) {
HasVRegDef = true;
addVRegDefDeps(SU, j);
}
@@ -856,9 +862,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA,
if (!MO.isReg() || !MO.isUse())
continue;
Register Reg = MO.getReg();
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
addPhysRegDeps(SU, j);
- } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) {
+ } else if (Reg.isVirtual() && MO.readsReg()) {
addVRegUseDeps(SU, j);
}
}
@@ -1019,21 +1025,21 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) {
}
void ScheduleDAGInstrs::Value2SUsMap::dump() {
- for (auto &Itr : *this) {
- if (Itr.first.is<const Value*>()) {
- const Value *V = Itr.first.get<const Value*>();
+ for (const auto &[ValType, SUs] : *this) {
+ if (ValType.is<const Value*>()) {
+ const Value *V = ValType.get<const Value*>();
if (isa<UndefValue>(V))
dbgs() << "Unknown";
else
V->printAsOperand(dbgs());
}
- else if (Itr.first.is<const PseudoSourceValue*>())
- dbgs() << Itr.first.get<const PseudoSourceValue*>();
+ else if (ValType.is<const PseudoSourceValue*>())
+ dbgs() << ValType.get<const PseudoSourceValue*>();
else
llvm_unreachable("Unknown Value type.");
dbgs() << " : ";
- dumpSUList(Itr.second);
+ dumpSUList(SUs);
}
}
@@ -1045,12 +1051,16 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
// Insert all SU's NodeNums into a vector and sort it.
std::vector<unsigned> NodeNums;
NodeNums.reserve(stores.size() + loads.size());
- for (auto &I : stores)
- for (auto *SU : I.second)
+ for (const auto &[V, SUs] : stores) {
+ (void)V;
+ for (const auto *SU : SUs)
NodeNums.push_back(SU->NodeNum);
- for (auto &I : loads)
- for (auto *SU : I.second)
+ }
+ for (const auto &[V, SUs] : loads) {
+ (void)V;
+ for (const auto *SU : SUs)
NodeNums.push_back(SU->NodeNum);
+ }
llvm::sort(NodeNums);
// The N last elements in NodeNums will be removed, and the SU with
@@ -1154,6 +1164,9 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
void ScheduleDAGInstrs::dumpNode(const SUnit &SU) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dumpNodeName(SU);
+ if (SchedPrintCycles)
+ dbgs() << " [TopReadyCycle = " << SU.TopReadyCycle
+ << ", BottomReadyCycle = " << SU.BotReadyCycle << "]";
dbgs() << ": ";
SU.getInstr()->dump();
#endif
@@ -1308,7 +1321,7 @@ public:
/// Adds a connection for cross edges.
void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
- ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
+ ConnectionPairs.emplace_back(PredDep.getSUnit(), Succ);
}
/// Sets each node's subtree ID to the representative ID and record
@@ -1336,12 +1349,12 @@ public:
LLVM_DEBUG(dbgs() << " SU(" << Idx << ") in tree "
<< R.DFSNodeData[Idx].SubtreeID << '\n');
}
- for (const std::pair<const SUnit*, const SUnit*> &P : ConnectionPairs) {
- unsigned PredTree = SubtreeClasses[P.first->NodeNum];
- unsigned SuccTree = SubtreeClasses[P.second->NodeNum];
+ for (const auto &[Pred, Succ] : ConnectionPairs) {
+ unsigned PredTree = SubtreeClasses[Pred->NodeNum];
+ unsigned SuccTree = SubtreeClasses[Succ->NodeNum];
if (PredTree == SuccTree)
continue;
- unsigned Depth = P.first->getDepth();
+ unsigned Depth = Pred->getDepth();
addConnection(PredTree, SuccTree, Depth);
addConnection(SuccTree, PredTree, Depth);
}
@@ -1408,7 +1421,7 @@ public:
bool isComplete() const { return DFSStack.empty(); }
void follow(const SUnit *SU) {
- DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
+ DFSStack.emplace_back(SU, SU->Preds.begin());
}
void advance() { ++DFSStack.back().second; }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index a61a2b2728fa..209c6d81f602 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -147,7 +147,7 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
case InstrStage::Required:
// Required FUs conflict with both reserved and required ones
freeUnits &= ~ReservedScoreboard[StageCycle];
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case InstrStage::Reserved:
// Reserved FUs can conflict only with required ones.
freeUnits &= ~RequiredScoreboard[StageCycle];
@@ -198,7 +198,7 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
case InstrStage::Required:
// Required FUs conflict with both reserved and required ones
freeUnits &= ~ReservedScoreboard[cycle + i];
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case InstrStage::Reserved:
// Reserved FUs can conflict only with required ones.
freeUnits &= ~RequiredScoreboard[cycle + i];
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
index 011f55efce1d..5fd78eccf732 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -10,7 +10,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -29,6 +28,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/ScaledNumber.h"
@@ -180,7 +180,7 @@ private:
// consisting of instructions exclusively computed for producing the operands
// of the source instruction.
void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,
- bool ForSinking = false);
+ Instruction *SI, bool ForSinking = false);
// Returns true if the condition of the select is highly predictable.
bool isSelectHighlyPredictable(const SelectInst *SI);
@@ -199,7 +199,7 @@ private:
SmallPtrSet<const Instruction *, 2> getSIset(const SelectGroups &SIGroups);
// Returns the latency cost of a given instruction.
- Optional<uint64_t> computeInstCost(const Instruction *I);
+ std::optional<uint64_t> computeInstCost(const Instruction *I);
// Returns the misprediction cost of a given select when converted to branch.
Scaled64 getMispredictionCost(const SelectInst *SI, const Scaled64 CondCost);
@@ -242,6 +242,10 @@ bool SelectOptimize::runOnFunction(Function &F) {
return false;
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+ if (!TTI->enableSelectOptimize())
+ return false;
+
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
BPI.reset(new BranchProbabilityInfo(F, *LI));
@@ -375,13 +379,13 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
// false operands.
if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue())) {
std::stack<Instruction *> TrueSlice;
- getExclBackwardsSlice(TI, TrueSlice, true);
+ getExclBackwardsSlice(TI, TrueSlice, SI, true);
maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
TrueSlices.push_back(TrueSlice);
}
if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue())) {
std::stack<Instruction *> FalseSlice;
- getExclBackwardsSlice(FI, FalseSlice, true);
+ getExclBackwardsSlice(FI, FalseSlice, SI, true);
maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
FalseSlices.push_back(FalseSlice);
}
@@ -514,12 +518,27 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
}
}
+static bool isSpecialSelect(SelectInst *SI) {
+ using namespace llvm::PatternMatch;
+
+ // If the select is a logical-and/logical-or then it is better treated as a
+ // and/or by the backend.
+ if (match(SI, m_CombineOr(m_LogicalAnd(m_Value(), m_Value()),
+ m_LogicalOr(m_Value(), m_Value()))))
+ return true;
+
+ return false;
+}
+
void SelectOptimize::collectSelectGroups(BasicBlock &BB,
SelectGroups &SIGroups) {
BasicBlock::iterator BBIt = BB.begin();
while (BBIt != BB.end()) {
Instruction *I = &*BBIt++;
if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ if (isSpecialSelect(SI))
+ continue;
+
SelectGroup SIGroup;
SIGroup.push_back(SI);
while (BBIt != BB.end()) {
@@ -554,6 +573,12 @@ void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups,
}
}
+static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE,
+ DiagnosticInfoOptimizationBase &Rem) {
+ LLVM_DEBUG(dbgs() << Rem.getMsg() << "\n");
+ ORE->emit(Rem);
+}
+
void SelectOptimize::findProfitableSIGroupsInnerLoops(
const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
NumSelectOptAnalyzed += SIGroups.size();
@@ -588,7 +613,7 @@ void SelectOptimize::findProfitableSIGroupsInnerLoops(
OR << "Profitable to convert to branch (loop analysis). BranchCost="
<< BranchCost.toString() << ", SelectCost=" << SelectCost.toString()
<< ". ";
- ORE->emit(OR);
+ EmitAndPrintRemark(ORE, OR);
++NumSelectConvertedLoop;
ProfSIGroups.push_back(ASI);
} else {
@@ -596,7 +621,7 @@ void SelectOptimize::findProfitableSIGroupsInnerLoops(
ORmiss << "Select is more profitable (loop analysis). BranchCost="
<< BranchCost.toString()
<< ", SelectCost=" << SelectCost.toString() << ". ";
- ORE->emit(ORmiss);
+ EmitAndPrintRemark(ORE, ORmiss);
}
}
}
@@ -604,6 +629,7 @@ void SelectOptimize::findProfitableSIGroupsInnerLoops(
bool SelectOptimize::isConvertToBranchProfitableBase(
const SmallVector<SelectInst *, 2> &ASI) {
SelectInst *SI = ASI.front();
+ LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI << "\n");
OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI);
OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI);
@@ -611,7 +637,7 @@ bool SelectOptimize::isConvertToBranchProfitableBase(
if (PSI->isColdBlock(SI->getParent(), BFI.get())) {
++NumSelectColdBB;
ORmiss << "Not converted to branch because of cold basic block. ";
- ORE->emit(ORmiss);
+ EmitAndPrintRemark(ORE, ORmiss);
return false;
}
@@ -619,7 +645,7 @@ bool SelectOptimize::isConvertToBranchProfitableBase(
if (SI->getMetadata(LLVMContext::MD_unpredictable)) {
++NumSelectUnPred;
ORmiss << "Not converted to branch because of unpredictable branch. ";
- ORE->emit(ORmiss);
+ EmitAndPrintRemark(ORE, ORmiss);
return false;
}
@@ -628,7 +654,7 @@ bool SelectOptimize::isConvertToBranchProfitableBase(
if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) {
++NumSelectConvertedHighPred;
OR << "Converted to branch because of highly predictable branch. ";
- ORE->emit(OR);
+ EmitAndPrintRemark(ORE, OR);
return true;
}
@@ -637,12 +663,12 @@ bool SelectOptimize::isConvertToBranchProfitableBase(
if (hasExpensiveColdOperand(ASI)) {
++NumSelectConvertedExpColdOperand;
OR << "Converted to branch because of expensive cold operand.";
- ORE->emit(OR);
+ EmitAndPrintRemark(ORE, OR);
return true;
}
ORmiss << "Not profitable to convert to branch (base heuristic).";
- ORE->emit(ORmiss);
+ EmitAndPrintRemark(ORE, ORmiss);
return false;
}
@@ -655,7 +681,7 @@ bool SelectOptimize::hasExpensiveColdOperand(
const SmallVector<SelectInst *, 2> &ASI) {
bool ColdOperand = false;
uint64_t TrueWeight, FalseWeight, TotalWeight;
- if (ASI.front()->extractProfMetadata(TrueWeight, FalseWeight)) {
+ if (extractBranchWeights(*ASI.front(), TrueWeight, FalseWeight)) {
uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
TotalWeight = TrueWeight + FalseWeight;
// Is there a path with frequency <ColdOperandThreshold% (default:20%) ?
@@ -664,7 +690,7 @@ bool SelectOptimize::hasExpensiveColdOperand(
OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
ORmiss << "Profile data available but missing branch-weights metadata for "
"select instruction. ";
- ORE->emit(ORmiss);
+ EmitAndPrintRemark(ORE, ORmiss);
}
if (!ColdOperand)
return false;
@@ -682,7 +708,7 @@ bool SelectOptimize::hasExpensiveColdOperand(
}
if (ColdI) {
std::stack<Instruction *> ColdSlice;
- getExclBackwardsSlice(ColdI, ColdSlice);
+ getExclBackwardsSlice(ColdI, ColdSlice, SI);
InstructionCost SliceCost = 0;
while (!ColdSlice.empty()) {
SliceCost += TTI->getInstructionCost(ColdSlice.top(),
@@ -703,6 +729,22 @@ bool SelectOptimize::hasExpensiveColdOperand(
return false;
}
+// Check if it is safe to move LoadI next to the SI.
+// Conservatively assume it is safe only if there is no instruction
+// modifying memory in-between the load and the select instruction.
+static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) {
+ // Assume loads from different basic blocks are unsafe to move.
+ if (LoadI->getParent() != SI->getParent())
+ return false;
+ auto It = LoadI->getIterator();
+ while (&*It != SI) {
+ if (It->mayWriteToMemory())
+ return false;
+ It++;
+ }
+ return true;
+}
+
// For a given source instruction, collect its backwards dependence slice
// consisting of instructions exclusively computed for the purpose of producing
// the operands of the source instruction. As an approximation
@@ -711,7 +753,7 @@ bool SelectOptimize::hasExpensiveColdOperand(
// form an one-use chain that leads to the source instruction.
void SelectOptimize::getExclBackwardsSlice(Instruction *I,
std::stack<Instruction *> &Slice,
- bool ForSinking) {
+ Instruction *SI, bool ForSinking) {
SmallPtrSet<Instruction *, 2> Visited;
std::queue<Instruction *> Worklist;
Worklist.push(I);
@@ -733,6 +775,13 @@ void SelectOptimize::getExclBackwardsSlice(Instruction *I,
isa<SelectInst>(II) || isa<PHINode>(II)))
continue;
+ // Avoid sinking loads in order not to skip state-modifying instructions,
+ // that may alias with the loaded address.
+ // Only allow sinking of loads within the same basic block that are
+ // conservatively proven to be safe.
+ if (ForSinking && II->mayReadFromMemory() && !isSafeToSinkLoad(II, SI))
+ continue;
+
// Avoid considering instructions with less frequency than the source
// instruction (i.e., avoid colder code regions of the dependence slice).
if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))
@@ -750,7 +799,7 @@ void SelectOptimize::getExclBackwardsSlice(Instruction *I,
bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) {
uint64_t TrueWeight, FalseWeight;
- if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
+ if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
uint64_t Max = std::max(TrueWeight, FalseWeight);
uint64_t Sum = TrueWeight + FalseWeight;
if (Sum != 0) {
@@ -777,7 +826,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L,
LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
ORmissL << "No select conversion in the loop due to no reduction of loop's "
"critical path. ";
- ORE->emit(ORmissL);
+ EmitAndPrintRemark(ORE, ORmissL);
return false;
}
@@ -794,7 +843,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L,
"loop's critical path. Gain="
<< Gain[1].toString()
<< ", RelativeGain=" << RelativeGain.toString() << "%. ";
- ORE->emit(ORmissL);
+ EmitAndPrintRemark(ORE, ORmissL);
return false;
}
@@ -810,7 +859,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L,
ORmissL << "No select conversion in the loop due to small gradient gain. "
"GradientGain="
<< GradientGain.toString() << "%. ";
- ORE->emit(ORmissL);
+ EmitAndPrintRemark(ORE, ORmissL);
return false;
}
}
@@ -818,7 +867,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L,
else if (Gain[1] < Gain[0]) {
ORmissL
<< "No select conversion in the loop due to negative gradient gain. ";
- ORE->emit(ORmissL);
+ EmitAndPrintRemark(ORE, ORmissL);
return false;
}
@@ -834,6 +883,8 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L,
bool SelectOptimize::computeLoopCosts(
const Loop *L, const SelectGroups &SIGroups,
DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
+ LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop "
+ << L->getHeader()->getName() << "\n");
const auto &SIset = getSIset(SIGroups);
// Compute instruction and loop-critical-path costs across two iterations for
// both predicated and non-predicated version.
@@ -867,11 +918,11 @@ bool SelectOptimize::computeLoopCosts(
ORmissL << "Invalid instruction cost preventing analysis and "
"optimization of the inner-most loop containing this "
"instruction. ";
- ORE->emit(ORmissL);
+ EmitAndPrintRemark(ORE, ORmissL);
return false;
}
- IPredCost += Scaled64::get(ILatency.value());
- INonPredCost += Scaled64::get(ILatency.value());
+ IPredCost += Scaled64::get(*ILatency);
+ INonPredCost += Scaled64::get(*ILatency);
// For a select that can be converted to branch,
// compute its cost as a branch (non-predicated cost).
@@ -880,7 +931,7 @@ bool SelectOptimize::computeLoopCosts(
// PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb
// MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate
if (SIset.contains(&I)) {
- auto SI = dyn_cast<SelectInst>(&I);
+ auto SI = cast<SelectInst>(&I);
Scaled64 TrueOpCost = Scaled64::getZero(),
FalseOpCost = Scaled64::getZero();
@@ -901,12 +952,17 @@ bool SelectOptimize::computeLoopCosts(
INonPredCost = PredictedPathCost + MispredictCost;
}
+ LLVM_DEBUG(dbgs() << " " << ILatency << "/" << IPredCost << "/"
+ << INonPredCost << " for " << I << "\n");
InstCostMap[&I] = {IPredCost, INonPredCost};
MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
}
}
+ LLVM_DEBUG(dbgs() << "Iteration " << Iter + 1
+ << " MaxCost = " << MaxCost.PredCost << " "
+ << MaxCost.NonPredCost << "\n");
}
return true;
}
@@ -920,12 +976,12 @@ SelectOptimize::getSIset(const SelectGroups &SIGroups) {
return SIset;
}
-Optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
+std::optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
InstructionCost ICost =
TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
if (auto OC = ICost.getValue())
- return Optional<uint64_t>(*OC);
- return Optional<uint64_t>(None);
+ return std::optional<uint64_t>(*OC);
+ return std::nullopt;
}
ScaledNumber<uint64_t>
@@ -959,7 +1015,7 @@ SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
const SelectInst *SI) {
Scaled64 PredPathCost;
uint64_t TrueWeight, FalseWeight;
- if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
+ if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
uint64_t SumWeight = TrueWeight + FalseWeight;
if (SumWeight != 0) {
PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a7f9382478d4..0a3ebd73d272 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20,8 +20,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IntervalMap.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
@@ -69,9 +67,11 @@
#include <cstdint>
#include <functional>
#include <iterator>
+#include <optional>
#include <string>
#include <tuple>
#include <utility>
+#include <variant>
using namespace llvm;
@@ -135,6 +135,11 @@ static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
cl::desc("DAG combiner enable load/<replace bytes>/store with "
"a narrower store"));
+static cl::opt<bool> EnableVectorFCopySignExtendRound(
+ "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
+ cl::desc(
+ "Enable merging extends and rounds into FCOPYSIGN on vector types"));
+
namespace {
class DAGCombiner {
@@ -246,8 +251,8 @@ namespace {
for (MVT VT : MVT::all_valuetypes())
if (EVT(VT).isSimple() && VT != MVT::Other &&
TLI.isTypeLegal(EVT(VT)) &&
- VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
- MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
+ VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
+ MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
}
void ConsiderForPruning(SDNode *N) {
@@ -382,6 +387,10 @@ namespace {
SDValue PromoteExtend(SDValue Op);
bool PromoteLoad(SDValue Op);
+ SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
+ SDValue RHS, SDValue True, SDValue False,
+ ISD::CondCode CC);
+
/// Call the node-specific routine that knows how to fold each
/// particular type of node. If that doesn't do anything, try the
/// target-specific DAG combines.
@@ -434,6 +443,7 @@ namespace {
SDValue visitOR(SDNode *N);
SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitXOR(SDNode *N);
+ SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
SDValue visitSHL(SDNode *N);
SDValue visitSRA(SDNode *N);
@@ -494,6 +504,8 @@ namespace {
SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
+ bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
+
SDValue visitSTORE(SDNode *N);
SDValue visitLIFETIME_END(SDNode *N);
SDValue visitINSERT_VECTOR_ELT(SDNode *N);
@@ -508,6 +520,8 @@ namespace {
SDValue visitMSTORE(SDNode *N);
SDValue visitMGATHER(SDNode *N);
SDValue visitMSCATTER(SDNode *N);
+ SDValue visitVPGATHER(SDNode *N);
+ SDValue visitVPSCATTER(SDNode *N);
SDValue visitFP_TO_FP16(SDNode *N);
SDValue visitFP16_TO_FP(SDNode *N);
SDValue visitFP_TO_BF16(SDNode *N);
@@ -551,6 +565,7 @@ namespace {
SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
const SDLoc &DL);
SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
+ SDValue foldABSToABD(SDNode *N);
SDValue unfoldMaskedMerge(SDNode *N);
SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
@@ -567,6 +582,7 @@ namespace {
SDValue CombineExtLoad(SDNode *N);
SDValue CombineZExtLogicopShiftLoad(SDNode *N);
SDValue combineRepeatedFPDivisors(SDNode *N);
+ SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
@@ -602,6 +618,7 @@ namespace {
SDValue splitMergedValStore(StoreSDNode *ST);
SDValue TransformFPLoadStorePair(SDNode *N);
SDValue convertBuildVecZextToZext(SDNode *N);
+ SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
SDValue reduceBuildVecTruncToBitCast(SDNode *N);
SDValue reduceBuildVecToShuffle(SDNode *N);
@@ -1204,19 +1221,14 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
- // Replace all uses. If any nodes become isomorphic to other nodes and
- // are deleted, make sure to remove them from our worklist.
- WorklistRemover DeadNodes(*this);
+ // Replace all uses.
DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
// Push the new node and any (possibly new) users onto the worklist.
AddToWorklistWithUsers(TLO.New.getNode());
- // Finally, if the node is now dead, remove it from the graph. The node
- // may not be dead if the replacement process recursively simplified to
- // something else needing this node.
- if (TLO.Old->use_empty())
- deleteAndRecombine(TLO.Old.getNode());
+ // Finally, if the node is now dead, remove it from the graph.
+ recursivelyDeleteUnusedNodes(TLO.Old.getNode());
}
/// Check the specified integer node value to see if it can be simplified or if
@@ -1263,11 +1275,12 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
Trunc.dump(&DAG); dbgs() << '\n');
- WorklistRemover DeadNodes(*this);
+
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
- deleteAndRecombine(Load);
+
AddToWorklist(Trunc.getNode());
+ recursivelyDeleteUnusedNodes(Load);
}
SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
@@ -1522,13 +1535,15 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
Result.dump(&DAG); dbgs() << '\n');
- WorklistRemover DeadNodes(*this);
+
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
- deleteAndRecombine(N);
+
AddToWorklist(Result.getNode());
+ recursivelyDeleteUnusedNodes(N);
return true;
}
+
return false;
}
@@ -1746,7 +1761,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::AssertAlign: return visitAssertAlign(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
case ISD::SIGN_EXTEND_VECTOR_INREG:
- case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
case ISD::TRUNCATE: return visitTRUNCATE(N);
case ISD::BITCAST: return visitBITCAST(N);
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
@@ -1964,7 +1980,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
Changed = true;
break;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
default:
// Only add if it isn't already in the list.
@@ -2187,54 +2203,29 @@ static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG,
std::swap(N0, N1);
// TODO: Should this apply to scalar select too?
- if (!N1.hasOneUse() || N1.getOpcode() != ISD::VSELECT)
+ if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
return SDValue();
+ // We can't hoist div/rem because of immediate UB (not speculatable).
unsigned Opcode = N->getOpcode();
+ if (!DAG.isSafeToSpeculativelyExecute(Opcode))
+ return SDValue();
+
EVT VT = N->getValueType(0);
SDValue Cond = N1.getOperand(0);
SDValue TVal = N1.getOperand(1);
SDValue FVal = N1.getOperand(2);
- // TODO: The cases should match with IR's ConstantExpr::getBinOpIdentity().
- // TODO: Target-specific opcodes could be added. Ex: "isCommutativeBinOp()".
- // TODO: With fast-math (NSZ), allow the opposite-sign form of zero?
- auto isIdentityConstantForOpcode = [](unsigned Opcode, SDValue V) {
- if (ConstantFPSDNode *C = isConstOrConstSplatFP(V)) {
- switch (Opcode) {
- case ISD::FADD: // X + -0.0 --> X
- return C->isZero() && C->isNegative();
- case ISD::FSUB: // X - 0.0 --> X
- return C->isZero() && !C->isNegative();
- case ISD::FMUL: // X * 1.0 --> X
- case ISD::FDIV: // X / 1.0 --> X
- return C->isExactlyValue(1.0);
- }
- }
- if (ConstantSDNode *C = isConstOrConstSplat(V)) {
- switch (Opcode) {
- case ISD::ADD: // X + 0 --> X
- case ISD::SUB: // X - 0 --> X
- case ISD::SHL: // X << 0 --> X
- case ISD::SRA: // X s>> 0 --> X
- case ISD::SRL: // X u>> 0 --> X
- return C->isZero();
- case ISD::MUL: // X * 1 --> X
- return C->isOne();
- }
- }
- return false;
- };
-
// This transform increases uses of N0, so freeze it to be safe.
// binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
- if (isIdentityConstantForOpcode(Opcode, TVal)) {
+ unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
+ if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
SDValue F0 = DAG.getFreeze(N0);
SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
}
// binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
- if (isIdentityConstantForOpcode(Opcode, FVal)) {
+ if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
SDValue F0 = DAG.getFreeze(N0);
SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
@@ -2289,8 +2280,8 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
// or X, (select Cond, -1, 0) --> select Cond, -1, X
bool CanFoldNonConst =
(BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
- (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
- (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
+ ((isNullOrNullSplat(CT) && isAllOnesOrAllOnesSplat(CF)) ||
+ (isNullOrNullSplat(CF) && isAllOnesOrAllOnesSplat(CT)));
SDValue CBO = BO->getOperand(SelOpNo ^ 1);
if (!CanFoldNonConst &&
@@ -2298,23 +2289,41 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
!DAG.isConstantFPBuildVectorOrConstantFP(CBO))
return SDValue();
- // We have a select-of-constants followed by a binary operator with a
- // constant. Eliminate the binop by pulling the constant math into the select.
- // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
SDLoc DL(Sel);
- SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
- : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
- if (!CanFoldNonConst && !NewCT.isUndef() &&
- !isConstantOrConstantVector(NewCT, true) &&
- !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
- return SDValue();
+ SDValue NewCT, NewCF;
- SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
- : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
- if (!CanFoldNonConst && !NewCF.isUndef() &&
- !isConstantOrConstantVector(NewCF, true) &&
- !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
- return SDValue();
+ if (CanFoldNonConst) {
+ // If CBO is an opaque constant, we can't rely on getNode to constant fold.
+ if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
+ (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
+ NewCT = CT;
+ else
+ NewCT = CBO;
+
+ if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
+ (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
+ NewCF = CF;
+ else
+ NewCF = CBO;
+ } else {
+ // We have a select-of-constants followed by a binary operator with a
+ // constant. Eliminate the binop by pulling the constant math into the
+ // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
+ // CBO, CF + CBO
+ NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
+ : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
+ if (!CanFoldNonConst && !NewCT.isUndef() &&
+ !isConstantOrConstantVector(NewCT, true) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
+ return SDValue();
+
+ NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
+ : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
+ if (!CanFoldNonConst && !NewCF.isUndef() &&
+ !isConstantOrConstantVector(NewCF, true) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
+ return SDValue();
+ }
SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
SelectOp->setFlags(BO->getFlags());
@@ -2668,9 +2677,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
}
// fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
- if ((N0.getOpcode() == ISD::ADD) &&
- (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
- (N1.getOpcode() == ISD::VSCALE)) {
+ if (N0.getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOpcode() == ISD::VSCALE &&
+ N1.getOpcode() == ISD::VSCALE) {
const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
const APInt &VS1 = N1->getConstantOperandAPInt(0);
SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
@@ -2687,9 +2696,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
}
// Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
- if ((N0.getOpcode() == ISD::ADD) &&
- (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
- (N1.getOpcode() == ISD::STEP_VECTOR)) {
+ if (N0.getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR &&
+ N1.getOpcode() == ISD::STEP_VECTOR) {
const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
const APInt &SV1 = N1->getConstantOperandAPInt(0);
APInt NewStep = SV0 + SV1;
@@ -2789,16 +2798,26 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
/// the opcode and bypass the mask operation.
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
SelectionDAG &DAG, const SDLoc &DL) {
+ if (N1.getOpcode() == ISD::ZERO_EXTEND)
+ N1 = N1.getOperand(0);
+
if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
return SDValue();
EVT VT = N0.getValueType();
- if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
+ N10 = N10.getOperand(0);
+
+ if (N10.getValueType() != VT)
+ return SDValue();
+
+ if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
return SDValue();
// add N0, (and (AssertSext X, i1), 1) --> sub N0, X
// sub N0, (and (AssertSext X, i1), 1) --> add N0, X
- return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
+ return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
}
/// Helper for doing combines based on N0 and N1 being added to each other.
@@ -3079,6 +3098,17 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
return Combined;
+ // We want to avoid useless duplication.
+ // TODO: This is done automatically for binary operations. As ADDCARRY is
+ // not a binary operation, this is not really possible to leverage this
+ // existing mechanism for it. However, if more operations require the same
+ // deduplication logic, then it may be worth generalize.
+ SDValue Ops[] = {N1, N0, CarryIn};
+ SDNode *CSENode =
+ DAG.getNodeIfExists(ISD::ADDCARRY, N->getVTList(), Ops, N->getFlags());
+ if (CSENode)
+ return SDValue(CSENode, 0);
+
return SDValue();
}
@@ -3110,7 +3140,7 @@ SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
* (addcarry X, 0, (addcarry A, B, Z):Carry)
*
* The end result is usually an increase in operation required, but because the
- * carry is now linearized, other tranforms can kick in and optimize the DAG.
+ * carry is now linearized, other transforms can kick in and optimize the DAG.
*
* Patterns typically look something like
* (uaddo A, B)
@@ -3492,11 +3522,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (VT.isVector()) {
SDValue N1S = DAG.getSplatValue(N1, true);
if (N1S && N1S.getOpcode() == ISD::SUB &&
- isNullConstant(N1S.getOperand(0))) {
- if (VT.isScalableVector())
- return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
- return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
- }
+ isNullConstant(N1S.getOperand(0)))
+ return DAG.getSplat(VT, DL, N1S.getOperand(1));
}
}
@@ -3625,7 +3652,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return V;
// (x - y) - 1 -> add (xor y, -1), x
- if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isOneOrOneSplat(N1)) {
SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
@@ -3642,26 +3669,26 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// Hoist one-use addition by non-opaque constant:
// (x + C) - y -> (x - y) + C
- if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
+ if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
}
// y - (x + C) -> (y - x) - C
- if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
+ if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
}
// (x - C) - y -> (x - y) - C
// This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
- if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
}
// (C - x) - y -> C - (x + y)
- if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
@@ -3716,7 +3743,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
// canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
- if (N1.getOpcode() == ISD::VSCALE) {
+ if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
const APInt &IntVal = N1.getConstantOperandAPInt(0);
return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
}
@@ -3749,6 +3776,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
}
+ // (sub (subcarry X, 0, Carry), Y) -> (subcarry X, Y, Carry)
+ if (N0.getOpcode() == ISD::SUBCARRY && isNullConstant(N0.getOperand(1)) &&
+ N0.getResNo() == 0 && N0.hasOneUse())
+ return DAG.getNode(ISD::SUBCARRY, DL, N0->getVTList(),
+ N0.getOperand(0), N1, N0.getOperand(2));
+
if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
// (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
if (SDValue Carry = getAsCarry(TLI, N0)) {
@@ -3772,6 +3805,24 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
}
+ // max(a,b) - min(a,b) --> abd(a,b)
+ auto MatchSubMaxMin = [&](unsigned Max, unsigned Min, unsigned Abd) {
+ if (N0.getOpcode() != Max || N1.getOpcode() != Min)
+ return SDValue();
+ if ((N0.getOperand(0) != N1.getOperand(0) ||
+ N0.getOperand(1) != N1.getOperand(1)) &&
+ (N0.getOperand(0) != N1.getOperand(1) ||
+ N0.getOperand(1) != N1.getOperand(0)))
+ return SDValue();
+ if (!TLI.isOperationLegalOrCustom(Abd, VT))
+ return SDValue();
+ return DAG.getNode(Abd, DL, VT, N0.getOperand(0), N0.getOperand(1));
+ };
+ if (SDValue R = MatchSubMaxMin(ISD::SMAX, ISD::SMIN, ISD::ABDS))
+ return R;
+ if (SDValue R = MatchSubMaxMin(ISD::UMAX, ISD::UMIN, ISD::ABDU))
+ return R;
+
return SDValue();
}
@@ -3996,8 +4047,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold (mul x, -1) -> 0-x
if (N1IsConst && ConstValue1.isAllOnes())
- return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(0, DL, VT), N0);
+ return DAG.getNegative(N0, DL, VT);
// fold (mul x, (1 << c)) -> x << c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
@@ -4021,6 +4071,21 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
getShiftAmountTy(N0.getValueType()))));
}
+ // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
+ // hi result is in use in case we hit this mid-legalization.
+ for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
+ SDVTList LoHiVT = DAG.getVTList(VT, VT);
+ // TODO: Can we match commutable operands with getNodeIfExists?
+ if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
+ if (LoHi->hasAnyUseOfValue(1))
+ return SDValue(LoHi, 0);
+ if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
+ if (LoHi->hasAnyUseOfValue(1))
+ return SDValue(LoHi, 0);
+ }
+ }
+
// Try to transform:
// (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
// mul x, (2^N + 1) --> add (shl x, N), x
@@ -4064,7 +4129,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
DAG.getConstant(TZeros, DL, VT)))
: DAG.getNode(MathOp, DL, VT, Shl, N0);
if (ConstValue1.isNegative())
- R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
+ R = DAG.getNegative(R, DL, VT);
return R;
}
}
@@ -4108,21 +4173,21 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
// Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
- if (N0.getOpcode() == ISD::VSCALE)
- if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
- const APInt &C0 = N0.getConstantOperandAPInt(0);
- const APInt &C1 = NC1->getAPIntValue();
- return DAG.getVScale(DL, VT, C0 * C1);
- }
+ ConstantSDNode *NC1 = isConstOrConstSplat(N1);
+ if (N0.getOpcode() == ISD::VSCALE && NC1) {
+ const APInt &C0 = N0.getConstantOperandAPInt(0);
+ const APInt &C1 = NC1->getAPIntValue();
+ return DAG.getVScale(DL, VT, C0 * C1);
+ }
// Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
APInt MulVal;
- if (N0.getOpcode() == ISD::STEP_VECTOR)
- if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
- const APInt &C0 = N0.getConstantOperandAPInt(0);
- APInt NewStep = C0 * MulVal;
- return DAG.getStepVector(DL, VT, NewStep);
- }
+ if (N0.getOpcode() == ISD::STEP_VECTOR &&
+ ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
+ const APInt &C0 = N0.getConstantOperandAPInt(0);
+ APInt NewStep = C0 * MulVal;
+ return DAG.getStepVector(DL, VT, NewStep);
+ }
// Fold ((mul x, 0/undef) -> 0,
// (mul x, 1) -> x) -> x)
@@ -4318,7 +4383,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// fold (sdiv X, -1) -> 0-X
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N1C && N1C->isAllOnes())
- return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
+ return DAG.getNegative(N0, DL, VT);
// fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
if (N1C && N1C->getAPIntValue().isMinSignedValue())
@@ -4465,10 +4530,11 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
// fold (udiv X, -1) -> select(X == -1, 1, 0)
ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (N1C && N1C->isAllOnes())
+ if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
DAG.getConstant(1, DL, VT),
DAG.getConstant(0, DL, VT));
+ }
if (SDValue V = simplifyDivRem(N, DAG))
return V;
@@ -4571,7 +4637,8 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
// fold (urem X, -1) -> select(FX == -1, 0, FX)
// Freeze the numerator to avoid a miscompile with an undefined value.
- if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false)) {
+ if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
+ CCVT.isVector() == VT.isVector()) {
SDValue F0 = DAG.getFreeze(N0);
SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
@@ -5328,6 +5395,21 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
return DAG.getNode(HandOpcode, DL, VT, Logic);
}
+ // For funnel shifts FSHL/FSHR:
+ // logic_op (OP x, x1, s), (OP y, y1, s) -->
+ // --> OP (logic_op x, y), (logic_op, x1, y1), s
+ if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
+ N0.getOperand(2) == N1.getOperand(2)) {
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+ SDValue X1 = N0.getOperand(1);
+ SDValue Y1 = N1.getOperand(1);
+ SDValue S = N0.getOperand(2);
+ SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
+ SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
+ return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
+ }
+
// Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
// Only perform this optimization up until type legalization, before
// LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
@@ -6139,6 +6221,43 @@ static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
}
+/// Given a tree of logic operations with shape like
+/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
+/// try to match and fold shift operations with the same shift amount.
+/// For example:
+/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
+/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
+static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand,
+ SDValue RightHand, SelectionDAG &DAG) {
+ unsigned LogicOpcode = N->getOpcode();
+ assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
+ LogicOpcode == ISD::XOR));
+ if (LeftHand.getOpcode() != LogicOpcode ||
+ RightHand.getOpcode() != LogicOpcode)
+ return SDValue();
+ if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
+ return SDValue();
+
+ // Try to match one of following patterns:
+ // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
+ // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
+ // Note that foldLogicOfShifts will handle commuted versions of the left hand
+ // itself.
+ SDValue CombinedShifts, W;
+ SDValue R0 = RightHand.getOperand(0);
+ SDValue R1 = RightHand.getOperand(1);
+ if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
+ W = R1;
+ else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
+ W = R0;
+ else
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -6175,8 +6294,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
- if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && N0.hasOneUse() &&
- Splat && N1.hasOneUse()) {
+ if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat &&
+ N1.hasOneUse()) {
EVT LoadVT = MLoad->getMemoryVT();
EVT ExtVT = VT;
if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
@@ -6186,11 +6305,16 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
uint64_t ElementSize =
LoadVT.getVectorElementType().getScalarSizeInBits();
if (Splat->getAPIntValue().isMask(ElementSize)) {
- return DAG.getMaskedLoad(
+ auto NewLoad = DAG.getMaskedLoad(
ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
ISD::ZEXTLOAD, MLoad->isExpandingLoad());
+ bool LoadHasOtherUsers = !N0.hasOneUse();
+ CombineTo(N, NewLoad);
+ if (LoadHasOtherUsers)
+ CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
+ return SDValue(N, 0);
}
}
}
@@ -6213,14 +6337,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
return RAND;
- // Try to convert a constant mask AND into a shuffle clear mask.
- if (VT.isVector())
- if (SDValue Shuffle = XformToShuffleWithZero(N))
- return Shuffle;
-
- if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
- return Combined;
-
// fold (and (or x, C), D) -> D if (C & D) == D
auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
@@ -6228,23 +6344,32 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N0.getOpcode() == ISD::OR &&
ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
return N1;
+
// fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N0Op0 = N0.getOperand(0);
APInt Mask = ~N1C->getAPIntValue();
Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
- if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
- SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
- N0.getValueType(), N0Op0);
-
- // Replace uses of the AND with uses of the Zero extend node.
- CombineTo(N, Zext);
+ if (DAG.MaskedValueIsZero(N0Op0, Mask))
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N0.getValueType(), N0Op0);
+ }
- // We actually want to replace all uses of the any_extend with the
- // zero_extend, to avoid duplicating things. This will later cause this
- // AND to be folded.
- CombineTo(N0.getNode(), Zext);
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
+ if (ISD::isExtOpcode(N0.getOpcode())) {
+ unsigned ExtOpc = N0.getOpcode();
+ SDValue N0Op0 = N0.getOperand(0);
+ if (N0Op0.getOpcode() == ISD::AND &&
+ (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N0Op0.getOperand(1)) &&
+ N0->hasOneUse() && N0Op0->hasOneUse()) {
+ SDLoc DL(N);
+ SDValue NewMask =
+ DAG.getNode(ISD::AND, DL, VT, N1,
+ DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1)));
+ return DAG.getNode(ISD::AND, DL, VT,
+ DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
+ NewMask);
}
}
@@ -6353,6 +6478,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
+ // Try to convert a constant mask AND into a shuffle clear mask.
+ if (VT.isVector())
+ if (SDValue Shuffle = XformToShuffleWithZero(N))
+ return Shuffle;
+
+ if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
+ return Combined;
+
if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
ISD::isExtOpcode(N0.getOperand(0).getOpcode())) {
SDValue Ext = N0.getOperand(0);
@@ -6512,6 +6645,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue V = foldAndToUsubsat(N, DAG))
return V;
+ // Postpone until legalization completed to avoid interference with bswap
+ // folding
+ if (LegalOperations || VT.isVector())
+ if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
+ return R;
+
return SDValue();
}
@@ -6911,6 +7050,10 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
+ // fold or (and x, y), x --> x
+ if (N00 == N1 || N01 == N1)
+ return N1;
+
// fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
// TODO: Set AllowUndefs = true.
if (getBitwiseNotOperand(N01, N00,
@@ -6923,6 +7066,24 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1);
}
+ if (N0.getOpcode() == ISD::XOR) {
+ // fold or (xor x, y), x --> or x, y
+ // or (xor x, y), (x and/or y) --> or x, y
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ if (N00 == N1)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1);
+ if (N01 == N1)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1);
+
+ if (N1.getOpcode() == ISD::AND || N1.getOpcode() == ISD::OR) {
+ SDValue N10 = N1.getOperand(0);
+ SDValue N11 = N1.getOperand(1);
+ if ((N00 == N10 && N01 == N11) || (N00 == N11 && N01 == N10))
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N01);
+ }
+ }
+
if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
return R;
@@ -7112,10 +7273,17 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDValue Combined = visitADDLike(N))
return Combined;
+ // Postpone until legalization completed to avoid interference with bswap
+ // folding
+ if (LegalOperations || VT.isVector())
+ if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
+ return R;
+
return SDValue();
}
-static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
+static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op,
+ SDValue &Mask) {
if (Op.getOpcode() == ISD::AND &&
DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
Mask = Op.getOperand(1);
@@ -7125,7 +7293,7 @@ static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
}
/// Match "(X shl/srl V1) & V2" where V2 may not be present.
-static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
+static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
SDValue &Mask) {
Op = stripConstantMask(DAG, Op, Mask);
if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
@@ -7612,7 +7780,7 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
};
// TODO: Support pre-legalization funnel-shift by constant.
- bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
+ bool IsRotate = LHSShiftArg == RHSShiftArg;
if (!IsRotate && !(HasFSHL || HasFSHR)) {
if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
@@ -7740,87 +7908,135 @@ struct ByteProvider {
// ByteOffset is the offset of the byte in the value produced by the load.
LoadSDNode *Load = nullptr;
unsigned ByteOffset = 0;
+ unsigned VectorOffset = 0;
ByteProvider() = default;
- static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
- return ByteProvider(Load, ByteOffset);
+ static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset,
+ unsigned VectorOffset) {
+ return ByteProvider(Load, ByteOffset, VectorOffset);
}
- static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
+ static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0, 0); }
bool isConstantZero() const { return !Load; }
bool isMemory() const { return Load; }
bool operator==(const ByteProvider &Other) const {
- return Other.Load == Load && Other.ByteOffset == ByteOffset;
+ return Other.Load == Load && Other.ByteOffset == ByteOffset &&
+ Other.VectorOffset == VectorOffset;
}
private:
- ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
- : Load(Load), ByteOffset(ByteOffset) {}
+ ByteProvider(LoadSDNode *Load, unsigned ByteOffset, unsigned VectorOffset)
+ : Load(Load), ByteOffset(ByteOffset), VectorOffset(VectorOffset) {}
};
} // end anonymous namespace
/// Recursively traverses the expression calculating the origin of the requested
-/// byte of the given value. Returns None if the provider can't be calculated.
+/// byte of the given value. Returns std::nullopt if the provider can't be
+/// calculated.
+///
+/// For all the values except the root of the expression, we verify that the
+/// value has exactly one use and if not then return std::nullopt. This way if
+/// the origin of the byte is returned it's guaranteed that the values which
+/// contribute to the byte are not used outside of this expression.
+
+/// However, there is a special case when dealing with vector loads -- we allow
+/// more than one use if the load is a vector type. Since the values that
+/// contribute to the byte ultimately come from the ExtractVectorElements of the
+/// Load, we don't care if the Load has uses other than ExtractVectorElements,
+/// because those operations are independent from the pattern to be combined.
+/// For vector loads, we simply care that the ByteProviders are adjacent
+/// positions of the same vector, and their index matches the byte that is being
+/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
+/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
+/// byte position we are trying to provide for the LoadCombine. If these do
+/// not match, then we can not combine the vector loads. \p Index uses the
+/// byte position we are trying to provide for and is matched against the
+/// shl and load size. The \p Index algorithm ensures the requested byte is
+/// provided for by the pattern, and the pattern does not over provide bytes.
///
-/// For all the values except the root of the expression verifies that the value
-/// has exactly one use and if it's not true return None. This way if the origin
-/// of the byte is returned it's guaranteed that the values which contribute to
-/// the byte are not used outside of this expression.
///
-/// Because the parts of the expression are not allowed to have more than one
-/// use this function iterates over trees, not DAGs. So it never visits the same
-/// node more than once.
-static const Optional<ByteProvider>
+/// The supported LoadCombine pattern for vector loads is as follows
+/// or
+/// / \
+/// or shl
+/// / \ |
+/// or shl zext
+/// / \ | |
+/// shl zext zext EVE*
+/// | | | |
+/// zext EVE* EVE* LOAD
+/// | | |
+/// EVE* LOAD LOAD
+/// |
+/// LOAD
+///
+/// *ExtractVectorElement
+static const std::optional<ByteProvider>
calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
- bool Root = false) {
+ std::optional<uint64_t> VectorIndex,
+ unsigned StartingIndex = 0) {
+
// Typical i64 by i8 pattern requires recursion up to 8 calls depth
if (Depth == 10)
- return None;
+ return std::nullopt;
+
+ // Only allow multiple uses if the instruction is a vector load (in which
+ // case we will use the load for every ExtractVectorElement)
+ if (Depth && !Op.hasOneUse() &&
+ (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
+ return std::nullopt;
- if (!Root && !Op.hasOneUse())
- return None;
+ // Fail to combine if we have encountered anything but a LOAD after handling
+ // an ExtractVectorElement.
+ if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
+ return std::nullopt;
- assert(Op.getValueType().isScalarInteger() && "can't handle other types");
unsigned BitWidth = Op.getValueSizeInBits();
if (BitWidth % 8 != 0)
- return None;
+ return std::nullopt;
unsigned ByteWidth = BitWidth / 8;
assert(Index < ByteWidth && "invalid index requested");
(void) ByteWidth;
switch (Op.getOpcode()) {
case ISD::OR: {
- auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
+ auto LHS =
+ calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
if (!LHS)
- return None;
- auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
+ return std::nullopt;
+ auto RHS =
+ calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
if (!RHS)
- return None;
+ return std::nullopt;
if (LHS->isConstantZero())
return RHS;
if (RHS->isConstantZero())
return LHS;
- return None;
+ return std::nullopt;
}
case ISD::SHL: {
auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
if (!ShiftOp)
- return None;
+ return std::nullopt;
uint64_t BitShift = ShiftOp->getZExtValue();
+
if (BitShift % 8 != 0)
- return None;
+ return std::nullopt;
uint64_t ByteShift = BitShift / 8;
+ // If we are shifting by an amount greater than the index we are trying to
+ // provide, then do not provide anything. Otherwise, subtract the index by
+ // the amount we shifted by.
return Index < ByteShift
? ByteProvider::getConstantZero()
: calculateByteProvider(Op->getOperand(0), Index - ByteShift,
- Depth + 1);
+ Depth + 1, VectorIndex, Index);
}
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
@@ -7828,37 +8044,70 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
SDValue NarrowOp = Op->getOperand(0);
unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
if (NarrowBitWidth % 8 != 0)
- return None;
+ return std::nullopt;
uint64_t NarrowByteWidth = NarrowBitWidth / 8;
if (Index >= NarrowByteWidth)
return Op.getOpcode() == ISD::ZERO_EXTEND
- ? Optional<ByteProvider>(ByteProvider::getConstantZero())
- : None;
- return calculateByteProvider(NarrowOp, Index, Depth + 1);
+ ? std::optional<ByteProvider>(ByteProvider::getConstantZero())
+ : std::nullopt;
+ return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
+ StartingIndex);
}
case ISD::BSWAP:
return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
- Depth + 1);
+ Depth + 1, VectorIndex, StartingIndex);
+ case ISD::EXTRACT_VECTOR_ELT: {
+ auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+ if (!OffsetOp)
+ return std::nullopt;
+
+ VectorIndex = OffsetOp->getZExtValue();
+
+ SDValue NarrowOp = Op->getOperand(0);
+ unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
+ if (NarrowBitWidth % 8 != 0)
+ return std::nullopt;
+ uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+
+ // Check to see if the position of the element in the vector corresponds
+ // with the byte we are trying to provide for. In the case of a vector of
+ // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
+ // the element will provide a range of bytes. For example, if we have a
+ // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
+ // 3).
+ if (*VectorIndex * NarrowByteWidth > StartingIndex)
+ return std::nullopt;
+ if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
+ return std::nullopt;
+
+ return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
+ VectorIndex, StartingIndex);
+ }
case ISD::LOAD: {
auto L = cast<LoadSDNode>(Op.getNode());
if (!L->isSimple() || L->isIndexed())
- return None;
+ return std::nullopt;
unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
if (NarrowBitWidth % 8 != 0)
- return None;
+ return std::nullopt;
uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+ // If the width of the load does not reach byte we are trying to provide for
+ // and it is not a ZEXTLOAD, then the load does not provide for the byte in
+ // question
if (Index >= NarrowByteWidth)
return L->getExtensionType() == ISD::ZEXTLOAD
- ? Optional<ByteProvider>(ByteProvider::getConstantZero())
- : None;
- return ByteProvider::getMemory(L, Index);
+ ? std::optional<ByteProvider>(ByteProvider::getConstantZero())
+ : std::nullopt;
+
+ unsigned BPVectorIndex = VectorIndex.value_or(0U);
+ return ByteProvider::getMemory(L, Index, BPVectorIndex);
}
}
- return None;
+ return std::nullopt;
}
static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
@@ -7871,13 +8120,13 @@ static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
// Check if the bytes offsets we are looking at match with either big or
// little endian value loaded. Return true for big endian, false for little
-// endian, and None if match failed.
-static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
- int64_t FirstOffset) {
+// endian, and std::nullopt if match failed.
+static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
+ int64_t FirstOffset) {
// The endian can be decided only when it is 2 bytes at least.
unsigned Width = ByteOffsets.size();
if (Width < 2)
- return None;
+ return std::nullopt;
bool BigEndian = true, LittleEndian = true;
for (unsigned i = 0; i < Width; i++) {
@@ -7885,7 +8134,7 @@ static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
if (!BigEndian && !LittleEndian)
- return None;
+ return std::nullopt;
}
assert((BigEndian != LittleEndian) && "It should be either big endian or"
@@ -7948,9 +8197,13 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
// All stores must be the same size to ensure that we are writing all of the
// bytes in the wide value.
+ // This store should have exactly one use as a chain operand for another
+ // store in the merging set. If there are other chain uses, then the
+ // transform may not be safe because order of loads/stores outside of this
+ // set may not be preserved.
// TODO: We could allow multiple sizes by tracking each stored byte.
if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
- Store->isIndexed())
+ Store->isIndexed() || !Store->hasOneUse())
return SDValue();
Stores.push_back(Store);
Chain = Store->getChain();
@@ -7974,7 +8227,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
int64_t FirstOffset = INT64_MAX;
StoreSDNode *FirstStore = nullptr;
- Optional<BaseIndexOffset> Base;
+ std::optional<BaseIndexOffset> Base;
for (auto *Store : Stores) {
// All the stores store different parts of the CombinedValue. A truncate is
// required to get the partial value.
@@ -8042,7 +8295,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
// Check that a store of the wide type is both allowed and fast on the target
const DataLayout &Layout = DAG.getDataLayout();
- bool Fast = false;
+ unsigned Fast = 0;
bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
*FirstStore->getMemOperand(), &Fast);
if (!Allowed || !Fast)
@@ -8146,7 +8399,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
auto MemoryByteOffset = [&] (ByteProvider P) {
assert(P.isMemory() && "Must be a memory byte provider");
- unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
+ unsigned LoadBitWidth = P.Load->getMemoryVT().getScalarSizeInBits();
+
assert(LoadBitWidth % 8 == 0 &&
"can only analyze providers for individual bytes not bit");
unsigned LoadByteWidth = LoadBitWidth / 8;
@@ -8155,11 +8409,11 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
: littleEndianByteAt(LoadByteWidth, P.ByteOffset);
};
- Optional<BaseIndexOffset> Base;
+ std::optional<BaseIndexOffset> Base;
SDValue Chain;
SmallPtrSet<LoadSDNode *, 8> Loads;
- Optional<ByteProvider> FirstByteProvider;
+ std::optional<ByteProvider> FirstByteProvider;
int64_t FirstOffset = INT64_MAX;
// Check if all the bytes of the OR we are looking at are loaded from the same
@@ -8167,7 +8421,9 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
unsigned ZeroExtendedBytes = 0;
for (int i = ByteWidth - 1; i >= 0; --i) {
- auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
+ auto P =
+ calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
+ /*StartingIndex*/ i);
if (!P)
return SDValue();
@@ -8181,10 +8437,6 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
assert(P->isMemory() && "provenance should either be memory or zero");
LoadSDNode *L = P->Load;
- assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
- !L->isIndexed() &&
- "Must be enforced by calculateByteProvider");
- assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
// All loads must share the same chain
SDValue LChain = L->getChain();
@@ -8196,8 +8448,25 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// Loads must share the same base address
BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
int64_t ByteOffsetFromBase = 0;
+
+ // For vector loads, the expected load combine pattern will have an
+ // ExtractElement for each index in the vector. While each of these
+ // ExtractElements will be accessing the same base address as determined
+ // by the load instruction, the actual bytes they interact with will differ
+ // due to different ExtractElement indices. To accurately determine the
+ // byte position of an ExtractElement, we offset the base load ptr with
+ // the index multiplied by the byte size of each element in the vector.
+ if (L->getMemoryVT().isVector()) {
+ unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
+ if (LoadWidthInBit % 8 != 0)
+ return SDValue();
+ unsigned ByteOffsetFromVector = P->VectorOffset * LoadWidthInBit / 8;
+ Ptr.addToOffset(ByteOffsetFromVector);
+ }
+
if (!Base)
Base = Ptr;
+
else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
return SDValue();
@@ -8213,6 +8482,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
Loads.insert(L);
}
+
assert(!Loads.empty() && "All the bytes of the value must be loaded from "
"memory, so there must be at least one load which produces the value");
assert(Base && "Base address of the accessed memory location must be set");
@@ -8236,8 +8506,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// Check if the bytes of the OR we are looking at match with either big or
// little endian value load
- Optional<bool> IsBigEndian = isBigEndian(
- makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
+ std::optional<bool> IsBigEndian = isBigEndian(
+ ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
if (!IsBigEndian)
return SDValue();
@@ -8272,7 +8542,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return SDValue();
// Check that a load of the wide type is both allowed and fast on the target
- bool Fast = false;
+ unsigned Fast = 0;
bool Allowed =
TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
*FirstLoad->getMemOperand(), &Fast);
@@ -8445,6 +8715,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
return RXOR;
+ // fold (a^b) -> (a|b) iff a and b share no bits.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
+ DAG.haveNoCommonBitsSet(N0, N1))
+ return DAG.getNode(ISD::OR, DL, VT, N0, N1);
+
// look for 'add-like' folds:
// XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
@@ -8536,8 +8811,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// fold (not (add X, -1)) -> (neg X)
if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
- return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
- N0.getOperand(0));
+ return DAG.getNegative(N0.getOperand(0), DL, VT);
}
// fold (xor (and x, y), y) -> (and (not x), y)
@@ -8599,6 +8873,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return R;
if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
return R;
+ if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
+ return R;
// Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
if (SDValue MM = unfoldMaskedMerge(N))
@@ -8698,13 +8974,9 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
return SDValue();
- // TODO: This is limited to early combining because it may reveal regressions
- // otherwise. But since we just checked a target hook to see if this is
- // desirable, that should have filtered out cases where this interferes
- // with some other pattern matching.
- if (!LegalTypes)
- if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
- return R;
+ // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
+ if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
+ return R;
// We want to pull some binops through shifts, so that we have (and (shift))
// instead of (shift (and)), likewise for add, or, xor, etc. This sort of
@@ -8723,11 +8995,6 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
break;
}
- // We require the RHS of the binop to be a constant and not opaque as well.
- ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
- if (!BinOpCst)
- return SDValue();
-
// FIXME: disable this unless the input to the binop is a shift by a constant
// or is copy/select. Enable this in other cases when figure out it's exactly
// profitable.
@@ -8745,16 +9012,17 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
if (IsCopyOrSelect && N->hasOneUse())
return SDValue();
- // Fold the constants, shifting the binop RHS by the shift amount.
+ // Attempt to fold the constants, shifting the binop RHS by the shift amount.
SDLoc DL(N);
EVT VT = N->getValueType(0);
- SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
- N->getOperand(1));
- assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
+ if (SDValue NewRHS = DAG.FoldConstantArithmetic(
+ N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
+ SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
+ N->getOperand(1));
+ return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
+ }
- SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
- N->getOperand(1));
- return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
+ return SDValue();
}
SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
@@ -8832,7 +9100,7 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
unsigned NextOp = N0.getOpcode();
// fold (rot* (rot* x, c2), c1)
- // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize)) % bitsize)
+ // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
@@ -8848,6 +9116,8 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
if (Norm1 && Norm2)
if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
+ CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
+ {CombinedShift, BitsizeC});
SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
@@ -9112,23 +9382,22 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return NewSHL;
// Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
- if (N0.getOpcode() == ISD::VSCALE)
- if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
- const APInt &C0 = N0.getConstantOperandAPInt(0);
- const APInt &C1 = NC1->getAPIntValue();
- return DAG.getVScale(SDLoc(N), VT, C0 << C1);
- }
+ if (N0.getOpcode() == ISD::VSCALE && N1C) {
+ const APInt &C0 = N0.getConstantOperandAPInt(0);
+ const APInt &C1 = N1C->getAPIntValue();
+ return DAG.getVScale(SDLoc(N), VT, C0 << C1);
+ }
// Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
APInt ShlVal;
- if (N0.getOpcode() == ISD::STEP_VECTOR)
- if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
- const APInt &C0 = N0.getConstantOperandAPInt(0);
- if (ShlVal.ult(C0.getBitWidth())) {
- APInt NewStep = C0 << ShlVal;
- return DAG.getStepVector(SDLoc(N), VT, NewStep);
- }
+ if (N0.getOpcode() == ISD::STEP_VECTOR &&
+ ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
+ const APInt &C0 = N0.getConstantOperandAPInt(0);
+ if (ShlVal.ult(C0.getBitWidth())) {
+ APInt NewStep = C0 << ShlVal;
+ return DAG.getStepVector(SDLoc(N), VT, NewStep);
}
+ }
return SDValue();
}
@@ -9168,6 +9437,28 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
EVT NarrowVT = LeftOp.getOperand(0).getValueType();
unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
+ // return true if U may use the lower bits of its operands
+ auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
+ if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
+ return true;
+ }
+ ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
+ if (!UShiftAmtSrc) {
+ return true;
+ }
+ unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
+ return UShiftAmt < NarrowVTSize;
+ };
+
+ // If the lower part of the MUL is also used and MUL_LOHI is supported
+ // do not introduce the MULH in favor of MUL_LOHI
+ unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
+ if (!ShiftOperand.hasOneUse() &&
+ TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
+ llvm::any_of(ShiftOperand->uses(), UserOfLowerBits)) {
+ return SDValue();
+ }
+
SDValue MulhRightOp;
if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
unsigned ActiveBits = IsSignExt
@@ -9675,16 +9966,23 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// However when after the source operand of SRL is optimized into AND, the SRL
// itself may not be optimized further. Look for it and add the BRCOND into
// the worklist.
+ //
+ // The also tends to happen for binary operations when SimplifyDemandedBits
+ // is involved.
+ //
+ // FIXME: This is unecessary if we process the DAG in topological order,
+ // which we plan to do. This workaround can be removed once the DAG is
+ // processed in topological order.
if (N->hasOneUse()) {
SDNode *Use = *N->use_begin();
- if (Use->getOpcode() == ISD::BRCOND)
- AddToWorklist(Use);
- else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
- // Also look pass the truncate.
+
+ // Look pass the truncate.
+ if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse())
Use = *Use->use_begin();
- if (Use->getOpcode() == ISD::BRCOND)
- AddToWorklist(Use);
- }
+
+ if (Use->getOpcode() == ISD::BRCOND || Use->getOpcode() == ISD::AND ||
+ Use->getOpcode() == ISD::OR || Use->getOpcode() == ISD::XOR)
+ AddToWorklist(Use);
}
// Try to transform this shift into a multiply-high if
@@ -9760,7 +10058,7 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
uint64_t PtrOff =
IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
- bool Fast = false;
+ unsigned Fast = 0;
if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
RHS->getAddressSpace(), NewAlign,
RHS->getMemOperand()->getFlags(), &Fast) &&
@@ -9843,8 +10141,8 @@ SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
// Given a ABS node, detect the following pattern:
// (ABS (SUB (EXTEND a), (EXTEND b))).
// Generates UABD/SABD instruction.
-static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+SDValue DAGCombiner::foldABSToABD(SDNode *N) {
+ EVT VT = N->getValueType(0);
SDValue AbsOp1 = N->getOperand(0);
SDValue Op0, Op1;
@@ -9857,10 +10155,14 @@ static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
unsigned Opc0 = Op0.getOpcode();
// Check if the operands of the sub are (zero|sign)-extended.
if (Opc0 != Op1.getOpcode() ||
- (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
+ (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) {
+ // fold (abs (sub nsw x, y)) -> abds(x, y)
+ if (AbsOp1->getFlags().hasNoSignedWrap() &&
+ TLI.isOperationLegalOrCustom(ISD::ABDS, VT))
+ return DAG.getNode(ISD::ABDS, SDLoc(N), VT, Op0, Op1);
return SDValue();
+ }
- EVT VT = N->getValueType(0);
EVT VT1 = Op0.getOperand(0).getValueType();
EVT VT2 = Op1.getOperand(0).getValueType();
unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
@@ -9897,9 +10199,24 @@ SDValue DAGCombiner::visitABS(SDNode *N) {
if (DAG.SignBitIsZero(N0))
return N0;
- if (SDValue ABD = combineABSToABD(N, DAG, TLI))
+ if (SDValue ABD = foldABSToABD(N))
return ABD;
+ // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
+ // iff zero_extend/truncate are free.
+ if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
+ if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
+ TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
+ hasOperation(ISD::ABS, ExtVT)) {
+ SDLoc DL(N);
+ return DAG.getNode(
+ ISD::ZERO_EXTEND, DL, VT,
+ DAG.getNode(ISD::ABS, DL, ExtVT,
+ DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
+ }
+ }
+
return SDValue();
}
@@ -10053,14 +10370,11 @@ static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
}
-/// Generate Min/Max node
-static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
- SDValue RHS, SDValue True, SDValue False,
- ISD::CondCode CC, const TargetLowering &TLI,
- SelectionDAG &DAG) {
- if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
- return SDValue();
-
+static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS,
+ SDValue RHS, SDValue True, SDValue False,
+ ISD::CondCode CC,
+ const TargetLowering &TLI,
+ SelectionDAG &DAG) {
EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
switch (CC) {
case ISD::SETOLT:
@@ -10101,6 +10415,46 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
}
}
+/// Generate Min/Max node
+SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
+ SDValue RHS, SDValue True,
+ SDValue False, ISD::CondCode CC) {
+ if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
+ return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
+
+ // If we can't directly match this, try to see if we can pull an fneg out of
+ // the select.
+ SDValue NegTrue = TLI.getCheaperOrNeutralNegatedExpression(
+ True, DAG, LegalOperations, ForCodeSize);
+ if (!NegTrue)
+ return SDValue();
+
+ HandleSDNode NegTrueHandle(NegTrue);
+
+ // Try to unfold an fneg from the select if we are comparing the negated
+ // constant.
+ //
+ // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
+ //
+ // TODO: Handle fabs
+ if (LHS == NegTrue) {
+ // If we can't directly match this, try to see if we can pull an fneg out of
+ // the select.
+ SDValue NegRHS = TLI.getCheaperOrNeutralNegatedExpression(
+ RHS, DAG, LegalOperations, ForCodeSize);
+ if (NegRHS) {
+ HandleSDNode NegRHSHandle(NegRHS);
+ if (NegRHS == False) {
+ SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
+ False, CC, TLI, DAG);
+ return DAG.getNode(ISD::FNEG, DL, VT, Combined);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
/// If a (v)select has a condition value that is a sign-bit test, try to smear
/// the condition operand sign-bit across the value width and use it as a mask.
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
@@ -10138,6 +10492,25 @@ static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT,
+ const TargetLowering &TLI) {
+ if (!TLI.convertSelectOfConstantsToMath(VT))
+ return false;
+
+ if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
+ return true;
+ if (!TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
+ return true;
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
+ return true;
+ if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
+ return true;
+
+ return false;
+}
+
SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
SDValue Cond = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -10154,96 +10527,106 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
if (!C1 || !C2)
return SDValue();
+ if (CondVT != MVT::i1 || LegalOperations) {
+ // fold (select Cond, 0, 1) -> (xor Cond, 1)
+ // We can't do this reliably if integer based booleans have different contents
+ // to floating point based booleans. This is because we can't tell whether we
+ // have an integer-based boolean or a floating-point-based boolean unless we
+ // can find the SETCC that produced it and inspect its operands. This is
+ // fairly easy if C is the SETCC node, but it can potentially be
+ // undiscoverable (or not reasonably discoverable). For example, it could be
+ // in another basic block or it could require searching a complicated
+ // expression.
+ if (CondVT.isInteger() &&
+ TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ C1->isZero() && C2->isOne()) {
+ SDValue NotCond =
+ DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
+ if (VT.bitsEq(CondVT))
+ return NotCond;
+ return DAG.getZExtOrTrunc(NotCond, DL, VT);
+ }
+
+ return SDValue();
+ }
+
// Only do this before legalization to avoid conflicting with target-specific
// transforms in the other direction (create a select from a zext/sext). There
// is also a target-independent combine here in DAGCombiner in the other
// direction for (select Cond, -1, 0) when the condition is not i1.
- if (CondVT == MVT::i1 && !LegalOperations) {
- if (C1->isZero() && C2->isOne()) {
- // select Cond, 0, 1 --> zext (!Cond)
- SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
- if (VT != MVT::i1)
- NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
- return NotCond;
- }
- if (C1->isZero() && C2->isAllOnes()) {
- // select Cond, 0, -1 --> sext (!Cond)
- SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
- if (VT != MVT::i1)
- NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
- return NotCond;
- }
- if (C1->isOne() && C2->isZero()) {
- // select Cond, 1, 0 --> zext (Cond)
- if (VT != MVT::i1)
- Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
- return Cond;
- }
- if (C1->isAllOnes() && C2->isZero()) {
- // select Cond, -1, 0 --> sext (Cond)
- if (VT != MVT::i1)
- Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
- return Cond;
- }
-
- // Use a target hook because some targets may prefer to transform in the
- // other direction.
- if (TLI.convertSelectOfConstantsToMath(VT)) {
- // For any constants that differ by 1, we can transform the select into an
- // extend and add.
- const APInt &C1Val = C1->getAPIntValue();
- const APInt &C2Val = C2->getAPIntValue();
- if (C1Val - 1 == C2Val) {
- // select Cond, C1, C1-1 --> add (zext Cond), C1-1
- if (VT != MVT::i1)
- Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
- return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
- }
- if (C1Val + 1 == C2Val) {
- // select Cond, C1, C1+1 --> add (sext Cond), C1+1
- if (VT != MVT::i1)
- Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
- return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
- }
+ assert(CondVT == MVT::i1 && !LegalOperations);
- // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
- if (C1Val.isPowerOf2() && C2Val.isZero()) {
- if (VT != MVT::i1)
- Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
- SDValue ShAmtC =
- DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
- return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
- }
+ // select Cond, 1, 0 --> zext (Cond)
+ if (C1->isOne() && C2->isZero())
+ return DAG.getZExtOrTrunc(Cond, DL, VT);
- if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
- return V;
- }
+ // select Cond, -1, 0 --> sext (Cond)
+ if (C1->isAllOnes() && C2->isZero())
+ return DAG.getSExtOrTrunc(Cond, DL, VT);
+
+ // select Cond, 0, 1 --> zext (!Cond)
+ if (C1->isZero() && C2->isOne()) {
+ SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
+ NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
+ return NotCond;
+ }
+
+ // select Cond, 0, -1 --> sext (!Cond)
+ if (C1->isZero() && C2->isAllOnes()) {
+ SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
+ NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
+ return NotCond;
+ }
+ // Use a target hook because some targets may prefer to transform in the
+ // other direction.
+ if (!shouldConvertSelectOfConstantsToMath(Cond, VT, TLI))
return SDValue();
+
+ // For any constants that differ by 1, we can transform the select into
+ // an extend and add.
+ const APInt &C1Val = C1->getAPIntValue();
+ const APInt &C2Val = C2->getAPIntValue();
+
+ // select Cond, C1, C1-1 --> add (zext Cond), C1-1
+ if (C1Val - 1 == C2Val) {
+ Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
+ return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
}
- // fold (select Cond, 0, 1) -> (xor Cond, 1)
- // We can't do this reliably if integer based booleans have different contents
- // to floating point based booleans. This is because we can't tell whether we
- // have an integer-based boolean or a floating-point-based boolean unless we
- // can find the SETCC that produced it and inspect its operands. This is
- // fairly easy if C is the SETCC node, but it can potentially be
- // undiscoverable (or not reasonably discoverable). For example, it could be
- // in another basic block or it could require searching a complicated
- // expression.
- if (CondVT.isInteger() &&
- TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
- TargetLowering::ZeroOrOneBooleanContent &&
- TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
- TargetLowering::ZeroOrOneBooleanContent &&
- C1->isZero() && C2->isOne()) {
- SDValue NotCond =
- DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
- if (VT.bitsEq(CondVT))
- return NotCond;
- return DAG.getZExtOrTrunc(NotCond, DL, VT);
+ // select Cond, C1, C1+1 --> add (sext Cond), C1+1
+ if (C1Val + 1 == C2Val) {
+ Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
+ return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
+ }
+
+ // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
+ if (C1Val.isPowerOf2() && C2Val.isZero()) {
+ Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
+ SDValue ShAmtC =
+ DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
+ return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
}
+ // select Cond, -1, C --> or (sext Cond), C
+ if (C1->isAllOnes()) {
+ Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
+ return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
+ }
+
+ // select Cond, C, -1 --> or (sext (not Cond)), C
+ if (C2->isAllOnes()) {
+ SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
+ NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
+ return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
+ }
+
+ if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
+ return V;
+
return SDValue();
}
@@ -10351,10 +10734,17 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (SDValue V = DAG.simplifySelect(N0, N1, N2))
return V;
- if (SDValue V = foldSelectOfConstants(N))
+ if (SDValue V = foldBoolSelectToLogic(N, DAG))
return V;
- if (SDValue V = foldBoolSelectToLogic(N, DAG))
+ // select (not Cond), N1, N2 -> select Cond, N2, N1
+ if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
+ SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
+ SelectOp->setFlags(Flags);
+ return SelectOp;
+ }
+
+ if (SDValue V = foldSelectOfConstants(N))
return V;
// If we can fold this based on the true/false value, do so.
@@ -10439,13 +10829,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
}
}
- // select (not Cond), N1, N2 -> select Cond, N2, N1
- if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
- SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
- SelectOp->setFlags(Flags);
- return SelectOp;
- }
-
// Fold selects based on a setcc into other things, such as min/max/abs.
if (N0.getOpcode() == ISD::SETCC) {
SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
@@ -10456,8 +10839,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
//
// This is OK if we don't care what happens if either operand is a NaN.
if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
- if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
- CC, TLI, DAG))
+ if (SDValue FMinMax =
+ combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
return FMinMax;
// Use 'unsigned add with overflow' to optimize an unsigned saturating add.
@@ -10568,23 +10951,37 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
}
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
- SelectionDAG &DAG) {
- if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
+ SelectionDAG &DAG, const SDLoc &DL) {
+ if (Index.getOpcode() != ISD::ADD)
return false;
// Only perform the transformation when existing operands can be reused.
if (IndexIsScaled)
return false;
- // For now we check only the LHS of the add.
- SDValue LHS = Index.getOperand(0);
- SDValue SplatVal = DAG.getSplatValue(LHS);
- if (!SplatVal || SplatVal.getValueType() != BasePtr.getValueType())
+ if (!isNullConstant(BasePtr) && !Index.hasOneUse())
return false;
- BasePtr = SplatVal;
- Index = Index.getOperand(1);
- return true;
+ EVT VT = BasePtr.getValueType();
+ if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
+ SplatVal && SplatVal.getValueType() == VT) {
+ if (isNullConstant(BasePtr))
+ BasePtr = SplatVal;
+ else
+ BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
+ Index = Index.getOperand(1);
+ return true;
+ }
+ if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
+ SplatVal && SplatVal.getValueType() == VT) {
+ if (isNullConstant(BasePtr))
+ BasePtr = SplatVal;
+ else
+ BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
+ Index = Index.getOperand(0);
+ return true;
+ }
+ return false;
}
// Fold sext/zext of index into index type.
@@ -10619,6 +11016,37 @@ bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
return false;
}
+SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
+ VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
+ SDValue Mask = MSC->getMask();
+ SDValue Chain = MSC->getChain();
+ SDValue Index = MSC->getIndex();
+ SDValue Scale = MSC->getScale();
+ SDValue StoreVal = MSC->getValue();
+ SDValue BasePtr = MSC->getBasePtr();
+ SDValue VL = MSC->getVectorLength();
+ ISD::MemIndexType IndexType = MSC->getIndexType();
+ SDLoc DL(N);
+
+ // Zap scatters with a zero mask.
+ if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
+ return Chain;
+
+ if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
+ SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
+ return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
+ DL, Ops, MSC->getMemOperand(), IndexType);
+ }
+
+ if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
+ SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
+ return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
+ DL, Ops, MSC->getMemOperand(), IndexType);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
SDValue Mask = MSC->getMask();
@@ -10634,7 +11062,7 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
return Chain;
- if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG)) {
+ if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
DL, Ops, MSC->getMemOperand(), IndexType,
@@ -10700,8 +11128,9 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
// If this is a TRUNC followed by a masked store, fold this into a masked
// truncating store. We can do this even if this is already a masked
// truncstore.
+ // TODO: Try combine to masked compress store if possiable.
if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
- MST->isUnindexed() &&
+ MST->isUnindexed() && !MST->isCompressingStore() &&
TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
MST->getMemoryVT(), LegalOperations)) {
auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
@@ -10715,6 +11144,34 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
+ VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
+ SDValue Mask = MGT->getMask();
+ SDValue Chain = MGT->getChain();
+ SDValue Index = MGT->getIndex();
+ SDValue Scale = MGT->getScale();
+ SDValue BasePtr = MGT->getBasePtr();
+ SDValue VL = MGT->getVectorLength();
+ ISD::MemIndexType IndexType = MGT->getIndexType();
+ SDLoc DL(N);
+
+ if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
+ SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
+ return DAG.getGatherVP(
+ DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
+ Ops, MGT->getMemOperand(), IndexType);
+ }
+
+ if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
+ SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
+ return DAG.getGatherVP(
+ DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
+ Ops, MGT->getMemOperand(), IndexType);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitMGATHER(SDNode *N) {
MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
SDValue Mask = MGT->getMask();
@@ -10730,7 +11187,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
return CombineTo(N, PassThru, MGT->getChain());
- if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG)) {
+ if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
return DAG.getMaskedGather(
DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
@@ -10782,7 +11239,7 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
SDValue N2 = N->getOperand(2);
EVT VT = N->getValueType(0);
if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
- !TLI.convertSelectOfConstantsToMath(VT) ||
+ !shouldConvertSelectOfConstantsToMath(Cond, VT, TLI) ||
!ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
!ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
return SDValue();
@@ -10895,8 +11352,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
// NaN.
//
if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
- if (SDValue FMinMax =
- combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
+ if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
return FMinMax;
}
@@ -11037,8 +11493,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
/*AllowUndefs*/ true)) {
- OpRHS = DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(0, DL, VT), OpRHS);
+ OpRHS = DAG.getNegative(OpRHS, DL, VT);
return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
}
@@ -11109,6 +11564,11 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
if (N2 == N3)
return N2;
+ // select_cc bool, 0, x, y, seteq -> select bool, y, x
+ if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
+ isNullConstant(N1))
+ return DAG.getSelect(SDLoc(N), N2.getValueType(), N0, N3, N2);
+
// Determine if the condition we're dealing with is constant
if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
CC, SDLoc(N), false)) {
@@ -11323,9 +11783,11 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
SDLoc DL(N);
assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
- Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
- Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
- && "Expected EXTEND dag node in input!");
+ Opcode == ISD::ANY_EXTEND ||
+ Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
+ Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
+ "Expected EXTEND dag node in input!");
// fold (sext c1) -> c1
// fold (zext c1) -> c1
@@ -11373,15 +11835,13 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
SmallVector<SDValue, 8> Elts;
unsigned NumElts = VT.getVectorNumElements();
- // For zero-extensions, UNDEF elements still guarantee to have the upper
- // bits set to zero.
- bool IsZext =
- Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
-
for (unsigned i = 0; i != NumElts; ++i) {
SDValue Op = N0.getOperand(i);
if (Op.isUndef()) {
- Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
+ if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
+ Elts.push_back(DAG.getUNDEF(SVT));
+ else
+ Elts.push_back(DAG.getConstant(0, DL, SVT));
continue;
}
@@ -11952,7 +12412,7 @@ SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
return SCC;
- if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
+ if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
EVT SetCCVT = getSetCCResultType(N00VT);
// Don't do this transform for i1 because there's a select transform
// that would reverse it.
@@ -11973,6 +12433,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
+ return FoldedVOp;
+
// sext(undef) = 0 because the top bit will all be the same.
if (N0.isUndef())
return DAG.getConstant(0, DL, VT);
@@ -11985,6 +12449,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
+ // fold (sext (sext_inreg x)) -> (sext (trunc x))
+ if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ SDValue N00 = N0.getOperand(0);
+ EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
+ if (N00.getOpcode() == ISD::TRUNCATE && (!LegalOperations || TLI.isTypeLegal(ExtVT))) {
+ SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00.getOperand(0));
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
+ }
+ }
+
if (N0.getOpcode() == ISD::TRUNCATE) {
// fold (sext (truncate (load x))) -> (sext (smaller load x))
// fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
@@ -12121,7 +12595,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
- return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
+ return DAG.getNegative(Zext, DL, VT);
}
// Eliminate this sign extend by doing a decrement in the destination type:
// sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
@@ -12218,10 +12692,41 @@ static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
}
+// If we have (zext (abs X)) where X is a type that will be promoted by type
+// legalization, convert to (abs (sext X)). But don't extend past a legal type.
+static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
+ assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
+
+ EVT VT = Extend->getValueType(0);
+ if (VT.isVector())
+ return SDValue();
+
+ SDValue Abs = Extend->getOperand(0);
+ if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
+ return SDValue();
+
+ EVT AbsVT = Abs.getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
+ TargetLowering::TypePromoteInteger)
+ return SDValue();
+
+ EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
+
+ SDValue SExt =
+ DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
+ SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
+ return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
+}
+
SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
+ return FoldedVOp;
+
// zext(undef) = 0
if (N0.isUndef())
return DAG.getConstant(0, SDLoc(N), VT);
@@ -12478,6 +12983,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (SDValue NewCtPop = widenCtPop(N, DAG))
return NewCtPop;
+ if (SDValue V = widenAbs(N, DAG))
+ return V;
+
if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
return Res;
@@ -12904,8 +13412,8 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
auto AdjustBigEndianShift = [&](unsigned ShAmt) {
unsigned LVTStoreBits =
- LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
- unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
+ LN0->getMemoryVT().getStoreSizeInBits().getFixedValue();
+ unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
return LVTStoreBits - EVTStoreBits - ShAmt;
};
@@ -13146,16 +13654,75 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
}
+ // Fold (iM_signext_inreg
+ // (extract_subvector (zext|anyext|sext iN_v to _) _)
+ // from iN)
+ // -> (extract_subvector (signext iN_v to iM))
+ if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
+ ISD::isExtOpcode(N0.getOperand(0).getOpcode())) {
+ SDValue InnerExt = N0.getOperand(0);
+ EVT InnerExtVT = InnerExt->getValueType(0);
+ SDValue Extendee = InnerExt->getOperand(0);
+
+ if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
+ SDValue SignExtExtendee =
+ DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), InnerExtVT, Extendee);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee,
+ N0.getOperand(1));
+ }
+ }
+
return SDValue();
}
+static SDValue
+foldExtendVectorInregToExtendOfSubvector(SDNode *N, const TargetLowering &TLI,
+ SelectionDAG &DAG,
+ bool LegalOperations) {
+ unsigned InregOpcode = N->getOpcode();
+ unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
+
+ SDValue Src = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
+ Src.getValueType().getVectorElementType(),
+ VT.getVectorElementCount());
+
+ assert((InregOpcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
+ InregOpcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ InregOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
+ "Expected EXTEND_VECTOR_INREG dag node in input!");
+
+ // Profitability check: our operand must be an one-use CONCAT_VECTORS.
+ // FIXME: one-use check may be overly restrictive
+ if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
+ return SDValue();
+
+ // Profitability check: we must be extending exactly one of it's operands.
+ // FIXME: this is probably overly restrictive.
+ Src = Src.getOperand(0);
+ if (Src.getValueType() != SrcVT)
+ return SDValue();
+
+ if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
+ return SDValue();
+
+ return DAG.getNode(Opcode, SDLoc(N), VT, Src);
+}
+
SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
- if (N0.isUndef())
- return DAG.getConstant(0, SDLoc(N), VT);
+ if (N0.isUndef()) {
+ // aext_vector_inreg(undef) = undef because the top bits are undefined.
+ // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
+ return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
+ ? DAG.getUNDEF(VT)
+ : DAG.getConstant(0, SDLoc(N), VT);
+ }
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
@@ -13163,6 +13730,10 @@ SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
if (SimplifyDemandedVectorElts(SDValue(N, 0)))
return SDValue(N, 0);
+ if (SDValue R = foldExtendVectorInregToExtendOfSubvector(N, TLI, DAG,
+ LegalOperations))
+ return R;
+
return SDValue();
}
@@ -13420,18 +13991,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
- // See if we can simplify the input to this truncate through knowledge that
- // only the low bits are being used.
- // For example "trunc (or (shl x, 8), y)" // -> trunc y
- // Currently we only perform this optimization on scalars because vectors
- // may have different active low bits.
- if (!VT.isVector()) {
- APInt Mask =
- APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
- if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
- }
-
// fold (truncate (extract_subvector(ext x))) ->
// (extract_subvector x)
// TODO: This can be generalized to cover cases where the truncate and extract
@@ -13536,7 +14095,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
LD1->getAddressSpace() != LD2->getAddressSpace())
return SDValue();
- bool LD1Fast = false;
+ unsigned LD1Fast = 0;
EVT LD1VT = LD1->getValueType(0);
unsigned LD1Bytes = LD1VT.getStoreSize();
if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
@@ -13866,15 +14425,72 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
return N0;
- // Fold freeze(bitcast(x)) -> bitcast(freeze(x)).
- // TODO: Replace with pushFreezeToPreventPoisonFromPropagating fold.
- if (N0.getOpcode() == ISD::BITCAST)
- return DAG.getBitcast(N->getValueType(0),
- DAG.getNode(ISD::FREEZE, SDLoc(N0),
- N0.getOperand(0).getValueType(),
- N0.getOperand(0)));
+ // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
+ // Try to push freeze through instructions that propagate but don't produce
+ // poison as far as possible. If an operand of freeze follows three
+ // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
+ // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
+ // the freeze through to the operands that are not guaranteed non-poison.
+ // NOTE: we will strip poison-generating flags, so ignore them here.
+ if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
+ /*ConsiderFlags*/ false) ||
+ N0->getNumValues() != 1 || !N0->hasOneUse())
+ return SDValue();
- return SDValue();
+ bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR;
+
+ SmallSetVector<SDValue, 8> MaybePoisonOperands;
+ for (SDValue Op : N0->ops()) {
+ if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
+ /*Depth*/ 1))
+ continue;
+ bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
+ bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op);
+ if (!HadMaybePoisonOperands)
+ continue;
+ if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
+ // Multiple maybe-poison ops when not allowed - bail out.
+ return SDValue();
+ }
+ }
+ // NOTE: the whole op may be not guaranteed to not be undef or poison because
+ // it could create undef or poison due to it's poison-generating flags.
+ // So not finding any maybe-poison operands is fine.
+
+ for (SDValue MaybePoisonOperand : MaybePoisonOperands) {
+ // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
+ if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
+ continue;
+ // First, freeze each offending operand.
+ SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
+ // Then, change all other uses of unfrozen operand to use frozen operand.
+ DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
+ if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
+ FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
+ // But, that also updated the use in the freeze we just created, thus
+ // creating a cycle in a DAG. Let's undo that by mutating the freeze.
+ DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
+ MaybePoisonOperand);
+ }
+ }
+
+ // The whole node may have been updated, so the value we were holding
+ // may no longer be valid. Re-fetch the operand we're `freeze`ing.
+ N0 = N->getOperand(0);
+
+ // Finally, recreate the node, it's operands were updated to use
+ // frozen operands, so we just need to use it's "original" operands.
+ SmallVector<SDValue> Ops(N0->op_begin(), N0->op_end());
+ // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
+ for (SDValue &Op : Ops) {
+ if (Op.getOpcode() == ISD::UNDEF)
+ Op = DAG.getFreeze(Op);
+ }
+ // NOTE: this strips poison generating flags.
+ SDValue R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
+ assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
+ "Can't create node that may be undef/poison!");
+ return R;
}
/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
@@ -14038,26 +14654,37 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
// fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
+ // This also works with nested fma instructions:
+ // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
+ // fma A, B, (fma C, D, fma (E, F, G))
+ // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
+ // fma A, B, (fma C, D, fma (E, F, G)).
// This requires reassociation because it changes the order of operations.
- SDValue FMA, E;
- if (CanReassociate && isFusedOp(N0) &&
- N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
- N0.getOperand(2).hasOneUse()) {
- FMA = N0;
- E = N1;
- } else if (CanReassociate && isFusedOp(N1) &&
- N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
- N1.getOperand(2).hasOneUse()) {
- FMA = N1;
- E = N0;
- }
- if (FMA && E) {
- SDValue A = FMA.getOperand(0);
- SDValue B = FMA.getOperand(1);
- SDValue C = FMA.getOperand(2).getOperand(0);
- SDValue D = FMA.getOperand(2).getOperand(1);
- SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
- return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
+ if (CanReassociate) {
+ SDValue FMA, E;
+ if (isFusedOp(N0) && N0.hasOneUse()) {
+ FMA = N0;
+ E = N1;
+ } else if (isFusedOp(N1) && N1.hasOneUse()) {
+ FMA = N1;
+ E = N0;
+ }
+
+ SDValue TmpFMA = FMA;
+ while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
+ SDValue FMul = TmpFMA->getOperand(2);
+ if (FMul.getOpcode() == ISD::FMUL && FMul.hasOneUse()) {
+ SDValue C = FMul.getOperand(0);
+ SDValue D = FMul.getOperand(1);
+ SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
+ DAG.ReplaceAllUsesOfValueWith(FMul, CDE);
+ // Replacing the inner FMul could cause the outer FMA to be simplified
+ // away.
+ return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue() : FMA;
+ }
+
+ TmpFMA = TmpFMA->getOperand(2);
+ }
}
// Look through FP_EXTEND nodes to do more combining.
@@ -14357,8 +14984,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
};
- auto isContractableAndReassociableFMUL = [isContractableFMUL,
- isReassociable](SDValue N) {
+ auto isContractableAndReassociableFMUL = [&isContractableFMUL,
+ &isReassociable](SDValue N) {
return isContractableFMUL(N) && isReassociable(N.getNode());
};
@@ -14593,8 +15220,8 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
- bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
+ SDNode *N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
+ SDNode *N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
@@ -14691,8 +15318,10 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// of rounding steps.
if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
if (N0.getOpcode() == ISD::FMUL) {
- bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
- bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
+ SDNode *CFP00 =
+ DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ SDNode *CFP01 =
+ DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
// (fadd (fmul x, c), x) -> (fmul x, c+1)
if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
@@ -14712,8 +15341,10 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
if (N1.getOpcode() == ISD::FMUL) {
- bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
- bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
+ SDNode *CFP10 =
+ DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ SDNode *CFP11 =
+ DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
// (fadd x, (fmul x, c)) -> (fmul x, c+1)
if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
@@ -14733,7 +15364,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
if (N0.getOpcode() == ISD::FADD) {
- bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ SDNode *CFP00 =
+ DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
// (fadd (fadd x, x), x) -> (fmul x, 3.0)
if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
(N0.getOperand(0) == N1)) {
@@ -14743,7 +15375,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
if (N1.getOpcode() == ISD::FADD) {
- bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ SDNode *CFP10 =
+ DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
N1.getOperand(0) == N0) {
@@ -14956,12 +15589,14 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
TargetLowering::NegatibleCost::Expensive;
SDValue NegN0 =
TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
- SDValue NegN1 =
- TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
- if (NegN0 && NegN1 &&
- (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
- CostN1 == TargetLowering::NegatibleCost::Cheaper))
- return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
+ if (NegN0) {
+ HandleSDNode NegN0Handle(NegN0);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
+ if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
+ CostN1 == TargetLowering::NegatibleCost::Cheaper))
+ return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
+ }
// fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
// fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
@@ -14990,7 +15625,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
case ISD::SETLT:
case ISD::SETLE:
std::swap(TrueOpnd, FalseOpnd);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETOGT:
case ISD::SETUGT:
case ISD::SETOGE:
@@ -15047,12 +15682,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
TargetLowering::NegatibleCost::Expensive;
SDValue NegN0 =
TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
- SDValue NegN1 =
- TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
- if (NegN0 && NegN1 &&
- (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
- CostN1 == TargetLowering::NegatibleCost::Cheaper))
- return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
+ if (NegN0) {
+ HandleSDNode NegN0Handle(NegN0);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
+ if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
+ CostN1 == TargetLowering::NegatibleCost::Cheaper))
+ return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
+ }
// FIXME: use fast math flags instead of Options.UnsafeFPMath
if (Options.UnsafeFPMath) {
@@ -15350,12 +15987,14 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
TargetLowering::NegatibleCost::Expensive;
SDValue NegN0 =
TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
- SDValue NegN1 =
- TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
- if (NegN0 && NegN1 &&
- (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
- CostN1 == TargetLowering::NegatibleCost::Cheaper))
- return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
+ if (NegN0) {
+ HandleSDNode NegN0Handle(NegN0);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
+ if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
+ CostN1 == TargetLowering::NegatibleCost::Cheaper))
+ return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
+ }
return SDValue();
}
@@ -15422,11 +16061,7 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
if (N1Op0VT == MVT::f128)
return false;
- // Avoid mismatched vector operand types, for better instruction selection.
- if (N1Op0VT.isVector())
- return false;
-
- return true;
+ return !N1Op0VT.isVector() || EnableVectorFCopySignExtendRound;
}
return false;
}
@@ -15748,12 +16383,12 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// fold (fp_round c1fp) -> c1fp
- if (N0CFP)
- return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
+ if (SDValue C =
+ DAG.FoldConstantArithmetic(ISD::FP_ROUND, SDLoc(N), VT, {N0, N1}))
+ return C;
// fold (fp_round (fp_extend x)) -> x
if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
@@ -15781,8 +16416,9 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
// Also, this is a value preserving truncation iff both fp_round's are.
if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
SDLoc DL(N);
- return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
- DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
+ return DAG.getNode(
+ ISD::FP_ROUND, DL, VT, N0.getOperand(0),
+ DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
}
}
@@ -15805,6 +16441,10 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
+ return FoldedVOp;
+
// If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
if (N->hasOneUse() &&
N->use_begin()->getOpcode() == ISD::FP_ROUND)
@@ -15840,11 +16480,11 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
CombineTo(N, ExtLoad);
- CombineTo(N0.getNode(),
- DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
- N0.getValueType(), ExtLoad,
- DAG.getIntPtrConstant(1, SDLoc(N0))),
- ExtLoad.getValue(1));
+ CombineTo(
+ N0.getNode(),
+ DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
+ DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
+ ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -16599,7 +17239,6 @@ static inline ElementCount numVectorEltsOrZero(EVT T) {
}
bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
- Val = ST->getValue();
EVT STType = Val.getValueType();
EVT STMemType = ST->getMemoryVT();
if (STType == STMemType)
@@ -16655,7 +17294,7 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
SDValue Chain = LD->getOperand(0);
StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
// TODO: Relax this restriction for unordered atomics (see D66309)
- if (!ST || !ST->isSimple())
+ if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
return SDValue();
EVT LDType = LD->getValueType(0);
@@ -16691,9 +17330,10 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
// significant bit in the loaded value maps to the least significant bit in
// the stored value). With Offset=n (for n > 0) the loaded value starts at the
// n:th least significant byte of the stored value.
+ int64_t OrigOffset = Offset;
if (DAG.getDataLayout().isBigEndian())
- Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
- (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
+ Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
+ (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
8 -
Offset;
@@ -16705,8 +17345,8 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
if (LdStScalable)
STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
else
- STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
- StMemSize.getFixedSize());
+ STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
+ StMemSize.getFixedValue());
auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
if (LD->isIndexed()) {
@@ -16735,18 +17375,30 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
// Mask to size of LDMemType
auto Mask =
DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
- StMemSize.getFixedSize()),
+ StMemSize.getFixedValue()),
SDLoc(ST), STType);
auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
return ReplaceLd(LD, Val, Chain);
}
}
+ // Handle some cases for big-endian that would be Offset 0 and handled for
+ // little-endian.
+ SDValue Val = ST->getValue();
+ if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
+ if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
+ !LDType.isVector() && isTypeLegal(STType) &&
+ TLI.isOperationLegal(ISD::SRL, STType)) {
+ Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
+ DAG.getConstant(Offset * 8, SDLoc(LD), STType));
+ Offset = 0;
+ }
+ }
+
// TODO: Deal with nonzero offset.
if (LD->getBasePtr().isUndef() || Offset != 0)
return SDValue();
// Model necessary truncations / extenstions.
- SDValue Val;
// Truncate Value To Stored Memory Size.
do {
if (!getTruncatedStoreValue(ST, Val))
@@ -17186,7 +17838,7 @@ struct LoadedSlice {
// Check if it will be merged with the load.
// 1. Check the alignment / fast memory access constraint.
- bool IsFast = false;
+ unsigned IsFast = 0;
if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
Origin->getAddressSpace(), getAlign(),
Origin->getMemOperand()->getFlags(), &IsFast) ||
@@ -17689,7 +18341,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if (DAG.getDataLayout().isBigEndian())
PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
- bool IsFast = false;
+ unsigned IsFast = 0;
Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
LD->getAddressSpace(), NewAlign,
@@ -17748,8 +18400,8 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
if (VTSize.isScalable())
return SDValue();
- bool FastLD = false, FastST = false;
- EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
+ unsigned FastLD = 0, FastST = 0;
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
!TLI.isOperationLegal(ISD::STORE, IntVT) ||
!TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
@@ -17892,7 +18544,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
unsigned SizeInBits = NumStores * ElementSizeBits;
unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
- Optional<MachineMemOperand::Flags> Flags;
+ std::optional<MachineMemOperand::Flags> Flags;
AAMDNodes AAInfo;
for (unsigned I = 0; I != NumStores; ++I) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
@@ -17967,6 +18619,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
// We may need to add a bitcast here to get types to line up.
if (MemVTScalarTy != Val.getValueType().getScalarType()) {
Val = DAG.getBitcast(MemVT, Val);
+ } else if (MemVT.isVector() &&
+ Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
} else {
unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
: ISD::EXTRACT_VECTOR_ELT;
@@ -18357,7 +19012,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(
// Find a legal type for the constant store.
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
- bool IsFast = false;
+ unsigned IsFast = 0;
// Break early when size is too large to be legal.
if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
@@ -18467,7 +19122,7 @@ bool DAGCombiner::tryStoreMergeOfExtracts(
// Find a legal type for the vector store.
unsigned Elts = (i + 1) * NumMemElts;
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
- bool IsFast = false;
+ unsigned IsFast = 0;
// Break early when size is too large to be legal.
if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
@@ -18620,8 +19275,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
break;
- bool IsFastSt = false;
- bool IsFastLd = false;
+ unsigned IsFastSt = 0;
+ unsigned IsFastLd = 0;
// Don't try vector types if we need a rotate. We may still fail the
// legality checks for the integer type, but we can't handle the rotate
// case with vectors.
@@ -19076,16 +19731,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
ST->getMemoryVT().getScalarSizeInBits());
- // See if we can simplify the input to this truncstore with knowledge that
- // only the low bits are being used. For example:
- // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
+ // See if we can simplify the operation with SimplifyDemandedBits, which
+ // only works if the value has a single use.
AddToWorklist(Value.getNode());
- if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
- return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
- ST->getMemOperand());
-
- // Otherwise, see if we can simplify the operation with
- // SimplifyDemandedBits, which only works if the value has a single use.
if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
// Re-visit the store if anything changed and the store hasn't been merged
// with another node (N is deleted) SimplifyDemandedBits will add Value's
@@ -19095,6 +19743,28 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
AddToWorklist(N);
return SDValue(N, 0);
}
+
+ // Otherwise, see if we can simplify the input to this truncstore with
+ // knowledge that only the low bits are being used. For example:
+ // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
+ if (SDValue Shorter =
+ TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
+ return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
+ ST->getMemOperand());
+
+ // If we're storing a truncated constant, see if we can simplify it.
+ // TODO: Move this to targetShrinkDemandedConstant?
+ if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
+ if (!Cst->isOpaque()) {
+ const APInt &CValue = Cst->getAPIntValue();
+ APInt NewVal = CValue & TruncDemandedBits;
+ if (NewVal != CValue) {
+ SDValue Shorter =
+ DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
+ return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
+ ST->getMemoryVT(), ST->getMemOperand());
+ }
+ }
}
// If this is a load followed by a store to the same location, then the store
@@ -19235,7 +19905,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
// If we store purely within object bounds just before its lifetime ends,
// we can remove the store.
if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
- StoreSize.getFixedSize() * 8)) {
+ StoreSize.getFixedValue() * 8)) {
LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
dbgs() << "\nwithin LIFETIME_END of : ";
LifetimeEndBase.dump(); dbgs() << "\n");
@@ -19355,94 +20025,113 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
return St1;
}
-/// Convert a disguised subvector insertion into a shuffle:
-SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
- assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
- "Expected extract_vector_elt");
- SDValue InsertVal = N->getOperand(1);
- SDValue Vec = N->getOperand(0);
+// Merge an insertion into an existing shuffle:
+// (insert_vector_elt (vector_shuffle X, Y, Mask),
+// .(extract_vector_elt X, N), InsIndex)
+// --> (vector_shuffle X, Y, NewMask)
+// and variations where shuffle operands may be CONCAT_VECTORS.
+static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef<int> Mask,
+ SmallVectorImpl<int> &NewMask, SDValue Elt,
+ unsigned InsIndex) {
+ if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(Elt.getOperand(1)))
+ return false;
- // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
- // InsIndex)
- // --> (vector_shuffle X, Y) and variations where shuffle operands may be
- // CONCAT_VECTORS.
- if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
- InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- isa<ConstantSDNode>(InsertVal.getOperand(1))) {
- ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
- ArrayRef<int> Mask = SVN->getMask();
+ // Vec's operand 0 is using indices from 0 to N-1 and
+ // operand 1 from N to 2N - 1, where N is the number of
+ // elements in the vectors.
+ SDValue InsertVal0 = Elt.getOperand(0);
+ int ElementOffset = -1;
+
+ // We explore the inputs of the shuffle in order to see if we find the
+ // source of the extract_vector_elt. If so, we can use it to modify the
+ // shuffle rather than perform an insert_vector_elt.
+ SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
+ ArgWorkList.emplace_back(Mask.size(), Y);
+ ArgWorkList.emplace_back(0, X);
+
+ while (!ArgWorkList.empty()) {
+ int ArgOffset;
+ SDValue ArgVal;
+ std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
+
+ if (ArgVal == InsertVal0) {
+ ElementOffset = ArgOffset;
+ break;
+ }
- SDValue X = Vec.getOperand(0);
- SDValue Y = Vec.getOperand(1);
-
- // Vec's operand 0 is using indices from 0 to N-1 and
- // operand 1 from N to 2N - 1, where N is the number of
- // elements in the vectors.
- SDValue InsertVal0 = InsertVal.getOperand(0);
- int ElementOffset = -1;
-
- // We explore the inputs of the shuffle in order to see if we find the
- // source of the extract_vector_elt. If so, we can use it to modify the
- // shuffle rather than perform an insert_vector_elt.
- SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
- ArgWorkList.emplace_back(Mask.size(), Y);
- ArgWorkList.emplace_back(0, X);
-
- while (!ArgWorkList.empty()) {
- int ArgOffset;
- SDValue ArgVal;
- std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
-
- if (ArgVal == InsertVal0) {
- ElementOffset = ArgOffset;
- break;
+ // Peek through concat_vector.
+ if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
+ int CurrentArgOffset =
+ ArgOffset + ArgVal.getValueType().getVectorNumElements();
+ int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
+ for (SDValue Op : reverse(ArgVal->ops())) {
+ CurrentArgOffset -= Step;
+ ArgWorkList.emplace_back(CurrentArgOffset, Op);
}
- // Peek through concat_vector.
- if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
- int CurrentArgOffset =
- ArgOffset + ArgVal.getValueType().getVectorNumElements();
- int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
- for (SDValue Op : reverse(ArgVal->ops())) {
- CurrentArgOffset -= Step;
- ArgWorkList.emplace_back(CurrentArgOffset, Op);
- }
-
- // Make sure we went through all the elements and did not screw up index
- // computation.
- assert(CurrentArgOffset == ArgOffset);
- }
+ // Make sure we went through all the elements and did not screw up index
+ // computation.
+ assert(CurrentArgOffset == ArgOffset);
}
+ }
- // If we failed to find a match, see if we can replace an UNDEF shuffle
- // operand.
- if (ElementOffset == -1 && Y.isUndef() &&
- InsertVal0.getValueType() == Y.getValueType()) {
- ElementOffset = Mask.size();
- Y = InsertVal0;
- }
+ // If we failed to find a match, see if we can replace an UNDEF shuffle
+ // operand.
+ if (ElementOffset == -1) {
+ if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
+ return false;
+ ElementOffset = Mask.size();
+ Y = InsertVal0;
+ }
- if (ElementOffset != -1) {
- SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
+ NewMask.assign(Mask.begin(), Mask.end());
+ NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
+ assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
+ "NewMask[InsIndex] is out of bound");
+ return true;
+}
- auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
- NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
- assert(NewMask[InsIndex] <
- (int)(2 * Vec.getValueType().getVectorNumElements()) &&
- NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
+// Merge an insertion into an existing shuffle:
+// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
+// InsIndex)
+// --> (vector_shuffle X, Y) and variations where shuffle operands may be
+// CONCAT_VECTORS.
+SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
+ assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
+ "Expected extract_vector_elt");
+ SDValue InsertVal = N->getOperand(1);
+ SDValue Vec = N->getOperand(0);
- SDValue LegalShuffle =
- TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
- Y, NewMask, DAG);
- if (LegalShuffle)
- return LegalShuffle;
- }
+ auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
+ if (!SVN || !Vec.hasOneUse())
+ return SDValue();
+
+ ArrayRef<int> Mask = SVN->getMask();
+ SDValue X = Vec.getOperand(0);
+ SDValue Y = Vec.getOperand(1);
+
+ SmallVector<int, 16> NewMask(Mask);
+ if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
+ SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
+ Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
+ if (LegalShuffle)
+ return LegalShuffle;
}
- // insert_vector_elt V, (bitcast X from vector type), IdxC -->
- // bitcast(shuffle (bitcast V), (extended X), Mask)
- // Note: We do not use an insert_subvector node because that requires a
- // legal subvector type.
+ return SDValue();
+}
+
+// Convert a disguised subvector insertion into a shuffle:
+// insert_vector_elt V, (bitcast X from vector type), IdxC -->
+// bitcast(shuffle (bitcast V), (extended X), Mask)
+// Note: We do not use an insert_subvector node because that requires a
+// legal subvector type.
+SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
+ assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
+ "Expected extract_vector_elt");
+ SDValue InsertVal = N->getOperand(1);
+
if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
!InsertVal.getOperand(0).getValueType().isVector())
return SDValue();
@@ -19517,13 +20206,8 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
if (!IndexC) {
// If this is variable insert to undef vector, it might be better to splat:
// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
- if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
- if (VT.isScalableVector())
- return DAG.getSplatVector(VT, DL, InVal);
-
- SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
- return DAG.getBuildVector(VT, DL, Ops);
- }
+ if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
+ return DAG.getSplat(VT, DL, InVal);
return SDValue();
}
@@ -19535,9 +20219,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
// We must know which element is being inserted for folds below here.
unsigned Elt = IndexC->getZExtValue();
- if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
- return Shuf;
-
// Handle <1 x ???> vector insertion special cases.
if (NumElts == 1) {
// insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
@@ -19567,6 +20248,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
}
}
+ if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
+ return Shuf;
+
+ if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
+ return Shuf;
+
// Attempt to convert an insert_vector_elt chain into a legal build_vector.
if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
// vXi1 vector - we don't need to recurse.
@@ -19636,9 +20323,52 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
continue;
}
+ // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
+ // update the shuffle mask (and second operand if we started with unary
+ // shuffle) and create a new legal shuffle.
+ if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
+ auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
+ SDValue LHS = SVN->getOperand(0);
+ SDValue RHS = SVN->getOperand(1);
+ SmallVector<int, 16> Mask(SVN->getMask());
+ bool Merged = true;
+ for (auto I : enumerate(Ops)) {
+ SDValue &Op = I.value();
+ if (Op) {
+ SmallVector<int, 16> NewMask;
+ if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
+ Merged = false;
+ break;
+ }
+ Mask = std::move(NewMask);
+ }
+ }
+ if (Merged)
+ if (SDValue NewShuffle =
+ TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
+ return NewShuffle;
+ }
+
// Failed to find a match in the chain - bail.
break;
}
+
+ // See if we can fill in the missing constant elements as zeros.
+ // TODO: Should we do this for any constant?
+ APInt DemandedZeroElts = APInt::getZero(NumElts);
+ for (unsigned I = 0; I != NumElts; ++I)
+ if (!Ops[I])
+ DemandedZeroElts.setBit(I);
+
+ if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
+ SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
+ : DAG.getConstantFP(0, DL, MaxEltVT);
+ for (unsigned I = 0; I != NumElts; ++I)
+ if (!Ops[I])
+ Ops[I] = Zero;
+
+ return CanonicalizeBuildVector(Ops);
+ }
}
return SDValue();
@@ -19679,7 +20409,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
}
- bool IsFast = false;
+ unsigned IsFast = 0;
if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
OriginalLoad->getAddressSpace(), Alignment,
OriginalLoad->getMemOperand()->getFlags(),
@@ -19757,6 +20487,168 @@ static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
return SDValue();
}
+// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
+// recursively analyse all of it's users. and try to model themselves as
+// bit sequence extractions. If all of them agree on the new, narrower element
+// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
+// new element type, do so now.
+// This is mainly useful to recover from legalization that scalarized
+// the vector as wide elements, but tries to rebuild it with narrower elements.
+//
+// Some more nodes could be modelled if that helps cover interesting patterns.
+bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
+ SDNode *N) {
+ // We perform this optimization post type-legalization because
+ // the type-legalizer often scalarizes integer-promoted vectors.
+ // Performing this optimization before may cause legalizaton cycles.
+ if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
+ return false;
+
+ // TODO: Add support for big-endian.
+ if (DAG.getDataLayout().isBigEndian())
+ return false;
+
+ SDValue VecOp = N->getOperand(0);
+ EVT VecVT = VecOp.getValueType();
+ assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
+
+ // We must start with a constant extraction index.
+ auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!IndexC)
+ return false;
+
+ assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
+ "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
+
+ // TODO: deal with the case of implicit anyext of the extraction.
+ unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
+ EVT ScalarVT = N->getValueType(0);
+ if (VecVT.getScalarType() != ScalarVT)
+ return false;
+
+ // TODO: deal with the cases other than everything being integer-typed.
+ if (!ScalarVT.isScalarInteger())
+ return false;
+
+ struct Entry {
+ SDNode *Producer;
+
+ // Which bits of VecOp does it contain?
+ unsigned BitPos;
+ int NumBits;
+ // NOTE: the actual width of \p Producer may be wider than NumBits!
+
+ Entry(Entry &&) = default;
+ Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
+ : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
+
+ Entry() = delete;
+ Entry(const Entry &) = delete;
+ Entry &operator=(const Entry &) = delete;
+ Entry &operator=(Entry &&) = delete;
+ };
+ SmallVector<Entry, 32> Worklist;
+ SmallVector<Entry, 32> Leafs;
+
+ // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
+ Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
+ /*NumBits=*/VecEltBitWidth);
+
+ while (!Worklist.empty()) {
+ Entry E = Worklist.pop_back_val();
+ // Does the node not even use any of the VecOp bits?
+ if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
+ E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
+ return false; // Let's allow the other combines clean this up first.
+ // Did we fail to model any of the users of the Producer?
+ bool ProducerIsLeaf = false;
+ // Look at each user of this Producer.
+ for (SDNode *User : E.Producer->uses()) {
+ switch (User->getOpcode()) {
+ // TODO: support ISD::BITCAST
+ // TODO: support ISD::ANY_EXTEND
+ // TODO: support ISD::ZERO_EXTEND
+ // TODO: support ISD::SIGN_EXTEND
+ case ISD::TRUNCATE:
+ // Truncation simply means we keep position, but extract less bits.
+ Worklist.emplace_back(User, E.BitPos,
+ /*NumBits=*/User->getValueSizeInBits(0));
+ break;
+ // TODO: support ISD::SRA
+ // TODO: support ISD::SHL
+ case ISD::SRL:
+ // We should be shifting the Producer by a constant amount.
+ if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
+ User->getOperand(0).getNode() == E.Producer && ShAmtC) {
+ // Logical right-shift means that we start extraction later,
+ // but stop it at the same position we did previously.
+ unsigned ShAmt = ShAmtC->getZExtValue();
+ Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
+ break;
+ }
+ [[fallthrough]];
+ default:
+ // We can not model this user of the Producer.
+ // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
+ ProducerIsLeaf = true;
+ // Profitability check: all users that we can not model
+ // must be ISD::BUILD_VECTOR's.
+ if (User->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ break;
+ }
+ }
+ if (ProducerIsLeaf)
+ Leafs.emplace_back(std::move(E));
+ }
+
+ unsigned NewVecEltBitWidth = Leafs.front().NumBits;
+
+ // If we are still at the same element granularity, give up,
+ if (NewVecEltBitWidth == VecEltBitWidth)
+ return false;
+
+ // The vector width must be a multiple of the new element width.
+ if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
+ return false;
+
+ // All leafs must agree on the new element width.
+ // All leafs must not expect any "padding" bits ontop of that width.
+ // All leafs must start extraction from multiple of that width.
+ if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
+ return (unsigned)E.NumBits == NewVecEltBitWidth &&
+ E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
+ E.BitPos % NewVecEltBitWidth == 0;
+ }))
+ return false;
+
+ EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
+ VecVT.getSizeInBits() / NewVecEltBitWidth);
+
+ if (LegalTypes &&
+ !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
+ return false;
+
+ if (LegalOperations &&
+ !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
+ TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, NewVecVT)))
+ return false;
+
+ SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
+ for (const Entry &E : Leafs) {
+ SDLoc DL(E.Producer);
+ unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
+ assert(NewIndex < NewVecVT.getVectorNumElements() &&
+ "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
+ SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
+ DAG.getVectorIdxConstant(NewIndex, DL));
+ CombineTo(E.Producer, V);
+ }
+
+ return true;
+}
+
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
SDValue VecOp = N->getOperand(0);
SDValue Index = N->getOperand(1);
@@ -19800,6 +20692,12 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
return DAG.getUNDEF(ScalarVT);
+ // extract_vector_elt(freeze(x)), idx -> freeze(extract_vector_elt(x)), idx
+ if (VecOp.hasOneUse() && VecOp.getOpcode() == ISD::FREEZE) {
+ return DAG.getFreeze(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
+ VecOp.getOperand(0), Index));
+ }
+
// extract_vector_elt (build_vector x, y), 1 -> y
if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
@@ -19845,7 +20743,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
SDValue BCSrc = VecOp.getOperand(0);
if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
- return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
+ return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
if (LegalTypes && BCSrc.getValueType().isInteger() &&
BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
@@ -19945,6 +20843,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
}
}
+ if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
+ return SDValue(N, 0);
+
// Everything under here is trying to match an extract of a loaded value.
// If the result of load has to be truncated, then it's not necessarily
// profitable.
@@ -20186,7 +21087,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
// Simplify (build_vec (trunc $1)
// (trunc (srl $1 half-width))
-// (trunc (srl $1 (2 * half-width))) …)
+// (trunc (srl $1 (2 * half-width))))
// to (bitcast $1)
SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
@@ -20339,6 +21240,29 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
ConcatOps[0] = VecIn2;
VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
+ } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
+ if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
+ !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
+ return SDValue();
+ // If dest vector has less than two elements, then use shuffle and extract
+ // from larger regs will cost even more.
+ if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
+ return SDValue();
+ assert(InVT2Size <= InVT1Size &&
+ "Second input is not going to be larger than the first one.");
+
+ // VecIn1 is wider than the output, and we have another, possibly
+ // smaller input. Pad the smaller input with undefs, shuffle at the
+ // input vector width, and extract the output.
+ // The shuffle type is different than VT, so check legality again.
+ if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
+ return SDValue();
+
+ if (InVT1 != InVT2) {
+ VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
+ DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
+ }
+ ShuffleNumElems = InVT1Size / VTSize * NumElems;
} else {
// TODO: Support cases where the length mismatch isn't exactly by a
// factor of 2.
@@ -20779,6 +21703,127 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
VT, In);
}
+// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
+// and all other elements being constant zero's, granularize the BUILD_VECTOR's
+// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
+// This patten can appear during legalization.
+//
+// NOTE: This can be generalized to allow more than a single
+// non-constant-zero op, UNDEF's, and to be KnownBits-based,
+SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
+ // Don't run this after legalization. Targets may have other preferences.
+ if (Level >= AfterLegalizeDAG)
+ return SDValue();
+
+ // FIXME: support big-endian.
+ if (DAG.getDataLayout().isBigEndian())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N->getOperand(0).getValueType();
+ assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
+
+ EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+
+ if (!TLI.isTypeLegal(OpIntVT) ||
+ (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
+ return SDValue();
+
+ unsigned EltBitwidth = VT.getScalarSizeInBits();
+ // NOTE: the actual width of operands may be wider than that!
+
+ // Analyze all operands of this BUILD_VECTOR. What is the largest number of
+ // active bits they all have? We'll want to truncate them all to that width.
+ unsigned ActiveBits = 0;
+ APInt KnownZeroOps(VT.getVectorNumElements(), 0);
+ for (auto I : enumerate(N->ops())) {
+ SDValue Op = I.value();
+ // FIXME: support UNDEF elements?
+ if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
+ unsigned OpActiveBits =
+ Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
+ if (OpActiveBits == 0) {
+ KnownZeroOps.setBit(I.index());
+ continue;
+ }
+ // Profitability check: don't allow non-zero constant operands.
+ return SDValue();
+ }
+ // Profitability check: there must only be a single non-zero operand,
+ // and it must be the first operand of the BUILD_VECTOR.
+ if (I.index() != 0)
+ return SDValue();
+ // The operand must be a zero-extension itself.
+ // FIXME: this could be generalized to known leading zeros check.
+ if (Op.getOpcode() != ISD::ZERO_EXTEND)
+ return SDValue();
+ unsigned CurrActiveBits =
+ Op.getOperand(0).getValueSizeInBits().getFixedValue();
+ assert(!ActiveBits && "Already encountered non-constant-zero operand?");
+ ActiveBits = CurrActiveBits;
+ // We want to at least halve the element size.
+ if (2 * ActiveBits > EltBitwidth)
+ return SDValue();
+ }
+
+ // This BUILD_VECTOR must have at least one non-constant-zero operand.
+ if (ActiveBits == 0)
+ return SDValue();
+
+ // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
+ // into how many chunks can we split our element width?
+ EVT NewScalarIntVT, NewIntVT;
+ std::optional<unsigned> Factor;
+ // We can split the element into at least two chunks, but not into more
+ // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
+ // for which the element width is a multiple of it,
+ // and the resulting types/operations on that chunk width are legal.
+ assert(2 * ActiveBits <= EltBitwidth &&
+ "We know that half or less bits of the element are active.");
+ for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
+ if (EltBitwidth % Scale != 0)
+ continue;
+ unsigned ChunkBitwidth = EltBitwidth / Scale;
+ assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
+ NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
+ NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
+ Scale * N->getNumOperands());
+ if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
+ (LegalOperations &&
+ !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
+ TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, NewIntVT))))
+ continue;
+ Factor = Scale;
+ break;
+ }
+ if (!Factor)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
+
+ // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
+ SmallVector<SDValue, 16> NewOps;
+ NewOps.reserve(NewIntVT.getVectorNumElements());
+ for (auto I : enumerate(N->ops())) {
+ SDValue Op = I.value();
+ assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
+ unsigned SrcOpIdx = I.index();
+ if (KnownZeroOps[SrcOpIdx]) {
+ NewOps.append(*Factor, ZeroOp);
+ continue;
+ }
+ Op = DAG.getBitcast(OpIntVT, Op);
+ Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
+ NewOps.emplace_back(Op);
+ NewOps.append(*Factor - 1, ZeroOp);
+ }
+ assert(NewOps.size() == NewIntVT.getVectorNumElements());
+ SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
+ NewBV = DAG.getBitcast(VT, NewBV);
+ return NewBV;
+}
+
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
@@ -20844,6 +21889,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (SDValue V = convertBuildVecZextToZext(N))
return V;
+ if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
+ return V;
+
if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
return V;
@@ -21104,6 +22152,109 @@ static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(CastOpcode, DL, VT, NewConcat);
}
+// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
+// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
+// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
+static SDValue combineConcatVectorOfShuffleAndItsOperands(
+ SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
+ bool LegalOperations) {
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N->getOperand(0).getValueType();
+ if (VT.isScalableVector())
+ return SDValue();
+
+ // For now, only allow simple 2-operand concatenations.
+ if (N->getNumOperands() != 2)
+ return SDValue();
+
+ // Don't create illegal types/shuffles when not allowed to.
+ if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
+ (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT)))
+ return SDValue();
+
+ // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
+ // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
+ // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
+ // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
+ // (4) and for now, the SHUFFLE_VECTOR must be unary.
+ ShuffleVectorSDNode *SVN = nullptr;
+ for (SDValue Op : N->ops()) {
+ if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
+ CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
+ all_of(N->ops(), [CurSVN](SDValue Op) {
+ // FIXME: can we allow UNDEF operands?
+ return !Op.isUndef() &&
+ (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
+ })) {
+ SVN = CurSVN;
+ break;
+ }
+ }
+ if (!SVN)
+ return SDValue();
+
+ // We are going to pad the shuffle operands, so any indice, that was picking
+ // from the second operand, must be adjusted.
+ SmallVector<int, 16> AdjustedMask;
+ AdjustedMask.reserve(SVN->getMask().size());
+ assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
+ append_range(AdjustedMask, SVN->getMask());
+
+ // Identity masks for the operands of the (padded) shuffle.
+ SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
+ MutableArrayRef<int> FirstShufOpIdentityMask =
+ MutableArrayRef<int>(IdentityMask)
+ .take_front(OpVT.getVectorNumElements());
+ MutableArrayRef<int> SecondShufOpIdentityMask =
+ MutableArrayRef<int>(IdentityMask).take_back(OpVT.getVectorNumElements());
+ std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
+ std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
+ VT.getVectorNumElements());
+
+ // New combined shuffle mask.
+ SmallVector<int, 32> Mask;
+ Mask.reserve(VT.getVectorNumElements());
+ for (SDValue Op : N->ops()) {
+ assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
+ if (Op.getNode() == SVN) {
+ append_range(Mask, AdjustedMask);
+ continue;
+ }
+ if (Op == SVN->getOperand(0)) {
+ append_range(Mask, FirstShufOpIdentityMask);
+ continue;
+ }
+ if (Op == SVN->getOperand(1)) {
+ append_range(Mask, SecondShufOpIdentityMask);
+ continue;
+ }
+ llvm_unreachable("Unexpected operand!");
+ }
+
+ // Don't create illegal shuffle masks.
+ if (!TLI.isShuffleMaskLegal(Mask, VT))
+ return SDValue();
+
+ // Pad the shuffle operands with UNDEF.
+ SDLoc dl(N);
+ std::array<SDValue, 2> ShufOps;
+ for (auto I : zip(SVN->ops(), ShufOps)) {
+ SDValue ShufOp = std::get<0>(I);
+ SDValue &NewShufOp = std::get<1>(I);
+ if (ShufOp.isUndef())
+ NewShufOp = DAG.getUNDEF(VT);
+ else {
+ SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
+ DAG.getUNDEF(OpVT));
+ ShufOpParts[0] = ShufOp;
+ NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
+ }
+ }
+ // Finally, create the new wide shuffle.
+ return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
+}
+
SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
// If we only have one input vector, we don't need to do any concatenation.
if (N->getNumOperands() == 1)
@@ -21239,6 +22390,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (SDValue V = combineConcatVectorOfCasts(N, DAG))
return V;
+ if (SDValue V = combineConcatVectorOfShuffleAndItsOperands(
+ N, DAG, TLI, LegalTypes, LegalOperations))
+ return V;
+
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
// nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
// operands and look for a CONCAT operations that place the incoming vectors
@@ -21516,7 +22671,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
} else
- MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
+ MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
StoreSize);
SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
@@ -22076,14 +23231,53 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
}
+// Match shuffles that can be converted to *_vector_extend_in_reg.
+// This is often generated during legalization.
+// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
+// and returns the EVT to which the extension should be performed.
+// NOTE: this assumes that the src is the first operand of the shuffle.
+static std::optional<EVT> canCombineShuffleToExtendVectorInreg(
+ unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
+ SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
+ bool LegalOperations) {
+ bool IsBigEndian = DAG.getDataLayout().isBigEndian();
+
+ // TODO Add support for big-endian when we have a test case.
+ if (!VT.isInteger() || IsBigEndian)
+ return std::nullopt;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+
+ // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
+ // power-of-2 extensions as they are the most likely.
+ // FIXME: should try Scale == NumElts case too,
+ for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
+ // The vector width must be a multiple of Scale.
+ if (NumElts % Scale != 0)
+ continue;
+
+ EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
+
+ if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
+ (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
+ continue;
+
+ if (Match(Scale))
+ return OutVT;
+ }
+
+ return std::nullopt;
+}
+
// Match shuffles that can be converted to any_vector_extend_in_reg.
// This is often generated during legalization.
// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
-// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
-static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
- SelectionDAG &DAG,
- const TargetLowering &TLI,
- bool LegalOperations) {
+static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool LegalOperations) {
EVT VT = SVN->getValueType(0);
bool IsBigEndian = DAG.getDataLayout().isBigEndian();
@@ -22091,13 +23285,9 @@ static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
if (!VT.isInteger() || IsBigEndian)
return SDValue();
- unsigned NumElts = VT.getVectorNumElements();
- unsigned EltSizeInBits = VT.getScalarSizeInBits();
- ArrayRef<int> Mask = SVN->getMask();
- SDValue N0 = SVN->getOperand(0);
-
// shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
- auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
+ auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
+ Mask = SVN->getMask()](unsigned Scale) {
for (unsigned i = 0; i != NumElts; ++i) {
if (Mask[i] < 0)
continue;
@@ -22108,27 +23298,138 @@ static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
return true;
};
- // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
- // power-of-2 extensions as they are the most likely.
- for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
- // Check for non power of 2 vector sizes
- if (NumElts % Scale != 0)
- continue;
- if (!isAnyExtend(Scale))
- continue;
+ unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
+ SDValue N0 = SVN->getOperand(0);
+ // Never create an illegal type. Only create unsupported operations if we
+ // are pre-legalization.
+ std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
+ Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
+ if (!OutVT)
+ return SDValue();
+ return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
+}
- EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
- EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
- // Never create an illegal type. Only create unsupported operations if we
- // are pre-legalization.
- if (TLI.isTypeLegal(OutVT))
- if (!LegalOperations ||
- TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
- return DAG.getBitcast(VT,
- DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
- SDLoc(SVN), OutVT, N0));
- }
+// Match shuffles that can be converted to zero_extend_vector_inreg.
+// This is often generated during legalization.
+// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
+static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool LegalOperations) {
+ bool LegalTypes = true;
+ EVT VT = SVN->getValueType(0);
+ assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+
+ // TODO: add support for big-endian when we have a test case.
+ bool IsBigEndian = DAG.getDataLayout().isBigEndian();
+ if (!VT.isInteger() || IsBigEndian)
+ return SDValue();
+
+ SmallVector<int, 16> Mask(SVN->getMask().begin(), SVN->getMask().end());
+ auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
+ for (int &Indice : Mask) {
+ if (Indice < 0)
+ continue;
+ int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
+ int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
+ Fn(Indice, OpIdx, OpEltIdx);
+ }
+ };
+
+ // Which elements of which operand does this shuffle demand?
+ std::array<APInt, 2> OpsDemandedElts;
+ for (APInt &OpDemandedElts : OpsDemandedElts)
+ OpDemandedElts = APInt::getZero(NumElts);
+ ForEachDecomposedIndice(
+ [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
+ OpsDemandedElts[OpIdx].setBit(OpEltIdx);
+ });
+
+ // Element-wise(!), which of these demanded elements are know to be zero?
+ std::array<APInt, 2> OpsKnownZeroElts;
+ for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
+ std::get<2>(I) =
+ DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
+
+ // Manifest zeroable element knowledge in the shuffle mask.
+ // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
+ // this is a local invention, but it won't leak into DAG.
+ // FIXME: should we not manifest them, but just check when matching?
+ bool HadZeroableElts = false;
+ ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
+ int &Indice, int OpIdx, int OpEltIdx) {
+ if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
+ Indice = -2; // Zeroable element.
+ HadZeroableElts = true;
+ }
+ });
+
+ // Don't proceed unless we've refined at least one zeroable mask indice.
+ // If we didn't, then we are still trying to match the same shuffle mask
+ // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
+ // and evidently failed. Proceeding will lead to endless combine loops.
+ if (!HadZeroableElts)
+ return SDValue();
+
+ // The shuffle may be more fine-grained than we want. Widen elements first.
+ // FIXME: should we do this before manifesting zeroable shuffle mask indices?
+ SmallVector<int, 16> ScaledMask;
+ getShuffleMaskWithWidestElts(Mask, ScaledMask);
+ assert(Mask.size() >= ScaledMask.size() &&
+ Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
+ int Prescale = Mask.size() / ScaledMask.size();
+
+ NumElts = ScaledMask.size();
+ EltSizeInBits *= Prescale;
+
+ EVT PrescaledVT = EVT::getVectorVT(
+ *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
+ NumElts);
+
+ if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
+ return SDValue();
+
+ // For example,
+ // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
+ // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
+ auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
+ assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
+ "Unexpected mask scaling factor.");
+ ArrayRef<int> Mask = ScaledMask;
+ for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
+ SrcElt != NumSrcElts; ++SrcElt) {
+ // Analyze the shuffle mask in Scale-sized chunks.
+ ArrayRef<int> MaskChunk = Mask.take_front(Scale);
+ assert(MaskChunk.size() == Scale && "Unexpected mask size.");
+ Mask = Mask.drop_front(MaskChunk.size());
+ // The first indice in this chunk must be SrcElt, but not zero!
+ // FIXME: undef should be fine, but that results in more-defined result.
+ if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
+ return false;
+ // The rest of the indices in this chunk must be zeros.
+ // FIXME: undef should be fine, but that results in more-defined result.
+ if (!all_of(MaskChunk.drop_front(1),
+ [](int Indice) { return Indice == -2; }))
+ return false;
+ }
+ assert(Mask.empty() && "Did not process the whole mask?");
+ return true;
+ };
+ unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
+ for (bool Commuted : {false, true}) {
+ SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
+ if (Commuted)
+ ShuffleVectorSDNode::commuteMask(ScaledMask);
+ std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
+ Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
+ LegalOperations);
+ if (OutVT)
+ return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
+ DAG.getBitcast(PrescaledVT, Op)));
+ }
return SDValue();
}
@@ -22200,9 +23501,52 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
// the masks of the shuffles.
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
SelectionDAG &DAG) {
+ EVT VT = Shuf->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
if (!Shuf->getOperand(1).isUndef())
return SDValue();
+ // See if this unary non-splat shuffle actually *is* a splat shuffle,
+ // in disguise, with all demanded elements being identical.
+ // FIXME: this can be done per-operand.
+ if (!Shuf->isSplat()) {
+ APInt DemandedElts(NumElts, 0);
+ for (int Idx : Shuf->getMask()) {
+ if (Idx < 0)
+ continue; // Ignore sentinel indices.
+ assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
+ DemandedElts.setBit(Idx);
+ }
+ assert(DemandedElts.countPopulation() > 1 && "Is a splat shuffle already?");
+ APInt UndefElts;
+ if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
+ // Even if all demanded elements are splat, some of them could be undef.
+ // Which lowest demanded element is *not* known-undef?
+ std::optional<unsigned> MinNonUndefIdx;
+ for (int Idx : Shuf->getMask()) {
+ if (Idx < 0 || UndefElts[Idx])
+ continue; // Ignore sentinel indices, and undef elements.
+ MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
+ }
+ if (!MinNonUndefIdx)
+ return DAG.getUNDEF(VT); // All undef - result is undef.
+ assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
+ SmallVector<int, 8> SplatMask(Shuf->getMask().begin(),
+ Shuf->getMask().end());
+ for (int &Idx : SplatMask) {
+ if (Idx < 0)
+ continue; // Passthrough sentinel indices.
+ // Otherwise, just pick the lowest demanded non-undef element.
+ // Or sentinel undef, if we know we'd pick a known-undef element.
+ Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
+ }
+ assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
+ return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
+ Shuf->getOperand(1), SplatMask);
+ }
+ }
+
// If the inner operand is a known splat with no undefs, just return that directly.
// TODO: Create DemandedElts mask from Shuf's mask.
// TODO: Allow undef elements and merge with the shuffle code below.
@@ -22386,7 +23730,7 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
// First, check if we are taking one element of a vector and shuffling that
// element into another vector.
ArrayRef<int> Mask = Shuf->getMask();
- SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
+ SmallVector<int, 16> CommutedMask(Mask);
SDValue Op0 = Shuf->getOperand(0);
SDValue Op1 = Shuf->getOperand(1);
int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
@@ -22540,6 +23884,23 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
if (Idx->getAPIntValue() == SplatIndex)
return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
+
+ // Look through a bitcast if LE and splatting lane 0, through to a
+ // scalar_to_vector or a build_vector.
+ if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
+ SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
+ (N0.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR ||
+ N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR)) {
+ EVT N00VT = N0.getOperand(0).getValueType();
+ if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
+ VT.isInteger() && N00VT.isInteger()) {
+ EVT InVT =
+ TLI.getTypeToTransformTo(*DAG.getContext(), VT.getScalarType());
+ SDValue Op = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0),
+ SDLoc(N), InVT);
+ return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
+ }
+ }
}
// If this is a bit convert that changes the element type of the vector but
@@ -22600,7 +23961,8 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return ShufOp;
// Match shuffles that can be converted to any_vector_extend_in_reg.
- if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
+ if (SDValue V =
+ combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
return V;
// Combine "truncate_vector_in_reg" style shuffles.
@@ -22697,7 +24059,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
return InsertN1;
if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
- SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
+ SmallVector<int> CommuteMask(Mask);
ShuffleVectorSDNode::commuteMask(CommuteMask);
if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
return InsertN0;
@@ -23086,55 +24448,101 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
return V;
+ // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
+ // Perform this really late, because it could eliminate knowledge
+ // of undef elements created by this shuffle.
+ if (Level < AfterLegalizeTypes)
+ if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
+ LegalOperations))
+ return V;
+
return SDValue();
}
SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
- SDValue InVal = N->getOperand(0);
EVT VT = N->getValueType(0);
+ if (!VT.isFixedLengthVector())
+ return SDValue();
+
+ // Try to convert a scalar binop with an extracted vector element to a vector
+ // binop. This is intended to reduce potentially expensive register moves.
+ // TODO: Check if both operands are extracted.
+ // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
+ SDValue Scalar = N->getOperand(0);
+ unsigned Opcode = Scalar.getOpcode();
+ EVT VecEltVT = VT.getScalarType();
+ if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
+ TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
+ Scalar.getOperand(0).getValueType() == VecEltVT &&
+ Scalar.getOperand(1).getValueType() == VecEltVT &&
+ DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
+ // Match an extract element and get a shuffle mask equivalent.
+ SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
+
+ for (int i : {0, 1}) {
+ // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
+ // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
+ SDValue EE = Scalar.getOperand(i);
+ auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
+ if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ EE.getOperand(0).getValueType() == VT &&
+ isa<ConstantSDNode>(EE.getOperand(1))) {
+ // Mask = {ExtractIndex, undef, undef....}
+ ShufMask[0] = EE.getConstantOperandVal(1);
+ // Make sure the shuffle is legal if we are crossing lanes.
+ if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
+ SDLoc DL(N);
+ SDValue V[] = {EE.getOperand(0),
+ DAG.getConstant(C->getAPIntValue(), DL, VT)};
+ SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
+ return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
+ ShufMask);
+ }
+ }
+ }
+ }
// Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
// with a VECTOR_SHUFFLE and possible truncate.
- if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- VT.isFixedLengthVector() &&
- InVal->getOperand(0).getValueType().isFixedLengthVector()) {
- SDValue InVec = InVal->getOperand(0);
- SDValue EltNo = InVal->getOperand(1);
- auto InVecT = InVec.getValueType();
- if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
- SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
- int Elt = C0->getZExtValue();
- NewMask[0] = Elt;
- // If we have an implict truncate do truncate here as long as it's legal.
- // if it's not legal, this should
- if (VT.getScalarType() != InVal.getValueType() &&
- InVal.getValueType().isScalarInteger() &&
- isTypeLegal(VT.getScalarType())) {
- SDValue Val =
- DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
- }
- if (VT.getScalarType() == InVecT.getScalarType() &&
- VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
- SDValue LegalShuffle =
- TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
- DAG.getUNDEF(InVecT), NewMask, DAG);
- if (LegalShuffle) {
- // If the initial vector is the correct size this shuffle is a
- // valid result.
- if (VT == InVecT)
- return LegalShuffle;
- // If not we must truncate the vector.
- if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
- SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
- EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
- InVecT.getVectorElementType(),
- VT.getVectorNumElements());
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
- LegalShuffle, ZeroIdx);
- }
- }
- }
+ if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
+ !Scalar.getOperand(0).getValueType().isFixedLengthVector())
+ return SDValue();
+
+ // If we have an implicit truncate, truncate here if it is legal.
+ if (VecEltVT != Scalar.getValueType() &&
+ Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
+ SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
+ }
+
+ auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
+ if (!ExtIndexC)
+ return SDValue();
+
+ SDValue SrcVec = Scalar.getOperand(0);
+ EVT SrcVT = SrcVec.getValueType();
+ unsigned SrcNumElts = SrcVT.getVectorNumElements();
+ unsigned VTNumElts = VT.getVectorNumElements();
+ if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
+ // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
+ SmallVector<int, 8> Mask(SrcNumElts, -1);
+ Mask[0] = ExtIndexC->getZExtValue();
+ SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
+ SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
+ if (!LegalShuffle)
+ return SDValue();
+
+ // If the initial vector is the same size, the shuffle is the result.
+ if (VT == SrcVT)
+ return LegalShuffle;
+
+ // If not, shorten the shuffled vector.
+ if (VTNumElts != SrcNumElts) {
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
+ EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
+ SrcVT.getVectorElementType(), VTNumElts);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
+ ZeroIdx);
}
}
@@ -23364,6 +24772,15 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
}
SDValue DAGCombiner::visitVPOp(SDNode *N) {
+
+ if (N->getOpcode() == ISD::VP_GATHER)
+ if (SDValue SD = visitVPGATHER(N))
+ return SD;
+
+ if (N->getOpcode() == ISD::VP_SCATTER)
+ if (SDValue SD = visitVPSCATTER(N))
+ return SD;
+
// VP operations in which all vector elements are disabled - either by
// determining that the mask is all false or that the EVL is 0 - can be
// eliminated.
@@ -23532,10 +24949,40 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
}
// bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
- if (VT.isScalableVector())
- return DAG.getSplatVector(VT, DL, ScalarBO);
- SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
- return DAG.getBuildVector(VT, DL, Ops);
+ return DAG.getSplat(VT, DL, ScalarBO);
+}
+
+/// Visit a vector cast operation, like FP_EXTEND.
+SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
+ EVT VT = N->getValueType(0);
+ assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
+ EVT EltVT = VT.getVectorElementType();
+ unsigned Opcode = N->getOpcode();
+
+ SDValue N0 = N->getOperand(0);
+ EVT SrcVT = N0->getValueType(0);
+ EVT SrcEltVT = SrcVT.getVectorElementType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // TODO: promote operation might be also good here?
+ int Index0;
+ SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
+ if (Src0 &&
+ (N0.getOpcode() == ISD::SPLAT_VECTOR ||
+ TLI.isExtractVecEltCheap(VT, Index0)) &&
+ TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
+ TLI.preferScalarizeSplat(Opcode)) {
+ SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
+ SDValue Elt =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
+ SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
+ if (VT.isScalableVector())
+ return DAG.getSplatVector(VT, DL, ScalarBO);
+ SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
+ return DAG.getBuildVector(VT, DL, Ops);
+ }
+
+ return SDValue();
}
/// Visit a binary vector operation, like ADD.
@@ -23555,9 +25002,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
// same types of operations that are in the original sequence. We do have to
// restrict ops like integer div that have immediate UB (eg, div-by-zero)
// though. This code is adapted from the identical transform in instcombine.
- if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
- Opcode != ISD::UREM && Opcode != ISD::SREM &&
- Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
+ if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
@@ -23575,7 +25020,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
// demanded elements analysis. It is further limited to not change a splat
// of an inserted scalar because that may be optimized better by
// load-folding or other target-specific behaviors.
- if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
+ if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
// binop (splat X), (splat C) --> splat (binop X, C)
@@ -23584,7 +25029,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
Shuf0->getMask());
}
- if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
+ if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
// binop (splat C), (splat X) --> splat (binop C, X)
@@ -23657,7 +25102,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue N2) {
- assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
+ assert(N0.getOpcode() == ISD::SETCC &&
+ "First argument must be a SetCC node!");
SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
cast<CondCodeSDNode>(N0.getOperand(2))->get());
@@ -24132,7 +25578,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
return V;
- // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
+ // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
// where y is has a single bit set.
// A plaintext description would be, we can turn the SELECT_CC into an AND
// when the condition can be materialized as an all-ones register. Any
@@ -24583,7 +26029,7 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
bool IsAtomic;
SDValue BasePtr;
int64_t Offset;
- Optional<int64_t> NumBytes;
+ std::optional<int64_t> NumBytes;
MachineMemOperand *MMO;
};
@@ -24598,21 +26044,26 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
: 0;
uint64_t Size =
MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
- return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
+ return {LSN->isVolatile(),
+ LSN->isAtomic(),
+ LSN->getBasePtr(),
Offset /*base offset*/,
- Optional<int64_t>(Size),
+ std::optional<int64_t>(Size),
LSN->getMemOperand()};
}
if (const auto *LN = cast<LifetimeSDNode>(N))
- return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
+ return {false /*isVolatile*/,
+ /*isAtomic*/ false,
+ LN->getOperand(1),
(LN->hasOffset()) ? LN->getOffset() : 0,
- (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
- : Optional<int64_t>(),
+ (LN->hasOffset()) ? std::optional<int64_t>(LN->getSize())
+ : std::optional<int64_t>(),
(MachineMemOperand *)nullptr};
// Default.
- return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
- (int64_t)0 /*offset*/,
- Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
+ return {false /*isvolatile*/,
+ /*isAtomic*/ false, SDValue(),
+ (int64_t)0 /*offset*/, std::optional<int64_t>() /*size*/,
+ (MachineMemOperand *)nullptr};
};
MemUseCharacteristics MUC0 = getCharacteristics(Op0),
@@ -24839,13 +26290,6 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return DAG.getTokenFactor(SDLoc(N), Aliases);
}
-namespace {
-// TODO: Replace with with std::monostate when we move to C++17.
-struct UnitT { } Unit;
-bool operator==(const UnitT &, const UnitT &) { return true; }
-bool operator!=(const UnitT &, const UnitT &) { return false; }
-} // namespace
-
// This function tries to collect a bunch of potentially interesting
// nodes to improve the chains of, all at once. This might seem
// redundant, as this function gets called when visiting every store
@@ -24866,8 +26310,8 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
// the common case, every store writes to the immediately previous address
// space and thus merged with the previous interval at insertion time.
- using IMap =
- llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
+ using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
+ IntervalMapHalfOpenInfo<int64_t>>;
IMap::Allocator A;
IMap Intervals(A);
@@ -24894,7 +26338,8 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
return false;
// Add ST's interval.
- Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
+ Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
+ std::monostate{});
while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
if (Chain->getMemoryVT().isScalableVector())
@@ -24923,7 +26368,7 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
// If there's a previous interval, we should start after it.
if (I != Intervals.begin() && (--I).stop() <= Offset)
break;
- Intervals.insert(Offset, Offset + Length, Unit);
+ Intervals.insert(Offset, Offset + Length, std::monostate{});
ChainedStores.push_back(Chain);
STChain = Chain;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index ff5779967e22..2f2ae6e29855 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -42,7 +42,6 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
@@ -105,6 +104,7 @@
#include <cassert>
#include <cstdint>
#include <iterator>
+#include <optional>
#include <utility>
using namespace llvm;
@@ -319,7 +319,7 @@ Register FastISel::materializeConstant(const Value *V, MVT VT) {
Reg = lookUpRegForValue(Op);
} else if (isa<UndefValue>(V)) {
Reg = createResultReg(TLI.getRegClassFor(VT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
}
return Reg;
@@ -405,11 +405,6 @@ void FastISel::recomputeInsertPt() {
++FuncInfo.InsertPt;
} else
FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI();
-
- // Now skip past any EH_LABELs, which must remain at the beginning.
- while (FuncInfo.InsertPt != FuncInfo.MBB->end() &&
- FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL)
- ++FuncInfo.InsertPt;
}
void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
@@ -696,20 +691,20 @@ bool FastISel::selectStackmap(const CallInst *I) {
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
auto Builder =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown));
const MCInstrDesc &MCID = Builder.getInstr()->getDesc();
for (unsigned I = 0, E = MCID.getNumOperands(); I < E; ++I)
Builder.addImm(0);
// Issue STACKMAP.
- MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::STACKMAP));
for (auto const &MO : Ops)
MIB.add(MO);
// Issue CALLSEQ_END
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
.addImm(0)
.addImm(0);
@@ -878,7 +873,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
/*isImp=*/true));
// Insert the patchpoint instruction before the call generated by the target.
- MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc,
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, MIMD,
TII.get(TargetOpcode::PATCHPOINT));
for (auto &MO : Ops)
@@ -907,7 +902,7 @@ bool FastISel::selectXRayCustomEvent(const CallInst *I) {
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
/*isDef=*/false));
MachineInstrBuilder MIB =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::PATCHABLE_EVENT_CALL));
for (auto &MO : Ops)
MIB.add(MO);
@@ -928,7 +923,7 @@ bool FastISel::selectXRayTypedEvent(const CallInst *I) {
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)),
/*isDef=*/false));
MachineInstrBuilder MIB =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL));
for (auto &MO : Ops)
MIB.add(MO);
@@ -1139,9 +1134,8 @@ bool FastISel::lowerCall(const CallInst *CI) {
bool IsTailCall = CI->isTailCall();
if (IsTailCall && !isInTailCallPosition(*CI, TM))
IsTailCall = false;
- if (IsTailCall && MF->getFunction()
- .getFnAttribute("disable-tail-calls")
- .getValueAsBool())
+ if (IsTailCall && !CI->isMustTailCall() &&
+ MF->getFunction().getFnAttribute("disable-tail-calls").getValueAsBool())
IsTailCall = false;
CallLoweringInfo CLI;
@@ -1171,7 +1165,7 @@ bool FastISel::selectCall(const User *I) {
ExtraInfo |= InlineAsm::Extra_IsConvergent;
ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
- MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::INLINEASM));
MIB.addExternalSymbol(IA->getAsmString().c_str());
MIB.addImm(ExtraInfo);
@@ -1229,7 +1223,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX)
return true;
- Optional<MachineOperand> Op;
+ std::optional<MachineOperand> Op;
if (Register Reg = lookUpRegForValue(Address))
Op = MachineOperand::CreateReg(Reg, false);
@@ -1251,24 +1245,24 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
false);
if (Op) {
- assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+ assert(DI->getVariable()->isValidLocationForIntrinsic(MIMD.getDL()) &&
"Expected inlined-at fields to agree");
- // A dbg.declare describes the address of a source variable, so lower it
- // into an indirect DBG_VALUE.
- auto Builder =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, *Op,
- DI->getVariable(), DI->getExpression());
-
- // If using instruction referencing, mutate this into a DBG_INSTR_REF,
- // to be later patched up by finalizeDebugInstrRefs. Tack a deref onto
- // the expression, we don't have an "indirect" flag in DBG_INSTR_REF.
- if (UseInstrRefDebugInfo && Op->isReg()) {
- Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF));
- Builder->getOperand(1).ChangeToImmediate(0);
- auto *NewExpr =
- DIExpression::prepend(DI->getExpression(), DIExpression::DerefBefore);
- Builder->getOperand(3).setMetadata(NewExpr);
+ if (FuncInfo.MF->useDebugInstrRef() && Op->isReg()) {
+ // If using instruction referencing, produce this as a DBG_INSTR_REF,
+ // to be later patched up by finalizeDebugInstrRefs. Tack a deref onto
+ // the expression, we don't have an "indirect" flag in DBG_INSTR_REF.
+ SmallVector<uint64_t, 3> Ops(
+ {dwarf::DW_OP_LLVM_arg, 0, dwarf::DW_OP_deref});
+ auto *NewExpr = DIExpression::prependOpcodes(DI->getExpression(), Ops);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(),
+ TII.get(TargetOpcode::DBG_INSTR_REF), /*IsIndirect*/ false, *Op,
+ DI->getVariable(), NewExpr);
+ } else {
+ // A dbg.declare describes the address of a source variable, so lower it
+ // into an indirect DBG_VALUE.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(),
+ TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, *Op,
+ DI->getVariable(), DI->getExpression());
}
} else {
// We can't yet handle anything else here because it would require
@@ -1283,12 +1277,12 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
const DbgValueInst *DI = cast<DbgValueInst>(II);
const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
const Value *V = DI->getValue();
- assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+ assert(DI->getVariable()->isValidLocationForIntrinsic(MIMD.getDL()) &&
"Expected inlined-at fields to agree");
if (!V || isa<UndefValue>(V) || DI->hasArgList()) {
// DI is either undef or cannot produce a valid DBG_VALUE, so produce an
// undef DBG_VALUE to terminate any prior location.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, false, 0U,
DI->getVariable(), DI->getExpression());
} else if (const auto *CI = dyn_cast<ConstantInt>(V)) {
// See if there's an expression to constant-fold.
@@ -1296,35 +1290,42 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
if (Expr)
std::tie(Expr, CI) = Expr->constantFold(CI);
if (CI->getBitWidth() > 64)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addCImm(CI)
.addImm(0U)
.addMetadata(DI->getVariable())
.addMetadata(Expr);
else
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addImm(CI->getZExtValue())
.addImm(0U)
.addMetadata(DI->getVariable())
.addMetadata(Expr);
} else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addFPImm(CF)
.addImm(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
} else if (Register Reg = lookUpRegForValue(V)) {
// FIXME: This does not handle register-indirect values at offset 0.
- bool IsIndirect = false;
- auto Builder =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg,
- DI->getVariable(), DI->getExpression());
-
- // If using instruction referencing, mutate this into a DBG_INSTR_REF,
- // to be later patched up by finalizeDebugInstrRefs.
- if (UseInstrRefDebugInfo) {
- Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF));
- Builder->getOperand(1).ChangeToImmediate(0);
+ if (!FuncInfo.MF->useDebugInstrRef()) {
+ bool IsIndirect = false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, IsIndirect,
+ Reg, DI->getVariable(), DI->getExpression());
+ } else {
+ // If using instruction referencing, produce this as a DBG_INSTR_REF,
+ // to be later patched up by finalizeDebugInstrRefs.
+ SmallVector<MachineOperand, 1> MOs({MachineOperand::CreateReg(
+ /* Reg */ Reg, /* isDef */ false, /* isImp */ false,
+ /* isKill */ false, /* isDead */ false,
+ /* isUndef */ false, /* isEarlyClobber */ false,
+ /* SubReg */ 0, /* isDebug */ true)});
+ SmallVector<uint64_t, 2> Ops({dwarf::DW_OP_LLVM_arg, 0});
+ auto *NewExpr = DIExpression::prependOpcodes(DI->getExpression(), Ops);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(),
+ TII.get(TargetOpcode::DBG_INSTR_REF), /*IsIndirect*/ false, MOs,
+ DI->getVariable(), NewExpr);
}
} else {
// We don't know how to handle other cases, so we drop.
@@ -1340,7 +1341,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
return true;
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel());
return true;
}
@@ -1448,7 +1449,7 @@ bool FastISel::selectFreeze(const User *I) {
MVT Ty = ETy.getSimpleVT();
const TargetRegisterClass *TyRegClass = TLI.getRegClassFor(Ty);
Register ResultReg = createResultReg(TyRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::COPY), ResultReg).addReg(Reg);
updateValueMap(I, ResultReg);
@@ -1500,7 +1501,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
if (Call->getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet)
return false;
- DbgLoc = I->getDebugLoc();
+ MIMD = MIMetadata(*I);
SavedInsertPt = FuncInfo.InsertPt;
@@ -1525,7 +1526,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
if (!SkipTargetIndependentISel) {
if (selectOperator(I, I->getOpcode())) {
++NumFastIselSuccessIndependent;
- DbgLoc = DebugLoc();
+ MIMD = {};
return true;
}
// Remove dead code.
@@ -1537,7 +1538,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
// Next, try calling the target to attempt to handle the instruction.
if (fastSelectInstruction(I)) {
++NumFastIselSuccessTarget;
- DbgLoc = DebugLoc();
+ MIMD = {};
return true;
}
// Remove dead code.
@@ -1545,7 +1546,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
if (SavedInsertPt != FuncInfo.InsertPt)
removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
- DbgLoc = DebugLoc();
+ MIMD = {};
// Undo phi node updates, because they will be added again by SelectionDAG.
if (I->isTerminator()) {
// PHI node handling may have generated local value instructions.
@@ -1593,7 +1594,7 @@ void FastISel::finishCondBranch(const BasicBlock *BranchBB,
FuncInfo.MBB->addSuccessorWithoutProb(TrueMBB);
}
- fastEmitBranch(FalseMBB, DbgLoc);
+ fastEmitBranch(FalseMBB, MIMD.getDL());
}
/// Emit an FNeg operation.
@@ -1906,7 +1907,7 @@ Register FastISel::constrainOperandRegClass(const MCInstrDesc &II, Register Op,
// If it's not legal to COPY between the register classes, something
// has gone very wrong before we got here.
Register NewOp = createResultReg(RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::COPY), NewOp).addReg(Op);
return NewOp;
}
@@ -1919,7 +1920,7 @@ Register FastISel::fastEmitInst_(unsigned MachineInstOpcode,
Register ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg);
return ResultReg;
}
@@ -1931,13 +1932,14 @@ Register FastISel::fastEmitInst_r(unsigned MachineInstOpcode,
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addReg(Op0);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addReg(Op0);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
}
return ResultReg;
@@ -1953,15 +1955,16 @@ Register FastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addReg(Op0)
.addReg(Op1);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addReg(Op0)
.addReg(Op1);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
}
return ResultReg;
}
@@ -1977,17 +1980,18 @@ Register FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addReg(Op0)
.addReg(Op1)
.addReg(Op2);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addReg(Op0)
.addReg(Op1)
.addReg(Op2);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
}
return ResultReg;
}
@@ -2001,15 +2005,16 @@ Register FastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addReg(Op0)
.addImm(Imm);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addReg(Op0)
.addImm(Imm);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
}
return ResultReg;
}
@@ -2023,17 +2028,18 @@ Register FastISel::fastEmitInst_rii(unsigned MachineInstOpcode,
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addReg(Op0)
.addImm(Imm1)
.addImm(Imm2);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addReg(Op0)
.addImm(Imm1)
.addImm(Imm2);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
}
return ResultReg;
}
@@ -2046,13 +2052,14 @@ Register FastISel::fastEmitInst_f(unsigned MachineInstOpcode,
Register ResultReg = createResultReg(RC);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addFPImm(FPImm);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addFPImm(FPImm);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
}
return ResultReg;
}
@@ -2067,17 +2074,18 @@ Register FastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addReg(Op0)
.addReg(Op1)
.addImm(Imm);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addReg(Op0)
.addReg(Op1)
.addImm(Imm);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
}
return ResultReg;
}
@@ -2088,12 +2096,13 @@ Register FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addImm(Imm);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.implicit_defs()[0]);
}
return ResultReg;
}
@@ -2105,7 +2114,7 @@ Register FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
"Cannot yet extract from physregs");
const TargetRegisterClass *RC = MRI.getRegClass(Op0);
MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
ResultReg).addReg(Op0, 0, Idx);
return ResultReg;
}
@@ -2170,9 +2179,9 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// Set the DebugLoc for the copy. Use the location of the operand if
// there is one; otherwise no location, flushLocalValueMap will fix it.
- DbgLoc = DebugLoc();
+ MIMD = {};
if (const auto *Inst = dyn_cast<Instruction>(PHIOp))
- DbgLoc = Inst->getDebugLoc();
+ MIMD = MIMetadata(*Inst);
Register Reg = getRegForValue(PHIOp);
if (!Reg) {
@@ -2180,7 +2189,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
return false;
}
FuncInfo.PHINodesToUpdate.push_back(std::make_pair(&*MBBI++, Reg));
- DbgLoc = DebugLoc();
+ MIMD = {};
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index aa9c77f9cabf..c18cd39ed296 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -119,10 +119,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
}
}
}
- if (Personality == EHPersonality::Wasm_CXX) {
- WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo();
- calculateWasmEHInfo(&fn, EHInfo);
- }
// Initialize the mapping of values to registers. This is only set up for
// instruction values that are used outside of the block that defines
@@ -154,7 +150,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
(TFI->isStackRealignable() || (Alignment <= StackAlign))) {
const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
uint64_t TySize =
- MF->getDataLayout().getTypeAllocSize(Ty).getKnownMinSize();
+ MF->getDataLayout().getTypeAllocSize(Ty).getKnownMinValue();
TySize *= CUI->getZExtValue(); // Get total allocated size.
if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
@@ -270,7 +266,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
// the first one should be marked.
if (BB.hasAddressTaken())
- MBB->setHasAddressTaken();
+ MBB->setAddressTakenIRBlock(const_cast<BasicBlock *>(&BB));
// Mark landing pad blocks.
if (BB.isEHPad())
@@ -323,10 +319,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
const auto *BB = CME.Handler.get<const BasicBlock *>();
CME.Handler = MBBMap[BB];
}
- }
-
- else if (Personality == EHPersonality::Wasm_CXX) {
+ } else if (Personality == EHPersonality::Wasm_CXX) {
WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo();
+ calculateWasmEHInfo(&fn, EHInfo);
+
// Map all BB references in the Wasm EH data to MBBs.
DenseMap<BBOrMBB, BBOrMBB> SrcToUnwindDest;
for (auto &KV : EHInfo.SrcToUnwindDest) {
@@ -369,8 +365,7 @@ void FunctionLoweringInfo::clear() {
/// CreateReg - Allocate a single virtual register for the given type.
Register FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) {
- return RegInfo->createVirtualRegister(
- MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent));
+ return RegInfo->createVirtualRegister(TLI->getRegClassFor(VT, isDivergent));
}
/// CreateRegs - Allocate the appropriate number of virtual registers of
@@ -381,8 +376,6 @@ Register FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) {
/// will assign registers for each member or element.
///
Register FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) {
- const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
-
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs);
@@ -451,8 +444,8 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
Register DestReg = It->second;
if (DestReg == 0)
- return
- assert(Register::isVirtualRegister(DestReg) && "Expected a virtual reg");
+ return;
+ assert(DestReg.isVirtual() && "Expected a virtual reg");
LiveOutRegInfo.grow(DestReg);
LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg];
@@ -475,7 +468,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
"CopyToReg node was created.");
Register SrcReg = ValueMap[V];
- if (!Register::isVirtualRegister(SrcReg)) {
+ if (!SrcReg.isVirtual()) {
DestLOI.IsValid = false;
return;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 3d3b504c6abd..338172e4e10a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -70,7 +70,7 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1)))
continue;
if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1)))
- if (Register::isPhysicalRegister(RN->getReg()))
+ if (RN->getReg().isPhysical())
continue;
NumImpUses = N - I;
break;
@@ -81,9 +81,9 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
/// implicit physical register output.
-void InstrEmitter::
-EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
- Register SrcReg, DenseMap<SDValue, Register> &VRBaseMap) {
+void InstrEmitter::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone,
+ Register SrcReg,
+ DenseMap<SDValue, Register> &VRBaseMap) {
Register VRBase;
if (SrcReg.isVirtual()) {
// Just use the input register directly!
@@ -106,51 +106,50 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
if (TLI->isTypeLegal(VT))
UseRC = TLI->getRegClassFor(VT, Node->isDivergent());
- if (!IsClone && !IsCloned)
- for (SDNode *User : Node->uses()) {
- bool Match = true;
- if (User->getOpcode() == ISD::CopyToReg &&
- User->getOperand(2).getNode() == Node &&
- User->getOperand(2).getResNo() == ResNo) {
- Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (DestReg.isVirtual()) {
- VRBase = DestReg;
- Match = false;
- } else if (DestReg != SrcReg)
- Match = false;
- } else {
- for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
- SDValue Op = User->getOperand(i);
- if (Op.getNode() != Node || Op.getResNo() != ResNo)
- continue;
- MVT VT = Node->getSimpleValueType(Op.getResNo());
- if (VT == MVT::Other || VT == MVT::Glue)
- continue;
- Match = false;
- if (User->isMachineOpcode()) {
- const MCInstrDesc &II = TII->get(User->getMachineOpcode());
- const TargetRegisterClass *RC = nullptr;
- if (i+II.getNumDefs() < II.getNumOperands()) {
- RC = TRI->getAllocatableClass(
- TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF));
- }
- if (!UseRC)
- UseRC = RC;
- else if (RC) {
- const TargetRegisterClass *ComRC =
+ for (SDNode *User : Node->uses()) {
+ bool Match = true;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == ResNo) {
+ Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (DestReg.isVirtual()) {
+ VRBase = DestReg;
+ Match = false;
+ } else if (DestReg != SrcReg)
+ Match = false;
+ } else {
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ SDValue Op = User->getOperand(i);
+ if (Op.getNode() != Node || Op.getResNo() != ResNo)
+ continue;
+ MVT VT = Node->getSimpleValueType(Op.getResNo());
+ if (VT == MVT::Other || VT == MVT::Glue)
+ continue;
+ Match = false;
+ if (User->isMachineOpcode()) {
+ const MCInstrDesc &II = TII->get(User->getMachineOpcode());
+ const TargetRegisterClass *RC = nullptr;
+ if (i + II.getNumDefs() < II.getNumOperands()) {
+ RC = TRI->getAllocatableClass(
+ TII->getRegClass(II, i + II.getNumDefs(), TRI, *MF));
+ }
+ if (!UseRC)
+ UseRC = RC;
+ else if (RC) {
+ const TargetRegisterClass *ComRC =
TRI->getCommonSubClass(UseRC, RC);
- // If multiple uses expect disjoint register classes, we emit
- // copies in AddRegisterOperand.
- if (ComRC)
- UseRC = ComRC;
- }
+ // If multiple uses expect disjoint register classes, we emit
+ // copies in AddRegisterOperand.
+ if (ComRC)
+ UseRC = ComRC;
}
}
}
- MatchReg &= Match;
- if (VRBase)
- break;
}
+ MatchReg &= Match;
+ if (VRBase)
+ break;
+ }
const TargetRegisterClass *SrcRC = nullptr, *DstRC = nullptr;
SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);
@@ -219,7 +218,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
RC = VTRC;
}
- if (II.OpInfo != nullptr && II.OpInfo[i].isOptionalDef()) {
+ if (!II.operands().empty() && II.operands()[i].isOptionalDef()) {
// Optional def must be a physical register.
VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
assert(VRBase.isPhysical());
@@ -231,8 +230,8 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
if (User->getOpcode() == ISD::CopyToReg &&
User->getOperand(2).getNode() == Node &&
User->getOperand(2).getResNo() == i) {
- unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (Register::isVirtualRegister(Reg)) {
+ Register Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (Reg.isVirtual()) {
const TargetRegisterClass *RegRC = MRI->getRegClass(Reg);
if (RegRC == RC) {
VRBase = Reg;
@@ -305,7 +304,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
const MCInstrDesc &MCID = MIB->getDesc();
bool isOptDef = IIOpNum < MCID.getNumOperands() &&
- MCID.OpInfo[IIOpNum].isOptionalDef();
+ MCID.operands()[IIOpNum].isOptionalDef();
// If the instruction requires a register in a different class, create
// a new virtual register and copy the value into it, but first attempt to
@@ -395,7 +394,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
(IIRC && TRI->isDivergentRegClass(IIRC)))
: nullptr;
- if (OpRC && IIRC && OpRC != IIRC && Register::isVirtualRegister(VReg)) {
+ if (OpRC && IIRC && OpRC != IIRC && VReg.isVirtual()) {
Register NewVReg = MRI->createVirtualRegister(IIRC);
BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
@@ -503,7 +502,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
Register Reg;
MachineInstr *DefMI;
RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0));
- if (R && Register::isPhysicalRegister(R->getReg())) {
+ if (R && R->getReg().isPhysical()) {
Reg = R->getReg();
DefMI = nullptr;
} else {
@@ -650,7 +649,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1));
// Skip physical registers as they don't have a vreg to get and we'll
// insert copies for them in TwoAddressInstructionPass anyway.
- if (!R || !Register::isPhysicalRegister(R->getReg())) {
+ if (!R || !R->getReg().isPhysical()) {
unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
@@ -678,43 +677,54 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
MachineInstr *
InstrEmitter::EmitDbgValue(SDDbgValue *SD,
DenseMap<SDValue, Register> &VRBaseMap) {
- MDNode *Var = SD->getVariable();
- MDNode *Expr = SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
- assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ assert(cast<DILocalVariable>(SD->getVariable())
+ ->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
SD->setIsEmitted();
- ArrayRef<SDDbgOperand> LocationOps = SD->getLocationOps();
- assert(!LocationOps.empty() && "dbg_value with no location operands?");
+ assert(!SD->getLocationOps().empty() &&
+ "dbg_value with no location operands?");
if (SD->isInvalidated())
return EmitDbgNoLocation(SD);
- // Emit variadic dbg_value nodes as DBG_VALUE_LIST.
- if (SD->isVariadic()) {
- // DBG_VALUE_LIST := "DBG_VALUE_LIST" var, expression, loc (, loc)*
- const MCInstrDesc &DbgValDesc = TII->get(TargetOpcode::DBG_VALUE_LIST);
- // Build the DBG_VALUE_LIST instruction base.
- auto MIB = BuildMI(*MF, DL, DbgValDesc);
- MIB.addMetadata(Var);
- MIB.addMetadata(Expr);
- AddDbgValueLocationOps(MIB, DbgValDesc, LocationOps, VRBaseMap);
- return &*MIB;
- }
-
// Attempt to produce a DBG_INSTR_REF if we've been asked to.
- // We currently exclude the possibility of instruction references for
- // variadic nodes; if at some point we enable them, this should be moved
- // above the variadic block.
if (EmitDebugInstrRefs)
if (auto *InstrRef = EmitDbgInstrRef(SD, VRBaseMap))
return InstrRef;
+ // Emit variadic dbg_value nodes as DBG_VALUE_LIST if they have not been
+ // emitted as instruction references.
+ if (SD->isVariadic())
+ return EmitDbgValueList(SD, VRBaseMap);
+
+ // Emit single-location dbg_value nodes as DBG_VALUE if they have not been
+ // emitted as instruction references.
return EmitDbgValueFromSingleOp(SD, VRBaseMap);
}
+MachineOperand GetMOForConstDbgOp(const SDDbgOperand &Op) {
+ const Value *V = Op.getConst();
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getBitWidth() > 64)
+ return MachineOperand::CreateCImm(CI);
+ return MachineOperand::CreateImm(CI->getSExtValue());
+ }
+ if (const ConstantFP *CF = dyn_cast<ConstantFP>(V))
+ return MachineOperand::CreateFPImm(CF);
+ // Note: This assumes that all nullptr constants are zero-valued.
+ if (isa<ConstantPointerNull>(V))
+ return MachineOperand::CreateImm(0);
+ // Undef or unhandled value type, so return an undef operand.
+ return MachineOperand::CreateReg(
+ /* Reg */ 0U, /* isDef */ false, /* isImp */ false,
+ /* isKill */ false, /* isDead */ false,
+ /* isUndef */ false, /* isEarlyClobber */ false,
+ /* SubReg */ 0, /* isDebug */ true);
+}
+
void InstrEmitter::AddDbgValueLocationOps(
MachineInstrBuilder &MIB, const MCInstrDesc &DbgValDesc,
ArrayRef<SDDbgOperand> LocationOps,
@@ -740,24 +750,9 @@ void InstrEmitter::AddDbgValueLocationOps(
AddOperand(MIB, V, (*MIB).getNumOperands(), &DbgValDesc, VRBaseMap,
/*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false);
} break;
- case SDDbgOperand::CONST: {
- const Value *V = Op.getConst();
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- if (CI->getBitWidth() > 64)
- MIB.addCImm(CI);
- else
- MIB.addImm(CI->getSExtValue());
- } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
- MIB.addFPImm(CF);
- } else if (isa<ConstantPointerNull>(V)) {
- // Note: This assumes that all nullptr constants are zero-valued.
- MIB.addImm(0);
- } else {
- // Could be an Undef. In any case insert an Undef so we can see what we
- // dropped.
- MIB.addReg(0U);
- }
- } break;
+ case SDDbgOperand::CONST:
+ MIB.add(GetMOForConstDbgOp(Op));
+ break;
}
}
}
@@ -765,116 +760,158 @@ void InstrEmitter::AddDbgValueLocationOps(
MachineInstr *
InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD,
DenseMap<SDValue, Register> &VRBaseMap) {
- assert(!SD->isVariadic());
- SDDbgOperand DbgOperand = SD->getLocationOps()[0];
MDNode *Var = SD->getVariable();
- DIExpression *Expr = (DIExpression*)SD->getExpression();
+ const DIExpression *Expr = (DIExpression *)SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF);
- // Handle variable locations that don't actually depend on the instructions
- // in the program: constants and stack locations.
- if (DbgOperand.getKind() == SDDbgOperand::FRAMEIX ||
- DbgOperand.getKind() == SDDbgOperand::CONST)
+ // Returns true if the given operand is not a legal debug operand for a
+ // DBG_INSTR_REF.
+ auto IsInvalidOp = [](SDDbgOperand DbgOp) {
+ return DbgOp.getKind() == SDDbgOperand::FRAMEIX;
+ };
+ // Returns true if the given operand is not itself an instruction reference
+ // but is a legal debug operand for a DBG_INSTR_REF.
+ auto IsNonInstrRefOp = [](SDDbgOperand DbgOp) {
+ return DbgOp.getKind() == SDDbgOperand::CONST;
+ };
+
+ // If this variable location does not depend on any instructions or contains
+ // any stack locations, produce it as a standard debug value instead.
+ if (any_of(SD->getLocationOps(), IsInvalidOp) ||
+ all_of(SD->getLocationOps(), IsNonInstrRefOp)) {
+ if (SD->isVariadic())
+ return EmitDbgValueList(SD, VRBaseMap);
return EmitDbgValueFromSingleOp(SD, VRBaseMap);
+ }
// Immediately fold any indirectness from the LLVM-IR intrinsic into the
// expression:
- if (SD->isIndirect()) {
- std::vector<uint64_t> Elts = {dwarf::DW_OP_deref};
- Expr = DIExpression::append(Expr, Elts);
- }
+ if (SD->isIndirect())
+ Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
+ // If this is not already a variadic expression, it must be modified to become
+ // one.
+ if (!SD->isVariadic())
+ Expr = DIExpression::convertToVariadicExpression(Expr);
+
+ SmallVector<MachineOperand> MOs;
// It may not be immediately possible to identify the MachineInstr that
// defines a VReg, it can depend for example on the order blocks are
// emitted in. When this happens, or when further analysis is needed later,
// produce an instruction like this:
//
- // DBG_INSTR_REF %0:gr64, 0, !123, !456
+ // DBG_INSTR_REF !123, !456, %0:gr64
//
// i.e., point the instruction at the vreg, and patch it up later in
// MachineFunction::finalizeDebugInstrRefs.
- auto EmitHalfDoneInstrRef = [&](unsigned VReg) -> MachineInstr * {
- auto MIB = BuildMI(*MF, DL, RefII);
- MIB.addReg(VReg);
- MIB.addImm(0);
- MIB.addMetadata(Var);
- MIB.addMetadata(Expr);
- return MIB;
+ auto AddVRegOp = [&](unsigned VReg) {
+ MOs.push_back(MachineOperand::CreateReg(
+ /* Reg */ VReg, /* isDef */ false, /* isImp */ false,
+ /* isKill */ false, /* isDead */ false,
+ /* isUndef */ false, /* isEarlyClobber */ false,
+ /* SubReg */ 0, /* isDebug */ true));
};
+ unsigned OpCount = SD->getLocationOps().size();
+ for (unsigned OpIdx = 0; OpIdx < OpCount; ++OpIdx) {
+ SDDbgOperand DbgOperand = SD->getLocationOps()[OpIdx];
+
+ // Try to find both the defined register and the instruction defining it.
+ MachineInstr *DefMI = nullptr;
+ unsigned VReg;
- // Try to find both the defined register and the instruction defining it.
- MachineInstr *DefMI = nullptr;
- unsigned VReg;
+ if (DbgOperand.getKind() == SDDbgOperand::VREG) {
+ VReg = DbgOperand.getVReg();
- if (DbgOperand.getKind() == SDDbgOperand::VREG) {
- VReg = DbgOperand.getVReg();
+ // No definition means that block hasn't been emitted yet. Leave a vreg
+ // reference to be fixed later.
+ if (!MRI->hasOneDef(VReg)) {
+ AddVRegOp(VReg);
+ continue;
+ }
- // No definition means that block hasn't been emitted yet. Leave a vreg
- // reference to be fixed later.
- if (!MRI->hasOneDef(VReg))
- return EmitHalfDoneInstrRef(VReg);
+ DefMI = &*MRI->def_instr_begin(VReg);
+ } else if (DbgOperand.getKind() == SDDbgOperand::SDNODE) {
+ // Look up the corresponding VReg for the given SDNode, if any.
+ SDNode *Node = DbgOperand.getSDNode();
+ SDValue Op = SDValue(Node, DbgOperand.getResNo());
+ DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op);
+ // No VReg -> produce a DBG_VALUE $noreg instead.
+ if (I == VRBaseMap.end())
+ break;
- DefMI = &*MRI->def_instr_begin(VReg);
- } else {
- assert(DbgOperand.getKind() == SDDbgOperand::SDNODE);
- // Look up the corresponding VReg for the given SDNode, if any.
- SDNode *Node = DbgOperand.getSDNode();
- SDValue Op = SDValue(Node, DbgOperand.getResNo());
- DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op);
- // No VReg -> produce a DBG_VALUE $noreg instead.
- if (I==VRBaseMap.end())
- return EmitDbgNoLocation(SD);
-
- // Try to pick out a defining instruction at this point.
- VReg = getVR(Op, VRBaseMap);
-
- // Again, if there's no instruction defining the VReg right now, fix it up
- // later.
- if (!MRI->hasOneDef(VReg))
- return EmitHalfDoneInstrRef(VReg);
-
- DefMI = &*MRI->def_instr_begin(VReg);
- }
+ // Try to pick out a defining instruction at this point.
+ VReg = getVR(Op, VRBaseMap);
- // Avoid copy like instructions: they don't define values, only move them.
- // Leave a virtual-register reference until it can be fixed up later, to find
- // the underlying value definition.
- if (DefMI->isCopyLike() || TII->isCopyInstr(*DefMI))
- return EmitHalfDoneInstrRef(VReg);
+ // Again, if there's no instruction defining the VReg right now, fix it up
+ // later.
+ if (!MRI->hasOneDef(VReg)) {
+ AddVRegOp(VReg);
+ continue;
+ }
- auto MIB = BuildMI(*MF, DL, RefII);
+ DefMI = &*MRI->def_instr_begin(VReg);
+ } else {
+ assert(DbgOperand.getKind() == SDDbgOperand::CONST);
+ MOs.push_back(GetMOForConstDbgOp(DbgOperand));
+ continue;
+ }
- // Find the operand number which defines the specified VReg.
- unsigned OperandIdx = 0;
- for (const auto &MO : DefMI->operands()) {
- if (MO.isReg() && MO.isDef() && MO.getReg() == VReg)
- break;
- ++OperandIdx;
+ // Avoid copy like instructions: they don't define values, only move them.
+ // Leave a virtual-register reference until it can be fixed up later, to
+ // find the underlying value definition.
+ if (DefMI->isCopyLike() || TII->isCopyInstr(*DefMI)) {
+ AddVRegOp(VReg);
+ continue;
+ }
+
+ // Find the operand number which defines the specified VReg.
+ unsigned OperandIdx = 0;
+ for (const auto &MO : DefMI->operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() == VReg)
+ break;
+ ++OperandIdx;
+ }
+ assert(OperandIdx < DefMI->getNumOperands());
+
+ // Make the DBG_INSTR_REF refer to that instruction, and that operand.
+ unsigned InstrNum = DefMI->getDebugInstrNum();
+ MOs.push_back(MachineOperand::CreateDbgInstrRef(InstrNum, OperandIdx));
}
- assert(OperandIdx < DefMI->getNumOperands());
- // Make the DBG_INSTR_REF refer to that instruction, and that operand.
- unsigned InstrNum = DefMI->getDebugInstrNum();
- MIB.addImm(InstrNum);
- MIB.addImm(OperandIdx);
- MIB.addMetadata(Var);
- MIB.addMetadata(Expr);
- return &*MIB;
+ // If we haven't created a valid MachineOperand for every DbgOp, abort and
+ // produce an undef DBG_VALUE.
+ if (MOs.size() != OpCount)
+ return EmitDbgNoLocation(SD);
+
+ return BuildMI(*MF, DL, RefII, false, MOs, Var, Expr);
}
MachineInstr *InstrEmitter::EmitDbgNoLocation(SDDbgValue *SD) {
// An invalidated SDNode must generate an undef DBG_VALUE: although the
// original value is no longer computed, earlier DBG_VALUEs live ranges
// must not leak into later code.
+ DIVariable *Var = SD->getVariable();
+ const DIExpression *Expr =
+ DIExpression::convertToUndefExpression(SD->getExpression());
+ DebugLoc DL = SD->getDebugLoc();
+ const MCInstrDesc &Desc = TII->get(TargetOpcode::DBG_VALUE);
+ return BuildMI(*MF, DL, Desc, false, 0U, Var, Expr);
+}
+
+MachineInstr *
+InstrEmitter::EmitDbgValueList(SDDbgValue *SD,
+ DenseMap<SDValue, Register> &VRBaseMap) {
MDNode *Var = SD->getVariable();
- MDNode *Expr = SD->getExpression();
+ DIExpression *Expr = SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
- auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE));
- MIB.addReg(0U);
- MIB.addReg(0U);
+ // DBG_VALUE_LIST := "DBG_VALUE_LIST" var, expression, loc (, loc)*
+ const MCInstrDesc &DbgValDesc = TII->get(TargetOpcode::DBG_VALUE_LIST);
+ // Build the DBG_VALUE_LIST instruction base.
+ auto MIB = BuildMI(*MF, DL, DbgValDesc);
MIB.addMetadata(Var);
MIB.addMetadata(Expr);
+ AddDbgValueLocationOps(MIB, DbgValDesc, SD->getLocationOps(), VRBaseMap);
return &*MIB;
}
@@ -984,8 +1021,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses);
bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() &&
II.isVariadic() && II.variadicOpsAreDefs();
- bool HasPhysRegOuts = NumResults > NumDefs &&
- II.getImplicitDefs() != nullptr && !HasVRegVariadicDefs;
+ bool HasPhysRegOuts = NumResults > NumDefs && !II.implicit_defs().empty() &&
+ !HasVRegVariadicDefs;
#ifndef NDEBUG
unsigned NumMIOperands = NodeOperands + NumResults;
if (II.isVariadic())
@@ -993,8 +1030,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
"Too few operands for a variadic node!");
else
assert(NumMIOperands >= II.getNumOperands() &&
- NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() +
- NumImpUses &&
+ NumMIOperands <=
+ II.getNumOperands() + II.implicit_defs().size() + NumImpUses &&
"#operands for dag node doesn't match .td file!");
#endif
@@ -1063,6 +1100,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// part of the function.
MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands());
+ // Set the CFI type.
+ MIB->setCFIType(*MF, Node->getCFIType());
+
// Insert the instruction into position in the block. This needs to
// happen before any custom inserter hook is called so that the
// hook knows where in the block to insert the replacement code.
@@ -1088,12 +1128,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// Additional results must be physical register defs.
if (HasPhysRegOuts) {
for (unsigned i = NumDefs; i < NumResults; ++i) {
- Register Reg = II.getImplicitDefs()[i - NumDefs];
+ Register Reg = II.implicit_defs()[i - NumDefs];
if (!Node->hasAnyUseOfValue(i))
continue;
// This implicitly defined physreg has a use.
UsedRegs.push_back(Reg);
- EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
+ EmitCopyFromReg(Node, i, IsClone, Reg, VRBaseMap);
}
}
@@ -1109,8 +1149,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
}
// Collect declared implicit uses.
const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
- UsedRegs.append(MCID.getImplicitUses(),
- MCID.getImplicitUses() + MCID.getNumImplicitUses());
+ append_range(UsedRegs, MCID.implicit_uses());
// In addition to declared implicit uses, we must also check for
// direct RegisterSDNode operands.
for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
@@ -1123,7 +1162,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
}
// Finally mark unused registers as dead.
- if (!UsedRegs.empty() || II.getImplicitDefs() || II.hasOptionalDef())
+ if (!UsedRegs.empty() || !II.implicit_defs().empty() || II.hasOptionalDef())
MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
// STATEPOINT is too 'dynamic' to have meaningful machine description.
@@ -1159,14 +1198,13 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
#endif
llvm_unreachable("This target-independent node should have been selected!");
case ISD::EntryToken:
- llvm_unreachable("EntryToken should have been excluded from the schedule!");
case ISD::MERGE_VALUES:
case ISD::TokenFactor: // fall thru
break;
case ISD::CopyToReg: {
Register DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
SDValue SrcVal = Node->getOperand(2);
- if (Register::isVirtualRegister(DestReg) && SrcVal.isMachineOpcode() &&
+ if (DestReg.isVirtual() && SrcVal.isMachineOpcode() &&
SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
// Instead building a COPY to that vreg destination, build an
// IMPLICIT_DEF instruction instead.
@@ -1189,7 +1227,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
}
case ISD::CopyFromReg: {
unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
- EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
+ EmitCopyFromReg(Node, 0, IsClone, SrcReg, VRBaseMap);
break;
}
case ISD::EH_LABEL:
@@ -1273,28 +1311,25 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
default: llvm_unreachable("Bad flags!");
case InlineAsm::Kind_RegDef:
for (unsigned j = 0; j != NumVals; ++j, ++i) {
- unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
// FIXME: Add dead flags for physical and virtual registers defined.
// For now, mark physical register defs as implicit to help fast
// regalloc. This makes inline asm look a lot like calls.
- MIB.addReg(Reg,
- RegState::Define |
- getImplRegState(Register::isPhysicalRegister(Reg)));
+ MIB.addReg(Reg, RegState::Define | getImplRegState(Reg.isPhysical()));
}
break;
case InlineAsm::Kind_RegDefEarlyClobber:
case InlineAsm::Kind_Clobber:
for (unsigned j = 0; j != NumVals; ++j, ++i) {
- unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
- MIB.addReg(Reg,
- RegState::Define | RegState::EarlyClobber |
- getImplRegState(Register::isPhysicalRegister(Reg)));
+ Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber |
+ getImplRegState(Reg.isPhysical()));
ECRegs.push_back(Reg);
}
break;
case InlineAsm::Kind_RegUse: // Use of register.
case InlineAsm::Kind_Imm: // Immediate.
- case InlineAsm::Kind_Mem: // Addressing mode.
+ case InlineAsm::Kind_Mem: // Non-function addressing mode.
// The addressing mode has been selected, just add all of the
// operands to the machine instruction.
for (unsigned j = 0; j != NumVals; ++j, ++i)
@@ -1312,6 +1347,21 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
}
}
break;
+ case InlineAsm::Kind_Func: // Function addressing mode.
+ for (unsigned j = 0; j != NumVals; ++j, ++i) {
+ SDValue Op = Node->getOperand(i);
+ AddOperand(MIB, Op, 0, nullptr, VRBaseMap,
+ /*IsDebug=*/false, IsClone, IsCloned);
+
+ // Adjust Target Flags for function reference.
+ if (auto *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ unsigned NewFlags =
+ MF->getSubtarget().classifyGlobalFunctionReference(
+ TGA->getGlobal());
+ unsigned LastIdx = MIB.getInstr()->getNumOperands() - 1;
+ MIB.getInstr()->getOperand(LastIdx).setTargetFlags(NewFlags);
+ }
+ }
}
}
@@ -1344,12 +1394,11 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
/// at the given position in the given block.
InstrEmitter::InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb,
- MachineBasicBlock::iterator insertpos,
- bool UseInstrRefDebugInfo)
+ MachineBasicBlock::iterator insertpos)
: MF(mbb->getParent()), MRI(&MF->getRegInfo()),
TII(MF->getSubtarget().getInstrInfo()),
TRI(MF->getSubtarget().getRegisterInfo()),
TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb),
InsertPos(insertpos) {
- EmitDebugInstrRefs = UseInstrRefDebugInfo;
+ EmitDebugInstrRefs = mbb->getParent()->useDebugInstrRef();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
index ced8f064b9be..959bce31c8b2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -44,10 +44,8 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
/// implicit physical register output.
- void EmitCopyFromReg(SDNode *Node, unsigned ResNo,
- bool IsClone, bool IsCloned,
- Register SrcReg,
- DenseMap<SDValue, Register> &VRBaseMap);
+ void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone,
+ Register SrcReg, DenseMap<SDValue, Register> &VRBaseMap);
void CreateVirtualRegisters(SDNode *Node,
MachineInstrBuilder &MIB,
@@ -128,6 +126,10 @@ public:
/// Emit a DBG_VALUE $noreg, indicating a variable has no location.
MachineInstr *EmitDbgNoLocation(SDDbgValue *SD);
+ /// Emit a DBG_VALUE_LIST from the operands to SDDbgValue.
+ MachineInstr *EmitDbgValueList(SDDbgValue *SD,
+ DenseMap<SDValue, Register> &VRBaseMap);
+
/// Emit a DBG_VALUE from the operands to SDDbgValue.
MachineInstr *EmitDbgValueFromSingleOp(SDDbgValue *SD,
DenseMap<SDValue, Register> &VRBaseMap);
@@ -154,8 +156,7 @@ public:
/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
/// at the given position in the given block.
InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb,
- MachineBasicBlock::iterator insertpos,
- bool UseInstrRefDebugInfo);
+ MachineBasicBlock::iterator insertpos);
private:
void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 56d35dfe8701..c3106216a060 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -142,10 +142,12 @@ private:
RTLIB::Libcall Call_F128,
RTLIB::Libcall Call_PPCF128,
SmallVectorImpl<SDValue> &Results);
- SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8,
- RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32,
- RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128,
- RTLIB::Libcall Call_IEXT);
+ SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128);
void ExpandArgFPLibCall(SDNode *Node,
RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
@@ -308,7 +310,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
// We don't want to shrink SNaNs. Converting the SNaN back to its real type
// can cause it to be changed into a QNaN on some platforms (e.g. on SystemZ).
if (!APF.isSignaling()) {
- while (SVT != MVT::f32 && SVT != MVT::f16) {
+ while (SVT != MVT::f32 && SVT != MVT::f16 && SVT != MVT::bf16) {
SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
if (ConstantFPSDNode::isValueValidForType(SVT, APF) &&
// Only do this if the target has a native EXTLOAD instruction from
@@ -550,16 +552,16 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// Promote to a byte-sized store with upper bits zero if not
// storing an integral number of bytes. For example, promote
// TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
- EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StSize.getFixedSize());
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StSize.getFixedValue());
Value = DAG.getZeroExtendInReg(Value, dl, StVT);
SDValue Result =
DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT,
ST->getOriginalAlign(), MMOFlags, AAInfo);
ReplaceNode(SDValue(Node, 0), Result);
- } else if (!StVT.isVector() && !isPowerOf2_64(StWidth.getFixedSize())) {
+ } else if (!StVT.isVector() && !isPowerOf2_64(StWidth.getFixedValue())) {
// If not storing a power-of-2 number of bits, expand as two stores.
assert(!StVT.isVector() && "Unsupported truncstore!");
- unsigned StWidthBits = StWidth.getFixedSize();
+ unsigned StWidthBits = StWidth.getFixedValue();
unsigned LogStWidth = Log2_32(StWidthBits);
assert(LogStWidth < 32);
unsigned RoundWidth = 1 << LogStWidth;
@@ -767,10 +769,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Value = Result;
Chain = Ch;
- } else if (!isPowerOf2_64(SrcWidth.getKnownMinSize())) {
+ } else if (!isPowerOf2_64(SrcWidth.getKnownMinValue())) {
// If not loading a power-of-2 number of bits, expand as two loads.
assert(!SrcVT.isVector() && "Unsupported extload!");
- unsigned SrcWidthBits = SrcWidth.getFixedSize();
+ unsigned SrcWidthBits = SrcWidth.getFixedValue();
unsigned LogSrcWidth = Log2_32(SrcWidthBits);
assert(LogSrcWidth < 32);
unsigned RoundWidth = 1 << LogSrcWidth;
@@ -850,7 +852,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Custom:
isCustom = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case TargetLowering::Legal:
Value = SDValue(Node, 0);
Chain = SDValue(Node, 1);
@@ -1035,12 +1037,14 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
case ISD::SETCC:
+ case ISD::SETCCCARRY:
case ISD::VP_SETCC:
case ISD::BR_CC: {
unsigned Opc = Node->getOpcode();
unsigned CCOperand = Opc == ISD::SELECT_CC ? 4
: Opc == ISD::STRICT_FSETCC ? 3
: Opc == ISD::STRICT_FSETCCS ? 3
+ : Opc == ISD::SETCCCARRY ? 3
: (Opc == ISD::SETCC || Opc == ISD::VP_SETCC) ? 2
: 1;
unsigned CompareOperand = Opc == ISD::BR_CC ? 2
@@ -1074,7 +1078,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
SimpleFinishLegalizing = false;
break;
case ISD::EXTRACT_ELEMENT:
- case ISD::FLT_ROUNDS_:
+ case ISD::GET_ROUNDING:
case ISD::MERGE_VALUES:
case ISD::EH_RETURN:
case ISD::FRAME_TO_ARGS_OFFSET:
@@ -1317,11 +1321,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
return;
}
LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case TargetLowering::Expand:
if (ExpandNode(Node))
return;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case TargetLowering::LibCall:
ConvertNodeToLibcall(Node);
return;
@@ -1717,8 +1721,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
DAG.getConstant(-Alignment.value(), dl, VT));
Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
- Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
- DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
+ Tmp2 = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);
Results.push_back(Tmp1);
Results.push_back(Tmp2);
@@ -2111,17 +2114,15 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
ExpandFPLibCall(Node, LC, Results);
}
-SDValue SelectionDAGLegalize::ExpandIntLibCall(
- SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8,
- RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64,
- RTLIB::Libcall Call_I128, RTLIB::Libcall Call_IEXT) {
+SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128) {
RTLIB::Libcall LC;
switch (Node->getSimpleValueType(0).SimpleTy) {
-
- default:
- LC = Call_IEXT;
- break;
-
+ default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC = Call_I8; break;
case MVT::i16: LC = Call_I16; break;
case MVT::i32: LC = Call_I32; break;
@@ -2156,11 +2157,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
RTLIB::Libcall LC;
switch (Node->getSimpleValueType(0).SimpleTy) {
-
- default:
- LC = isSigned ? RTLIB::SDIVREM_IEXT : RTLIB::UDIVREM_IEXT;
- break;
-
+ default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
@@ -2744,7 +2741,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
FA, Offset));
break;
}
- case ISD::FLT_ROUNDS_:
+ case ISD::GET_ROUNDING:
Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0)));
Results.push_back(Node->getOperand(0));
break;
@@ -2911,13 +2908,44 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
case ISD::BF16_TO_FP: {
// Always expand bf16 to f32 casts, they lower to ext + shift.
- SDValue Op = DAG.getNode(ISD::BITCAST, dl, MVT::i16, Node->getOperand(0));
- Op = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op);
+ //
+ // Note that the operand of this code can be bf16 or an integer type in case
+ // bf16 is not supported on the target and was softened.
+ SDValue Op = Node->getOperand(0);
+ if (Op.getValueType() == MVT::bf16) {
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i16, Op));
+ } else {
+ Op = DAG.getAnyExtOrTrunc(Op, dl, MVT::i32);
+ }
Op = DAG.getNode(
ISD::SHL, dl, MVT::i32, Op,
DAG.getConstant(16, dl,
TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout())));
Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op);
+ // Add fp_extend in case the output is bigger than f32.
+ if (Node->getValueType(0) != MVT::f32)
+ Op = DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Op);
+ Results.push_back(Op);
+ break;
+ }
+ case ISD::FP_TO_BF16: {
+ SDValue Op = Node->getOperand(0);
+ if (Op.getValueType() != MVT::f32)
+ Op = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
+ Op = DAG.getNode(
+ ISD::SRL, dl, MVT::i32, DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op),
+ DAG.getConstant(16, dl,
+ TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout())));
+ // The result of this node can be bf16 or an integer type in case bf16 is
+ // not supported on the target and was softened to i16 for storage.
+ if (Node->getValueType(0) == MVT::bf16) {
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::bf16,
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Op));
+ } else {
+ Op = DAG.getAnyExtOrTrunc(Op, dl, Node->getValueType(0));
+ }
Results.push_back(Op);
break;
}
@@ -2961,7 +2989,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp2);
break;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SINT_TO_FP:
case ISD::STRICT_SINT_TO_FP:
if ((Tmp1 = ExpandLegalINT_TO_FP(Node, Tmp2))) {
@@ -3112,7 +3140,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
case ISD::EXTRACT_ELEMENT: {
EVT OpTy = Node->getOperand(0).getValueType();
- if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+ if (Node->getConstantOperandVal(1)) {
// 1 -> Hi
Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
DAG.getConstant(OpTy.getSizeInBits() / 2, dl,
@@ -3251,8 +3279,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TLI.isOperationLegalOrCustom(ISD::FP_TO_FP16, MVT::f32)) {
// Under fastmath, we can expand this node into a fround followed by
// a float-half conversion.
- SDValue FloatVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op,
- DAG.getIntPtrConstant(0, dl));
+ SDValue FloatVal =
+ DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
Results.push_back(
DAG.getNode(ISD::FP_TO_FP16, dl, Node->getValueType(0), FloatVal));
}
@@ -4379,24 +4408,28 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::SUB_PPCF128, Results);
break;
case ISD::SREM:
- Results.push_back(ExpandIntLibCall(
- Node, true, RTLIB::SREM_I8, RTLIB::SREM_I16, RTLIB::SREM_I32,
- RTLIB::SREM_I64, RTLIB::SREM_I128, RTLIB::SREM_IEXT));
+ Results.push_back(ExpandIntLibCall(Node, true,
+ RTLIB::SREM_I8,
+ RTLIB::SREM_I16, RTLIB::SREM_I32,
+ RTLIB::SREM_I64, RTLIB::SREM_I128));
break;
case ISD::UREM:
- Results.push_back(ExpandIntLibCall(
- Node, false, RTLIB::UREM_I8, RTLIB::UREM_I16, RTLIB::UREM_I32,
- RTLIB::UREM_I64, RTLIB::UREM_I128, RTLIB::UREM_IEXT));
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::UREM_I8,
+ RTLIB::UREM_I16, RTLIB::UREM_I32,
+ RTLIB::UREM_I64, RTLIB::UREM_I128));
break;
case ISD::SDIV:
- Results.push_back(ExpandIntLibCall(
- Node, true, RTLIB::SDIV_I8, RTLIB::SDIV_I16, RTLIB::SDIV_I32,
- RTLIB::SDIV_I64, RTLIB::SDIV_I128, RTLIB::SDIV_IEXT));
+ Results.push_back(ExpandIntLibCall(Node, true,
+ RTLIB::SDIV_I8,
+ RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+ RTLIB::SDIV_I64, RTLIB::SDIV_I128));
break;
case ISD::UDIV:
- Results.push_back(ExpandIntLibCall(
- Node, false, RTLIB::UDIV_I8, RTLIB::UDIV_I16, RTLIB::UDIV_I32,
- RTLIB::UDIV_I64, RTLIB::UDIV_I128, RTLIB::UDIV_IEXT));
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::UDIV_I8,
+ RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+ RTLIB::UDIV_I64, RTLIB::UDIV_I128));
break;
case ISD::SDIVREM:
case ISD::UDIVREM:
@@ -4404,9 +4437,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
ExpandDivRemLibCall(Node, Results);
break;
case ISD::MUL:
- Results.push_back(ExpandIntLibCall(
- Node, false, RTLIB::MUL_I8, RTLIB::MUL_I16, RTLIB::MUL_I32,
- RTLIB::MUL_I64, RTLIB::MUL_I128, RTLIB::MUL_IEXT));
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::MUL_I8,
+ RTLIB::MUL_I16, RTLIB::MUL_I32,
+ RTLIB::MUL_I64, RTLIB::MUL_I128));
break;
case ISD::CTLZ_ZERO_UNDEF:
switch (Node->getSimpleValueType(0).SimpleTy) {
@@ -4696,7 +4730,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1);
else
Tmp1 = DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp1,
- DAG.getIntPtrConstant(0, dl));
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
Results.push_back(Tmp1);
break;
@@ -4756,8 +4790,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2,
Node->getFlags());
- Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
- Tmp3, DAG.getIntPtrConstant(0, dl)));
+ Results.push_back(
+ DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
break;
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
@@ -4787,7 +4822,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(
DAG.getNode(ISD::FP_ROUND, dl, OVT,
DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3),
- DAG.getIntPtrConstant(0, dl)));
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
break;
case ISD::STRICT_FMA:
Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
@@ -4817,8 +4852,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
// (fp_round (fpext a))
// which is a no-op. Mark it as a TRUNCating FP_ROUND.
const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN);
- Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
- Tmp3, DAG.getIntPtrConstant(isTrunc, dl)));
+ Results.push_back(
+ DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3,
+ DAG.getIntPtrConstant(isTrunc, dl, /*isTarget=*/true)));
break;
}
case ISD::STRICT_FPOWI:
@@ -4850,8 +4886,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FEXP2:
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
- Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
- Tmp2, DAG.getIntPtrConstant(0, dl)));
+ Results.push_back(
+ DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
break;
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FCEIL:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index b2df67f45c72..f1e80ce7e037 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1071,8 +1071,9 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
if (ST->isTruncatingStore())
// Do an FP_ROUND followed by a non-truncating store.
- Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(),
- Val, DAG.getIntPtrConstant(0, dl)));
+ Val = BitConvertToInteger(
+ DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(), Val,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
else
Val = GetSoftenedFloat(Val);
@@ -2532,7 +2533,8 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_XINT_TO_FP(SDNode *N) {
// Round the value to the desired precision (that of the source type).
return DAG.getNode(
ISD::FP_EXTEND, DL, NVT,
- DAG.getNode(ISD::FP_ROUND, DL, VT, NV, DAG.getIntPtrConstant(0, DL)));
+ DAG.getNode(ISD::FP_ROUND, DL, VT, NV,
+ DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)));
}
SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {
@@ -2746,39 +2748,47 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FCOPYSIGN(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FMAD(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1));
SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2));
SDLoc dl(N);
// Promote to the larger FP type.
- Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0);
- Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1);
- Op2 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op2);
+ auto PromotionOpcode = GetPromotionOpcode(OVT, NVT);
+ Op0 = DAG.getNode(PromotionOpcode, dl, NVT, Op0);
+ Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1);
+ Op2 = DAG.getNode(PromotionOpcode, dl, NVT, Op2);
SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1, Op2);
// Convert back to FP16 as an integer.
- return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
+ return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
}
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FPOWI(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
SDValue Op1 = N->getOperand(1);
SDLoc dl(N);
- Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0);
+ // Promote to the larger FP type.
+ Op0 = DAG.getNode(GetPromotionOpcode(OVT, NVT), dl, NVT, Op0);
SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1);
// Convert back to FP16 as an integer.
- return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
+ return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
}
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ EVT SVT = N->getOperand(0).getValueType();
+
if (N->isStrictFPOpcode()) {
+ assert(RVT == MVT::f16);
SDValue Res =
DAG.getNode(ISD::STRICT_FP_TO_FP16, SDLoc(N), {MVT::i16, MVT::Other},
{N->getOperand(0), N->getOperand(1)});
@@ -2786,7 +2796,8 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) {
return Res;
}
- return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), MVT::i16, N->getOperand(0));
+ return DAG.getNode(GetPromotionOpcode(SVT, RVT), SDLoc(N), MVT::i16,
+ N->getOperand(0));
}
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_LOAD(SDNode *N) {
@@ -2821,13 +2832,14 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_SELECT_CC(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_XINT_TO_FP(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
SDLoc dl(N);
SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
// Round the value to the softened type.
- return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
+ return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
}
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UNDEF(SDNode *N) {
@@ -2835,33 +2847,36 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UNDEF(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UnaryOp(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
SDValue Op = GetSoftPromotedHalf(N->getOperand(0));
SDLoc dl(N);
// Promote to the larger FP type.
- Op = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op);
+ Op = DAG.getNode(GetPromotionOpcode(OVT, NVT), dl, NVT, Op);
SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op);
// Convert back to FP16 as an integer.
- return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
+ return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
}
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BinOp(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1));
SDLoc dl(N);
// Promote to the larger FP type.
- Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0);
- Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1);
+ auto PromotionOpcode = GetPromotionOpcode(OVT, NVT);
+ Op0 = DAG.getNode(PromotionOpcode, dl, NVT, Op0);
+ Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1);
SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1);
// Convert back to FP16 as an integer.
- return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
+ return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
}
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_VECREDUCE(SDNode *N) {
@@ -2945,22 +2960,27 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FCOPYSIGN(SDNode *N,
unsigned OpNo) {
assert(OpNo == 1 && "Only Operand 1 must need promotion here");
SDValue Op1 = N->getOperand(1);
+ EVT RVT = Op1.getValueType();
SDLoc dl(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op1.getValueType());
Op1 = GetSoftPromotedHalf(Op1);
- Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1);
+ Op1 = DAG.getNode(GetPromotionOpcode(RVT, NVT), dl, NVT, Op1);
return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), N->getOperand(0),
Op1);
}
SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
+ EVT RVT = N->getValueType(0);
bool IsStrict = N->isStrictFPOpcode();
- SDValue Op = GetSoftPromotedHalf(N->getOperand(IsStrict ? 1 : 0));
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ EVT SVT = Op.getValueType();
+ Op = GetSoftPromotedHalf(N->getOperand(IsStrict ? 1 : 0));
if (IsStrict) {
+ assert(SVT == MVT::f16);
SDValue Res =
DAG.getNode(ISD::STRICT_FP16_TO_FP, SDLoc(N),
{N->getValueType(0), MVT::Other}, {N->getOperand(0), Op});
@@ -2969,31 +2989,35 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
return SDValue();
}
- return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), Op);
+ return DAG.getNode(GetPromotionOpcode(SVT, RVT), SDLoc(N), RVT, Op);
}
SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
SDValue Op = N->getOperand(0);
+ EVT SVT = Op.getValueType();
SDLoc dl(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
Op = GetSoftPromotedHalf(Op);
- SDValue Res = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op);
+ SDValue Res = DAG.getNode(GetPromotionOpcode(SVT, RVT), dl, NVT, Op);
return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res);
}
SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
SDValue Op = N->getOperand(0);
+ EVT SVT = Op.getValueType();
SDLoc dl(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
Op = GetSoftPromotedHalf(Op);
- SDValue Res = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op);
+ SDValue Res = DAG.getNode(GetPromotionOpcode(SVT, RVT), dl, NVT, Op);
return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res,
N->getOperand(1));
@@ -3006,14 +3030,16 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SELECT_CC(SDNode *N,
SDValue Op1 = N->getOperand(1);
SDLoc dl(N);
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op0.getValueType());
+ EVT SVT = Op0.getValueType();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), SVT);
Op0 = GetSoftPromotedHalf(Op0);
Op1 = GetSoftPromotedHalf(Op1);
// Promote to the larger FP type.
- Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0);
- Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1);
+ auto PromotionOpcode = GetPromotionOpcode(SVT, NVT);
+ Op0 = DAG.getNode(PromotionOpcode, dl, NVT, Op0);
+ Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1);
return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), Op0, Op1,
N->getOperand(2), N->getOperand(3), N->getOperand(4));
@@ -3025,14 +3051,16 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SETCC(SDNode *N) {
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
SDLoc dl(N);
+ EVT SVT = Op0.getValueType();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op0.getValueType());
Op0 = GetSoftPromotedHalf(Op0);
Op1 = GetSoftPromotedHalf(Op1);
// Promote to the larger FP type.
- Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0);
- Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1);
+ auto PromotionOpcode = GetPromotionOpcode(SVT, NVT);
+ Op0 = DAG.getNode(PromotionOpcode, dl, NVT, Op0);
+ Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1);
return DAG.getSetCC(SDLoc(N), N->getValueType(0), Op0, Op1, CCCode);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index e2173879c218..c9ce9071a25d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -137,8 +137,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
- case ISD::VP_FPTOSI:
- case ISD::VP_FPTOUI:
+ case ISD::VP_FP_TO_SINT:
+ case ISD::VP_FP_TO_UINT:
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
@@ -148,9 +148,12 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_UINT_SAT:
Res = PromoteIntRes_FP_TO_XINT_SAT(N); break;
- case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break;
+ case ISD::FP_TO_BF16:
+ case ISD::FP_TO_FP16:
+ Res = PromoteIntRes_FP_TO_FP16_BF16(N);
+ break;
- case ISD::FLT_ROUNDS_: Res = PromoteIntRes_FLT_ROUNDS(N); break;
+ case ISD::GET_ROUNDING: Res = PromoteIntRes_GET_ROUNDING(N); break;
case ISD::AND:
case ISD::OR:
@@ -165,11 +168,15 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::VP_SUB:
case ISD::VP_MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
+ case ISD::VP_SMIN:
+ case ISD::VP_SMAX:
case ISD::SDIV:
case ISD::SREM:
case ISD::VP_SDIV:
case ISD::VP_SREM: Res = PromoteIntRes_SExtIntBinOp(N); break;
+ case ISD::VP_UMIN:
+ case ISD::VP_UMAX:
case ISD::UDIV:
case ISD::UREM:
case ISD::VP_UDIV:
@@ -673,10 +680,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
NewOpc = ISD::STRICT_FP_TO_SINT;
- if (N->getOpcode() == ISD::VP_FPTOUI &&
- !TLI.isOperationLegal(ISD::VP_FPTOUI, NVT) &&
- TLI.isOperationLegalOrCustom(ISD::VP_FPTOSI, NVT))
- NewOpc = ISD::VP_FPTOSI;
+ if (N->getOpcode() == ISD::VP_FP_TO_UINT &&
+ !TLI.isOperationLegal(ISD::VP_FP_TO_UINT, NVT) &&
+ TLI.isOperationLegalOrCustom(ISD::VP_FP_TO_SINT, NVT))
+ NewOpc = ISD::VP_FP_TO_SINT;
SDValue Res;
if (N->isStrictFPOpcode()) {
@@ -685,7 +692,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
- } else if (NewOpc == ISD::VP_FPTOSI || NewOpc == ISD::VP_FPTOUI) {
+ } else if (NewOpc == ISD::VP_FP_TO_SINT || NewOpc == ISD::VP_FP_TO_UINT) {
Res = DAG.getNode(NewOpc, dl, NVT, {N->getOperand(0), N->getOperand(1),
N->getOperand(2)});
} else {
@@ -701,7 +708,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
// after legalization: fp-to-sint32, 65534. -> 0x0000fffe
return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT ||
N->getOpcode() == ISD::STRICT_FP_TO_UINT ||
- N->getOpcode() == ISD::VP_FPTOUI)
+ N->getOpcode() == ISD::VP_FP_TO_UINT)
? ISD::AssertZext
: ISD::AssertSext,
dl, NVT, Res,
@@ -716,14 +723,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT_SAT(SDNode *N) {
N->getOperand(1));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16_BF16(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_FLT_ROUNDS(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_GET_ROUNDING(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
@@ -836,7 +843,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
SDLoc dl(N);
SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(VT, SVT),
- makeArrayRef(Ops, NumOps));
+ ArrayRef(Ops, NumOps));
// Modified the sum result - switch anything that used the old sum to use
// the new one.
@@ -1555,7 +1562,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_VSCALE(SDNode *N) {
EVT VT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- APInt MulImm = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue();
+ const APInt &MulImm = N->getConstantOperandAPInt(0);
return DAG.getVScale(SDLoc(N), VT, MulImm.sext(VT.getSizeInBits()));
}
@@ -1648,7 +1655,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::VP_SETCC:
case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;
- case ISD::VP_SITOFP:
+ case ISD::VP_SINT_TO_FP:
case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break;
case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
@@ -1663,8 +1670,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
OpNo); break;
case ISD::VP_TRUNCATE:
case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
+ case ISD::BF16_TO_FP:
case ISD::FP16_TO_FP:
- case ISD::VP_UITOFP:
+ case ISD::VP_UINT_TO_FP:
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
@@ -1998,7 +2006,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
- if (N->getOpcode() == ISD::VP_SITOFP)
+ if (N->getOpcode() == ISD::VP_SINT_TO_FP)
return SDValue(DAG.UpdateNodeOperands(N,
SExtPromotedInteger(N->getOperand(0)),
N->getOperand(1), N->getOperand(2)),
@@ -2127,7 +2135,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
- if (N->getOpcode() == ISD::VP_UITOFP)
+ if (N->getOpcode() == ISD::VP_UINT_TO_FP)
return SDValue(DAG.UpdateNodeOperands(N,
ZExtPromotedInteger(N->getOperand(0)),
N->getOperand(1), N->getOperand(2)),
@@ -2420,17 +2428,21 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
- case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break;
+ case ISD::GET_ROUNDING:ExpandIntRes_GET_ROUNDING(N, Lo, Hi); break;
case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT: ExpandIntRes_FP_TO_XINT_SAT(N, Lo, Hi); break;
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LRINT:
+ case ISD::LROUND:
+ case ISD::LRINT:
case ISD::STRICT_LLROUND:
case ISD::STRICT_LLRINT:
case ISD::LLROUND:
- case ISD::LLRINT: ExpandIntRes_LLROUND_LLRINT(N, Lo, Hi); break;
+ case ISD::LLRINT: ExpandIntRes_XROUND_XRINT(N, Lo, Hi); break;
case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
@@ -2866,15 +2878,29 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
ISD::CondCode CondC;
std::tie(CondC, LoOpc) = getExpandedMinMaxOps(N->getOpcode());
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
// Expand the subcomponents.
SDValue LHSL, LHSH, RHSL, RHSH;
- GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
- GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ GetExpandedInteger(LHS, LHSL, LHSH);
+ GetExpandedInteger(RHS, RHSL, RHSH);
// Value types
EVT NVT = LHSL.getValueType();
EVT CCT = getSetCCResultType(NVT);
+ // If the upper halves are all sign bits, then we can perform the MINMAX on
+ // the lower half and sign-extend the result to the upper half.
+ unsigned NumHalfBits = NVT.getScalarSizeInBits();
+ if (DAG.ComputeNumSignBits(LHS) > NumHalfBits &&
+ DAG.ComputeNumSignBits(RHS) > NumHalfBits) {
+ Lo = DAG.getNode(N->getOpcode(), DL, NVT, LHSL, RHSL);
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, Lo,
+ DAG.getShiftAmountConstant(NumHalfBits - 1, NVT, DL));
+ return;
+ }
+
// Hi part is always the same op
Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH});
@@ -2913,13 +2939,13 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
Hi = DAG.computeKnownBits(HiOps[2]).isZero()
- ? DAG.getNode(ISD::UADDO, dl, VTList, makeArrayRef(HiOps, 2))
+ ? DAG.getNode(ISD::UADDO, dl, VTList, ArrayRef(HiOps, 2))
: DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps);
} else {
Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
Hi = DAG.computeKnownBits(HiOps[2]).isZero()
- ? DAG.getNode(ISD::USUBO, dl, VTList, makeArrayRef(HiOps, 2))
+ ? DAG.getNode(ISD::USUBO, dl, VTList, ArrayRef(HiOps, 2))
: DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps);
}
return;
@@ -2962,18 +2988,18 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
if (N->getOpcode() == ISD::ADD) {
RevOpc = ISD::SUB;
Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, ArrayRef(HiOps, 2));
} else {
RevOpc = ISD::ADD;
Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps);
- Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2));
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, ArrayRef(HiOps, 2));
}
SDValue OVF = Lo.getValue(1);
switch (BoolType) {
case TargetLoweringBase::UndefinedBooleanContent:
OVF = DAG.getNode(ISD::AND, dl, OvfVT, DAG.getConstant(1, dl, OvfVT), OVF);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case TargetLoweringBase::ZeroOrOneBooleanContent:
OVF = DAG.getZExtOrTrunc(OVF, dl, NVT);
Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF);
@@ -2987,27 +3013,21 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
if (N->getOpcode() == ISD::ADD) {
Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
- SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
- ISD::SETULT);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, ArrayRef(HiOps, 2));
+ SDValue Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
+ ISD::SETULT);
- if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) {
- SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry);
- return;
- }
+ SDValue Carry;
+ if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent)
+ Carry = DAG.getZExtOrTrunc(Cmp, dl, NVT);
+ else
+ Carry = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT),
+ DAG.getConstant(0, dl, NVT));
- SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1,
- DAG.getConstant(1, dl, NVT),
- DAG.getConstant(0, dl, NVT));
- SDValue Cmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[1],
- ISD::SETULT);
- SDValue Carry2 = DAG.getSelect(dl, NVT, Cmp2,
- DAG.getConstant(1, dl, NVT), Carry1);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry);
} else {
Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps);
- Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2));
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, ArrayRef(HiOps, 2));
SDValue Cmp =
DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()),
LoOps[0], LoOps[1], ISD::SETULT);
@@ -3280,6 +3300,14 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
GetExpandedInteger(N0, Lo, Hi);
EVT NVT = Lo.getValueType();
+ // If the upper half is all sign bits, then we can perform the ABS on the
+ // lower half and zero-extend.
+ if (DAG.ComputeNumSignBits(N0) > NVT.getScalarSizeInBits()) {
+ Lo = DAG.getNode(ISD::ABS, dl, NVT, Lo);
+ Hi = DAG.getConstant(0, dl, NVT);
+ return;
+ }
+
// If we have SUBCARRY, use the expanded form of the sra+xor+sub sequence we
// use in LegalizeDAG. The SUB part of the expansion is based on
// ExpandIntRes_ADDSUB which also uses SUBCARRY/USUBO after checking that
@@ -3364,15 +3392,15 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
Hi = DAG.getConstant(0, dl, NVT);
}
-void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo,
+void DAGTypeLegalizer::ExpandIntRes_GET_ROUNDING(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDLoc dl(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned NBitWidth = NVT.getSizeInBits();
- Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, {NVT, MVT::Other}, N->getOperand(0));
+ Lo = DAG.getNode(ISD::GET_ROUNDING, dl, {NVT, MVT::Other}, N->getOperand(0));
SDValue Chain = Lo.getValue(1);
- // The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS
+ // The high part is the sign of Lo, as -1 is a valid value for GET_ROUNDING
Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
DAG.getShiftAmountConstant(NBitWidth - 1, NVT, dl));
@@ -3450,17 +3478,57 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo,
SplitInteger(Res, Lo, Hi);
}
-void DAGTypeLegalizer::ExpandIntRes_LLROUND_LLRINT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- SDValue Op = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);
+void DAGTypeLegalizer::ExpandIntRes_XROUND_XRINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
assert(getTypeAction(Op.getValueType()) != TargetLowering::TypePromoteFloat &&
"Input type needs to be promoted!");
EVT VT = Op.getValueType();
+ if (VT == MVT::f16) {
+ VT = MVT::f32;
+ // Extend to f32.
+ if (IsStrict) {
+ Op = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, { VT, MVT::Other }, {Chain, Op});
+ Chain = Op.getValue(1);
+ } else {
+ Op = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op);
+ }
+ }
+
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
- if (N->getOpcode() == ISD::LLROUND ||
+ if (N->getOpcode() == ISD::LROUND ||
+ N->getOpcode() == ISD::STRICT_LROUND) {
+ if (VT == MVT::f32)
+ LC = RTLIB::LROUND_F32;
+ else if (VT == MVT::f64)
+ LC = RTLIB::LROUND_F64;
+ else if (VT == MVT::f80)
+ LC = RTLIB::LROUND_F80;
+ else if (VT == MVT::f128)
+ LC = RTLIB::LROUND_F128;
+ else if (VT == MVT::ppcf128)
+ LC = RTLIB::LROUND_PPCF128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected lround input type!");
+ } else if (N->getOpcode() == ISD::LRINT ||
+ N->getOpcode() == ISD::STRICT_LRINT) {
+ if (VT == MVT::f32)
+ LC = RTLIB::LRINT_F32;
+ else if (VT == MVT::f64)
+ LC = RTLIB::LRINT_F64;
+ else if (VT == MVT::f80)
+ LC = RTLIB::LRINT_F80;
+ else if (VT == MVT::f128)
+ LC = RTLIB::LRINT_F128;
+ else if (VT == MVT::ppcf128)
+ LC = RTLIB::LRINT_PPCF128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected lrint input type!");
+ } else if (N->getOpcode() == ISD::LLROUND ||
N->getOpcode() == ISD::STRICT_LLROUND) {
if (VT == MVT::f32)
LC = RTLIB::LLROUND_F32;
@@ -3489,9 +3557,7 @@ void DAGTypeLegalizer::ExpandIntRes_LLROUND_LLRINT(SDNode *N, SDValue &Lo,
} else
llvm_unreachable("Unexpected opcode!");
- SDLoc dl(N);
EVT RetVT = N->getValueType(0);
- SDValue Chain = N->isStrictFPOpcode() ? N->getOperand(0) : SDValue();
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
@@ -4046,70 +4112,6 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
ReplaceValueWith(SDValue(Node, 1), Ovf);
}
-// Emit a call to __udivei4 and friends which require
-// the arguments be based on the stack
-// and extra argument that contains the number of bits of the operands.
-// Returns the result of the call operation.
-static SDValue ExpandExtIntRes_DIVREM(const TargetLowering &TLI,
- const RTLIB::Libcall &LC,
- SelectionDAG &DAG, SDNode *N,
- const SDLoc &DL, const EVT &VT) {
-
- SDValue InChain = DAG.getEntryNode();
-
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
-
- // The signature of __udivei4 is
- // void __udivei4(unsigned int *quo, unsigned int *a, unsigned int *b,
- // unsigned int bits)
- EVT ArgVT = N->op_begin()->getValueType();
- assert(ArgVT.isInteger() && ArgVT.getSizeInBits() > 128 &&
- "Unexpected argument type for lowering");
- Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
-
- SDValue Output = DAG.CreateStackTemporary(ArgVT);
- Entry.Node = Output;
- Entry.Ty = ArgTy->getPointerTo();
- Entry.IsSExt = false;
- Entry.IsZExt = false;
- Args.push_back(Entry);
-
- for (const llvm::SDUse &Op : N->ops()) {
- SDValue StackPtr = DAG.CreateStackTemporary(ArgVT);
- InChain = DAG.getStore(InChain, DL, Op, StackPtr, MachinePointerInfo());
- Entry.Node = StackPtr;
- Entry.Ty = ArgTy->getPointerTo();
- Entry.IsSExt = false;
- Entry.IsZExt = false;
- Args.push_back(Entry);
- }
-
- int Bits = N->getOperand(0)
- .getValueType()
- .getTypeForEVT(*DAG.getContext())
- ->getIntegerBitWidth();
- Entry.Node = DAG.getConstant(Bits, DL, TLI.getPointerTy(DAG.getDataLayout()));
- Entry.Ty = Type::getInt32Ty(*DAG.getContext());
- Entry.IsSExt = false;
- Entry.IsZExt = true;
- Args.push_back(Entry);
-
- SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy(DAG.getDataLayout()));
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(DL)
- .setChain(InChain)
- .setLibCallee(TLI.getLibcallCallingConv(LC),
- Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args))
- .setDiscardResult();
-
- SDValue Chain = TLI.LowerCallTo(CLI).second;
-
- return DAG.getLoad(ArgVT, DL, Chain, Output, MachinePointerInfo());
-}
-
void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
@@ -4131,14 +4133,6 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
LC = RTLIB::SDIV_I64;
else if (VT == MVT::i128)
LC = RTLIB::SDIV_I128;
-
- else {
- SDValue Result =
- ExpandExtIntRes_DIVREM(TLI, RTLIB::SDIV_IEXT, DAG, N, dl, VT);
- SplitInteger(Result, Lo, Hi);
- return;
- }
-
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -4146,6 +4140,111 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_ShiftThroughStack(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ SDValue Shiftee = N->getOperand(0);
+ EVT VT = Shiftee.getValueType();
+ SDValue ShAmt = N->getOperand(1);
+ EVT ShAmtVT = ShAmt.getValueType();
+
+ // This legalization is optimal when the shift is by a multiple of byte width,
+ // %x * 8 <-> %x << 3 so 3 low bits should be be known zero.
+ bool ShiftByByteMultiple =
+ DAG.computeKnownBits(ShAmt).countMinTrailingZeros() >= 3;
+
+ // If we can't do it as one step, we'll have two uses of shift amount,
+ // and thus must freeze it.
+ if (!ShiftByByteMultiple)
+ ShAmt = DAG.getFreeze(ShAmt);
+
+ unsigned VTBitWidth = VT.getScalarSizeInBits();
+ assert(VTBitWidth % 8 == 0 && "Shifting a not byte multiple value?");
+ unsigned VTByteWidth = VTBitWidth / 8;
+ assert(isPowerOf2_32(VTByteWidth) &&
+ "Shiftee type size is not a power of two!");
+ unsigned StackSlotByteWidth = 2 * VTByteWidth;
+ unsigned StackSlotBitWidth = 8 * StackSlotByteWidth;
+ EVT StackSlotVT = EVT::getIntegerVT(*DAG.getContext(), StackSlotBitWidth);
+
+ // Get a temporary stack slot 2x the width of our VT.
+ // FIXME: reuse stack slots?
+ // FIXME: should we be more picky about alignment?
+ Align StackSlotAlignment(1);
+ SDValue StackPtr = DAG.CreateStackTemporary(
+ TypeSize::getFixed(StackSlotByteWidth), StackSlotAlignment);
+ EVT PtrTy = StackPtr.getValueType();
+ SDValue Ch = DAG.getEntryNode();
+
+ MachinePointerInfo StackPtrInfo = MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(),
+ cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex());
+
+ // Extend the value, that is being shifted, to the entire stack slot's width.
+ SDValue Init;
+ if (N->getOpcode() != ISD::SHL) {
+ unsigned WideningOpc =
+ N->getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ Init = DAG.getNode(WideningOpc, dl, StackSlotVT, Shiftee);
+ } else {
+ // For left-shifts, pad the Shiftee's LSB with zeros to twice it's width.
+ SDValue AllZeros = DAG.getConstant(0, dl, VT);
+ Init = DAG.getNode(ISD::BUILD_PAIR, dl, StackSlotVT, AllZeros, Shiftee);
+ }
+ // And spill it into the stack slot.
+ Ch = DAG.getStore(Ch, dl, Init, StackPtr, StackPtrInfo, StackSlotAlignment);
+
+ // Now, compute the full-byte offset into stack slot from where we can load.
+ // We have shift amount, which is in bits, but in multiples of byte.
+ // So just divide by CHAR_BIT.
+ SDNodeFlags Flags;
+ if (ShiftByByteMultiple)
+ Flags.setExact(true);
+ SDValue ByteOffset = DAG.getNode(ISD::SRL, dl, ShAmtVT, ShAmt,
+ DAG.getConstant(3, dl, ShAmtVT), Flags);
+ // And clamp it, because OOB load is an immediate UB,
+ // while shift overflow would have *just* been poison.
+ ByteOffset = DAG.getNode(ISD::AND, dl, ShAmtVT, ByteOffset,
+ DAG.getConstant(VTByteWidth - 1, dl, ShAmtVT));
+ // We have exactly two strategies on indexing into stack slot here:
+ // 1. upwards starting from the beginning of the slot
+ // 2. downwards starting from the middle of the slot
+ // On little-endian machine, we pick 1. for right shifts and 2. for left-shift
+ // and vice versa on big-endian machine.
+ bool WillIndexUpwards = N->getOpcode() != ISD::SHL;
+ if (DAG.getDataLayout().isBigEndian())
+ WillIndexUpwards = !WillIndexUpwards;
+
+ SDValue AdjStackPtr;
+ if (WillIndexUpwards) {
+ AdjStackPtr = StackPtr;
+ } else {
+ AdjStackPtr = DAG.getMemBasePlusOffset(
+ StackPtr, DAG.getConstant(VTByteWidth, dl, PtrTy), dl);
+ ByteOffset = DAG.getNegative(ByteOffset, dl, ShAmtVT);
+ }
+
+ // Get the pointer somewhere into the stack slot from which we need to load.
+ ByteOffset = DAG.getSExtOrTrunc(ByteOffset, dl, PtrTy);
+ AdjStackPtr = DAG.getMemBasePlusOffset(AdjStackPtr, ByteOffset, dl);
+
+ // And load it! While the load is not legal, legalizing it is obvious.
+ SDValue Res = DAG.getLoad(
+ VT, dl, Ch, AdjStackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), Align(1));
+ // We've performed the shift by a CHAR_BIT * [_ShAmt / CHAR_BIT_]
+
+ // If we may still have a less-than-CHAR_BIT to shift by, do so now.
+ if (!ShiftByByteMultiple) {
+ SDValue ShAmtRem = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
+ DAG.getConstant(7, dl, ShAmtVT));
+ Res = DAG.getNode(N->getOpcode(), dl, VT, Res, ShAmtRem);
+ }
+
+ // Finally, split the computed value.
+ SplitInteger(Res, Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
@@ -4181,7 +4280,24 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
(Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
Action == TargetLowering::Custom;
- if (LegalOrCustom && TLI.shouldExpandShift(DAG, N)) {
+ unsigned ExpansionFactor = 1;
+ // That VT->NVT expansion is one step. But will we re-expand NVT?
+ for (EVT TmpVT = NVT;;) {
+ EVT NewTMPVT = TLI.getTypeToTransformTo(*DAG.getContext(), TmpVT);
+ if (NewTMPVT == TmpVT)
+ break;
+ TmpVT = NewTMPVT;
+ ++ExpansionFactor;
+ }
+
+ TargetLowering::ShiftLegalizationStrategy S =
+ TLI.preferredShiftLegalizationStrategy(DAG, N, ExpansionFactor);
+
+ if (S == TargetLowering::ShiftLegalizationStrategy::ExpandThroughStack)
+ return ExpandIntRes_ShiftThroughStack(N, Lo, Hi);
+
+ if (LegalOrCustom &&
+ S != TargetLowering::ShiftLegalizationStrategy::LowerToLibcall) {
// Expand the subcomponents.
SDValue LHSL, LHSH;
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
@@ -4330,14 +4446,6 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
LC = RTLIB::SREM_I64;
else if (VT == MVT::i128)
LC = RTLIB::SREM_I128;
-
- else {
- SDValue Result =
- ExpandExtIntRes_DIVREM(TLI, RTLIB::SREM_IEXT, DAG, N, dl, VT);
- SplitInteger(Result, Lo, Hi);
- return;
- }
-
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -4507,6 +4615,22 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
return;
}
+ // Try to expand UDIV by constant.
+ if (isa<ConstantSDNode>(N->getOperand(1))) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ // Only if the new type is legal.
+ if (isTypeLegal(NVT)) {
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+ SmallVector<SDValue> Result;
+ if (TLI.expandDIVREMByConstant(N, Result, NVT, DAG, InL, InH)) {
+ Lo = Result[0];
+ Hi = Result[1];
+ return;
+ }
+ }
+ }
+
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i16)
LC = RTLIB::UDIV_I16;
@@ -4516,14 +4640,6 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
LC = RTLIB::UDIV_I64;
else if (VT == MVT::i128)
LC = RTLIB::UDIV_I128;
-
- else {
- SDValue Result =
- ExpandExtIntRes_DIVREM(TLI, RTLIB::UDIV_IEXT, DAG, N, dl, VT);
- SplitInteger(Result, Lo, Hi);
- return;
- }
-
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -4542,6 +4658,22 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
return;
}
+ // Try to expand UREM by constant.
+ if (isa<ConstantSDNode>(N->getOperand(1))) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ // Only if the new type is legal.
+ if (isTypeLegal(NVT)) {
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+ SmallVector<SDValue> Result;
+ if (TLI.expandDIVREMByConstant(N, Result, NVT, DAG, InL, InH)) {
+ Lo = Result[0];
+ Hi = Result[1];
+ return;
+ }
+ }
+ }
+
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i16)
LC = RTLIB::UREM_I16;
@@ -4551,14 +4683,6 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
LC = RTLIB::UREM_I64;
else if (VT == MVT::i128)
LC = RTLIB::UREM_I128;
-
- else {
- SDValue Result =
- ExpandExtIntRes_DIVREM(TLI, RTLIB::UREM_IEXT, DAG, N, dl, VT);
- SplitInteger(Result, Lo, Hi);
- return;
- }
-
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -5297,7 +5421,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
return DAG.getVectorShuffle(OutVT, dl, V0, V1, NewMask);
}
-
SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
@@ -5355,7 +5478,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_STEP_VECTOR(SDNode *N) {
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isScalableVector() &&
"Type must be promoted to a scalable vector type");
- APInt StepVal = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue();
+ const APInt &StepVal = N->getConstantOperandAPInt(0);
return DAG.getStepVector(dl, NOutVT,
StepVal.sext(NOutVT.getScalarSizeInBits()));
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 8fe9a83b9c3d..5e0349593139 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -722,9 +722,13 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
}
void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
- assert(Result.getValueType() ==
- TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+#ifndef NDEBUG
+ EVT VT = Result.getValueType();
+ LLVMContext &Ctx = *DAG.getContext();
+ assert((VT == EVT::getIntegerVT(Ctx, 80) ||
+ VT == TLI.getTypeToTransformTo(Ctx, Op.getValueType())) &&
"Invalid type for softened float");
+#endif
AnalyzeNewValue(Result);
auto &OpIdEntry = SoftenedFloats[getTableId(Op)];
@@ -759,7 +763,7 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
// a constant i8 operand.
// We don't currently support the scalarization of scalable vector types.
- assert(Result.getValueSizeInBits().getFixedSize() >=
+ assert(Result.getValueSizeInBits().getFixedValue() >=
Op.getScalarValueSizeInBits() &&
"Invalid type for scalarized vector");
AnalyzeNewValue(Result);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 6696b79cf885..b97e44a01319 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -324,7 +324,7 @@ private:
SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
SDValue PromoteIntRes_FP_TO_XINT_SAT(SDNode *N);
- SDValue PromoteIntRes_FP_TO_FP16(SDNode *N);
+ SDValue PromoteIntRes_FP_TO_FP16_BF16(SDNode *N);
SDValue PromoteIntRes_FREEZE(SDNode *N);
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
@@ -354,7 +354,7 @@ private:
SDValue PromoteIntRes_ADDSUBSHLSAT(SDNode *N);
SDValue PromoteIntRes_MULFIX(SDNode *N);
SDValue PromoteIntRes_DIVFIX(SDNode *N);
- SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
+ SDValue PromoteIntRes_GET_ROUNDING(SDNode *N);
SDValue PromoteIntRes_VECREDUCE(SDNode *N);
SDValue PromoteIntRes_VP_REDUCE(SDNode *N);
SDValue PromoteIntRes_ABS(SDNode *N);
@@ -437,11 +437,11 @@ private:
void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_GET_ROUNDING (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_XINT_SAT (SDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandIntRes_LLROUND_LLRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_XROUND_XRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -457,6 +457,7 @@ private:
void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ShiftThroughStack (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_MINMAX (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -862,6 +863,8 @@ private:
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo,
+ SDValue &Hi);
void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_Gather(MemSDNode *VPGT, SDValue &Lo, SDValue &Hi,
bool SplitSETCC = false);
@@ -891,6 +894,7 @@ private:
SDValue SplitVecOp_ExtVecInRegOp(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_VP_STRIDED_STORE(VPStridedStoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo);
SDValue SplitVecOp_Gather(MemSDNode *MGT, unsigned OpNo);
@@ -947,6 +951,7 @@ private:
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
+ SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
SDValue WidenVecRes_VP_GATHER(VPGatherSDNode* N);
@@ -958,6 +963,7 @@ private:
SDValue WidenVecRes_STRICT_FSETCC(SDNode* N);
SDValue WidenVecRes_UNDEF(SDNode *N);
SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
+ SDValue WidenVecRes_VECTOR_REVERSE(SDNode *N);
SDValue WidenVecRes_Ternary(SDNode *N);
SDValue WidenVecRes_Binary(SDNode *N);
@@ -984,6 +990,7 @@ private:
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
SDValue WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo);
+ SDValue WidenVecOp_VP_STRIDED_STORE(SDNode *N, unsigned OpNo);
SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index c6885677d644..21b5255c8f72 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -197,8 +197,7 @@ void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
GetExpandedOp(N->getOperand(0), Lo, Hi);
- SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ?
- Hi : Lo;
+ SDValue Part = N->getConstantOperandVal(1) ? Hi : Lo;
assert(Part.getValueType() == N->getValueType(0) &&
"Type twice as big as expanded type not itself expanded!");
@@ -209,7 +208,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue OldVec = N->getOperand(0);
- unsigned OldElts = OldVec.getValueType().getVectorNumElements();
+ ElementCount OldEltCount = OldVec.getValueType().getVectorElementCount();
EVT OldEltVT = OldVec.getValueType().getVectorElementType();
SDLoc dl(N);
@@ -223,14 +222,13 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
// the input vector. If so, extend the elements of the input vector to the
// same bitwidth as the result before expanding.
assert(OldEltVT.bitsLT(OldVT) && "Result type smaller then element type!");
- EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldElts);
+ EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldEltCount);
OldVec = DAG.getNode(ISD::ANY_EXTEND, dl, NVecVT, N->getOperand(0));
}
- SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
- EVT::getVectorVT(*DAG.getContext(),
- NewVT, 2*OldElts),
- OldVec);
+ SDValue NewVec = DAG.getNode(
+ ISD::BITCAST, dl,
+ EVT::getVectorVT(*DAG.getContext(), NewVT, OldEltCount * 2), OldVec);
// Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
SDValue Idx = N->getOperand(1);
@@ -359,8 +357,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
SmallVector<SDValue, 8> Ops;
IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType());
- SDValue Vec =
- DAG.getBuildVector(NVT, dl, makeArrayRef(Ops.data(), NumElts));
+ SDValue Vec = DAG.getBuildVector(NVT, dl, ArrayRef(Ops.data(), NumElts));
return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
}
@@ -403,7 +400,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
SDValue Lo, Hi;
GetExpandedOp(N->getOperand(0), Lo, Hi);
- return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo;
+ return N->getConstantOperandVal(1) ? Hi : Lo;
}
SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index f5a1eae1e7fe..e245b3cb4c6d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -132,6 +132,7 @@ class VectorLegalizer {
SDValue ExpandVSELECT(SDNode *Node);
SDValue ExpandVP_SELECT(SDNode *Node);
SDValue ExpandVP_MERGE(SDNode *Node);
+ SDValue ExpandVP_REM(SDNode *Node);
SDValue ExpandSELECT(SDNode *Node);
std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
SDValue ExpandStore(SDNode *N);
@@ -492,7 +493,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
if (LowerOperationWrapper(Node, ResultVals))
break;
LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case TargetLowering::Expand:
LLVM_DEBUG(dbgs() << "Expanding\n");
Expand(Node, ResultVals);
@@ -594,7 +595,8 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
(VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
- Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res, DAG.getIntPtrConstant(0, dl));
+ Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
else
Res = DAG.getNode(ISD::BITCAST, dl, VT, Res);
@@ -728,12 +730,22 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::BSWAP:
Results.push_back(ExpandBSWAP(Node));
return;
+ case ISD::VP_BSWAP:
+ Results.push_back(TLI.expandVPBSWAP(Node, DAG));
+ return;
case ISD::VSELECT:
Results.push_back(ExpandVSELECT(Node));
return;
case ISD::VP_SELECT:
Results.push_back(ExpandVP_SELECT(Node));
return;
+ case ISD::VP_SREM:
+ case ISD::VP_UREM:
+ if (SDValue Expanded = ExpandVP_REM(Node)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::SELECT:
Results.push_back(ExpandSELECT(Node));
return;
@@ -776,12 +788,24 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::BITREVERSE:
ExpandBITREVERSE(Node, Results);
return;
+ case ISD::VP_BITREVERSE:
+ if (SDValue Expanded = TLI.expandVPBITREVERSE(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::CTPOP:
if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) {
Results.push_back(Expanded);
return;
}
break;
+ case ISD::VP_CTPOP:
+ if (SDValue Expanded = TLI.expandVPCTPOP(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) {
@@ -789,6 +813,13 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::VP_CTLZ:
+ case ISD::VP_CTLZ_ZERO_UNDEF:
+ if (SDValue Expanded = TLI.expandVPCTLZ(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) {
@@ -796,8 +827,17 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::VP_CTTZ:
+ case ISD::VP_CTTZ_ZERO_UNDEF:
+ if (SDValue Expanded = TLI.expandVPCTTZ(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::FSHL:
+ case ISD::VP_FSHL:
case ISD::FSHR:
+ case ISD::VP_FSHR:
if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) {
Results.push_back(Expanded);
return;
@@ -847,6 +887,13 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::USHLSAT:
+ case ISD::SSHLSAT:
+ if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
// Expand the fpsosisat if it is scalable to prevent it from unrolling below.
@@ -954,10 +1001,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
DAG.getConstant(0, DL, BitTy));
// Broadcast the mask so that the entire vector is all one or all zero.
- if (VT.isFixedLengthVector())
- Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask);
- else
- Mask = DAG.getSplatVector(MaskTy, DL, Mask);
+ Mask = DAG.getSplat(MaskTy, DL, Mask);
// Bitcast the operands to be the same type as the mask.
// This is needed when we select between FP types because
@@ -1300,8 +1344,7 @@ SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
return DAG.UnrollVectorOp(Node);
SDValue StepVec = DAG.getStepVector(DL, EVLVecVT);
- SDValue SplatEVL = IsFixedLen ? DAG.getSplatBuildVector(EVLVecVT, DL, EVL)
- : DAG.getSplatVector(EVLVecVT, DL, EVL);
+ SDValue SplatEVL = DAG.getSplat(EVLVecVT, DL, EVL);
SDValue EVLMask =
DAG.getSetCC(DL, MaskVT, StepVec, SplatEVL, ISD::CondCode::SETULT);
@@ -1309,6 +1352,30 @@ SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
return DAG.getSelect(DL, Node->getValueType(0), FullMask, Op1, Op2);
}
+SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) {
+ // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB.
+ EVT VT = Node->getValueType(0);
+
+ unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV;
+
+ if (!TLI.isOperationLegalOrCustom(DivOpc, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::VP_MUL, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::VP_SUB, VT))
+ return SDValue();
+
+ SDLoc DL(Node);
+
+ SDValue Dividend = Node->getOperand(0);
+ SDValue Divisor = Node->getOperand(1);
+ SDValue Mask = Node->getOperand(2);
+ SDValue EVL = Node->getOperand(3);
+
+ // X % Y -> X-X/Y*Y
+ SDValue Div = DAG.getNode(DivOpc, DL, VT, Dividend, Divisor, Mask, EVL);
+ SDValue Mul = DAG.getNode(ISD::VP_MUL, DL, VT, Divisor, Div, Mask, EVL);
+ return DAG.getNode(ISD::VP_SUB, DL, VT, Dividend, Mul, Mask, EVL);
+}
+
void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
// Attempt to expand using TargetLowering.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 143abc08eeea..af5ea1ce5f45 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -27,6 +27,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TypeSize.h"
#include "llvm/Support/raw_ostream.h"
+#include <numeric>
+
using namespace llvm;
#define DEBUG_TYPE "legalize-types"
@@ -975,6 +977,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_LOAD:
SplitVecRes_VP_LOAD(cast<VPLoadSDNode>(N), Lo, Hi);
break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
+ SplitVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N), Lo, Hi);
+ break;
case ISD::MLOAD:
SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi);
break;
@@ -1006,23 +1011,34 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
break;
case ISD::ABS:
+ case ISD::VP_ABS:
case ISD::BITREVERSE:
+ case ISD::VP_BITREVERSE:
case ISD::BSWAP:
+ case ISD::VP_BSWAP:
case ISD::CTLZ:
+ case ISD::VP_CTLZ:
case ISD::CTTZ:
+ case ISD::VP_CTTZ:
case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::VP_CTLZ_ZERO_UNDEF:
case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::VP_CTTZ_ZERO_UNDEF:
case ISD::CTPOP:
- case ISD::FABS:
+ case ISD::VP_CTPOP:
+ case ISD::FABS: case ISD::VP_FABS:
case ISD::FCEIL:
+ case ISD::VP_FCEIL:
case ISD::FCOS:
case ISD::FEXP:
case ISD::FEXP2:
case ISD::FFLOOR:
+ case ISD::VP_FFLOOR:
case ISD::FLOG:
case ISD::FLOG10:
case ISD::FLOG2:
case ISD::FNEARBYINT:
+ case ISD::VP_FNEARBYINT:
case ISD::FNEG: case ISD::VP_FNEG:
case ISD::FREEZE:
case ISD::ARITH_FENCE:
@@ -1031,21 +1047,25 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FP_ROUND:
case ISD::VP_FP_ROUND:
case ISD::FP_TO_SINT:
- case ISD::VP_FPTOSI:
+ case ISD::VP_FP_TO_SINT:
case ISD::FP_TO_UINT:
- case ISD::VP_FPTOUI:
+ case ISD::VP_FP_TO_UINT:
case ISD::FRINT:
+ case ISD::VP_FRINT:
case ISD::FROUND:
+ case ISD::VP_FROUND:
case ISD::FROUNDEVEN:
+ case ISD::VP_FROUNDEVEN:
case ISD::FSIN:
- case ISD::FSQRT:
+ case ISD::FSQRT: case ISD::VP_SQRT:
case ISD::FTRUNC:
+ case ISD::VP_FROUNDTOZERO:
case ISD::SINT_TO_FP:
- case ISD::VP_SITOFP:
+ case ISD::VP_SINT_TO_FP:
case ISD::TRUNCATE:
case ISD::VP_TRUNCATE:
case ISD::UINT_TO_FP:
- case ISD::VP_UITOFP:
+ case ISD::VP_UINT_TO_FP:
case ISD::FCANONICALIZE:
SplitVecRes_UnaryOp(N, Lo, Hi);
break;
@@ -1066,8 +1086,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FADD: case ISD::VP_FADD:
case ISD::FSUB: case ISD::VP_FSUB:
case ISD::FMUL: case ISD::VP_FMUL:
- case ISD::FMINNUM:
- case ISD::FMAXNUM:
+ case ISD::FMINNUM: case ISD::VP_FMINNUM:
+ case ISD::FMAXNUM: case ISD::VP_FMAXNUM:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
case ISD::SDIV: case ISD::VP_SDIV:
@@ -1083,10 +1103,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UREM: case ISD::VP_UREM:
case ISD::SREM: case ISD::VP_SREM:
case ISD::FREM: case ISD::VP_FREM:
- case ISD::SMIN:
- case ISD::SMAX:
- case ISD::UMIN:
- case ISD::UMAX:
+ case ISD::SMIN: case ISD::VP_SMIN:
+ case ISD::SMAX: case ISD::VP_SMAX:
+ case ISD::UMIN: case ISD::VP_UMIN:
+ case ISD::UMAX: case ISD::VP_UMAX:
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
@@ -1095,11 +1115,14 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::USHLSAT:
case ISD::ROTL:
case ISD::ROTR:
+ case ISD::VP_FCOPYSIGN:
SplitVecRes_BinOp(N, Lo, Hi);
break;
case ISD::FMA: case ISD::VP_FMA:
case ISD::FSHL:
+ case ISD::VP_FSHL:
case ISD::FSHR:
+ case ISD::VP_FSHR:
SplitVecRes_TernaryOp(N, Lo, Hi);
break;
@@ -1143,13 +1166,13 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
MachinePointerInfo &MPI, SDValue &Ptr,
uint64_t *ScaledOffset) {
SDLoc DL(N);
- unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinSize() / 8;
+ unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinValue() / 8;
if (MemVT.isScalableVector()) {
SDNodeFlags Flags;
SDValue BytesIncrement = DAG.getVScale(
DL, Ptr.getValueType(),
- APInt(Ptr.getValueSizeInBits().getFixedSize(), IncrementSize));
+ APInt(Ptr.getValueSizeInBits().getFixedValue(), IncrementSize));
MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
Flags.setNoUnsignedWrap(true);
if (ScaledOffset)
@@ -1465,7 +1488,11 @@ void DAGTypeLegalizer::SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo,
SDLoc DL(N);
SDValue ArgLo, ArgHi;
SDValue Test = N->getOperand(1);
- GetSplitVector(N->getOperand(0), ArgLo, ArgHi);
+ SDValue FpValue = N->getOperand(0);
+ if (getTypeAction(FpValue.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(FpValue, ArgLo, ArgHi);
+ else
+ std::tie(ArgLo, ArgHi) = DAG.SplitVector(FpValue, SDLoc(FpValue));
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
@@ -1900,7 +1927,7 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo,
MPI = MachinePointerInfo(LD->getPointerInfo().getAddrSpace());
else
MPI = LD->getPointerInfo().getWithOffset(
- LoMemVT.getStoreSize().getFixedSize());
+ LoMemVT.getStoreSize().getFixedValue());
MMO = DAG.getMachineFunction().getMachineMemOperand(
MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment,
@@ -1921,6 +1948,87 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo,
ReplaceValueWith(SDValue(LD, 1), Ch);
}
+void DAGTypeLegalizer::SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD,
+ SDValue &Lo, SDValue &Hi) {
+ assert(SLD->isUnindexed() &&
+ "Indexed VP strided load during type legalization!");
+ assert(SLD->getOffset().isUndef() &&
+ "Unexpected indexed variable-length load offset");
+
+ SDLoc DL(SLD);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(SLD->getValueType(0));
+
+ EVT LoMemVT, HiMemVT;
+ bool HiIsEmpty = false;
+ std::tie(LoMemVT, HiMemVT) =
+ DAG.GetDependentSplitDestVTs(SLD->getMemoryVT(), LoVT, &HiIsEmpty);
+
+ SDValue Mask = SLD->getMask();
+ SDValue LoMask, HiMask;
+ if (Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), LoMask, HiMask);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, LoMask, HiMask);
+ else
+ std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
+ }
+
+ SDValue LoEVL, HiEVL;
+ std::tie(LoEVL, HiEVL) =
+ DAG.SplitEVL(SLD->getVectorLength(), SLD->getValueType(0), DL);
+
+ // Generate the low vp_strided_load
+ Lo = DAG.getStridedLoadVP(
+ SLD->getAddressingMode(), SLD->getExtensionType(), LoVT, DL,
+ SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(), SLD->getStride(),
+ LoMask, LoEVL, LoMemVT, SLD->getMemOperand(), SLD->isExpandingLoad());
+
+ if (HiIsEmpty) {
+ // The high vp_strided_load has zero storage size. We therefore simply set
+ // it to the low vp_strided_load and rely on subsequent removal from the
+ // chain.
+ Hi = Lo;
+ } else {
+ // Generate the high vp_strided_load.
+ // To calculate the high base address, we need to sum to the low base
+ // address stride number of bytes for each element already loaded by low,
+ // that is: Ptr = Ptr + (LoEVL * Stride)
+ EVT PtrVT = SLD->getBasePtr().getValueType();
+ SDValue Increment =
+ DAG.getNode(ISD::MUL, DL, PtrVT, LoEVL,
+ DAG.getSExtOrTrunc(SLD->getStride(), DL, PtrVT));
+ SDValue Ptr =
+ DAG.getNode(ISD::ADD, DL, PtrVT, SLD->getBasePtr(), Increment);
+
+ Align Alignment = SLD->getOriginalAlign();
+ if (LoMemVT.isScalableVector())
+ Alignment = commonAlignment(
+ Alignment, LoMemVT.getSizeInBits().getKnownMinValue() / 8);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(SLD->getPointerInfo().getAddrSpace()),
+ MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment,
+ SLD->getAAInfo(), SLD->getRanges());
+
+ Hi = DAG.getStridedLoadVP(SLD->getAddressingMode(), SLD->getExtensionType(),
+ HiVT, DL, SLD->getChain(), Ptr, SLD->getOffset(),
+ SLD->getStride(), HiMask, HiEVL, HiMemVT, MMO,
+ SLD->isExpandingLoad());
+ }
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ SDValue Ch = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(SLD, 1), Ch);
+}
+
void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
SDValue &Lo, SDValue &Hi) {
assert(MLD->isUnindexed() && "Indexed masked load during type legalization!");
@@ -1983,7 +2091,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
MPI = MachinePointerInfo(MLD->getPointerInfo().getAddrSpace());
else
MPI = MLD->getPointerInfo().getWithOffset(
- LoMemVT.getStoreSize().getFixedSize());
+ LoMemVT.getStoreSize().getFixedValue());
MMO = DAG.getMachineFunction().getMachineMemOperand(
MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment,
@@ -2286,13 +2394,13 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
// If Lo or Hi uses elements from at most two of the four input vectors, then
// express it as a vector shuffle of those two inputs. Otherwise extract the
// input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
- SmallVector<int> OrigMask(N->getMask().begin(), N->getMask().end());
+ SmallVector<int> OrigMask(N->getMask());
// Try to pack incoming shuffles/inputs.
auto &&TryPeekThroughShufflesInputs = [&Inputs, &NewVT, this, NewElts,
&DL](SmallVectorImpl<int> &Mask) {
// Check if all inputs are shuffles of the same operands or non-shuffles.
MapVector<std::pair<SDValue, SDValue>, SmallVector<unsigned>> ShufflesIdxs;
- for (unsigned Idx = 0; Idx < array_lengthof(Inputs); ++Idx) {
+ for (unsigned Idx = 0; Idx < std::size(Inputs); ++Idx) {
SDValue Input = Inputs[Idx];
auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Input.getNode());
if (!Shuffle ||
@@ -2339,7 +2447,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
ShufflesIdxs[std::make_pair(P.first.second, P.first.first)].clear();
}
// Check if any concat_vectors can be simplified.
- SmallBitVector UsedSubVector(2 * array_lengthof(Inputs));
+ SmallBitVector UsedSubVector(2 * std::size(Inputs));
for (int &Idx : Mask) {
if (Idx == UndefMaskElem)
continue;
@@ -2359,7 +2467,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
}
if (UsedSubVector.count() > 1) {
SmallVector<SmallVector<std::pair<unsigned, int>, 2>> Pairs;
- for (unsigned I = 0; I < array_lengthof(Inputs); ++I) {
+ for (unsigned I = 0; I < std::size(Inputs); ++I) {
if (UsedSubVector.test(2 * I) == UsedSubVector.test(2 * I + 1))
continue;
if (Pairs.empty() || Pairs.back().size() == 2)
@@ -2403,7 +2511,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
// Try to remove extra shuffles (except broadcasts) and shuffles with the
// reused operands.
Changed = false;
- for (unsigned I = 0; I < array_lengthof(Inputs); ++I) {
+ for (unsigned I = 0; I < std::size(Inputs); ++I) {
auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Inputs[I].getNode());
if (!Shuffle)
continue;
@@ -2495,15 +2603,15 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
NewElts](SmallVectorImpl<int> &Mask) {
SetVector<SDValue> UniqueInputs;
SetVector<SDValue> UniqueConstantInputs;
- for (unsigned I = 0; I < array_lengthof(Inputs); ++I) {
- if (IsConstant(Inputs[I]))
- UniqueConstantInputs.insert(Inputs[I]);
- else if (!Inputs[I].isUndef())
- UniqueInputs.insert(Inputs[I]);
+ for (const auto &I : Inputs) {
+ if (IsConstant(I))
+ UniqueConstantInputs.insert(I);
+ else if (!I.isUndef())
+ UniqueInputs.insert(I);
}
// Adjust mask in case of reused inputs. Also, need to insert constant
// inputs at first, otherwise it affects the final outcome.
- if (UniqueInputs.size() != array_lengthof(Inputs)) {
+ if (UniqueInputs.size() != std::size(Inputs)) {
auto &&UniqueVec = UniqueInputs.takeVector();
auto &&UniqueConstantVec = UniqueConstantInputs.takeVector();
unsigned ConstNum = UniqueConstantVec.size();
@@ -2541,8 +2649,8 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
// Build a shuffle mask for the output, discovering on the fly which
// input vectors to use as shuffle operands.
unsigned FirstMaskIdx = High * NewElts;
- SmallVector<int> Mask(NewElts * array_lengthof(Inputs), UndefMaskElem);
- copy(makeArrayRef(OrigMask).slice(FirstMaskIdx, NewElts), Mask.begin());
+ SmallVector<int> Mask(NewElts * std::size(Inputs), UndefMaskElem);
+ copy(ArrayRef(OrigMask).slice(FirstMaskIdx, NewElts), Mask.begin());
assert(!Output && "Expected default initialized initial value.");
TryPeekThroughShufflesInputs(Mask);
MakeUniqueInputs(Mask);
@@ -2561,7 +2669,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
return SecondIteration;
};
processShuffleMasks(
- Mask, array_lengthof(Inputs), array_lengthof(Inputs),
+ Mask, std::size(Inputs), std::size(Inputs),
/*NumOfUsedRegs=*/1,
[&Output, &DAG = DAG, NewVT]() { Output = DAG.getUNDEF(NewVT); },
[&Output, &DAG = DAG, NewVT, &DL, &Inputs,
@@ -2707,6 +2815,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VP_STORE:
Res = SplitVecOp_VP_STORE(cast<VPStoreSDNode>(N), OpNo);
break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ Res = SplitVecOp_VP_STRIDED_STORE(cast<VPStridedStoreSDNode>(N), OpNo);
+ break;
case ISD::MSTORE:
Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
break;
@@ -2725,6 +2836,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
+ case ISD::VP_SINT_TO_FP:
+ case ISD::VP_UINT_TO_FP:
if (N->getValueType(0).bitsLT(
N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType()))
Res = SplitVecOp_TruncateHelper(N);
@@ -2737,6 +2850,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
break;
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::VP_FP_TO_SINT:
+ case ISD::VP_FP_TO_UINT:
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::STRICT_FP_EXTEND:
@@ -2999,29 +3114,57 @@ SDValue DAGTypeLegalizer::SplitVecOp_INSERT_SUBVECTOR(SDNode *N,
SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
// We know that the extracted result type is legal.
EVT SubVT = N->getValueType(0);
-
SDValue Idx = N->getOperand(1);
SDLoc dl(N);
SDValue Lo, Hi;
- if (SubVT.isScalableVector() !=
- N->getOperand(0).getValueType().isScalableVector())
- report_fatal_error("Extracting a fixed-length vector from an illegal "
- "scalable vector is not yet supported");
-
GetSplitVector(N->getOperand(0), Lo, Hi);
- uint64_t LoElts = Lo.getValueType().getVectorMinNumElements();
+ uint64_t LoEltsMin = Lo.getValueType().getVectorMinNumElements();
uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
- if (IdxVal < LoElts) {
- assert(IdxVal + SubVT.getVectorMinNumElements() <= LoElts &&
+ if (IdxVal < LoEltsMin) {
+ assert(IdxVal + SubVT.getVectorMinNumElements() <= LoEltsMin &&
"Extracted subvector crosses vector split!");
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
- } else {
+ } else if (SubVT.isScalableVector() ==
+ N->getOperand(0).getValueType().isScalableVector())
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
- DAG.getVectorIdxConstant(IdxVal - LoElts, dl));
- }
+ DAG.getVectorIdxConstant(IdxVal - LoEltsMin, dl));
+
+ // After this point the DAG node only permits extracting fixed-width
+ // subvectors from scalable vectors.
+ assert(SubVT.isFixedLengthVector() &&
+ "Extracting scalable subvector from fixed-width unsupported");
+
+ // If the element type is i1 and we're not promoting the result, then we may
+ // end up loading the wrong data since the bits are packed tightly into
+ // bytes. For example, if we extract a v4i1 (legal) from a nxv4i1 (legal)
+ // type at index 4, then we will load a byte starting at index 0.
+ if (SubVT.getScalarType() == MVT::i1)
+ report_fatal_error("Don't know how to extract fixed-width predicate "
+ "subvector from a scalable predicate vector");
+
+ // Spill the vector to the stack. We should use the alignment for
+ // the smallest part.
+ SDValue Vec = N->getOperand(0);
+ EVT VecVT = Vec.getValueType();
+ Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
+ SDValue StackPtr =
+ DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+ SmallestAlign);
+
+ // Extract the subvector by loading the correct part.
+ StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVT, Idx);
+
+ return DAG.getLoad(
+ SubVT, dl, Store, StackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
}
SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -3029,8 +3172,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
SDValue Idx = N->getOperand(1);
EVT VecVT = Vec.getValueType();
- if (isa<ConstantSDNode>(Idx)) {
- uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (const ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Idx)) {
+ uint64_t IdxVal = Index->getZExtValue();
SDValue Lo, Hi;
GetSplitVector(Vec, Lo, Hi);
@@ -3167,11 +3310,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo) {
MachinePointerInfo MPI;
if (LoMemVT.isScalableVector()) {
Alignment = commonAlignment(Alignment,
- LoMemVT.getSizeInBits().getKnownMinSize() / 8);
+ LoMemVT.getSizeInBits().getKnownMinValue() / 8);
MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
} else
MPI = N->getPointerInfo().getWithOffset(
- LoMemVT.getStoreSize().getFixedSize());
+ LoMemVT.getStoreSize().getFixedValue());
MMO = DAG.getMachineFunction().getMachineMemOperand(
MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment,
@@ -3186,6 +3329,80 @@ SDValue DAGTypeLegalizer::SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo) {
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
}
+SDValue DAGTypeLegalizer::SplitVecOp_VP_STRIDED_STORE(VPStridedStoreSDNode *N,
+ unsigned OpNo) {
+ assert(N->isUnindexed() && "Indexed vp_strided_store of a vector?");
+ assert(N->getOffset().isUndef() && "Unexpected VP strided store offset");
+
+ SDLoc DL(N);
+
+ SDValue Data = N->getValue();
+ SDValue LoData, HiData;
+ if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Data, LoData, HiData);
+ else
+ std::tie(LoData, HiData) = DAG.SplitVector(Data, DL);
+
+ EVT LoMemVT, HiMemVT;
+ bool HiIsEmpty = false;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetDependentSplitDestVTs(
+ N->getMemoryVT(), LoData.getValueType(), &HiIsEmpty);
+
+ SDValue Mask = N->getMask();
+ SDValue LoMask, HiMask;
+ if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC)
+ SplitVecRes_SETCC(Mask.getNode(), LoMask, HiMask);
+ else if (getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, LoMask, HiMask);
+ else
+ std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
+
+ SDValue LoEVL, HiEVL;
+ std::tie(LoEVL, HiEVL) =
+ DAG.SplitEVL(N->getVectorLength(), Data.getValueType(), DL);
+
+ // Generate the low vp_strided_store
+ SDValue Lo = DAG.getStridedStoreVP(
+ N->getChain(), DL, LoData, N->getBasePtr(), N->getOffset(),
+ N->getStride(), LoMask, LoEVL, LoMemVT, N->getMemOperand(),
+ N->getAddressingMode(), N->isTruncatingStore(), N->isCompressingStore());
+
+ // If the high vp_strided_store has zero storage size, only the low
+ // vp_strided_store is needed.
+ if (HiIsEmpty)
+ return Lo;
+
+ // Generate the high vp_strided_store.
+ // To calculate the high base address, we need to sum to the low base
+ // address stride number of bytes for each element already stored by low,
+ // that is: Ptr = Ptr + (LoEVL * Stride)
+ EVT PtrVT = N->getBasePtr().getValueType();
+ SDValue Increment =
+ DAG.getNode(ISD::MUL, DL, PtrVT, LoEVL,
+ DAG.getSExtOrTrunc(N->getStride(), DL, PtrVT));
+ SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, N->getBasePtr(), Increment);
+
+ Align Alignment = N->getOriginalAlign();
+ if (LoMemVT.isScalableVector())
+ Alignment = commonAlignment(Alignment,
+ LoMemVT.getSizeInBits().getKnownMinValue() / 8);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(N->getPointerInfo().getAddrSpace()),
+ MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment,
+ N->getAAInfo(), N->getRanges());
+
+ SDValue Hi = DAG.getStridedStoreVP(
+ N->getChain(), DL, HiData, Ptr, N->getOffset(), N->getStride(), HiMask,
+ HiEVL, HiMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(),
+ N->isCompressingStore());
+
+ // Build a factor node to remember that this store is independent of the
+ // other one.
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
unsigned OpNo) {
assert(N->isUnindexed() && "Indexed masked store of vector?");
@@ -3243,11 +3460,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
MachinePointerInfo MPI;
if (LoMemVT.isScalableVector()) {
Alignment = commonAlignment(
- Alignment, LoMemVT.getSizeInBits().getKnownMinSize() / 8);
+ Alignment, LoMemVT.getSizeInBits().getKnownMinValue() / 8);
MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
} else
MPI = N->getPointerInfo().getWithOffset(
- LoMemVT.getStoreSize().getFixedSize());
+ LoMemVT.getStoreSize().getFixedValue());
MMO = DAG.getMachineFunction().getMachineMemOperand(
MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment,
@@ -3593,7 +3810,26 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
// The result (and the first input) has a legal vector type, but the second
// input needs splitting.
- return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements());
+
+ SDLoc DL(N);
+
+ EVT LHSLoVT, LHSHiVT;
+ std::tie(LHSLoVT, LHSHiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ if (!isTypeLegal(LHSLoVT) || !isTypeLegal(LHSHiVT))
+ return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements());
+
+ SDValue LHSLo, LHSHi;
+ std::tie(LHSLo, LHSHi) =
+ DAG.SplitVector(N->getOperand(0), DL, LHSLoVT, LHSHiVT);
+
+ SDValue RHSLo, RHSHi;
+ std::tie(RHSLo, RHSHi) = DAG.SplitVector(N->getOperand(1), DL);
+
+ SDValue Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLoVT, LHSLo, RHSLo);
+ SDValue Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHiVT, LHSHi, RHSHi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), Lo, Hi);
}
SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) {
@@ -3683,6 +3919,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_LOAD:
Res = WidenVecRes_VP_LOAD(cast<VPLoadSDNode>(N));
break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
+ Res = WidenVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N));
+ break;
case ISD::MLOAD:
Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N));
break;
@@ -3692,6 +3931,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_GATHER:
Res = WidenVecRes_VP_GATHER(cast<VPGatherSDNode>(N));
break;
+ case ISD::VECTOR_REVERSE:
+ Res = WidenVecRes_VECTOR_REVERSE(N);
+ break;
case ISD::ADD: case ISD::VP_ADD:
case ISD::AND: case ISD::VP_AND:
@@ -3704,14 +3946,14 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SHL: case ISD::VP_SHL:
case ISD::SRA: case ISD::VP_ASHR:
case ISD::SRL: case ISD::VP_LSHR:
- case ISD::FMINNUM:
- case ISD::FMAXNUM:
+ case ISD::FMINNUM: case ISD::VP_FMINNUM:
+ case ISD::FMAXNUM: case ISD::VP_FMAXNUM:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
- case ISD::SMIN:
- case ISD::SMAX:
- case ISD::UMIN:
- case ISD::UMAX:
+ case ISD::SMIN: case ISD::VP_SMIN:
+ case ISD::SMAX: case ISD::VP_SMAX:
+ case ISD::UMIN: case ISD::VP_UMIN:
+ case ISD::UMAX: case ISD::VP_UMAX:
case ISD::UADDSAT:
case ISD::SADDSAT:
case ISD::USUBSAT:
@@ -3738,6 +3980,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_FMUL:
case ISD::VP_FDIV:
case ISD::VP_FREM:
+ case ISD::VP_FCOPYSIGN:
Res = WidenVecRes_Binary(N);
break;
@@ -3748,7 +3991,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
// If the target has custom/legal support for the scalar FP intrinsic ops
// (they are probably not destined to become libcalls), then widen those
// like any other binary ops.
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::FADD:
case ISD::FMUL:
@@ -3809,17 +4052,17 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FP_ROUND:
case ISD::VP_FP_ROUND:
case ISD::FP_TO_SINT:
- case ISD::VP_FPTOSI:
+ case ISD::VP_FP_TO_SINT:
case ISD::FP_TO_UINT:
- case ISD::VP_FPTOUI:
+ case ISD::VP_FP_TO_UINT:
case ISD::SIGN_EXTEND:
case ISD::VP_SIGN_EXTEND:
case ISD::SINT_TO_FP:
- case ISD::VP_SITOFP:
+ case ISD::VP_SINT_TO_FP:
case ISD::VP_TRUNCATE:
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
- case ISD::VP_UITOFP:
+ case ISD::VP_UINT_TO_FP:
case ISD::ZERO_EXTEND:
case ISD::VP_ZERO_EXTEND:
Res = WidenVecRes_Convert(N);
@@ -3851,17 +4094,34 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
// If the target has custom/legal support for the scalar FP intrinsic ops
// (they are probably not destined to become libcalls), then widen those
// like any other unary ops.
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::ABS:
+ case ISD::VP_ABS:
case ISD::BITREVERSE:
+ case ISD::VP_BITREVERSE:
case ISD::BSWAP:
+ case ISD::VP_BSWAP:
case ISD::CTLZ:
+ case ISD::VP_CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::VP_CTLZ_ZERO_UNDEF:
case ISD::CTPOP:
+ case ISD::VP_CTPOP:
case ISD::CTTZ:
+ case ISD::VP_CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::VP_CTTZ_ZERO_UNDEF:
case ISD::FNEG: case ISD::VP_FNEG:
+ case ISD::VP_FABS:
+ case ISD::VP_SQRT:
+ case ISD::VP_FCEIL:
+ case ISD::VP_FFLOOR:
+ case ISD::VP_FRINT:
+ case ISD::VP_FNEARBYINT:
+ case ISD::VP_FROUND:
+ case ISD::VP_FROUNDEVEN:
+ case ISD::VP_FROUNDTOZERO:
case ISD::FREEZE:
case ISD::ARITH_FENCE:
case ISD::FCANONICALIZE:
@@ -3869,7 +4129,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
break;
case ISD::FMA: case ISD::VP_FMA:
case ISD::FSHL:
+ case ISD::VP_FSHL:
case ISD::FSHR:
+ case ISD::VP_FSHR:
Res = WidenVecRes_Ternary(N);
break;
}
@@ -4005,7 +4267,7 @@ static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI,
ConcatOps[j] = UndefVal;
}
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
- makeArrayRef(ConcatOps.data(), NumOps));
+ ArrayRef(ConcatOps.data(), NumOps));
}
SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
@@ -4480,8 +4742,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) {
+ SDValue FpValue = N->getOperand(0);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Arg = GetWidenedVector(N->getOperand(0));
+ if (getTypeAction(FpValue.getValueType()) != TargetLowering::TypeWidenVector)
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+ SDValue Arg = GetWidenedVector(FpValue);
return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, {Arg, N->getOperand(1)},
N->getFlags());
}
@@ -4585,33 +4850,43 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
unsigned WidenSize = WidenVT.getSizeInBits();
unsigned InSize = InVT.getSizeInBits();
+ unsigned InScalarSize = InVT.getScalarSizeInBits();
// x86mmx is not an acceptable vector element type, so don't try.
- if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) {
+ if (WidenSize % InScalarSize == 0 && InVT != MVT::x86mmx) {
// Determine new input vector type. The new input vector type will use
// the same element type (if its a vector) or use the input type as a
// vector. It is the same size as the type to widen to.
EVT NewInVT;
- unsigned NewNumElts = WidenSize / InSize;
+ unsigned NewNumParts = WidenSize / InSize;
if (InVT.isVector()) {
EVT InEltVT = InVT.getVectorElementType();
NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
WidenSize / InEltVT.getSizeInBits());
} else {
- NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumParts);
}
if (TLI.isTypeLegal(NewInVT)) {
SDValue NewVec;
if (InVT.isVector()) {
// Because the result and the input are different vector types, widening
- // the result could create a legal type but widening the input might make
- // it an illegal type that might lead to repeatedly splitting the input
- // and then widening it. To avoid this, we widen the input only if
+ // the result could create a legal type but widening the input might
+ // make it an illegal type that might lead to repeatedly splitting the
+ // input and then widening it. To avoid this, we widen the input only if
// it results in a legal type.
- SmallVector<SDValue, 16> Ops(NewNumElts, DAG.getUNDEF(InVT));
- Ops[0] = InOp;
+ if (WidenSize % InSize == 0) {
+ SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getUNDEF(InVT));
+ Ops[0] = InOp;
- NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
+ NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
+ } else {
+ SmallVector<SDValue, 16> Ops;
+ DAG.ExtractVectorElements(InOp, Ops);
+ Ops.append(WidenSize / InScalarSize - Ops.size(),
+ DAG.getUNDEF(InVT.getVectorElementType()));
+
+ NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops);
+ }
} else {
NewVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewInVT, InOp);
}
@@ -4768,7 +5043,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
// nxv2i64 extract_subvector(nxv16i64, 8)
// nxv2i64 extract_subvector(nxv16i64, 10)
// undef)
- unsigned GCD = greatestCommonDivisor(VTNumElts, WidenNumElts);
+ unsigned GCD = std::gcd(VTNumElts, WidenNumElts);
assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken "
"down type's element count");
EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
@@ -4915,6 +5190,33 @@ SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD(VPLoadSDNode *N) {
return Res;
}
+SDValue DAGTypeLegalizer::WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N) {
+ SDLoc DL(N);
+
+ // The mask should be widened as well
+ SDValue Mask = N->getMask();
+ assert(getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen VP strided load");
+ Mask = GetWidenedVector(Mask);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ assert(Mask.getValueType().getVectorElementCount() ==
+ WidenVT.getVectorElementCount() &&
+ "Data and mask vectors should have the same number of elements");
+
+ SDValue Res = DAG.getStridedLoadVP(
+ N->getAddressingMode(), N->getExtensionType(), WidenVT, DL, N->getChain(),
+ N->getBasePtr(), N->getOffset(), N->getStride(), Mask,
+ N->getVectorLength(), N->getMemoryVT(), N->getMemOperand(),
+ N->isExpandingLoad());
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
@@ -5316,6 +5618,61 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask);
}
+SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_REVERSE(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ SDLoc dl(N);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue OpValue = GetWidenedVector(N->getOperand(0));
+ assert(WidenVT == OpValue.getValueType() && "Unexpected widened vector type");
+
+ SDValue ReverseVal = DAG.getNode(ISD::VECTOR_REVERSE, dl, WidenVT, OpValue);
+ unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
+ unsigned VTNumElts = VT.getVectorMinNumElements();
+ unsigned IdxVal = WidenNumElts - VTNumElts;
+
+ if (VT.isScalableVector()) {
+ // Try to split the 'Widen ReverseVal' into smaller extracts and concat the
+ // results together, e.g.(nxv6i64 -> nxv8i64)
+ // nxv8i64 vector_reverse
+ // <->
+ // nxv8i64 concat(
+ // nxv2i64 extract_subvector(nxv8i64, 2)
+ // nxv2i64 extract_subvector(nxv8i64, 4)
+ // nxv2i64 extract_subvector(nxv8i64, 6)
+ // nxv2i64 undef)
+
+ unsigned GCD = std::gcd(VTNumElts, WidenNumElts);
+ EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ ElementCount::getScalable(GCD));
+ assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken "
+ "down type's element count");
+ SmallVector<SDValue> Parts;
+ unsigned i = 0;
+ for (; i < VTNumElts / GCD; ++i)
+ Parts.push_back(
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, ReverseVal,
+ DAG.getVectorIdxConstant(IdxVal + i * GCD, dl)));
+ for (; i < WidenNumElts / GCD; ++i)
+ Parts.push_back(DAG.getUNDEF(PartVT));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
+ }
+
+ // Use VECTOR_SHUFFLE to combine new vector from 'ReverseVal' for
+ // fixed-vectors.
+ SmallVector<int, 16> Mask;
+ for (unsigned i = 0; i != VTNumElts; ++i) {
+ Mask.push_back(IdxVal + i);
+ }
+ for (unsigned i = VTNumElts; i != WidenNumElts; ++i)
+ Mask.push_back(-1);
+
+ return DAG.getVectorShuffle(WidenVT, dl, ReverseVal, DAG.getUNDEF(WidenVT),
+ Mask);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
@@ -5432,6 +5789,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
case ISD::VP_STORE: Res = WidenVecOp_VP_STORE(N, OpNo); break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ Res = WidenVecOp_VP_STRIDED_STORE(N, OpNo);
+ break;
case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break;
case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
@@ -5910,6 +6270,38 @@ SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) {
ST->isCompressingStore());
}
+SDValue DAGTypeLegalizer::WidenVecOp_VP_STRIDED_STORE(SDNode *N,
+ unsigned OpNo) {
+ assert((OpNo == 1 || OpNo == 4) &&
+ "Can widen only data or mask operand of vp_strided_store");
+ VPStridedStoreSDNode *SST = cast<VPStridedStoreSDNode>(N);
+ SDValue Mask = SST->getMask();
+ SDValue StVal = SST->getValue();
+ SDLoc DL(N);
+
+ if (OpNo == 1)
+ assert(getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen VP strided store");
+ else
+ assert(getTypeAction(StVal.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen VP strided store");
+
+ StVal = GetWidenedVector(StVal);
+ Mask = GetWidenedVector(Mask);
+
+ assert(StVal.getValueType().getVectorElementCount() ==
+ Mask.getValueType().getVectorElementCount() &&
+ "Data and mask vectors should have the same number of elements");
+
+ return DAG.getStridedStoreVP(
+ SST->getChain(), DL, StVal, SST->getBasePtr(), SST->getOffset(),
+ SST->getStride(), Mask, SST->getVectorLength(), SST->getMemoryVT(),
+ SST->getMemOperand(), SST->getAddressingMode(), SST->isTruncatingStore(),
+ SST->isCompressingStore());
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
assert((OpNo == 1 || OpNo == 3) &&
"Can widen only data or mask operand of mstore");
@@ -6127,7 +6519,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
unsigned WideElts = WideVT.getVectorMinNumElements();
if (WideVT.isScalableVector()) {
- unsigned GCD = greatestCommonDivisor(OrigElts, WideElts);
+ unsigned GCD = std::gcd(OrigElts, WideElts);
EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
ElementCount::getScalable(GCD));
SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem);
@@ -6164,7 +6556,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) {
unsigned WideElts = WideVT.getVectorMinNumElements();
if (WideVT.isScalableVector()) {
- unsigned GCD = greatestCommonDivisor(OrigElts, WideElts);
+ unsigned GCD = std::gcd(OrigElts, WideElts);
EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
ElementCount::getScalable(GCD));
SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem);
@@ -6223,12 +6615,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
// Align: If 0, don't allow use of a wider type
// WidenEx: If Align is not 0, the amount additional we can load/store from.
-static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI,
- unsigned Width, EVT WidenVT,
- unsigned Align = 0, unsigned WidenEx = 0) {
+static std::optional<EVT> findMemType(SelectionDAG &DAG,
+ const TargetLowering &TLI, unsigned Width,
+ EVT WidenVT, unsigned Align = 0,
+ unsigned WidenEx = 0) {
EVT WidenEltVT = WidenVT.getVectorElementType();
const bool Scalable = WidenVT.isScalableVector();
- unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinSize();
+ unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinValue();
unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
unsigned AlignInBits = Align*8;
@@ -6266,7 +6659,7 @@ static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI,
// Skip vector MVTs which don't match the scalable property of WidenVT.
if (Scalable != MemVT.isScalableVector())
continue;
- unsigned MemVTWidth = MemVT.getSizeInBits().getKnownMinSize();
+ unsigned MemVTWidth = MemVT.getSizeInBits().getKnownMinValue();
auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
if ((Action == TargetLowering::TypeLegal ||
Action == TargetLowering::TypePromoteInteger) &&
@@ -6283,7 +6676,7 @@ static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI,
// Using element-wise loads and stores for widening operations is not
// supported for scalable vectors
if (Scalable)
- return None;
+ return std::nullopt;
return RetVT;
}
@@ -6348,9 +6741,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
(!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlign().value();
// Find the vector type that can load from.
- Optional<EVT> FirstVT =
- findMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
- WidthDiff.getKnownMinSize());
+ std::optional<EVT> FirstVT =
+ findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, LdAlign,
+ WidthDiff.getKnownMinValue());
if (!FirstVT)
return SDValue();
@@ -6361,15 +6754,15 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
// Unless we're able to load in one instruction we must work out how to load
// the remainder.
if (!TypeSize::isKnownLE(LdWidth, FirstVTWidth)) {
- Optional<EVT> NewVT = FirstVT;
+ std::optional<EVT> NewVT = FirstVT;
TypeSize RemainingWidth = LdWidth;
TypeSize NewVTWidth = FirstVTWidth;
do {
RemainingWidth -= NewVTWidth;
if (TypeSize::isKnownLT(RemainingWidth, NewVTWidth)) {
// The current type we are using is too large. Find a better size.
- NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinSize(), WidenVT,
- LdAlign, WidthDiff.getKnownMinSize());
+ NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinValue(),
+ WidenVT, LdAlign, WidthDiff.getKnownMinValue());
if (!NewVT)
return SDValue();
NewVTWidth = NewVT->getSizeInBits();
@@ -6387,7 +6780,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
if (!FirstVT->isVector()) {
unsigned NumElts =
- WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize();
+ WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
@@ -6396,9 +6789,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
return LdOp;
// TODO: We don't currently have any tests that exercise this code path.
- assert(WidenWidth.getFixedSize() % FirstVTWidth.getFixedSize() == 0);
+ assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0);
unsigned NumConcat =
- WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize();
+ WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
SmallVector<SDValue, 16> ConcatOps(NumConcat);
SDValue UndefVal = DAG.getUNDEF(*FirstVT);
ConcatOps[0] = LdOp;
@@ -6461,9 +6854,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
TypeSize LdTySize = LdTy.getSizeInBits();
TypeSize NewLdTySize = NewLdTy.getSizeInBits();
assert(NewLdTySize.isScalable() == LdTySize.isScalable() &&
- NewLdTySize.isKnownMultipleOf(LdTySize.getKnownMinSize()));
+ NewLdTySize.isKnownMultipleOf(LdTySize.getKnownMinValue()));
unsigned NumOps =
- NewLdTySize.getKnownMinSize() / LdTySize.getKnownMinSize();
+ NewLdTySize.getKnownMinValue() / LdTySize.getKnownMinValue();
SmallVector<SDValue, 16> WidenOps(NumOps);
unsigned j = 0;
for (; j != End-Idx; ++j)
@@ -6481,11 +6874,11 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
if (WidenWidth == LdTy.getSizeInBits() * (End - Idx))
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
- makeArrayRef(&ConcatOps[Idx], End - Idx));
+ ArrayRef(&ConcatOps[Idx], End - Idx));
// We need to fill the rest with undefs to build the vector.
unsigned NumOps =
- WidenWidth.getKnownMinSize() / LdTy.getSizeInBits().getKnownMinSize();
+ WidenWidth.getKnownMinValue() / LdTy.getSizeInBits().getKnownMinValue();
SmallVector<SDValue, 16> WidenOps(NumOps);
SDValue UndefVal = DAG.getUNDEF(LdTy);
{
@@ -6584,8 +6977,8 @@ bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
while (StWidth.isNonZero()) {
// Find the largest vector type we can store with.
- Optional<EVT> NewVT =
- findMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT);
+ std::optional<EVT> NewVT =
+ findMemType(DAG, TLI, StWidth.getKnownMinValue(), ValVT);
if (!NewVT)
return false;
MemVTs.push_back({*NewVT, 0});
@@ -6620,11 +7013,11 @@ bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
} while (--Count);
} else {
// Cast the vector to the scalar type we can store.
- unsigned NumElts = ValWidth.getFixedSize() / NewVTWidth.getFixedSize();
+ unsigned NumElts = ValWidth.getFixedValue() / NewVTWidth.getFixedValue();
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
// Readjust index position based on new vector type.
- Idx = Idx * ValEltWidth / NewVTWidth.getFixedSize();
+ Idx = Idx * ValEltWidth / NewVTWidth.getFixedValue();
do {
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
DAG.getVectorIdxConstant(Idx++, dl));
@@ -6636,7 +7029,7 @@ bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr);
} while (--Count);
// Restore index back to be relative to the original widen element type.
- Idx = Idx * NewVTWidth.getFixedSize() / ValEltWidth;
+ Idx = Idx * NewVTWidth.getFixedValue() / ValEltWidth;
}
}
@@ -6685,7 +7078,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
unsigned InNumElts = InEC.getFixedValue();
unsigned WidenNumElts = WidenEC.getFixedValue();
- // Fall back to extract and build.
+ // Fall back to extract and build (+ mask, if padding with zeros).
SmallVector<SDValue, 16> Ops(WidenNumElts);
EVT EltVT = NVT.getVectorElementType();
unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
@@ -6694,9 +7087,21 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
DAG.getVectorIdxConstant(Idx, dl));
- SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
- DAG.getUNDEF(EltVT);
- for ( ; Idx < WidenNumElts; ++Idx)
- Ops[Idx] = FillVal;
- return DAG.getBuildVector(NVT, dl, Ops);
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = UndefVal;
+
+ SDValue Widened = DAG.getBuildVector(NVT, dl, Ops);
+ if (!FillWithZeroes)
+ return Widened;
+
+ assert(NVT.isInteger() &&
+ "We expect to never want to FillWithZeroes for non-integral types.");
+
+ SmallVector<SDValue, 16> MaskOps;
+ MaskOps.append(MinNumElts, DAG.getAllOnesConstant(dl, EltVT));
+ MaskOps.append(WidenNumElts - MinNumElts, DAG.getConstant(0, dl, EltVT));
+
+ return DAG.getNode(ISD::AND, dl, NVT, Widened,
+ DAG.getBuildVector(NVT, dl, MaskOps));
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 3ac2a7bddc5a..2d93adea6b9b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -426,10 +426,11 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
NumRes = 1;
} else {
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
- assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ assert(!MCID.implicit_defs().empty() &&
+ "Physical reg def must be in implicit def list!");
NumRes = MCID.getNumDefs();
- for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
- if (Reg == *ImpDef)
+ for (MCPhysReg ImpDef : MCID.implicit_defs()) {
+ if (Reg == ImpDef)
break;
++NumRes;
}
@@ -526,11 +527,8 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
if (!Node->isMachineOpcode())
continue;
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
- if (!MCID.ImplicitDefs)
- continue;
- for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
- CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
- }
+ for (MCPhysReg Reg : MCID.implicit_defs())
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
return !LRegs.empty();
}
@@ -777,8 +775,7 @@ void ScheduleDAGLinearize::Schedule() {
MachineBasicBlock*
ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
- InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos,
- DAG->getUseInstrRefDebugInfo());
+ InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos);
DenseMap<SDValue, Register> VRBaseMap;
LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; });
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 8a04ce7535a1..c252046ef10b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -24,7 +24,7 @@
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
@@ -302,6 +302,8 @@ private:
} // end anonymous namespace
+static constexpr unsigned RegSequenceCost = 1;
+
/// GetCostForDef - Looks up the register class and cost for a given definition.
/// Typically this just means looking up the representative register class,
/// but for untyped values (MVT::Untyped) it means inspecting the node's
@@ -321,7 +323,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
// Special handling for CopyFromReg of untyped values.
if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
- unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
RegClass = RC->getID();
Cost = 1;
@@ -333,13 +335,14 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
RegClass = RC->getID();
- Cost = 1;
+ Cost = RegSequenceCost;
return;
}
unsigned Idx = RegDefPos.GetIdx();
- const MCInstrDesc Desc = TII->get(Opcode);
+ const MCInstrDesc &Desc = TII->get(Opcode);
const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
+ assert(RC && "Not a valid register class");
RegClass = RC->getID();
// FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
// better way to determine it.
@@ -1089,7 +1092,7 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
RemovePred(SU, Pred);
AddPredQueued(NewSU, Pred);
}
- for (SDep D : NodeSuccs) {
+ for (SDep &D : NodeSuccs) {
SUnit *SuccDep = D.getSUnit();
D.setSUnit(SU);
RemovePred(SuccDep, D);
@@ -1100,7 +1103,7 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
!D.isCtrl() && NewSU->NumRegDefsLeft > 0)
--NewSU->NumRegDefsLeft;
}
- for (SDep D : ChainSuccs) {
+ for (SDep &D : ChainSuccs) {
SUnit *SuccDep = D.getSUnit();
D.setSUnit(SU);
RemovePred(SuccDep, D);
@@ -1204,11 +1207,11 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
D.setSUnit(NewSU);
AddPredQueued(SuccSU, D);
D.setSUnit(SU);
- DelDeps.push_back(std::make_pair(SuccSU, D));
+ DelDeps.emplace_back(SuccSU, D);
}
}
- for (auto &DelDep : DelDeps)
- RemovePred(DelDep.first, DelDep.second);
+ for (const auto &[DelSU, DelD] : DelDeps)
+ RemovePred(DelSU, DelD);
AvailableQueue->updateNode(SU);
AvailableQueue->addNode(NewSU);
@@ -1242,17 +1245,17 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
SDep D = Succ;
D.setSUnit(CopyToSU);
AddPredQueued(SuccSU, D);
- DelDeps.push_back(std::make_pair(SuccSU, Succ));
+ DelDeps.emplace_back(SuccSU, Succ);
}
else {
- // Avoid scheduling the def-side copy before other successors. Otherwise
+ // Avoid scheduling the def-side copy before other successors. Otherwise,
// we could introduce another physreg interference on the copy and
// continue inserting copies indefinitely.
AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial));
}
}
- for (auto &DelDep : DelDeps)
- RemovePred(DelDep.first, DelDep.second);
+ for (const auto &[DelSU, DelD] : DelDeps)
+ RemovePred(DelSU, DelD);
SDep FromDep(SU, SDep::Data, Reg);
FromDep.setLatency(SU->Latency);
@@ -1281,10 +1284,11 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
NumRes = 1;
} else {
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
- assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ assert(!MCID.implicit_defs().empty() &&
+ "Physical reg def must be in implicit def list!");
NumRes = MCID.getNumDefs();
- for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
- if (Reg == *ImpDef)
+ for (MCPhysReg ImpDef : MCID.implicit_defs()) {
+ if (Reg == ImpDef)
break;
++NumRes;
}
@@ -1381,8 +1385,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
InlineAsm::isClobberKind(Flags)) {
// Check for def of register or earlyclobber register.
for (; NumVals; --NumVals, ++i) {
- unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
- if (Register::isPhysicalRegister(Reg))
+ Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (Reg.isPhysical())
CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
}
} else
@@ -1419,7 +1423,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
}
if (const uint32_t *RegMask = getNodeRegMask(Node))
CheckForLiveRegDefMasked(SU, RegMask,
- makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
+ ArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
RegAdded, LRegs);
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
@@ -1429,16 +1433,14 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
// of %noreg. When the OptionalDef is set to a valid register, we need to
// handle it in the same way as an ImplicitDef.
for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
- if (MCID.OpInfo[i].isOptionalDef()) {
+ if (MCID.operands()[i].isOptionalDef()) {
const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
- unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
+ Register Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
}
}
- if (!MCID.ImplicitDefs)
- continue;
- for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
- CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
+ for (MCPhysReg Reg : MCID.implicit_defs())
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
}
return !LRegs.empty();
@@ -1484,16 +1486,15 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
else dbgs() << printReg(LRegs[0], TRI);
dbgs() << " SU #" << CurSU->NodeNum << '\n');
- std::pair<LRegsMapT::iterator, bool> LRegsPair =
- LRegsMap.insert(std::make_pair(CurSU, LRegs));
- if (LRegsPair.second) {
+ auto [LRegsIter, LRegsInserted] = LRegsMap.try_emplace(CurSU, LRegs);
+ if (LRegsInserted) {
CurSU->isPending = true; // This SU is not in AvailableQueue right now.
Interferences.push_back(CurSU);
}
else {
assert(CurSU->isPending && "Interferences are pending");
// Update the interference with current live regs.
- LRegsPair.first->second = LRegs;
+ LRegsIter->second = LRegs;
}
CurSU = AvailableQueue->pop();
}
@@ -2302,6 +2303,16 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) {
RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
continue;
}
+ if (POpc == TargetOpcode::REG_SEQUENCE) {
+ unsigned DstRCIdx =
+ cast<ConstantSDNode>(PN->getOperand(0))->getZExtValue();
+ const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+ unsigned RCId = RC->getID();
+ // REG_SEQUENCE is untyped, so getRepRegClassCostFor could not be used
+ // here. Instead use the same constant as in GetCostForDef.
+ RegPressure[RCId] += RegSequenceCost;
+ continue;
+ }
unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
for (unsigned i = 0; i != NumDefs; ++i) {
MVT VT = PN->getSimpleValueType(i);
@@ -2376,9 +2387,9 @@ static bool hasOnlyLiveInOpers(const SUnit *SU) {
const SUnit *PredSU = Pred.getSUnit();
if (PredSU->getNode() &&
PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
- unsigned Reg =
- cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
- if (Register::isVirtualRegister(Reg)) {
+ Register Reg =
+ cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
+ if (Reg.isVirtual()) {
RetVal = true;
continue;
}
@@ -2397,9 +2408,9 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) {
if (Succ.isCtrl()) continue;
const SUnit *SuccSU = Succ.getSUnit();
if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
- unsigned Reg =
- cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
- if (Register::isVirtualRegister(Reg)) {
+ Register Reg =
+ cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
+ if (Reg.isVirtual()) {
RetVal = true;
continue;
}
@@ -2854,10 +2865,10 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
ScheduleDAGRRList *scheduleDAG,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- const MCPhysReg *ImpDefs
- = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
+ ArrayRef<MCPhysReg> ImpDefs =
+ TII->get(SU->getNode()->getMachineOpcode()).implicit_defs();
const uint32_t *RegMask = getNodeRegMask(SU->getNode());
- if(!ImpDefs && !RegMask)
+ if (ImpDefs.empty() && !RegMask)
return false;
for (const SDep &Succ : SU->Succs) {
@@ -2871,14 +2882,14 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
return true;
- if (ImpDefs)
- for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
- // Return true if SU clobbers this physical register use and the
- // definition of the register reaches from DepSU. IsReachable queries
- // a topological forward sort of the DAG (following the successors).
- if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
- scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
- return true;
+ for (MCPhysReg ImpDef : ImpDefs) {
+ // Return true if SU clobbers this physical register use and the
+ // definition of the register reaches from DepSU. IsReachable queries
+ // a topological forward sort of the DAG (following the successors).
+ if (TRI->regsOverlap(ImpDef, SuccPred.getReg()) &&
+ scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
+ return true;
+ }
}
}
return false;
@@ -2891,16 +2902,16 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
const TargetRegisterInfo *TRI) {
SDNode *N = SuccSU->getNode();
unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
- const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
- assert(ImpDefs && "Caller should check hasPhysRegDefs");
+ ArrayRef<MCPhysReg> ImpDefs = TII->get(N->getMachineOpcode()).implicit_defs();
+ assert(!ImpDefs.empty() && "Caller should check hasPhysRegDefs");
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getGluedNode()) {
if (!SUNode->isMachineOpcode())
continue;
- const MCPhysReg *SUImpDefs =
- TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
+ ArrayRef<MCPhysReg> SUImpDefs =
+ TII->get(SUNode->getMachineOpcode()).implicit_defs();
const uint32_t *SURegMask = getNodeRegMask(SUNode);
- if (!SUImpDefs && !SURegMask)
+ if (SUImpDefs.empty() && !SURegMask)
continue;
for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
MVT VT = N->getSimpleValueType(i);
@@ -2908,13 +2919,10 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
continue;
if (!N->hasAnyUseOfValue(i))
continue;
- unsigned Reg = ImpDefs[i - NumDefs];
+ MCPhysReg Reg = ImpDefs[i - NumDefs];
if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
return true;
- if (!SUImpDefs)
- continue;
- for (;*SUImpDefs; ++SUImpDefs) {
- unsigned SUReg = *SUImpDefs;
+ for (MCPhysReg SUReg : SUImpDefs) {
if (TRI->regsOverlap(Reg, SUReg))
return true;
}
@@ -2968,8 +2976,7 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
// like other nodes from the perspective of scheduling heuristics.
if (SDNode *N = SU.getNode())
if (N->getOpcode() == ISD::CopyToReg &&
- Register::isVirtualRegister(
- cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual())
continue;
SDNode *PredFrameSetup = nullptr;
@@ -3015,8 +3022,7 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
// like other nodes from the perspective of scheduling heuristics.
if (SDNode *N = SU.getNode())
if (N->getOpcode() == ISD::CopyFromReg &&
- Register::isVirtualRegister(
- cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual())
continue;
// Perform checks on the successors of PredSU.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 5166db033c62..2e1fd1e8a758 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -110,11 +110,15 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
const TargetRegisterInfo *TRI,
const TargetInstrInfo *TII,
+ const TargetLowering &TLI,
unsigned &PhysReg, int &Cost) {
if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
return;
unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TLI.checkForPhysRegDependency(Def, User, Op, TRI, TII, PhysReg, Cost))
+ return;
+
if (Register::isVirtualRegister(Reg))
return;
@@ -188,7 +192,7 @@ static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
"expected an unused glue value");
CloneNodeWithValues(N, DAG,
- makeArrayRef(N->value_begin(), N->getNumValues() - 1));
+ ArrayRef(N->value_begin(), N->getNumValues() - 1));
}
/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
@@ -460,7 +464,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
// Find all predecessors and successors of the group.
for (SDNode *N = SU.getNode(); N; N = N->getGluedNode()) {
if (N->isMachineOpcode() &&
- TII->get(N->getMachineOpcode()).getImplicitDefs()) {
+ !TII->get(N->getMachineOpcode()).implicit_defs().empty()) {
SU.hasPhysRegClobbers = true;
unsigned NumUsed = InstrEmitter::CountResults(N);
while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
@@ -485,7 +489,8 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
unsigned PhysReg = 0;
int Cost = 1;
// Determine if this is a physical register dependency.
- CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+ CheckForPhysRegDependency(OpN, N, i, TRI, TII, TLI, PhysReg, Cost);
assert((PhysReg == 0 || !isChain) &&
"Chain dependence via physreg data?");
// FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
@@ -843,8 +848,7 @@ EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap,
/// not necessarily refer to returned BB. The emitter may split blocks.
MachineBasicBlock *ScheduleDAGSDNodes::
EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
- InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos,
- DAG->getUseInstrRefDebugInfo());
+ InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos);
DenseMap<SDValue, Register> VRBaseMap;
DenseMap<SUnit*, Register> CopyVRBaseMap;
SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
@@ -890,6 +894,9 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
MI->setFlag(MachineInstr::MIFlag::NoMerge);
}
+ if (MDNode *MD = DAG->getPCSections(Node))
+ MI->setPCSections(MF, MD);
+
return MI;
};
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 195c0e6a836f..9a3609bc183b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -18,7 +18,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -27,6 +26,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -45,6 +45,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -92,6 +93,7 @@ void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
void SelectionDAG::DAGUpdateListener::NodeInserted(SDNode *) {}
void SelectionDAG::DAGNodeDeletedListener::anchor() {}
+void SelectionDAG::DAGNodeInsertedListener::anchor() {}
#define DEBUG_TYPE "selectiondag"
@@ -291,6 +293,43 @@ bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) {
return true;
}
+bool ISD::isVectorShrinkable(const SDNode *N, unsigned NewEltSize,
+ bool Signed) {
+ assert(N->getValueType(0).isVector() && "Expected a vector!");
+
+ unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
+ if (EltSize <= NewEltSize)
+ return false;
+
+ if (N->getOpcode() == ISD::ZERO_EXTEND) {
+ return (N->getOperand(0).getValueType().getScalarSizeInBits() <=
+ NewEltSize) &&
+ !Signed;
+ }
+ if (N->getOpcode() == ISD::SIGN_EXTEND) {
+ return (N->getOperand(0).getValueType().getScalarSizeInBits() <=
+ NewEltSize) &&
+ Signed;
+ }
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ for (const SDValue &Op : N->op_values()) {
+ if (Op.isUndef())
+ continue;
+ if (!isa<ConstantSDNode>(Op))
+ return false;
+
+ APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().trunc(EltSize);
+ if (Signed && C.trunc(NewEltSize).sext(EltSize) != C)
+ return false;
+ if (!Signed && C.trunc(NewEltSize).zext(EltSize) != C)
+ return false;
+ }
+
+ return true;
+}
+
bool ISD::allOperandsUndef(const SDNode *N) {
// Return false if the node has no operands.
// This is "logically inconsistent" with the definition of "all" but
@@ -300,6 +339,10 @@ bool ISD::allOperandsUndef(const SDNode *N) {
return all_of(N->op_values(), [](SDValue Op) { return Op.isUndef(); });
}
+bool ISD::isFreezeUndef(const SDNode *N) {
+ return N->getOpcode() == ISD::FREEZE && N->getOperand(0).isUndef();
+}
+
bool ISD::matchUnaryPredicate(SDValue Op,
std::function<bool(ConstantSDNode *)> Match,
bool AllowUndefs) {
@@ -450,10 +493,10 @@ bool ISD::isVPReduction(unsigned Opcode) {
}
/// The operand position of the vector mask.
-Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) {
+std::optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) {
switch (Opcode) {
default:
- return None;
+ return std::nullopt;
#define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, ...) \
case ISD::VPSD: \
return MASKPOS;
@@ -462,10 +505,10 @@ Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) {
}
/// The operand position of the explicit vector length parameter.
-Optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) {
+std::optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) {
switch (Opcode) {
default:
- return None;
+ return std::nullopt;
#define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \
case ISD::VPSD: \
return EVLPOS;
@@ -618,7 +661,7 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID,
}
}
-static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
+static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned OpC,
SDVTList VTList, ArrayRef<SDValue> OpList) {
AddNodeIDOpcode(ID, OpC);
AddNodeIDValueTypes(ID, VTList);
@@ -1018,6 +1061,9 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
// If any of the SDDbgValue nodes refer to this SDNode, invalidate
// them and forget about that node.
DbgInfo->erase(N);
+
+ // Invalidate extra info.
+ SDEI.erase(N);
}
#ifndef NDEBUG
@@ -1230,18 +1276,18 @@ Align SelectionDAG::getEVTAlign(EVT VT) const {
// EntryNode could meaningfully have debug info if we can find it...
SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
: TM(tm), OptLevel(OL),
- EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
+ EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other, MVT::Glue)),
Root(getEntryNode()) {
InsertNode(&EntryNode);
DbgInfo = new SDDbgInfo();
}
void SelectionDAG::init(MachineFunction &NewMF,
- OptimizationRemarkEmitter &NewORE,
- Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
- LegacyDivergenceAnalysis * Divergence,
- ProfileSummaryInfo *PSIin,
- BlockFrequencyInfo *BFIin) {
+ OptimizationRemarkEmitter &NewORE, Pass *PassPtr,
+ const TargetLibraryInfo *LibraryInfo,
+ LegacyDivergenceAnalysis *Divergence,
+ ProfileSummaryInfo *PSIin, BlockFrequencyInfo *BFIin,
+ FunctionVarLocs const *VarLocs) {
MF = &NewMF;
SDAGISelPass = PassPtr;
ORE = &NewORE;
@@ -1252,6 +1298,7 @@ void SelectionDAG::init(MachineFunction &NewMF,
DA = Divergence;
PSI = PSIin;
BFI = BFIin;
+ FnVarLocs = VarLocs;
}
SelectionDAG::~SelectionDAG() {
@@ -1326,7 +1373,7 @@ void SelectionDAG::clear() {
ExternalSymbols.clear();
TargetExternalSymbols.clear();
MCSymbols.clear();
- SDCallSiteDbgInfo.clear();
+ SDEI.clear();
std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
static_cast<CondCodeSDNode*>(nullptr));
std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
@@ -1341,7 +1388,8 @@ void SelectionDAG::clear() {
SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) {
return VT.bitsGT(Op.getValueType())
? getNode(ISD::FP_EXTEND, DL, VT, Op)
- : getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL));
+ : getNode(ISD::FP_ROUND, DL, VT, Op,
+ getIntPtrConstant(0, DL, /*isTarget=*/true));
}
std::pair<SDValue, SDValue>
@@ -1415,6 +1463,10 @@ SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
return getZeroExtendInReg(Op, DL, VT);
}
+SDValue SelectionDAG::getNegative(SDValue Val, const SDLoc &DL, EVT VT) {
+ return getNode(ISD::SUB, DL, VT, getConstant(0, DL, VT), Val);
+}
+
/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
return getNode(ISD::XOR, DL, VT, Val, getAllOnesConstant(DL, VT));
@@ -1431,6 +1483,20 @@ SDValue SelectionDAG::getVPLogicalNOT(const SDLoc &DL, SDValue Val,
return getNode(ISD::VP_XOR, DL, VT, Val, TrueValue, Mask, EVL);
}
+SDValue SelectionDAG::getVPPtrExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op,
+ SDValue Mask, SDValue EVL) {
+ return getVPZExtOrTrunc(DL, VT, Op, Mask, EVL);
+}
+
+SDValue SelectionDAG::getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op,
+ SDValue Mask, SDValue EVL) {
+ if (VT.bitsGT(Op.getValueType()))
+ return getNode(ISD::VP_ZERO_EXTEND, DL, VT, Op, Mask, EVL);
+ if (VT.bitsLT(Op.getValueType()))
+ return getNode(ISD::VP_TRUNCATE, DL, VT, Op, Mask, EVL);
+ return Op;
+}
+
SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT,
EVT OpVT) {
if (!V)
@@ -1544,7 +1610,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
"APInt size does not match type size!");
unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(EltVT), None);
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), std::nullopt);
ID.AddPointer(Elt);
ID.AddBoolean(isO);
void *IP = nullptr;
@@ -1561,11 +1627,8 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
}
SDValue Result(N, 0);
- if (VT.isScalableVector())
- Result = getSplatVector(VT, DL, Result);
- else if (VT.isVector())
- Result = getSplatBuildVector(VT, DL, Result);
-
+ if (VT.isVector())
+ Result = getSplat(VT, DL, Result);
return Result;
}
@@ -1602,7 +1665,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
// we don't have issues with SNANs.
unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(EltVT), None);
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), std::nullopt);
ID.AddPointer(&V);
void *IP = nullptr;
SDNode *N = nullptr;
@@ -1617,10 +1680,8 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
}
SDValue Result(N, 0);
- if (VT.isScalableVector())
- Result = getSplatVector(VT, DL, Result);
- else if (VT.isVector())
- Result = getSplatBuildVector(VT, DL, Result);
+ if (VT.isVector())
+ Result = getSplat(VT, DL, Result);
NewSDValueDbgMsg(Result, "Creating fp constant: ", this);
return Result;
}
@@ -1661,7 +1722,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
ID.AddPointer(GV);
ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
@@ -1679,7 +1740,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
ID.AddInteger(FI);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
@@ -1697,7 +1758,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
"Cannot set target flags on target-independent jump tables");
unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
ID.AddInteger(JTI);
ID.AddInteger(TargetFlags);
void *IP = nullptr;
@@ -1721,7 +1782,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
: getDataLayout().getPrefTypeAlign(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
ID.AddInteger(Alignment->value());
ID.AddInteger(Offset);
ID.AddPointer(C);
@@ -1748,7 +1809,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
Alignment = getDataLayout().getPrefTypeAlign(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
ID.AddInteger(Alignment->value());
ID.AddInteger(Offset);
C->addSelectionDAGCSEId(ID);
@@ -1767,7 +1828,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
unsigned TargetFlags) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None);
+ AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), std::nullopt);
ID.AddInteger(Index);
ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
@@ -1783,7 +1844,7 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None);
+ AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), std::nullopt);
ID.AddPointer(MBB);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
@@ -1894,7 +1955,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
"Index out of range");
// Copy the mask so we can do any needed cleanup.
- SmallVector<int, 8> MaskVec(Mask.begin(), Mask.end());
+ SmallVector<int, 8> MaskVec(Mask);
// Canonicalize shuffle v, v -> v, undef
if (N1 == N2) {
@@ -2050,7 +2111,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
EVT VT = SV.getValueType(0);
- SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end());
+ SmallVector<int, 8> MaskVec(SV.getMask());
ShuffleVectorSDNode::commuteMask(MaskVec);
SDValue Op0 = SV.getOperand(0);
@@ -2060,7 +2121,7 @@ SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::Register, getVTList(VT), None);
+ AddNodeIDNode(ID, ISD::Register, getVTList(VT), std::nullopt);
ID.AddInteger(RegNo);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
@@ -2075,7 +2136,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None);
+ AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), std::nullopt);
ID.AddPointer(RegMask);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
@@ -2117,7 +2178,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt);
ID.AddPointer(BA);
ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
@@ -2133,7 +2194,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
SDValue SelectionDAG::getSrcValue(const Value *V) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None);
+ AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), std::nullopt);
ID.AddPointer(V);
void *IP = nullptr;
@@ -2148,7 +2209,7 @@ SDValue SelectionDAG::getSrcValue(const Value *V) {
SDValue SelectionDAG::getMDNode(const MDNode *MD) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), None);
+ AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), std::nullopt);
ID.AddPointer(MD);
void *IP = nullptr;
@@ -2287,7 +2348,7 @@ SDValue SelectionDAG::CreateStackTemporary(TypeSize Bytes, Align Alignment) {
StackID = TFI->getStackIDForScalableVectors();
// The stack id gives an indication of whether the object is scalable or
// not, so it's safe to pass in the minimum size here.
- int FrameIdx = MFI.CreateStackObject(Bytes.getKnownMinSize(), Alignment,
+ int FrameIdx = MFI.CreateStackObject(Bytes.getKnownMinValue(), Alignment,
false, nullptr, StackID);
return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
}
@@ -2305,8 +2366,9 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
assert(VT1Size.isScalable() == VT2Size.isScalable() &&
"Don't know how to choose the maximum size when creating a stack "
"temporary");
- TypeSize Bytes =
- VT1Size.getKnownMinSize() > VT2Size.getKnownMinSize() ? VT1Size : VT2Size;
+ TypeSize Bytes = VT1Size.getKnownMinValue() > VT2Size.getKnownMinValue()
+ ? VT1Size
+ : VT2Size;
Type *Ty1 = VT1.getTypeForEVT(*getContext());
Type *Ty2 = VT2.getTypeForEVT(*getContext());
@@ -2380,34 +2442,34 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
default: break;
case ISD::SETEQ: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT,
OpVT);
case ISD::SETNE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
R==APFloat::cmpLessThan, dl, VT,
OpVT);
case ISD::SETLT: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT,
OpVT);
case ISD::SETGT: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl,
VT, OpVT);
case ISD::SETLE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan ||
R==APFloat::cmpEqual, dl, VT,
OpVT);
case ISD::SETGE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
R==APFloat::cmpEqual, dl, VT, OpVT);
case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT,
@@ -2459,48 +2521,6 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
return SDValue();
}
-/// See if the specified operand can be simplified with the knowledge that only
-/// the bits specified by DemandedBits are used.
-/// TODO: really we should be making this into the DAG equivalent of
-/// SimplifyMultipleUseDemandedBits and not generate any new nodes.
-SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) {
- EVT VT = V.getValueType();
-
- if (VT.isScalableVector())
- return SDValue();
-
- switch (V.getOpcode()) {
- default:
- return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, *this);
- case ISD::Constant: {
- const APInt &CVal = cast<ConstantSDNode>(V)->getAPIntValue();
- APInt NewVal = CVal & DemandedBits;
- if (NewVal != CVal)
- return getConstant(NewVal, SDLoc(V), V.getValueType());
- break;
- }
- case ISD::SRL:
- // Only look at single-use SRLs.
- if (!V.getNode()->hasOneUse())
- break;
- if (auto *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
- // See if we can recursively simplify the LHS.
- unsigned Amt = RHSC->getZExtValue();
-
- // Watch out for shift count overflow though.
- if (Amt >= DemandedBits.getBitWidth())
- break;
- APInt SrcDemandedBits = DemandedBits << Amt;
- if (SDValue SimplifyLHS = TLI->SimplifyMultipleUseDemandedBits(
- V.getOperand(0), SrcDemandedBits, *this))
- return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS,
- V.getOperand(1));
- }
- break;
- }
- return SDValue();
-}
-
/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We
/// use this predicate to simplify operations downstream.
bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
@@ -2538,17 +2558,40 @@ bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask,
return Mask.isSubsetOf(computeKnownBits(V, Depth).One);
}
+APInt SelectionDAG::computeVectorKnownZeroElements(SDValue Op,
+ const APInt &DemandedElts,
+ unsigned Depth) const {
+ EVT VT = Op.getValueType();
+ assert(VT.isVector() && !VT.isScalableVector() && "Only for fixed vectors!");
+
+ unsigned NumElts = VT.getVectorNumElements();
+ assert(DemandedElts.getBitWidth() == NumElts && "Unexpected demanded mask.");
+
+ APInt KnownZeroElements = APInt::getNullValue(NumElts);
+ for (unsigned EltIdx = 0; EltIdx != NumElts; ++EltIdx) {
+ if (!DemandedElts[EltIdx])
+ continue; // Don't query elements that are not demanded.
+ APInt Mask = APInt::getOneBitSet(NumElts, EltIdx);
+ if (MaskedVectorIsZero(Op, Mask, Depth))
+ KnownZeroElements.setBit(EltIdx);
+ }
+ return KnownZeroElements;
+}
+
/// isSplatValue - Return true if the vector V has the same value
-/// across all DemandedElts. For scalable vectors it does not make
-/// sense to specify which elements are demanded or undefined, therefore
-/// they are simply ignored.
+/// across all DemandedElts. For scalable vectors, we don't know the
+/// number of lanes at compile time. Instead, we use a 1 bit APInt
+/// to represent a conservative value for all lanes; that is, that
+/// one bit value is implicitly splatted across all lanes.
bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
APInt &UndefElts, unsigned Depth) const {
unsigned Opcode = V.getOpcode();
EVT VT = V.getValueType();
assert(VT.isVector() && "Vector type expected");
+ assert((!VT.isScalableVector() || DemandedElts.getBitWidth() == 1) &&
+ "scalable demanded bits are ignored");
- if (!VT.isScalableVector() && !DemandedElts)
+ if (!DemandedElts)
return false; // No demanded elts, better to assume we don't know anything.
if (Depth >= MaxRecursionDepth)
@@ -2585,7 +2628,8 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
default:
if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||
Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID)
- return TLI->isSplatValueForTargetNode(V, DemandedElts, UndefElts, Depth);
+ return TLI->isSplatValueForTargetNode(V, DemandedElts, UndefElts, *this,
+ Depth);
break;
}
@@ -2730,11 +2774,11 @@ bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) const {
assert(VT.isVector() && "Vector type expected");
APInt UndefElts;
- APInt DemandedElts;
-
- // For now we don't support this with scalable vectors.
- if (!VT.isScalableVector())
- DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts
+ = APInt::getAllOnes(VT.isScalableVector() ? 1 : VT.getVectorNumElements());
return isSplatValue(V, DemandedElts, UndefElts) &&
(AllowUndefs || !UndefElts);
}
@@ -2747,10 +2791,11 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {
switch (Opcode) {
default: {
APInt UndefElts;
- APInt DemandedElts;
-
- if (!VT.isScalableVector())
- DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts
+ = APInt::getAllOnes(VT.isScalableVector() ? 1 : VT.getVectorNumElements());
if (isSplatValue(V, DemandedElts, UndefElts)) {
if (VT.isScalableVector()) {
@@ -2773,9 +2818,7 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {
SplatIdx = 0;
return V;
case ISD::VECTOR_SHUFFLE: {
- if (VT.isScalableVector())
- return SDValue();
-
+ assert(!VT.isScalableVector());
// Check if this is a shuffle node doing a splat.
// TODO - remove this and rely purely on SelectionDAG::isSplatValue,
// getTargetVShiftNode currently struggles without the splat source.
@@ -2890,14 +2933,10 @@ const APInt *SelectionDAG::getValidMaximumShiftAmountConstant(
KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
- // TOOD: Until we have a plan for how to represent demanded elements for
- // scalable vectors, we can just bail out for now.
- if (Op.getValueType().isScalableVector()) {
- unsigned BitWidth = Op.getScalarValueSizeInBits();
- return KnownBits(BitWidth);
- }
-
- APInt DemandedElts = VT.isVector()
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts = VT.isFixedLengthVector()
? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return computeKnownBits(Op, DemandedElts, Depth);
@@ -2912,11 +2951,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
KnownBits Known(BitWidth); // Don't know anything.
- // TOOD: Until we have a plan for how to represent demanded elements for
- // scalable vectors, we can just bail out for now.
- if (Op.getValueType().isScalableVector())
- return Known;
-
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
// We know all of the bits for a constant!
return KnownBits::makeConstant(C->getAPIntValue());
@@ -2931,7 +2965,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
KnownBits Known2;
unsigned NumElts = DemandedElts.getBitWidth();
- assert((!Op.getValueType().isVector() ||
+ assert((!Op.getValueType().isFixedLengthVector() ||
NumElts == Op.getValueType().getVectorNumElements()) &&
"Unexpected vector size");
@@ -2943,7 +2977,17 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
case ISD::MERGE_VALUES:
return computeKnownBits(Op.getOperand(Op.getResNo()), DemandedElts,
Depth + 1);
+ case ISD::SPLAT_VECTOR: {
+ SDValue SrcOp = Op.getOperand(0);
+ assert(SrcOp.getValueSizeInBits() >= BitWidth &&
+ "Expected SPLAT_VECTOR implicit truncation");
+ // Implicitly truncate the bits to match the official semantics of
+ // SPLAT_VECTOR.
+ Known = computeKnownBits(SrcOp, Depth + 1).trunc(BitWidth);
+ break;
+ }
case ISD::BUILD_VECTOR:
+ assert(!Op.getValueType().isScalableVector());
// Collect the known bits that are shared by every demanded vector element.
Known.Zero.setAllBits(); Known.One.setAllBits();
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
@@ -2969,32 +3013,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
break;
case ISD::VECTOR_SHUFFLE: {
+ assert(!Op.getValueType().isScalableVector());
// Collect the known bits that are shared by every vector element referenced
// by the shuffle.
- APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
- Known.Zero.setAllBits(); Known.One.setAllBits();
+ APInt DemandedLHS, DemandedRHS;
const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
assert(NumElts == SVN->getMask().size() && "Unexpected vector size");
- for (unsigned i = 0; i != NumElts; ++i) {
- if (!DemandedElts[i])
- continue;
-
- int M = SVN->getMaskElt(i);
- if (M < 0) {
- // For UNDEF elements, we don't know anything about the common state of
- // the shuffle result.
- Known.resetAll();
- DemandedLHS.clearAllBits();
- DemandedRHS.clearAllBits();
- break;
- }
+ if (!getShuffleDemandedElts(NumElts, SVN->getMask(), DemandedElts,
+ DemandedLHS, DemandedRHS))
+ break;
- if ((unsigned)M < NumElts)
- DemandedLHS.setBit((unsigned)M % NumElts);
- else
- DemandedRHS.setBit((unsigned)M % NumElts);
- }
// Known bits are the values that are shared by every demanded element.
+ Known.Zero.setAllBits(); Known.One.setAllBits();
if (!!DemandedLHS) {
SDValue LHS = Op.getOperand(0);
Known2 = computeKnownBits(LHS, DemandedLHS, Depth + 1);
@@ -3011,6 +3041,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::CONCAT_VECTORS: {
+ if (Op.getValueType().isScalableVector())
+ break;
// Split DemandedElts and test each of the demanded subvectors.
Known.Zero.setAllBits(); Known.One.setAllBits();
EVT SubVectorVT = Op.getOperand(0).getValueType();
@@ -3031,6 +3063,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::INSERT_SUBVECTOR: {
+ if (Op.getValueType().isScalableVector())
+ break;
// Demand any elements from the subvector and the remainder from the src its
// inserted into.
SDValue Src = Op.getOperand(0);
@@ -3058,7 +3092,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
// Offset the demanded elts by the subvector index.
SDValue Src = Op.getOperand(0);
// Bail until we can represent demanded elements for scalable vectors.
- if (Src.getValueType().isScalableVector())
+ if (Op.getValueType().isScalableVector() || Src.getValueType().isScalableVector())
break;
uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
@@ -3067,6 +3101,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::SCALAR_TO_VECTOR: {
+ if (Op.getValueType().isScalableVector())
+ break;
// We know about scalar_to_vector as much as we know about it source,
// which becomes the first element of otherwise unknown vector.
if (DemandedElts != 1)
@@ -3080,6 +3116,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::BITCAST: {
+ if (Op.getValueType().isScalableVector())
+ break;
+
SDValue N0 = Op.getOperand(0);
EVT SubVT = N0.getValueType();
unsigned SubBitWidth = SubVT.getScalarSizeInBits();
@@ -3335,13 +3374,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result");
// Collect lo/hi source values and concatenate.
- // TODO: Would a KnownBits::concatBits helper be useful?
unsigned LoBits = Op.getOperand(0).getScalarValueSizeInBits();
unsigned HiBits = Op.getOperand(1).getScalarValueSizeInBits();
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- Known = Known.anyext(LoBits + HiBits);
- Known.insertBits(Known2, LoBits);
+ Known = Known2.concat(Known);
// Collect shift amount.
Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
@@ -3372,7 +3409,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If we have a known 1, its position is our upper bound.
unsigned PossibleTZ = Known2.countMaxTrailingZeros();
- unsigned LowBits = Log2_32(PossibleTZ) + 1;
+ unsigned LowBits = llvm::bit_width(PossibleTZ);
Known.Zero.setBitsFrom(LowBits);
break;
}
@@ -3381,7 +3418,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If we have a known 1, its position is our upper bound.
unsigned PossibleLZ = Known2.countMaxLeadingZeros();
- unsigned LowBits = Log2_32(PossibleLZ) + 1;
+ unsigned LowBits = llvm::bit_width(PossibleLZ);
Known.Zero.setBitsFrom(LowBits);
break;
}
@@ -3389,7 +3426,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If we know some of the bits are zero, they can't be one.
unsigned PossibleOnes = Known2.countMaxPopulation();
- Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1);
+ Known.Zero.setBitsFrom(llvm::bit_width(PossibleOnes));
break;
}
case ISD::PARITY: {
@@ -3403,7 +3440,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (ISD::isNON_EXTLoad(LD) && Cst) {
// Determine any common known bits from the loaded constant pool value.
Type *CstTy = Cst->getType();
- if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits()) {
+ if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits() &&
+ !Op.getValueType().isScalableVector()) {
// If its a vector splat, then we can (quickly) reuse the scalar path.
// NOTE: We assume all elements match and none are UNDEF.
if (CstTy->isVectorTy()) {
@@ -3453,12 +3491,32 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
unsigned MemBits = VT.getScalarSizeInBits();
Known.Zero.setBitsFrom(MemBits);
} else if (const MDNode *Ranges = LD->getRanges()) {
- if (LD->getExtensionType() == ISD::NON_EXTLOAD)
- computeKnownBitsFromRangeMetadata(*Ranges, Known);
+ EVT VT = LD->getValueType(0);
+
+ // TODO: Handle for extending loads
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+ if (VT.isVector()) {
+ // Handle truncation to the first demanded element.
+ // TODO: Figure out which demanded elements are covered
+ if (DemandedElts != 1 || !getDataLayout().isLittleEndian())
+ break;
+
+ // Handle the case where a load has a vector type, but scalar memory
+ // with an attached range.
+ EVT MemVT = LD->getMemoryVT();
+ KnownBits KnownFull(MemVT.getSizeInBits());
+
+ computeKnownBitsFromRangeMetadata(*Ranges, KnownFull);
+ Known = KnownFull.trunc(BitWidth);
+ } else
+ computeKnownBitsFromRangeMetadata(*Ranges, Known);
+ }
}
break;
}
case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ if (Op.getValueType().isScalableVector())
+ break;
EVT InVT = Op.getOperand(0).getValueType();
APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
@@ -3471,6 +3529,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ if (Op.getValueType().isScalableVector())
+ break;
EVT InVT = Op.getOperand(0).getValueType();
APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
@@ -3487,6 +3547,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::ANY_EXTEND_VECTOR_INREG: {
+ if (Op.getValueType().isScalableVector())
+ break;
EVT InVT = Op.getOperand(0).getValueType();
APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
@@ -3506,7 +3568,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
case ISD::AssertZext: {
EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
- Known = computeKnownBits(Op.getOperand(0), Depth+1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known.Zero |= (~InMask);
Known.One &= (~Known.Zero);
break;
@@ -3538,7 +3600,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setBitsFrom(1);
break;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SUB:
case ISD::SUBC: {
assert(Op.getResNo() == 0 &&
@@ -3566,7 +3628,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setBitsFrom(1);
break;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::ADD:
case ISD::ADDC:
case ISD::ADDE: {
@@ -3652,6 +3714,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::INSERT_VECTOR_ELT: {
+ if (Op.getValueType().isScalableVector())
+ break;
+
// If we know the element index, split the demand between the
// source vector and the inserted element, otherwise assume we need
// the original demanded vector elements and the value.
@@ -3781,7 +3846,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setBitsFrom(1);
break;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::ATOMIC_CMP_SWAP:
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
@@ -3814,10 +3879,15 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
default:
if (Opcode < ISD::BUILTIN_OP_END)
break;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_VOID:
+ // TODO: Probably okay to remove after audit; here to reduce change size
+ // in initial enablement patch for scalable vectors
+ if (Op.getValueType().isScalableVector())
+ break;
+
// Allow the target to implement this method for its nodes.
TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, Depth);
break;
@@ -3914,11 +3984,10 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
- // TODO: Assume we don't know anything for now.
- if (VT.isScalableVector())
- return 1;
-
- APInt DemandedElts = VT.isVector()
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts = VT.isFixedLengthVector()
? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return ComputeNumSignBits(Op, DemandedElts, Depth);
@@ -3941,7 +4010,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
if (Depth >= MaxRecursionDepth)
return 1; // Limit search depth.
- if (!DemandedElts || VT.isScalableVector())
+ if (!DemandedElts)
return 1; // No demanded elts, better to assume we don't know anything.
unsigned Opcode = Op.getOpcode();
@@ -3956,7 +4025,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::MERGE_VALUES:
return ComputeNumSignBits(Op.getOperand(Op.getResNo()), DemandedElts,
Depth + 1);
+ case ISD::SPLAT_VECTOR: {
+ // Check if the sign bits of source go down as far as the truncated value.
+ unsigned NumSrcBits = Op.getOperand(0).getValueSizeInBits();
+ unsigned NumSrcSignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (NumSrcSignBits > (NumSrcBits - VTBits))
+ return NumSrcSignBits - (NumSrcBits - VTBits);
+ break;
+ }
case ISD::BUILD_VECTOR:
+ assert(!VT.isScalableVector());
Tmp = VTBits;
for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) {
if (!DemandedElts[i])
@@ -3979,22 +4057,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::VECTOR_SHUFFLE: {
// Collect the minimum number of sign bits that are shared by every vector
// element referenced by the shuffle.
- APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
+ APInt DemandedLHS, DemandedRHS;
const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
assert(NumElts == SVN->getMask().size() && "Unexpected vector size");
- for (unsigned i = 0; i != NumElts; ++i) {
- int M = SVN->getMaskElt(i);
- if (!DemandedElts[i])
- continue;
- // For UNDEF elements, we don't know anything about the common state of
- // the shuffle result.
- if (M < 0)
- return 1;
- if ((unsigned)M < NumElts)
- DemandedLHS.setBit((unsigned)M % NumElts);
- else
- DemandedRHS.setBit((unsigned)M % NumElts);
- }
+ if (!getShuffleDemandedElts(NumElts, SVN->getMask(), DemandedElts,
+ DemandedLHS, DemandedRHS))
+ return 1;
+
Tmp = std::numeric_limits<unsigned>::max();
if (!!DemandedLHS)
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1);
@@ -4010,6 +4079,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
}
case ISD::BITCAST: {
+ if (VT.isScalableVector())
+ break;
SDValue N0 = Op.getOperand(0);
EVT SrcVT = N0.getValueType();
unsigned SrcBits = SrcVT.getScalarSizeInBits();
@@ -4067,6 +4138,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
Tmp2 = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
return std::max(Tmp, Tmp2);
case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ if (VT.isScalableVector())
+ break;
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
APInt DemandedSrcElts = DemandedElts.zext(SrcVT.getVectorNumElements());
@@ -4284,6 +4357,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::EXTRACT_ELEMENT: {
+ if (VT.isScalableVector())
+ break;
const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1);
const int BitWidth = Op.getValueSizeInBits();
const int Items = Op.getOperand(0).getValueSizeInBits() / BitWidth;
@@ -4294,9 +4369,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// If the sign portion ends in our element the subtraction gives correct
// result. Otherwise it gives either negative or > bitwidth result
- return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
+ return std::clamp(KnownSign - rIndex * BitWidth, 0, BitWidth);
}
case ISD::INSERT_VECTOR_ELT: {
+ if (VT.isScalableVector())
+ break;
// If we know the element index, split the demand between the
// source vector and the inserted element, otherwise assume we need
// the original demanded vector elements and the value.
@@ -4327,6 +4404,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return Tmp;
}
case ISD::EXTRACT_VECTOR_ELT: {
+ assert(!VT.isScalableVector());
SDValue InVec = Op.getOperand(0);
SDValue EltNo = Op.getOperand(1);
EVT VecVT = InVec.getValueType();
@@ -4365,6 +4443,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1);
}
case ISD::CONCAT_VECTORS: {
+ if (VT.isScalableVector())
+ break;
// Determine the minimum number of sign bits across all demanded
// elts of the input vectors. Early out if the result is already 1.
Tmp = std::numeric_limits<unsigned>::max();
@@ -4383,6 +4463,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return Tmp;
}
case ISD::INSERT_SUBVECTOR: {
+ if (VT.isScalableVector())
+ break;
// Demand any elements from the subvector and the remainder from the src its
// inserted into.
SDValue Src = Op.getOperand(0);
@@ -4406,6 +4488,34 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
return Tmp;
}
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ if (const MDNode *Ranges = LD->getRanges()) {
+ if (DemandedElts != 1)
+ break;
+
+ ConstantRange CR = getConstantRangeFromMetadata(*Ranges);
+ if (VTBits > CR.getBitWidth()) {
+ switch (LD->getExtensionType()) {
+ case ISD::SEXTLOAD:
+ CR = CR.signExtend(VTBits);
+ break;
+ case ISD::ZEXTLOAD:
+ CR = CR.zeroExtend(VTBits);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (VTBits != CR.getBitWidth())
+ break;
+ return std::min(CR.getSignedMin().getNumSignBits(),
+ CR.getSignedMax().getNumSignBits());
+ }
+
+ break;
+ }
case ISD::ATOMIC_CMP_SWAP:
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
case ISD::ATOMIC_SWAP:
@@ -4453,7 +4563,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// We only need to handle vectors - computeKnownBits should handle
// scalar cases.
Type *CstTy = Cst->getType();
- if (CstTy->isVectorTy() &&
+ if (CstTy->isVectorTy() && !VT.isScalableVector() &&
(NumElts * VTBits) == CstTy->getPrimitiveSizeInBits() &&
VTBits == CstTy->getScalarSizeInBits()) {
Tmp = VTBits;
@@ -4488,10 +4598,14 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
Opcode == ISD::INTRINSIC_WO_CHAIN ||
Opcode == ISD::INTRINSIC_W_CHAIN ||
Opcode == ISD::INTRINSIC_VOID) {
- unsigned NumBits =
+ // TODO: This can probably be removed once target code is audited. This
+ // is here purely to reduce patch size and review complexity.
+ if (!VT.isScalableVector()) {
+ unsigned NumBits =
TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth);
- if (NumBits > 1)
- FirstAnswer = std::max(FirstAnswer, NumBits);
+ if (NumBits > 1)
+ FirstAnswer = std::max(FirstAnswer, NumBits);
+ }
}
// Finally, if we can prove that the top bits of the result are 0's or 1's,
@@ -4547,6 +4661,11 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
return true;
switch (Opcode) {
+ case ISD::VALUETYPE:
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ return true;
+
case ISD::UNDEF:
return PoisonOnly;
@@ -4562,9 +4681,9 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
}
return true;
- // TODO: Search for noundef attributes from library functions.
+ // TODO: Search for noundef attributes from library functions.
- // TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef.
+ // TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef.
default:
// Allow the target to implement this method for its nodes.
@@ -4575,7 +4694,94 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
break;
}
- return false;
+ // If Op can't create undef/poison and none of its operands are undef/poison
+ // then Op is never undef/poison.
+ // NOTE: TargetNodes should handle this in themselves in
+ // isGuaranteedNotToBeUndefOrPoisonForTargetNode.
+ return !canCreateUndefOrPoison(Op, PoisonOnly, /*ConsiderFlags*/ true,
+ Depth) &&
+ all_of(Op->ops(), [&](SDValue V) {
+ return isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly, Depth + 1);
+ });
+}
+
+bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, bool PoisonOnly,
+ bool ConsiderFlags,
+ unsigned Depth) const {
+ // TODO: Assume we don't know anything for now.
+ EVT VT = Op.getValueType();
+ if (VT.isScalableVector())
+ return true;
+
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnes(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly, ConsiderFlags,
+ Depth);
+}
+
+bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
+ bool PoisonOnly, bool ConsiderFlags,
+ unsigned Depth) const {
+ // TODO: Assume we don't know anything for now.
+ EVT VT = Op.getValueType();
+ if (VT.isScalableVector())
+ return true;
+
+ unsigned Opcode = Op.getOpcode();
+ switch (Opcode) {
+ case ISD::AssertSext:
+ case ISD::AssertZext:
+ case ISD::FREEZE:
+ case ISD::INSERT_SUBVECTOR:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::FSHL:
+ case ISD::FSHR:
+ case ISD::BSWAP:
+ case ISD::CTPOP:
+ case ISD::BITREVERSE:
+ case ISD::PARITY:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ case ISD::BITCAST:
+ case ISD::BUILD_VECTOR:
+ return false;
+
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ // Matches hasPoisonGeneratingFlags().
+ return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() ||
+ Op->getFlags().hasNoUnsignedWrap());
+
+ case ISD::SHL:
+ // If the max shift amount isn't in range, then the shift can create poison.
+ if (!getValidMaximumShiftAmountConstant(Op, DemandedElts))
+ return true;
+
+ // Matches hasPoisonGeneratingFlags().
+ return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() ||
+ Op->getFlags().hasNoUnsignedWrap());
+
+ default:
+ // Allow the target to implement this method for its nodes.
+ if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID)
+ return TLI->canCreateUndefOrPoisonForTargetNode(
+ Op, DemandedElts, *this, PoisonOnly, ConsiderFlags, Depth);
+ break;
+ }
+
+ // Be conservative and return true.
+ return true;
}
bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
@@ -4598,7 +4804,6 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
if (Depth >= MaxRecursionDepth)
return false; // Limit search depth.
- // TODO: Handle vectors.
// If the value is a constant, we can obviously see if it is a NaN or not.
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) {
return !C->getValueAPF().isNaN() ||
@@ -4613,7 +4818,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FDIV:
case ISD::FREM:
case ISD::FSIN:
- case ISD::FCOS: {
+ case ISD::FCOS:
+ case ISD::FMA:
+ case ISD::FMAD: {
if (SNaN)
return true;
// TODO: Need isKnownNeverInfinity
@@ -4650,14 +4857,6 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
return true;
- case ISD::FMA:
- case ISD::FMAD: {
- if (SNaN)
- return true;
- return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
- isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
- isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
- }
case ISD::FSQRT: // Need is known positive
case ISD::FLOG:
case ISD::FLOG2:
@@ -4696,6 +4895,12 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::EXTRACT_VECTOR_ELT: {
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
}
+ case ISD::BUILD_VECTOR: {
+ for (const SDValue &Opnd : Op->ops())
+ if (!isKnownNeverNaN(Opnd, SNaN, Depth + 1))
+ return false;
+ return true;
+ }
default:
if (Opcode >= ISD::BUILTIN_OP_END ||
Opcode == ISD::INTRINSIC_WO_CHAIN ||
@@ -4938,7 +5143,7 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
/// Gets or creates the specified node.
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opcode, getVTList(VT), None);
+ AddNodeIDNode(ID, Opcode, getVTList(VT), std::nullopt);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
return SDValue(E, 0);
@@ -4980,7 +5185,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::TRUNCATE:
if (C->isOpaque())
break;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::ZERO_EXTEND:
return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
C->isTargetOpcode(), C->isOpaque());
@@ -5166,7 +5371,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
case ISD::FREEZE:
assert(VT == Operand.getValueType() && "Unexpected VT!");
- if (isGuaranteedNotToBeUndefOrPoison(Operand))
+ if (isGuaranteedNotToBeUndefOrPoison(Operand, /*PoisonOnly*/ false,
+ /*Depth*/ 1))
return Operand;
break;
case ISD::TokenFactor:
@@ -5428,8 +5634,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return V;
}
-static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
- const APInt &C2) {
+static std::optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
+ const APInt &C2) {
switch (Opcode) {
case ISD::ADD: return C1 + C2;
case ISD::SUB: return C1 - C2;
@@ -5505,7 +5711,23 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1);
}
}
- return llvm::None;
+ return std::nullopt;
+}
+
+// Handle constant folding with UNDEF.
+// TODO: Handle more cases.
+static std::optional<APInt> FoldValueWithUndef(unsigned Opcode, const APInt &C1,
+ bool IsUndef1, const APInt &C2,
+ bool IsUndef2) {
+ if (!(IsUndef1 || IsUndef2))
+ return FoldValue(Opcode, C1, C2);
+
+ // Fold and(x, undef) -> 0
+ // Fold mul(x, undef) -> 0
+ if (Opcode == ISD::AND || Opcode == ISD::MUL)
+ return APInt::getZero(C1.getBitWidth());
+
+ return std::nullopt;
}
SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
@@ -5581,7 +5803,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (C1->isOpaque() || C2->isOpaque())
return SDValue();
- Optional<APInt> FoldAttempt =
+ std::optional<APInt> FoldAttempt =
FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue());
if (!FoldAttempt)
return SDValue();
@@ -5608,7 +5830,6 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
ElementCount NumElts = VT.getVectorElementCount();
// See if we can fold through bitcasted integer ops.
- // TODO: Can we handle undef elements?
if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() &&
Ops[0].getValueType() == VT && Ops[1].getValueType() == VT &&
Ops[0].getOpcode() == ISD::BITCAST &&
@@ -5624,11 +5845,11 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
SmallVector<APInt> RawBits1, RawBits2;
BitVector UndefElts1, UndefElts2;
if (BV1->getConstantRawBits(IsLE, EltBits, RawBits1, UndefElts1) &&
- BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2) &&
- UndefElts1.none() && UndefElts2.none()) {
+ BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2)) {
SmallVector<APInt> RawBits;
for (unsigned I = 0, E = NumElts.getFixedValue(); I != E; ++I) {
- Optional<APInt> Fold = FoldValue(Opcode, RawBits1[I], RawBits2[I]);
+ std::optional<APInt> Fold = FoldValueWithUndef(
+ Opcode, RawBits1[I], UndefElts1[I], RawBits2[I], UndefElts2[I]);
if (!Fold)
break;
RawBits.push_back(*Fold);
@@ -5823,7 +6044,7 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, /*AllowUndefs*/ true))
if (N1C && N1C->getValueAPF().isNegZero() && N2.isUndef())
return getUNDEF(VT);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::FADD:
case ISD::FMUL:
@@ -5882,11 +6103,11 @@ void SelectionDAG::canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1,
// Canonicalize:
// binop(const, nonconst) -> binop(nonconst, const)
- bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1);
- bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2);
- bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1);
- bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2);
- if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP))
+ SDNode *N1C = isConstantIntBuildVectorOrConstantInt(N1);
+ SDNode *N2C = isConstantIntBuildVectorOrConstantInt(N2);
+ SDNode *N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
+ SDNode *N2CFP = isConstantFPBuildVectorOrConstantFP(N2);
+ if ((N1C && !N2C) || (N1CFP && !N2CFP))
std::swap(N1, N2);
// Canonicalize:
@@ -5995,6 +6216,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getNode(ISD::AND, DL, VT, N1, getNOT(DL, N2, VT));
}
break;
+ case ISD::ABDS:
+ case ISD::ABDU:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ break;
case ISD::SMIN:
case ISD::UMAX:
assert(VT.isInteger() && "This operator does not apply to FP types!");
@@ -6034,12 +6261,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
const APInt &ShiftImm = N2C->getAPIntValue();
return getVScale(DL, VT, MulImm << ShiftImm);
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SRA:
case ISD::SRL:
if (SDValue V = simplifyShift(N1, N2))
return V;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::ROTL:
case ISD::ROTR:
assert(VT == N1.getValueType() &&
@@ -6329,7 +6556,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// Handle undef ^ undef -> 0 special case. This is a common
// idiom (misuse).
return getConstant(0, DL, VT);
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::ADD:
case ISD::SUB:
case ISD::UDIV:
@@ -6484,6 +6711,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"Dest and insert subvector source types must match!");
assert(VT.isVector() && N2VT.isVector() &&
"Insert subvector VTs must be vectors!");
+ assert(VT.getVectorElementType() == N2VT.getVectorElementType() &&
+ "Insert subvector VTs must have the same element type!");
assert((VT.isScalableVector() || N2VT.isFixedLengthVector()) &&
"Cannot insert a scalable vector into a fixed length vector!");
assert((VT.isScalableVector() != N2VT.isScalableVector() ||
@@ -6674,10 +6903,10 @@ SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, TypeSize Offset,
if (Offset.isScalable())
Index = getVScale(DL, Base.getValueType(),
- APInt(Base.getValueSizeInBits().getFixedSize(),
- Offset.getKnownMinSize()));
+ APInt(Base.getValueSizeInBits().getFixedValue(),
+ Offset.getKnownMinValue()));
else
- Index = getConstant(Offset.getFixedSize(), DL, VT);
+ Index = getConstant(Offset.getFixedValue(), DL, VT);
return getMemBasePlusOffset(Base, Index, DL, Flags);
}
@@ -6794,7 +7023,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
Align NewAlign = DL.getABITypeAlign(Ty);
// Don't promote to an alignment that would require dynamic stack
- // realignment.
+ // realignment which may conflict with optimizations such as tail call
+ // optimization.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->hasStackRealignment(MF))
while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
@@ -6986,6 +7216,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(C);
Align NewAlign = DL.getABITypeAlign(Ty);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment which may conflict with optimizations such as tail call
+ // optimization.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->hasStackRealignment(MF))
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign.previous();
+
if (NewAlign > Alignment) {
// Give the stack frame object a larger alignment if needed.
if (MFI.getObjectAlign(FI->getIndex()) < NewAlign)
@@ -7094,7 +7333,17 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
- Align NewAlign = DAG.getDataLayout().getABITypeAlign(Ty);
+ const DataLayout &DL = DAG.getDataLayout();
+ Align NewAlign = DL.getABITypeAlign(Ty);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment which may conflict with optimizations such as tail call
+ // optimization.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->hasStackRealignment(MF))
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign.previous();
+
if (NewAlign > Alignment) {
// Give the stack frame object a larger alignment if needed.
if (MFI.getObjectAlign(FI->getIndex()) < NewAlign)
@@ -7562,6 +7811,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
Opcode == ISD::ATOMIC_LOAD_FSUB ||
Opcode == ISD::ATOMIC_LOAD_FMAX ||
Opcode == ISD::ATOMIC_LOAD_FMIN ||
+ Opcode == ISD::ATOMIC_LOAD_UINC_WRAP ||
+ Opcode == ISD::ATOMIC_LOAD_UDEC_WRAP ||
Opcode == ISD::ATOMIC_SWAP ||
Opcode == ISD::ATOMIC_STORE) &&
"Invalid Atomic Op");
@@ -8816,12 +9067,12 @@ SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) {
if (auto *CondC = dyn_cast<ConstantSDNode>(Cond))
return CondC->isZero() ? F : T;
- // TODO: This should simplify VSELECT with constant condition using something
- // like this (but check boolean contents to be complete?):
- // if (ISD::isBuildVectorAllOnes(Cond.getNode()))
- // return T;
- // if (ISD::isBuildVectorAllZeros(Cond.getNode()))
- // return F;
+ // TODO: This should simplify VSELECT with non-zero constant condition using
+ // something like this (but check boolean contents to be complete?):
+ if (ConstantSDNode *CondC = isConstOrConstSplat(Cond, /*AllowUndefs*/ false,
+ /*AllowTruncation*/ true))
+ if (CondC->isZero())
+ return F;
// select ?, T, T --> T
if (T == F)
@@ -9177,7 +9428,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
SDVTList VTList) {
- return getNode(Opcode, DL, VTList, None);
+ return getNode(Opcode, DL, VTList, std::nullopt);
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
@@ -9444,7 +9695,7 @@ void SelectionDAG::setNodeMemRefs(MachineSDNode *N,
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
EVT VT) {
SDVTList VTs = getVTList(VT);
- return SelectNodeTo(N, MachineOpc, VTs, None);
+ return SelectNodeTo(N, MachineOpc, VTs, std::nullopt);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
@@ -9485,7 +9736,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
EVT VT1, EVT VT2) {
SDVTList VTs = getVTList(VT1, VT2);
- return SelectNodeTo(N, MachineOpc, VTs, None);
+ return SelectNodeTo(N, MachineOpc, VTs, std::nullopt);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
@@ -9652,7 +9903,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
EVT VT) {
SDVTList VTs = getVTList(VT);
- return getMachineNode(Opcode, dl, VTs, None);
+ return getMachineNode(Opcode, dl, VTs, std::nullopt);
}
MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
@@ -10091,6 +10342,8 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
// Preserve Debug Values
transferDbgValues(FromN, To);
+ // Preserve extra info.
+ copyExtraInfo(From, To.getNode());
// Iterate over all the existing uses of From. New uses will be added
// to the beginning of the use list, which we avoid visiting.
@@ -10152,6 +10405,8 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {
assert((i < To->getNumValues()) && "Invalid To location");
transferDbgValues(SDValue(From, i), SDValue(To, i));
}
+ // Preserve extra info.
+ copyExtraInfo(From, To);
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
@@ -10194,9 +10449,12 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
if (From->getNumValues() == 1) // Handle the simple case efficiently.
return ReplaceAllUsesWith(SDValue(From, 0), To[0]);
- // Preserve Debug Info.
- for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+ for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) {
+ // Preserve Debug Info.
transferDbgValues(SDValue(From, i), To[i]);
+ // Preserve extra info.
+ copyExtraInfo(From, To[i].getNode());
+ }
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
@@ -10249,6 +10507,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
// Preserve Debug Info.
transferDbgValues(From, To);
+ copyExtraInfo(From.getNode(), To.getNode());
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
@@ -10402,6 +10661,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
return ReplaceAllUsesOfValueWith(*From, *To);
transferDbgValues(*From, *To);
+ copyExtraInfo(From->getNode(), To->getNode());
// Read up all the uses and make records of them. This helps
// processing new uses that are introduced during the
@@ -10636,6 +10896,67 @@ bool llvm::isMinSignedConstant(SDValue V) {
return Const != nullptr && Const->isMinSignedValue();
}
+bool llvm::isNeutralConstant(unsigned Opcode, SDNodeFlags Flags, SDValue V,
+ unsigned OperandNo) {
+ // NOTE: The cases should match with IR's ConstantExpr::getBinOpIdentity().
+ // TODO: Target-specific opcodes could be added.
+ if (auto *Const = isConstOrConstSplat(V)) {
+ switch (Opcode) {
+ case ISD::ADD:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::UMAX:
+ return Const->isZero();
+ case ISD::MUL:
+ return Const->isOne();
+ case ISD::AND:
+ case ISD::UMIN:
+ return Const->isAllOnes();
+ case ISD::SMAX:
+ return Const->isMinSignedValue();
+ case ISD::SMIN:
+ return Const->isMaxSignedValue();
+ case ISD::SUB:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ return OperandNo == 1 && Const->isZero();
+ case ISD::UDIV:
+ case ISD::SDIV:
+ return OperandNo == 1 && Const->isOne();
+ }
+ } else if (auto *ConstFP = isConstOrConstSplatFP(V)) {
+ switch (Opcode) {
+ case ISD::FADD:
+ return ConstFP->isZero() &&
+ (Flags.hasNoSignedZeros() || ConstFP->isNegative());
+ case ISD::FSUB:
+ return OperandNo == 1 && ConstFP->isZero() &&
+ (Flags.hasNoSignedZeros() || !ConstFP->isNegative());
+ case ISD::FMUL:
+ return ConstFP->isExactlyValue(1.0);
+ case ISD::FDIV:
+ return OperandNo == 1 && ConstFP->isExactlyValue(1.0);
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM: {
+ // Neutral element for fminnum is NaN, Inf or FLT_MAX, depending on FMF.
+ EVT VT = V.getValueType();
+ const fltSemantics &Semantics = SelectionDAG::EVTToAPFloatSemantics(VT);
+ APFloat NeutralAF = !Flags.hasNoNaNs()
+ ? APFloat::getQNaN(Semantics)
+ : !Flags.hasNoInfs()
+ ? APFloat::getInf(Semantics)
+ : APFloat::getLargest(Semantics);
+ if (Opcode == ISD::FMAXNUM)
+ NeutralAF.changeSign();
+
+ return ConstFP->isExactlyValue(NeutralAF);
+ }
+ }
+ }
+ return false;
+}
+
SDValue llvm::peekThroughBitcasts(SDValue V) {
while (V.getOpcode() == ISD::BITCAST)
V = V.getOperand(0);
@@ -10666,6 +10987,16 @@ bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) {
ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs,
bool AllowTruncation) {
+ EVT VT = N.getValueType();
+ APInt DemandedElts = VT.isFixedLengthVector()
+ ? APInt::getAllOnes(VT.getVectorMinNumElements())
+ : APInt(1, 1);
+ return isConstOrConstSplat(N, DemandedElts, AllowUndefs, AllowTruncation);
+}
+
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
+ bool AllowUndefs,
+ bool AllowTruncation) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
return CN;
@@ -10683,34 +11014,11 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs,
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
BitVector UndefElements;
- ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
-
- // BuildVectors can truncate their operands. Ignore that case here unless
- // AllowTruncation is set.
- if (CN && (UndefElements.none() || AllowUndefs)) {
- EVT CVT = CN->getValueType(0);
- EVT NSVT = N.getValueType().getScalarType();
- assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension");
- if (AllowTruncation || (CVT == NSVT))
- return CN;
- }
- }
-
- return nullptr;
-}
-
-ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
- bool AllowUndefs,
- bool AllowTruncation) {
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
- return CN;
-
- if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
- BitVector UndefElements;
ConstantSDNode *CN = BV->getConstantSplatNode(DemandedElts, &UndefElements);
// BuildVectors can truncate their operands. Ignore that case here unless
// AllowTruncation is set.
+ // TODO: Look into whether we should allow UndefElements in non-DemandedElts
if (CN && (UndefElements.none() || AllowUndefs)) {
EVT CVT = CN->getValueType(0);
EVT NSVT = N.getValueType().getScalarType();
@@ -10724,21 +11032,11 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
}
ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) {
- if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
- return CN;
-
- if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
- BitVector UndefElements;
- ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
- if (CN && (UndefElements.none() || AllowUndefs))
- return CN;
- }
-
- if (N.getOpcode() == ISD::SPLAT_VECTOR)
- if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N.getOperand(0)))
- return CN;
-
- return nullptr;
+ EVT VT = N.getValueType();
+ APInt DemandedElts = VT.isFixedLengthVector()
+ ? APInt::getAllOnes(VT.getVectorMinNumElements())
+ : APInt(1, 1);
+ return isConstOrConstSplatFP(N, DemandedElts, AllowUndefs);
}
ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N,
@@ -10751,10 +11049,15 @@ ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N,
BitVector UndefElements;
ConstantFPSDNode *CN =
BV->getConstantFPSplatNode(DemandedElts, &UndefElements);
+ // TODO: Look into whether we should allow UndefElements in non-DemandedElts
if (CN && (UndefElements.none() || AllowUndefs))
return CN;
}
+ if (N.getOpcode() == ISD::SPLAT_VECTOR)
+ if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N.getOperand(0)))
+ return CN;
+
return nullptr;
}
@@ -10808,7 +11111,7 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
// the MMO. This is because the MMO might indicate only a possible address
// range instead of specifying the affected memory addresses precisely.
// TODO: Make MachineMemOperands aware of scalable vectors.
- assert(memvt.getStoreSize().getKnownMinSize() <= MMO->getSize() &&
+ assert(memvt.getStoreSize().getKnownMinValue() <= MMO->getSize() &&
"Size mismatch!");
}
@@ -11221,7 +11524,7 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
return false;
if (LD->getChain() != Base->getChain())
return false;
- EVT VT = LD->getValueType(0);
+ EVT VT = LD->getMemoryVT();
if (VT.getSizeInBits() / 8 != Bytes)
return false;
@@ -11234,8 +11537,8 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
return false;
}
-/// InferPtrAlignment - Infer alignment of a load / store address. Return None
-/// if it cannot be inferred.
+/// InferPtrAlignment - Infer alignment of a load / store address. Return
+/// std::nullopt if it cannot be inferred.
MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const {
// If this is a GlobalAddress + cst, return the alignment.
const GlobalValue *GV = nullptr;
@@ -11267,7 +11570,7 @@ MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const {
return commonAlignment(MFI.getObjectAlign(FrameIdx), FrameOffset);
}
- return None;
+ return std::nullopt;
}
/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
@@ -11690,30 +11993,30 @@ bool BuildVectorSDNode::isConstant() const {
return true;
}
-Optional<std::pair<APInt, APInt>>
+std::optional<std::pair<APInt, APInt>>
BuildVectorSDNode::isConstantSequence() const {
unsigned NumOps = getNumOperands();
if (NumOps < 2)
- return None;
+ return std::nullopt;
if (!isa<ConstantSDNode>(getOperand(0)) ||
!isa<ConstantSDNode>(getOperand(1)))
- return None;
+ return std::nullopt;
unsigned EltSize = getValueType(0).getScalarSizeInBits();
APInt Start = getConstantOperandAPInt(0).trunc(EltSize);
APInt Stride = getConstantOperandAPInt(1).trunc(EltSize) - Start;
if (Stride.isZero())
- return None;
+ return std::nullopt;
for (unsigned i = 2; i < NumOps; ++i) {
if (!isa<ConstantSDNode>(getOperand(i)))
- return None;
+ return std::nullopt;
APInt Val = getConstantOperandAPInt(i).trunc(EltSize);
if (Val != (Start + (Stride * i)))
- return None;
+ return std::nullopt;
}
return std::make_pair(Start, Stride);
@@ -11847,6 +12150,18 @@ SDValue SelectionDAG::getNeutralElement(unsigned Opcode, const SDLoc &DL,
}
}
+void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) {
+ assert(From && To && "Invalid SDNode; empty source SDValue?");
+ auto I = SDEI.find(From);
+ if (I == SDEI.end())
+ return;
+
+ // Use of operator[] on the DenseMap may cause an insertion, which invalidates
+ // the iterator, hence the need to make a copy to prevent a use-after-free.
+ NodeExtraInfo Copy = I->second;
+ SDEI[To] = std::move(Copy);
+}
+
#ifndef NDEBUG
static void checkForCyclesHelper(const SDNode *N,
SmallPtrSetImpl<const SDNode*> &Visited,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index d236433f6fb4..a432d8e92bca 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -85,9 +85,9 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other,
}
bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
- const Optional<int64_t> NumBytes0,
+ const std::optional<int64_t> NumBytes0,
const SDNode *Op1,
- const Optional<int64_t> NumBytes1,
+ const std::optional<int64_t> NumBytes1,
const SelectionDAG &DAG, bool &IsAlias) {
BaseIndexOffset BasePtr0 = match(Op0, DAG);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index ecdaef0442da..0bdfdac6a65f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -15,8 +15,6 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -27,10 +25,12 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
@@ -63,6 +63,7 @@
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -99,6 +100,7 @@
#include <cstddef>
#include <iterator>
#include <limits>
+#include <optional>
#include <tuple>
using namespace llvm;
@@ -148,18 +150,18 @@ static const unsigned MaxParallelChains = 64;
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
- Optional<CallingConv::ID> CC);
+ std::optional<CallingConv::ID> CC);
/// getCopyFromParts - Create a value that contains the specified legal parts
/// combined into the value they represent. If the parts combine to a type
/// larger than ValueVT then AssertOp can be used to specify whether the extra
/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
/// (ISD::AssertSext).
-static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
- const SDValue *Parts, unsigned NumParts,
- MVT PartVT, EVT ValueVT, const Value *V,
- Optional<CallingConv::ID> CC = None,
- Optional<ISD::NodeType> AssertOp = None) {
+static SDValue
+getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
+ unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V,
+ std::optional<CallingConv::ID> CC = std::nullopt,
+ std::optional<ISD::NodeType> AssertOp = std::nullopt) {
// Let the target assemble the parts if it wants to
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts,
@@ -180,8 +182,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
unsigned ValueBits = ValueVT.getSizeInBits();
// Assemble the power of 2 part.
- unsigned RoundParts =
- (NumParts & (NumParts - 1)) ? 1 << Log2_32(NumParts) : NumParts;
+ unsigned RoundParts = llvm::bit_floor(NumParts);
unsigned RoundBits = PartBits * RoundParts;
EVT RoundVT = RoundBits == ValueBits ?
ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
@@ -320,7 +321,7 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
- Optional<CallingConv::ID> CallConv) {
+ std::optional<CallingConv::ID> CallConv) {
assert(ValueVT.isVector() && "Not a vector value");
assert(NumParts > 0 && "No parts to assemble!");
const bool IsABIRegCopy = CallConv.has_value();
@@ -397,10 +398,9 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
- // If the element type of the source/dest vectors are the same, but the
- // parts vector has more elements than the value vector, then we have a
- // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
- // elements we want.
+ // If the parts vector has more elements than the value vector, then we
+ // have a vector widening case (e.g. <2 x float> -> <4 x float>).
+ // Extract the elements we want.
if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) {
assert((PartEVT.getVectorElementCount().getKnownMinValue() >
ValueVT.getVectorElementCount().getKnownMinValue()) &&
@@ -414,6 +414,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
DAG.getVectorIdxConstant(0, DL));
if (PartEVT == ValueVT)
return Val;
+ if (PartEVT.isInteger() && ValueVT.isFloatingPoint())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
}
// Promoted vector extract
@@ -447,12 +449,22 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// Handle cases such as i8 -> <1 x i1>
EVT ValueSVT = ValueVT.getVectorElementType();
if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) {
- if (ValueSVT.getSizeInBits() == PartEVT.getSizeInBits())
+ unsigned ValueSize = ValueSVT.getSizeInBits();
+ if (ValueSize == PartEVT.getSizeInBits()) {
Val = DAG.getNode(ISD::BITCAST, DL, ValueSVT, Val);
- else
+ } else if (ValueSVT.isFloatingPoint() && PartEVT.isInteger()) {
+ // It's possible a scalar floating point type gets softened to integer and
+ // then promoted to a larger integer. If PartEVT is the larger integer
+ // we need to truncate it and then bitcast to the FP type.
+ assert(ValueSVT.bitsLT(PartEVT) && "Unexpected types");
+ EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, IntermediateType, Val);
+ Val = DAG.getBitcast(ValueSVT, Val);
+ } else {
Val = ValueVT.isFloatingPoint()
? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
: DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
+ }
}
return DAG.getBuildVector(ValueVT, DL, Val);
@@ -461,16 +473,16 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
SDValue Val, SDValue *Parts, unsigned NumParts,
MVT PartVT, const Value *V,
- Optional<CallingConv::ID> CallConv);
+ std::optional<CallingConv::ID> CallConv);
/// getCopyToParts - Create a series of nodes that contain the specified value
/// split into legal parts. If the parts contain more bits than Val, then, for
/// integers, ExtendKind can be used to specify how to generate the extra bits.
-static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
- SDValue *Parts, unsigned NumParts, MVT PartVT,
- const Value *V,
- Optional<CallingConv::ID> CallConv = None,
- ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+static void
+getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
+ unsigned NumParts, MVT PartVT, const Value *V,
+ std::optional<CallingConv::ID> CallConv = std::nullopt,
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
// Let the target split the parts if it wants to
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT,
@@ -555,7 +567,7 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
// The number of parts is not a power of 2. Split off and copy the tail.
assert(PartVT.isInteger() && ValueVT.isInteger() &&
"Do not know what to expand to!");
- unsigned RoundParts = 1 << Log2_32(NumParts);
+ unsigned RoundParts = llvm::bit_floor(NumParts);
unsigned RoundBits = RoundParts * PartBits;
unsigned OddParts = NumParts - RoundParts;
SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
@@ -643,7 +655,7 @@ static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val,
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
SDValue Val, SDValue *Parts, unsigned NumParts,
MVT PartVT, const Value *V,
- Optional<CallingConv::ID> CallConv) {
+ std::optional<CallingConv::ID> CallConv) {
EVT ValueVT = Val.getValueType();
assert(ValueVT.isVector() && "Not a vector");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -678,7 +690,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
SDValue Widened = widenVectorToPartType(DAG, Val, DL, WidenVT);
Val = DAG.getAnyExtOrTrunc(Widened, DL, PartVT);
} else {
- if (ValueVT.getVectorElementCount().isScalar()) {
+ // Don't extract an integer from a float vector. This can happen if the
+ // FP type gets softened to integer and then promoted. The promotion
+ // prevents it from being picked up by the earlier bitcast case.
+ if (ValueVT.getVectorElementCount().isScalar() &&
+ (!ValueVT.isFloatingPoint() || !PartVT.isInteger())) {
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
DAG.getVectorIdxConstant(0, DL));
} else {
@@ -703,8 +719,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
unsigned NumRegs;
if (IsABIRegCopy) {
NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
- *DAG.getContext(), CallConv.value(), ValueVT, IntermediateVT,
- NumIntermediates, RegisterVT);
+ *DAG.getContext(), *CallConv, ValueVT, IntermediateVT, NumIntermediates,
+ RegisterVT);
} else {
NumRegs =
TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
@@ -718,7 +734,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() &&
"Mixing scalable and fixed vectors when copying in parts");
- Optional<ElementCount> DestEltCnt;
+ std::optional<ElementCount> DestEltCnt;
if (IntermediateVT.isVector())
DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates;
@@ -786,13 +802,13 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
}
RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
- EVT valuevt, Optional<CallingConv::ID> CC)
+ EVT valuevt, std::optional<CallingConv::ID> CC)
: ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
RegCount(1, regs.size()), CallConv(CC) {}
RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
const DataLayout &DL, unsigned Reg, Type *Ty,
- Optional<CallingConv::ID> CC) {
+ std::optional<CallingConv::ID> CC) {
ComputeValueVTs(TLI, DL, Ty, ValueVTs);
CallConv = CC;
@@ -800,11 +816,11 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
for (EVT ValueVT : ValueVTs) {
unsigned NumRegs =
isABIMangled()
- ? TLI.getNumRegistersForCallingConv(Context, CC.value(), ValueVT)
+ ? TLI.getNumRegistersForCallingConv(Context, *CC, ValueVT)
: TLI.getNumRegisters(Context, ValueVT);
MVT RegisterVT =
isABIMangled()
- ? TLI.getRegisterTypeForCallingConv(Context, CC.value(), ValueVT)
+ ? TLI.getRegisterTypeForCallingConv(Context, *CC, ValueVT)
: TLI.getRegisterType(Context, ValueVT);
for (unsigned i = 0; i != NumRegs; ++i)
Regs.push_back(Reg + i);
@@ -831,10 +847,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
// Copy the legal parts from the registers.
EVT ValueVT = ValueVTs[Value];
unsigned NumRegs = RegCount[Value];
- MVT RegisterVT =
- isABIMangled() ? TLI.getRegisterTypeForCallingConv(
- *DAG.getContext(), CallConv.value(), RegVTs[Value])
- : RegVTs[Value];
+ MVT RegisterVT = isABIMangled()
+ ? TLI.getRegisterTypeForCallingConv(
+ *DAG.getContext(), *CallConv, RegVTs[Value])
+ : RegVTs[Value];
Parts.resize(NumRegs);
for (unsigned i = 0; i != NumRegs; ++i) {
@@ -914,10 +930,10 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
unsigned NumParts = RegCount[Value];
- MVT RegisterVT =
- isABIMangled() ? TLI.getRegisterTypeForCallingConv(
- *DAG.getContext(), CallConv.value(), RegVTs[Value])
- : RegVTs[Value];
+ MVT RegisterVT = isABIMangled()
+ ? TLI.getRegisterTypeForCallingConv(
+ *DAG.getContext(), *CallConv, RegVTs[Value])
+ : RegVTs[Value];
if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
ExtendKind = ISD::ZERO_EXTEND;
@@ -1025,8 +1041,10 @@ RegsForValue::getRegsAndSizes() const {
}
void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
+ AssumptionCache *ac,
const TargetLibraryInfo *li) {
AA = aa;
+ AC = ac;
GFI = gfi;
LibInfo = li;
Context = DAG.getContext();
@@ -1117,18 +1135,57 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
HandlePHINodesInSuccessorBlocks(I.getParent());
}
+ // Add SDDbgValue nodes for any var locs here. Do so before updating
+ // SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}.
+ if (FunctionVarLocs const *FnVarLocs = DAG.getFunctionVarLocs()) {
+ // Add SDDbgValue nodes for any var locs here. Do so before updating
+ // SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}.
+ for (auto It = FnVarLocs->locs_begin(&I), End = FnVarLocs->locs_end(&I);
+ It != End; ++It) {
+ auto *Var = FnVarLocs->getDILocalVariable(It->VariableID);
+ dropDanglingDebugInfo(Var, It->Expr);
+ if (!handleDebugValue(It->V, Var, It->Expr, It->DL, SDNodeOrder,
+ /*IsVariadic=*/false))
+ addDanglingDebugInfo(It, SDNodeOrder);
+ }
+ }
+
// Increase the SDNodeOrder if dealing with a non-debug instruction.
if (!isa<DbgInfoIntrinsic>(I))
++SDNodeOrder;
CurInst = &I;
+ // Set inserted listener only if required.
+ bool NodeInserted = false;
+ std::unique_ptr<SelectionDAG::DAGNodeInsertedListener> InsertedListener;
+ MDNode *PCSectionsMD = I.getMetadata(LLVMContext::MD_pcsections);
+ if (PCSectionsMD) {
+ InsertedListener = std::make_unique<SelectionDAG::DAGNodeInsertedListener>(
+ DAG, [&](SDNode *) { NodeInserted = true; });
+ }
+
visit(I.getOpcode(), I);
if (!I.isTerminator() && !HasTailCall &&
!isa<GCStatepointInst>(I)) // statepoints handle their exports internally
CopyToExportRegsIfNeeded(&I);
+ // Handle metadata.
+ if (PCSectionsMD) {
+ auto It = NodeMap.find(&I);
+ if (It != NodeMap.end()) {
+ DAG.addPCSections(It->second.getNode(), PCSectionsMD);
+ } else if (NodeInserted) {
+ // This should not happen; if it does, don't let it go unnoticed so we can
+ // fix it. Relevant visit*() function is probably missing a setValue().
+ errs() << "warning: loosing !pcsections metadata ["
+ << I.getModule()->getName() << "]\n";
+ LLVM_DEBUG(I.dump());
+ assert(false);
+ }
+ }
+
CurInst = nullptr;
}
@@ -1148,8 +1205,13 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
}
}
+void SelectionDAGBuilder::addDanglingDebugInfo(const VarLocInfo *VarLoc,
+ unsigned Order) {
+ DanglingDebugInfoMap[VarLoc->V].emplace_back(VarLoc, Order);
+}
+
void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI,
- DebugLoc DL, unsigned Order) {
+ unsigned Order) {
// We treat variadic dbg_values differently at this stage.
if (DI->hasArgList()) {
// For variadic dbg_values we will now insert an undef.
@@ -1161,7 +1223,7 @@ void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI,
}
SDDbgValue *SDV = DAG.getDbgValueList(
DI->getVariable(), DI->getExpression(), Locs, {},
- /*IsIndirect=*/false, DL, Order, /*IsVariadic=*/true);
+ /*IsIndirect=*/false, DI->getDebugLoc(), Order, /*IsVariadic=*/true);
DAG.AddDbgValue(SDV, /*isParameter=*/false);
} else {
// TODO: Dangling debug info will eventually either be resolved or produce
@@ -1171,18 +1233,18 @@ void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI,
assert(DI->getNumVariableLocationOps() == 1 &&
"DbgValueInst without an ArgList should have a single location "
"operand.");
- DanglingDebugInfoMap[DI->getValue(0)].emplace_back(DI, DL, Order);
+ DanglingDebugInfoMap[DI->getValue(0)].emplace_back(DI, Order);
}
}
void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
const DIExpression *Expr) {
auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) {
- const DbgValueInst *DI = DDI.getDI();
- DIVariable *DanglingVariable = DI->getVariable();
- DIExpression *DanglingExpr = DI->getExpression();
+ DIVariable *DanglingVariable = DDI.getVariable(DAG.getFunctionVarLocs());
+ DIExpression *DanglingExpr = DDI.getExpression();
if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) {
- LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << printDDI(DDI)
+ << "\n");
return true;
}
return false;
@@ -1211,15 +1273,12 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second;
for (auto &DDI : DDIV) {
- const DbgValueInst *DI = DDI.getDI();
- assert(!DI->hasArgList() && "Not implemented for variadic dbg_values");
- assert(DI && "Ill-formed DanglingDebugInfo");
- DebugLoc dl = DDI.getdl();
+ DebugLoc DL = DDI.getDebugLoc();
unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
- DILocalVariable *Variable = DI->getVariable();
- DIExpression *Expr = DI->getExpression();
- assert(Variable->isValidLocationForIntrinsic(dl) &&
+ DILocalVariable *Variable = DDI.getVariable(DAG.getFunctionVarLocs());
+ DIExpression *Expr = DDI.getExpression();
+ assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
SDDbgValue *SDV;
if (Val.getNode()) {
@@ -1229,10 +1288,10 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
// in the first place we should not be more successful here). Unless we
// have some test case that prove this to be correct we should avoid
// calling EmitFuncArgumentDbgValue here.
- if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl,
+ if (!EmitFuncArgumentDbgValue(V, Variable, Expr, DL,
FuncArgumentDbgValueKind::Value, Val)) {
- LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
- << DbgSDNodeOrder << "] for:\n " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << "Resolve dangling debug info for " << printDDI(DDI)
+ << "\n");
LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
// Increase the SDNodeOrder for the DbgValue here to make sure it is
// inserted after the definition of Val when emitting the instructions
@@ -1241,17 +1300,17 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs()
<< "changing SDNodeOrder from " << DbgSDNodeOrder << " to "
<< ValSDNodeOrder << "\n");
- SDV = getDbgValue(Val, Variable, Expr, dl,
+ SDV = getDbgValue(Val, Variable, Expr, DL,
std::max(DbgSDNodeOrder, ValSDNodeOrder));
DAG.AddDbgValue(SDV, false);
} else
- LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI
- << "in EmitFuncArgumentDbgValue\n");
+ LLVM_DEBUG(dbgs() << "Resolved dangling debug info for "
+ << printDDI(DDI) << " in EmitFuncArgumentDbgValue\n");
} else {
- LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
- auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType());
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << printDDI(DDI) << "\n");
+ auto Undef = UndefValue::get(V->getType());
auto SDV =
- DAG.getConstantDbgValue(Variable, Expr, Undef, dl, DbgSDNodeOrder);
+ DAG.getConstantDbgValue(Variable, Expr, Undef, DL, DbgSDNodeOrder);
DAG.AddDbgValue(SDV, false);
}
}
@@ -1263,21 +1322,19 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
// state of `handleDebugValue`, we need know specifically which values were
// invalid, so that we attempt to salvage only those values when processing
// a DIArgList.
- assert(!DDI.getDI()->hasArgList() &&
- "Not implemented for variadic dbg_values");
- Value *V = DDI.getDI()->getValue(0);
- DILocalVariable *Var = DDI.getDI()->getVariable();
- DIExpression *Expr = DDI.getDI()->getExpression();
- DebugLoc DL = DDI.getdl();
- DebugLoc InstDL = DDI.getDI()->getDebugLoc();
+ Value *V = DDI.getVariableLocationOp(0);
+ Value *OrigV = V;
+ DILocalVariable *Var = DDI.getVariable(DAG.getFunctionVarLocs());
+ DIExpression *Expr = DDI.getExpression();
+ DebugLoc DL = DDI.getDebugLoc();
unsigned SDOrder = DDI.getSDNodeOrder();
+
// Currently we consider only dbg.value intrinsics -- we tell the salvager
// that DW_OP_stack_value is desired.
- assert(isa<DbgValueInst>(DDI.getDI()));
bool StackValue = true;
// Can this Value can be encoded without any further work?
- if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder, /*IsVariadic=*/false))
+ if (handleDebugValue(V, Var, Expr, DL, SDOrder, /*IsVariadic=*/false))
return;
// Attempt to salvage back through as many instructions as possible. Bail if
@@ -1306,10 +1363,10 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
// Some kind of simplification occurred: check whether the operand of the
// salvaged debug expression can be encoded in this DAG.
- if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder,
- /*IsVariadic=*/false)) {
- LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n "
- << *DDI.getDI() << "\nBy stripping back to:\n " << *V);
+ if (handleDebugValue(V, Var, Expr, DL, SDOrder, /*IsVariadic=*/false)) {
+ LLVM_DEBUG(
+ dbgs() << "Salvaged debug location info for:\n " << *Var << "\n"
+ << *OrigV << "\nBy stripping back to:\n " << *V << "\n");
return;
}
}
@@ -1317,21 +1374,18 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
// This was the final opportunity to salvage this debug information, and it
// couldn't be done. Place an undef DBG_VALUE at this location to terminate
// any earlier variable location.
- auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType());
- auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder);
+ assert(OrigV && "V shouldn't be null");
+ auto *Undef = UndefValue::get(OrigV->getType());
+ auto *SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder);
DAG.AddDbgValue(SDV, false);
-
- LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << *DDI.getDI()
- << "\n");
- LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI.getDI()->getOperand(0)
+ LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << printDDI(DDI)
<< "\n");
}
bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
DILocalVariable *Var,
- DIExpression *Expr, DebugLoc dl,
- DebugLoc InstDL, unsigned Order,
- bool IsVariadic) {
+ DIExpression *Expr, DebugLoc DbgLoc,
+ unsigned Order, bool IsVariadic) {
if (Values.empty())
return true;
SmallVector<SDDbgOperand> LocationOps;
@@ -1344,6 +1398,13 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
continue;
}
+ // Look through IntToPtr constants.
+ if (auto *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::IntToPtr) {
+ LocationOps.emplace_back(SDDbgOperand::fromConst(CE->getOperand(0)));
+ continue;
+ }
+
// If the Value is a frame index, we can create a FrameIndex debug value
// without relying on the DAG at all.
if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
@@ -1362,7 +1423,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
if (N.getNode()) {
// Only emit func arg dbg value for non-variadic dbg.values for now.
if (!IsVariadic &&
- EmitFuncArgumentDbgValue(V, Var, Expr, dl,
+ EmitFuncArgumentDbgValue(V, Var, Expr, DbgLoc,
FuncArgumentDbgValueKind::Value, N))
return true;
if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
@@ -1391,7 +1452,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
// they're parameters, and they are parameters of the current function. We
// need to let them dangle until they get an SDNode.
bool IsParamOfFunc =
- isa<Argument>(V) && Var->isParameter() && !InstDL.getInlinedAt();
+ isa<Argument>(V) && Var->isParameter() && !DbgLoc.getInlinedAt();
if (IsParamOfFunc)
return false;
@@ -1404,7 +1465,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
// If this is a PHI node, it may be split up into several MI PHI nodes
// (in FunctionLoweringInfo::set).
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
- V->getType(), None);
+ V->getType(), std::nullopt);
if (RFV.occupiesMultipleRegs()) {
// FIXME: We could potentially support variadic dbg_values here.
if (IsVariadic)
@@ -1429,7 +1490,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
if (!FragmentExpr)
continue;
SDDbgValue *SDV = DAG.getVRegDbgValue(
- Var, *FragmentExpr, RegAndSize.first, false, dl, SDNodeOrder);
+ Var, *FragmentExpr, RegAndSize.first, false, DbgLoc, SDNodeOrder);
DAG.AddDbgValue(SDV, false);
Offset += RegisterSize;
}
@@ -1446,9 +1507,9 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
// We have created a SDDbgOperand for each Value in Values.
// Should use Order instead of SDNodeOrder?
assert(!LocationOps.empty());
- SDDbgValue *SDV =
- DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies,
- /*IsIndirect=*/false, dl, SDNodeOrder, IsVariadic);
+ SDDbgValue *SDV = DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies,
+ /*IsIndirect=*/false, DbgLoc,
+ SDNodeOrder, IsVariadic);
DAG.AddDbgValue(SDV, /*isParameter=*/false);
return true;
}
@@ -1472,7 +1533,7 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
DAG.getDataLayout(), InReg, Ty,
- None); // This is not an ABI copy.
+ std::nullopt); // This is not an ABI copy.
SDValue Chain = DAG.getEntryNode();
Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
V);
@@ -1647,12 +1708,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
else
Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
- if (isa<ScalableVectorType>(VecTy))
- return NodeMap[V] = DAG.getSplatVector(VT, getCurSDLoc(), Op);
-
- SmallVector<SDValue, 16> Ops;
- Ops.assign(cast<FixedVectorType>(VecTy)->getNumElements(), Op);
- return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
+ return NodeMap[V] = DAG.getSplat(VT, getCurSDLoc(), Op);
}
llvm_unreachable("Unknown vector constant");
@@ -1664,16 +1720,16 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end())
- return DAG.getFrameIndex(SI->second,
- TLI.getFrameIndexTy(DAG.getDataLayout()));
+ return DAG.getFrameIndex(
+ SI->second, TLI.getValueType(DAG.getDataLayout(), AI->getType()));
}
// If this is an instruction which fast-isel has deferred, select it now.
if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
- unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
+ Register InReg = FuncInfo.InitializeRegForValue(Inst);
RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
- Inst->getType(), None);
+ Inst->getType(), std::nullopt);
SDValue Chain = DAG.getEntryNode();
return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
}
@@ -2082,7 +2138,7 @@ void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
// Already exported?
if (FuncInfo.isExportedInst(V)) return;
- unsigned Reg = FuncInfo.InitializeRegForValue(V);
+ Register Reg = FuncInfo.InitializeRegForValue(V);
CopyValueToVirtualRegister(V, Reg);
}
@@ -2536,6 +2592,8 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
MVT::Other, getControlRoot(), Cond,
DAG.getBasicBlock(CB.TrueBB));
+ setValue(CurInst, BrCond);
+
// Insert the false branch. Do this even if it's a fall through branch,
// this makes it easier to do DAG optimizations which require inverting
// the branch condition.
@@ -2746,7 +2804,8 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
CallOptions.setDiscardResult(true);
SDValue Chain =
TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
- None, CallOptions, getCurSDLoc()).second;
+ std::nullopt, CallOptions, getCurSDLoc())
+ .second;
// On PS4/PS5, the "return address" must still be within the calling
// function, even if it's at the very end, so emit an explicit TRAP here.
// Passing 'true' for doesNotReturn above won't generate the trap for us.
@@ -2835,7 +2894,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
MVT VT = BB.RegVT;
SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
SDValue Cmp;
- unsigned PopCount = countPopulation(B.Mask);
+ unsigned PopCount = llvm::popcount(B.Mask);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (PopCount == 1) {
// Testing for a single bit; just compare the shift count with what it
@@ -3000,7 +3059,8 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
BasicBlock *Dest = I.getIndirectDest(i);
MachineBasicBlock *Target = FuncInfo.MBBMap[Dest];
Target->setIsInlineAsmBrIndirectTarget();
- Target->setHasAddressTaken();
+ Target->setMachineBlockAddressTaken();
+ Target->setLabelMustBeEmitted();
// Don't add duplicate machine successors.
if (Dests.insert(Dest).second)
addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
@@ -3279,7 +3339,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
Flags.copyFMF(*FPOp);
// Min/max matching is only viable if all output VTs are the same.
- if (is_splat(ValueVTs)) {
+ if (all_equal(ValueVTs)) {
EVT VT = ValueVTs[0];
LLVMContext &Ctx = *DAG.getContext();
auto &TLI = DAG.getTargetLoweringInfo();
@@ -3339,7 +3399,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
break;
case SPF_NABS:
Negate = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case SPF_ABS:
IsUnaryAbs = true;
Opc = ISD::ABS;
@@ -3375,8 +3435,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
Values[i] =
DAG.getNode(OpCode, dl, VT, LHSVal.getValue(LHSVal.getResNo() + i));
if (Negate)
- Values[i] = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT),
- Values[i]);
+ Values[i] = DAG.getNegative(Values[i], dl, VT);
}
} else {
for (unsigned i = 0; i != NumValues; ++i) {
@@ -3537,7 +3596,7 @@ void SelectionDAGBuilder::visitInsertElement(const User &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue InVec = getValue(I.getOperand(0));
SDValue InVal = getValue(I.getOperand(1));
- SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
+ SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
TLI.getVectorIdxTy(DAG.getDataLayout()));
setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
TLI.getValueType(DAG.getDataLayout(), I.getType()),
@@ -3547,7 +3606,7 @@ void SelectionDAGBuilder::visitInsertElement(const User &I) {
void SelectionDAGBuilder::visitExtractElement(const User &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue InVec = getValue(I.getOperand(0));
- SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
+ SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
TLI.getVectorIdxTy(DAG.getDataLayout()));
setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
TLI.getValueType(DAG.getDataLayout(), I.getType()),
@@ -3716,7 +3775,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
}
// Calculate new mask.
- SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
+ SmallVector<int, 8> MappedOps(Mask);
for (int &Idx : MappedOps) {
if (Idx >= (int)SrcNumElts)
Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
@@ -3856,10 +3915,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (IsVectorGEP && !N.getValueType().isVector()) {
LLVMContext &Context = *DAG.getContext();
EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount);
- if (VectorElementCount.isScalable())
- N = DAG.getSplatVector(VT, dl, N);
- else
- N = DAG.getSplatBuildVector(VT, dl, N);
+ N = DAG.getSplat(VT, dl, N);
}
for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
@@ -3891,7 +3947,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
DAG.getDataLayout().getTypeAllocSize(GTI.getIndexedType());
// We intentionally mask away the high bits here; ElementSize may not
// fit in IdxTy.
- APInt ElementMul(IdxSize, ElementSize.getKnownMinSize());
+ APInt ElementMul(IdxSize, ElementSize.getKnownMinValue());
bool ElementScalable = ElementSize.isScalable();
// If this is a scalar constant or a splat vector of constants,
@@ -3931,10 +3987,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (!IdxN.getValueType().isVector() && IsVectorGEP) {
EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(),
VectorElementCount);
- if (VectorElementCount.isScalable())
- IdxN = DAG.getSplatVector(VT, dl, IdxN);
- else
- IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
+ IdxN = DAG.getSplat(VT, dl, IdxN);
}
// If the index is smaller or larger than intptr_t, truncate or extend
@@ -4000,7 +4053,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
SDValue AllocSize = getValue(I.getArraySize());
- EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace());
+ EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), I.getAddressSpace());
if (AllocSize.getValueType() != IntPtr)
AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
@@ -4019,7 +4072,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
// the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign();
if (*Alignment <= StackAlign)
- Alignment = None;
+ Alignment = std::nullopt;
const uint64_t StackAlignMask = StackAlign.value() - 1U;
// Round the size of the allocation up to the stack alignment size
@@ -4068,11 +4121,6 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
SDValue Ptr = getValue(SV);
Type *Ty = I.getType();
- Align Alignment = I.getAlign();
-
- AAMDNodes AAInfo = I.getAAMetadata();
- const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
-
SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets);
@@ -4080,9 +4128,12 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (NumValues == 0)
return;
+ Align Alignment = I.getAlign();
+ AAMDNodes AAInfo = I.getAAMetadata();
+ const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
bool isVolatile = I.isVolatile();
MachineMemOperand::Flags MMOFlags =
- TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
+ TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo);
SDValue Root;
bool ConstantMemory = false;
@@ -4100,11 +4151,6 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Root = DAG.getEntryNode();
ConstantMemory = true;
MMOFlags |= MachineMemOperand::MOInvariant;
-
- // FIXME: pointsToConstantMemory probably does not imply dereferenceable,
- // but the previous usage implied it did. Probably should check
- // isDereferenceableAndAlignedPointer.
- MMOFlags |= MachineMemOperand::MODereferenceable;
} else {
// Do not serialize non-volatile loads against each other.
Root = DAG.getRoot();
@@ -4135,7 +4181,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (ChainI == MaxParallelChains) {
assert(PendingLoads.empty() && "PendingLoads must be serialized first");
SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- makeArrayRef(Chains.data(), ChainI));
+ ArrayRef(Chains.data(), ChainI));
Root = Chain;
ChainI = 0;
}
@@ -4157,7 +4203,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (!ConstantMemory) {
SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- makeArrayRef(Chains.data(), ChainI));
+ ArrayRef(Chains.data(), ChainI));
if (isVolatile)
DAG.setRoot(Chain);
else
@@ -4278,7 +4324,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
// See visitLoad comments.
if (ChainI == MaxParallelChains) {
SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- makeArrayRef(Chains.data(), ChainI));
+ ArrayRef(Chains.data(), ChainI));
Root = Chain;
ChainI = 0;
}
@@ -4294,7 +4340,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
}
SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- makeArrayRef(Chains.data(), ChainI));
+ ArrayRef(Chains.data(), ChainI));
+ setValue(&I, StoreNode);
DAG.setRoot(StoreNode);
}
@@ -4316,7 +4363,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
Src0 = I.getArgOperand(0);
Ptr = I.getArgOperand(1);
Mask = I.getArgOperand(2);
- Alignment = None;
+ Alignment = std::nullopt;
};
Value *PtrOperand, *MaskOperand, *Src0Operand;
@@ -4400,17 +4447,17 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
if (BasePtr->getType()->isVectorTy() || !IndexVal->getType()->isVectorTy())
return false;
+ uint64_t ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType());
+
+ // Target may not support the required addressing mode.
+ if (ScaleVal != 1 &&
+ !TLI.isLegalScaleForGatherScatter(ScaleVal, ElemSize))
+ return false;
+
Base = SDB->getValue(BasePtr);
Index = SDB->getValue(IndexVal);
IndexType = ISD::SIGNED_SCALED;
- // MGATHER/MSCATTER are only required to support scaling by one or by the
- // element size. Other scales may be produced using target-specific DAG
- // combines.
- uint64_t ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType());
- if (ScaleVal != ElemSize && ScaleVal != 1)
- return false;
-
Scale =
DAG.getTargetConstant(ScaleVal, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
return true;
@@ -4478,7 +4525,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
MaybeAlign &Alignment) {
// @llvm.masked.expandload.*(Ptr, Mask, Src0)
Ptr = I.getArgOperand(0);
- Alignment = None;
+ Alignment = std::nullopt;
Mask = I.getArgOperand(1);
Src0 = I.getArgOperand(2);
};
@@ -4624,6 +4671,12 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break;
case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break;
case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break;
+ case AtomicRMWInst::UIncWrap:
+ NT = ISD::ATOMIC_LOAD_UINC_WRAP;
+ break;
+ case AtomicRMWInst::UDecWrap:
+ NT = ISD::ATOMIC_LOAD_UDEC_WRAP;
+ break;
}
AtomicOrdering Ordering = I.getOrdering();
SyncScope::ID SSID = I.getSyncScopeID();
@@ -4659,7 +4712,9 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) {
TLI.getFenceOperandTy(DAG.getDataLayout()));
Ops[2] = DAG.getTargetConstant(I.getSyncScopeID(), dl,
TLI.getFenceOperandTy(DAG.getDataLayout()));
- DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
+ SDValue N = DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops);
+ setValue(&I, N);
+ DAG.setRoot(N);
}
void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
@@ -4677,7 +4732,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
I.getAlign().value() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic load");
- auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
+ auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
@@ -4726,7 +4781,8 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
EVT MemVT =
TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
- if (I.getAlign().value() < MemVT.getSizeInBits() / 8)
+ if (!TLI.supportsUnalignedAtomics() &&
+ I.getAlign().value() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic store");
auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
@@ -4745,13 +4801,14 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
// TODO: Once this is better exercised by tests, it should be merged with
// the normal path for stores to prevent future divergence.
SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO);
+ setValue(&I, S);
DAG.setRoot(S);
return;
}
SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain,
Ptr, Val, MMO);
-
+ setValue(&I, OutChain);
DAG.setRoot(OutChain);
}
@@ -4826,13 +4883,21 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Create the node.
SDValue Result;
+ // In some cases, custom collection of operands from CallInst I may be needed.
+ TLI.CollectTargetIntrinsicOperands(I, Ops, DAG);
if (IsTgtIntrinsic) {
// This is target intrinsic that touches memory
- Result =
- DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
- MachinePointerInfo(Info.ptrVal, Info.offset),
- Info.align, Info.flags, Info.size,
- I.getAAMetadata());
+ //
+ // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic
+ // didn't yield anything useful.
+ MachinePointerInfo MPI;
+ if (Info.ptrVal)
+ MPI = MachinePointerInfo(Info.ptrVal, Info.offset);
+ else if (Info.fallbackAddressSpace)
+ MPI = MachinePointerInfo(*Info.fallbackAddressSpace);
+ Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops,
+ Info.memVT, MPI, Info.align, Info.flags,
+ Info.size, I.getAAMetadata());
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
@@ -5515,17 +5580,20 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
// For VRegs, in instruction referencing mode, create a DBG_INSTR_REF
// pointing at the VReg, which will be patched up later.
auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF);
- auto MIB = BuildMI(MF, DL, Inst);
- MIB.addReg(Reg);
- MIB.addImm(0);
- MIB.addMetadata(Variable);
+ SmallVector<MachineOperand, 1> MOs({MachineOperand::CreateReg(
+ /* Reg */ Reg, /* isDef */ false, /* isImp */ false,
+ /* isKill */ false, /* isDead */ false,
+ /* isUndef */ false, /* isEarlyClobber */ false,
+ /* SubReg */ 0, /* isDebug */ true)});
+
auto *NewDIExpr = FragExpr;
// We don't have an "Indirect" field in DBG_INSTR_REF, fold that into
// the DIExpression.
if (Indirect)
NewDIExpr = DIExpression::prepend(FragExpr, DIExpression::DerefBefore);
- MIB.addMetadata(NewDIExpr);
- return MIB;
+ SmallVector<uint64_t, 2> Ops({dwarf::DW_OP_LLVM_arg, 0});
+ NewDIExpr = DIExpression::prependOpcodes(NewDIExpr, Ops);
+ return BuildMI(MF, DL, Inst, false, MOs, Variable, NewDIExpr);
} else {
// Create a completely standard DBG_VALUE.
auto &Inst = TII->get(TargetOpcode::DBG_VALUE);
@@ -5599,7 +5667,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
bool IsIndirect = false;
- Optional<MachineOperand> Op;
+ std::optional<MachineOperand> Op;
// Some arguments' frame index is recorded during argument lowering.
int FI = FuncInfo.getArgumentFrameIndex(Arg);
if (FI != std::numeric_limits<int>::max())
@@ -5680,7 +5748,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (VMI != FuncInfo.ValueMap.end()) {
const auto &TLI = DAG.getTargetLoweringInfo();
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
- V->getType(), None);
+ V->getType(), std::nullopt);
if (RFV.occupiesMultipleRegs()) {
splitMultiRegDbgValue(RFV.getRegsAndSizes());
return true;
@@ -6026,6 +6094,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare: {
+ // Debug intrinsics are handled seperately in assignment tracking mode.
+ if (isAssignmentTrackingEnabled(*I.getFunction()->getParent()))
+ return;
// Assume dbg.addr and dbg.declare can not currently use DIArgList, i.e.
// they are non-variadic.
const auto &DI = cast<DbgVariableIntrinsic>(I);
@@ -6125,7 +6196,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.AddDbgLabel(SDV);
return;
}
+ case Intrinsic::dbg_assign: {
+ // Debug intrinsics are handled seperately in assignment tracking mode.
+ assert(isAssignmentTrackingEnabled(*I.getFunction()->getParent()) &&
+ "expected assignment tracking to be enabled");
+ return;
+ }
case Intrinsic::dbg_value: {
+ // Debug intrinsics are handled seperately in assignment tracking mode.
+ if (isAssignmentTrackingEnabled(*I.getFunction()->getParent()))
+ return;
const DbgValueInst &DI = cast<DbgValueInst>(I);
assert(DI.getVariable() && "Missing variable");
@@ -6140,9 +6220,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
bool IsVariadic = DI.hasArgList();
- if (!handleDebugValue(Values, Variable, Expression, dl, DI.getDebugLoc(),
+ if (!handleDebugValue(Values, Variable, Expression, DI.getDebugLoc(),
SDNodeOrder, IsVariadic))
- addDanglingDebugInfo(&DI, dl, SDNodeOrder);
+ addDanglingDebugInfo(&DI, SDNodeOrder);
return;
}
@@ -6358,7 +6438,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// Get the last argument, the metadata and convert it to an integer in the
// call
Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(1))->getMetadata();
- Optional<RoundingMode> RoundMode =
+ std::optional<RoundingMode> RoundMode =
convertStrToRoundingMode(cast<MDString>(MD)->getString());
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
@@ -6750,8 +6830,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::gcread:
case Intrinsic::gcwrite:
llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
- case Intrinsic::flt_rounds:
- Res = DAG.getNode(ISD::FLT_ROUNDS_, sdl, {MVT::i32, MVT::Other}, getRoot());
+ case Intrinsic::get_rounding:
+ Res = DAG.getNode(ISD::GET_ROUNDING, sdl, {MVT::i32, MVT::Other}, getRoot());
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return;
@@ -6846,7 +6926,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Result = DAG.getMemIntrinsicNode(
ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops,
EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)),
- /* align */ None, Flags);
+ /* align */ std::nullopt, Flags);
// Chain the prefetch in parallell with any pending loads, to stay out of
// the way of later optimizations.
@@ -7178,6 +7258,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.getZExtOrTrunc(Const, sdl, PtrVT)));
return;
}
+ case Intrinsic::threadlocal_address: {
+ setValue(&I, getValue(I.getOperand(0)));
+ return;
+ }
case Intrinsic::get_active_lane_mask: {
EVT CCVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
SDValue Index = getValue(I.getOperand(0));
@@ -7191,14 +7275,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue TripCount = getValue(I.getOperand(1));
auto VecTy = CCVT.changeVectorElementType(ElementVT);
- SDValue VectorIndex, VectorTripCount;
- if (VecTy.isScalableVector()) {
- VectorIndex = DAG.getSplatVector(VecTy, sdl, Index);
- VectorTripCount = DAG.getSplatVector(VecTy, sdl, TripCount);
- } else {
- VectorIndex = DAG.getSplatBuildVector(VecTy, sdl, Index);
- VectorTripCount = DAG.getSplatBuildVector(VecTy, sdl, TripCount);
- }
+ SDValue VectorIndex = DAG.getSplat(VecTy, sdl, Index);
+ SDValue VectorTripCount = DAG.getSplat(VecTy, sdl, TripCount);
SDValue VectorStep = DAG.getStepVector(sdl, VecTy);
SDValue VectorInduction = DAG.getNode(
ISD::UADDSAT, sdl, VecTy, VectorIndex, VectorStep);
@@ -7253,11 +7331,6 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
const ConstrainedFPIntrinsic &FPI) {
SDLoc sdl = getCurSDLoc();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs);
- ValueVTs.push_back(MVT::Other); // Out chain
-
// We do not need to serialize constrained FP intrinsics against
// each other or against (nonvolatile) loads, so they can be
// chained like loads.
@@ -7286,7 +7359,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
// The only reason why ebIgnore nodes still need to be chained is that
// they might depend on the current rounding mode, and therefore must
// not be moved across instruction that may change that mode.
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case fp::ExceptionBehavior::ebMayTrap:
// These must not be moved across calls or instructions that may change
// floating-point exception masks.
@@ -7301,7 +7374,9 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
}
};
- SDVTList VTs = DAG.getVTList(ValueVTs);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), FPI.getType());
+ SDVTList VTs = DAG.getVTList(VT, MVT::Other);
fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();
SDNodeFlags Flags;
@@ -7323,8 +7398,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
Opcode = ISD::STRICT_FMA;
// Break fmuladd into fmul and fadd.
if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict ||
- !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(),
- ValueVTs[0])) {
+ !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
Opers.pop_back();
SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags);
pushOutChain(Mul, EB);
@@ -7365,8 +7439,18 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
}
static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
- Optional<unsigned> ResOPC;
+ std::optional<unsigned> ResOPC;
switch (VPIntrin.getIntrinsicID()) {
+ case Intrinsic::vp_ctlz: {
+ bool IsZeroUndef = cast<ConstantInt>(VPIntrin.getArgOperand(3))->isOne();
+ ResOPC = IsZeroUndef ? ISD::VP_CTLZ_ZERO_UNDEF : ISD::VP_CTLZ;
+ break;
+ }
+ case Intrinsic::vp_cttz: {
+ bool IsZeroUndef = cast<ConstantInt>(VPIntrin.getArgOperand(3))->isOne();
+ ResOPC = IsZeroUndef ? ISD::VP_CTTZ_ZERO_UNDEF : ISD::VP_CTTZ;
+ break;
+ }
#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
case Intrinsic::VPID: \
ResOPC = ISD::VPSD; \
@@ -7388,118 +7472,133 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
return *ResOPC;
}
-void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
- SmallVector<SDValue, 7> &OpValues,
- bool IsGather) {
+void SelectionDAGBuilder::visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
+ SmallVector<SDValue, 7> &OpValues) {
SDLoc DL = getCurSDLoc();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Value *PtrOperand = VPIntrin.getArgOperand(0);
MaybeAlign Alignment = VPIntrin.getPointerAlignment();
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
SDValue LD;
bool AddToChain = true;
- if (!IsGather) {
- // Do not serialize variable-length loads of constant memory with
- // anything.
- if (!Alignment)
- Alignment = DAG.getEVTAlign(VT);
- MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
- AddToChain = !AA || !AA->pointsToConstantMemory(ML);
- SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
- MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
- MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
- LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
- MMO, false /*IsExpanding */);
- } else {
- if (!Alignment)
- Alignment = DAG.getEVTAlign(VT.getScalarType());
- unsigned AS =
- PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
- MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(AS), MachineMemOperand::MOLoad,
- MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
- SDValue Base, Index, Scale;
- ISD::MemIndexType IndexType;
- bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
- this, VPIntrin.getParent(),
- VT.getScalarStoreSize());
- if (!UniformBase) {
- Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
- Index = getValue(PtrOperand);
- IndexType = ISD::SIGNED_SCALED;
- Scale =
- DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
- }
- EVT IdxVT = Index.getValueType();
- EVT EltTy = IdxVT.getVectorElementType();
- if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
- EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
- Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
- }
- LD = DAG.getGatherVP(
- DAG.getVTList(VT, MVT::Other), VT, DL,
- {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO,
- IndexType);
- }
+ // Do not serialize variable-length loads of constant memory with
+ // anything.
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT);
+ MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
+ AddToChain = !AA || !AA->pointsToConstantMemory(ML);
+ SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+ LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
+ MMO, false /*IsExpanding */);
if (AddToChain)
PendingLoads.push_back(LD.getValue(1));
setValue(&VPIntrin, LD);
}
-void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin,
- SmallVector<SDValue, 7> &OpValues,
- bool IsScatter) {
+void SelectionDAGBuilder::visitVPGather(const VPIntrinsic &VPIntrin, EVT VT,
+ SmallVector<SDValue, 7> &OpValues) {
SDLoc DL = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Value *PtrOperand = VPIntrin.getArgOperand(0);
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
+ SDValue LD;
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT.getScalarType());
+ unsigned AS =
+ PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+ SDValue Base, Index, Scale;
+ ISD::MemIndexType IndexType;
+ bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
+ this, VPIntrin.getParent(),
+ VT.getScalarStoreSize());
+ if (!UniformBase) {
+ Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(PtrOperand);
+ IndexType = ISD::SIGNED_SCALED;
+ Scale = DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+ EVT IdxVT = Index.getValueType();
+ EVT EltTy = IdxVT.getVectorElementType();
+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
+ }
+ LD = DAG.getGatherVP(
+ DAG.getVTList(VT, MVT::Other), VT, DL,
+ {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO,
+ IndexType);
+ PendingLoads.push_back(LD.getValue(1));
+ setValue(&VPIntrin, LD);
+}
+
+void SelectionDAGBuilder::visitVPStore(const VPIntrinsic &VPIntrin,
+ SmallVector<SDValue, 7> &OpValues) {
+ SDLoc DL = getCurSDLoc();
Value *PtrOperand = VPIntrin.getArgOperand(1);
EVT VT = OpValues[0].getValueType();
MaybeAlign Alignment = VPIntrin.getPointerAlignment();
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
SDValue ST;
- if (!IsScatter) {
- if (!Alignment)
- Alignment = DAG.getEVTAlign(VT);
- SDValue Ptr = OpValues[1];
- SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
- MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
- MemoryLocation::UnknownSize, *Alignment, AAInfo);
- ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset,
- OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED,
- /* IsTruncating */ false, /*IsCompressing*/ false);
- } else {
- if (!Alignment)
- Alignment = DAG.getEVTAlign(VT.getScalarType());
- unsigned AS =
- PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
- MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(AS), MachineMemOperand::MOStore,
- MemoryLocation::UnknownSize, *Alignment, AAInfo);
- SDValue Base, Index, Scale;
- ISD::MemIndexType IndexType;
- bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
- this, VPIntrin.getParent(),
- VT.getScalarStoreSize());
- if (!UniformBase) {
- Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
- Index = getValue(PtrOperand);
- IndexType = ISD::SIGNED_SCALED;
- Scale =
- DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
- }
- EVT IdxVT = Index.getValueType();
- EVT EltTy = IdxVT.getVectorElementType();
- if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
- EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
- Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
- }
- ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL,
- {getMemoryRoot(), OpValues[0], Base, Index, Scale,
- OpValues[2], OpValues[3]},
- MMO, IndexType);
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT);
+ SDValue Ptr = OpValues[1];
+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo);
+ ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset,
+ OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED,
+ /* IsTruncating */ false, /*IsCompressing*/ false);
+ DAG.setRoot(ST);
+ setValue(&VPIntrin, ST);
+}
+
+void SelectionDAGBuilder::visitVPScatter(const VPIntrinsic &VPIntrin,
+ SmallVector<SDValue, 7> &OpValues) {
+ SDLoc DL = getCurSDLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Value *PtrOperand = VPIntrin.getArgOperand(1);
+ EVT VT = OpValues[0].getValueType();
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ SDValue ST;
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT.getScalarType());
+ unsigned AS =
+ PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo);
+ SDValue Base, Index, Scale;
+ ISD::MemIndexType IndexType;
+ bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
+ this, VPIntrin.getParent(),
+ VT.getScalarStoreSize());
+ if (!UniformBase) {
+ Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(PtrOperand);
+ IndexType = ISD::SIGNED_SCALED;
+ Scale =
+ DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+ EVT IdxVT = Index.getValueType();
+ EVT EltTy = IdxVT.getVectorElementType();
+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
}
+ ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL,
+ {getMemoryRoot(), OpValues[0], Base, Index, Scale,
+ OpValues[2], OpValues[3]},
+ MMO, IndexType);
DAG.setRoot(ST);
setValue(&VPIntrin, ST);
}
@@ -7626,20 +7725,78 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
break;
}
case ISD::VP_LOAD:
+ visitVPLoad(VPIntrin, ValueVTs[0], OpValues);
+ break;
case ISD::VP_GATHER:
- visitVPLoadGather(VPIntrin, ValueVTs[0], OpValues,
- Opcode == ISD::VP_GATHER);
+ visitVPGather(VPIntrin, ValueVTs[0], OpValues);
break;
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
visitVPStridedLoad(VPIntrin, ValueVTs[0], OpValues);
break;
case ISD::VP_STORE:
+ visitVPStore(VPIntrin, OpValues);
+ break;
case ISD::VP_SCATTER:
- visitVPStoreScatter(VPIntrin, OpValues, Opcode == ISD::VP_SCATTER);
+ visitVPScatter(VPIntrin, OpValues);
break;
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
visitVPStridedStore(VPIntrin, OpValues);
break;
+ case ISD::VP_FMULADD: {
+ assert(OpValues.size() == 5 && "Unexpected number of operands");
+ SDNodeFlags SDFlags;
+ if (auto *FPMO = dyn_cast<FPMathOperator>(&VPIntrin))
+ SDFlags.copyFMF(*FPMO);
+ if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
+ TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), ValueVTs[0])) {
+ setValue(&VPIntrin, DAG.getNode(ISD::VP_FMA, DL, VTs, OpValues, SDFlags));
+ } else {
+ SDValue Mul = DAG.getNode(
+ ISD::VP_FMUL, DL, VTs,
+ {OpValues[0], OpValues[1], OpValues[3], OpValues[4]}, SDFlags);
+ SDValue Add =
+ DAG.getNode(ISD::VP_FADD, DL, VTs,
+ {Mul, OpValues[2], OpValues[3], OpValues[4]}, SDFlags);
+ setValue(&VPIntrin, Add);
+ }
+ break;
+ }
+ case ISD::VP_INTTOPTR: {
+ SDValue N = OpValues[0];
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), VPIntrin.getType());
+ EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), VPIntrin.getType());
+ N = DAG.getVPPtrExtOrTrunc(getCurSDLoc(), DestVT, N, OpValues[1],
+ OpValues[2]);
+ N = DAG.getVPZExtOrTrunc(getCurSDLoc(), PtrMemVT, N, OpValues[1],
+ OpValues[2]);
+ setValue(&VPIntrin, N);
+ break;
+ }
+ case ISD::VP_PTRTOINT: {
+ SDValue N = OpValues[0];
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ VPIntrin.getType());
+ EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(),
+ VPIntrin.getOperand(0)->getType());
+ N = DAG.getVPPtrExtOrTrunc(getCurSDLoc(), PtrMemVT, N, OpValues[1],
+ OpValues[2]);
+ N = DAG.getVPZExtOrTrunc(getCurSDLoc(), DestVT, N, OpValues[1],
+ OpValues[2]);
+ setValue(&VPIntrin, N);
+ break;
+ }
+ case ISD::VP_ABS:
+ case ISD::VP_CTLZ:
+ case ISD::VP_CTLZ_ZERO_UNDEF:
+ case ISD::VP_CTTZ:
+ case ISD::VP_CTTZ_ZERO_UNDEF: {
+ // Pop is_zero_poison operand for cp.ctlz/cttz or
+ // is_int_min_poison operand for vp.abs.
+ OpValues.pop_back();
+ SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
+ setValue(&VPIntrin, Result);
+ break;
+ }
}
}
@@ -7820,6 +7977,17 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
if (TLI.supportSwiftError() && SwiftErrorVal)
isTailCall = false;
+ ConstantInt *CFIType = nullptr;
+ if (CB.isIndirectCall()) {
+ if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_kcfi)) {
+ if (!TLI.supportKCFIBundles())
+ report_fatal_error(
+ "Target doesn't support calls with kcfi operand bundles.");
+ CFIType = cast<ConstantInt>(Bundle->Inputs[0]);
+ assert(CFIType->getType()->isIntegerTy(32) && "Invalid CFI type");
+ }
+ }
+
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc())
.setChain(getRoot())
@@ -7827,7 +7995,8 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
.setTailCall(isTailCall)
.setConvergent(CB.isConvergent())
.setIsPreallocated(
- CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
+ CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0)
+ .setCFIType(CFIType);
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
if (Result.first.getNode()) {
@@ -8200,9 +8369,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
return;
}
- if (Function *F = I.getCalledFunction()) {
- diagnoseDontCall(I);
+ diagnoseDontCall(I);
+ if (Function *F = I.getCalledFunction()) {
if (F->isDeclaration()) {
// Is this an LLVM intrinsic or a target-specific intrinsic?
unsigned IID = F->getIntrinsicID();
@@ -8371,7 +8540,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
assert(!I.hasOperandBundlesOtherThan(
{LLVMContext::OB_deopt, LLVMContext::OB_funclet,
LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated,
- LLVMContext::OB_clang_arc_attachedcall}) &&
+ LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi}) &&
"Cannot lower calls with arbitrary operand bundles!");
SDValue Callee = getValue(I.getCalledOperand());
@@ -8499,7 +8668,7 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
///
/// OpInfo describes the operand
/// RefOpInfo describes the matching operand if any, the operand otherwise
-static llvm::Optional<unsigned>
+static std::optional<unsigned>
getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
SDISelAsmOperandInfo &OpInfo,
SDISelAsmOperandInfo &RefOpInfo) {
@@ -8513,7 +8682,7 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
// No work to do for memory/address operands.
if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
OpInfo.ConstraintType == TargetLowering::C_Address)
- return None;
+ return std::nullopt;
// If this is a constraint for a single physreg, or a constraint for a
// register class, find it.
@@ -8523,7 +8692,7 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
&TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);
// RC is unset only on failure. Return immediately.
if (!RC)
- return None;
+ return std::nullopt;
// Get the actual register value type. This is important, because the user
// may have asked for (e.g.) the AX register in i32 type. We need to
@@ -8568,7 +8737,7 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
// No need to allocate a matching input constraint since the constraint it's
// matching to has already been allocated.
if (OpInfo.isMatchingInputConstraint())
- return None;
+ return std::nullopt;
EVT ValueVT = OpInfo.ConstraintVT;
if (OpInfo.ConstraintVT == MVT::Other)
@@ -8606,7 +8775,7 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
}
OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
- return None;
+ return std::nullopt;
}
static unsigned
@@ -8665,6 +8834,23 @@ public:
} // end anonymous namespace
+static bool isFunction(SDValue Op) {
+ if (Op && Op.getOpcode() == ISD::GlobalAddress) {
+ if (auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ auto Fn = dyn_cast_or_null<Function>(GA->getGlobal());
+
+ // In normal "call dllimport func" instruction (non-inlineasm) it force
+ // indirect access by specifing call opcode. And usually specially print
+ // asm with indirect symbol (i.g: "*") according to opcode. Inline asm can
+ // not do in this way now. (In fact, this is similar with "Data Access"
+ // action). So here we ignore dllimport function.
+ if (Fn && !Fn->hasDLLImportStorageClass())
+ return true;
+ }
+ }
+ return false;
+}
+
/// visitInlineAsm - Handle a call to an InlineAsm object.
void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
const BasicBlock *EHPadBB) {
@@ -8713,7 +8899,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
// memory and is nonvolatile.
SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();
- bool EmitEHLabels = isa<InvokeInst>(Call) && IA->canThrow();
+ bool EmitEHLabels = isa<InvokeInst>(Call);
if (EmitEHLabels) {
assert(EHPadBB && "InvokeInst must have an EHPadBB");
}
@@ -8731,8 +8917,15 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
Chain = lowerStartEH(Chain, EHPadBB, BeginLabel);
}
+ int OpNo = -1;
+ SmallVector<StringRef> AsmStrs;
+ IA->collectAsmStrs(AsmStrs);
+
// Second pass over the constraints: compute which constraint option to use.
for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
+ if (OpInfo.hasArg() || OpInfo.Type == InlineAsm::isOutput)
+ OpNo++;
+
// If this is an output operand with a matching input operand, look up the
// matching input. If their types mismatch, e.g. one is an integer, the
// other is floating point, or their sizes are different, flag it as an
@@ -8750,6 +8943,32 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
OpInfo.ConstraintType == TargetLowering::C_Address)
continue;
+ // In Linux PIC model, there are 4 cases about value/label addressing:
+ //
+ // 1: Function call or Label jmp inside the module.
+ // 2: Data access (such as global variable, static variable) inside module.
+ // 3: Function call or Label jmp outside the module.
+ // 4: Data access (such as global variable) outside the module.
+ //
+ // Due to current llvm inline asm architecture designed to not "recognize"
+ // the asm code, there are quite troubles for us to treat mem addressing
+ // differently for same value/adress used in different instuctions.
+ // For example, in pic model, call a func may in plt way or direclty
+ // pc-related, but lea/mov a function adress may use got.
+ //
+ // Here we try to "recognize" function call for the case 1 and case 3 in
+ // inline asm. And try to adjust the constraint for them.
+ //
+ // TODO: Due to current inline asm didn't encourage to jmp to the outsider
+ // label, so here we don't handle jmp function label now, but we need to
+ // enhance it (especilly in PIC model) if we meet meaningful requirements.
+ if (OpInfo.isIndirect && isFunction(OpInfo.CallOperand) &&
+ TLI.isInlineAsmTargetBranch(AsmStrs, OpNo) &&
+ TM.getCodeModel() != CodeModel::Large) {
+ OpInfo.isIndirect = false;
+ OpInfo.ConstraintType = TargetLowering::C_Address;
+ }
+
// If this is a memory input, and if the operand is not indirect, do what we
// need to provide an address for the memory input.
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
@@ -8800,7 +9019,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
if (RegError) {
const MachineFunction &MF = DAG.getMachineFunction();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
- const char *RegName = TRI.getName(RegError.value());
+ const char *RegName = TRI.getName(*RegError);
emitInlineAsmError(Call, "register '" + Twine(RegName) +
"' allocated for constraint '" +
Twine(OpInfo.ConstraintCode) +
@@ -8959,8 +9178,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
break;
}
- if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
- OpInfo.ConstraintType == TargetLowering::C_Address) {
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
assert((OpInfo.isIndirect ||
OpInfo.ConstraintType != TargetLowering::C_Memory) &&
"Operand must be indirect to be a mem!");
@@ -8983,6 +9201,37 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
break;
}
+ if (OpInfo.ConstraintType == TargetLowering::C_Address) {
+ assert(InOperandVal.getValueType() ==
+ TLI.getPointerTy(DAG.getDataLayout()) &&
+ "Address operands expect pointer values");
+
+ unsigned ConstraintID =
+ TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+ assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ "Failed to convert memory constraint code to constraint id.");
+
+ unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+
+ SDValue AsmOp = InOperandVal;
+ if (isFunction(InOperandVal)) {
+ auto *GA = cast<GlobalAddressSDNode>(InOperandVal);
+ ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Func, 1);
+ AsmOp = DAG.getTargetGlobalAddress(GA->getGlobal(), getCurSDLoc(),
+ InOperandVal.getValueType(),
+ GA->getOffset());
+ }
+
+ // Add information to the INLINEASM node to know about this input.
+ ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
+
+ AsmNodeOperands.push_back(
+ DAG.getTargetConstant(ResOpType, getCurSDLoc(), MVT::i32));
+
+ AsmNodeOperands.push_back(AsmOp);
+ break;
+ }
+
assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
OpInfo.ConstraintType == TargetLowering::C_Register) &&
"Unknown constraint type!");
@@ -9047,7 +9296,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
if (StructType *StructResult = dyn_cast<StructType>(CallResultType))
ResultTypes = StructResult->elements();
else if (!CallResultType->isVoidTy())
- ResultTypes = makeArrayRef(CallResultType);
+ ResultTypes = ArrayRef(CallResultType);
auto CurResultType = ResultTypes.begin();
auto handleRegAssign = [&](SDValue V) {
@@ -9327,12 +9576,11 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
- SDValue Chain, InFlag, Callee, NullPtr;
+ SDValue Chain, InFlag, Callee;
SmallVector<SDValue, 32> Ops;
SDLoc DL = getCurSDLoc();
Callee = getValue(CI.getCalledOperand());
- NullPtr = DAG.getIntPtrConstant(0, DL, true);
// The stackmap intrinsic only records the live variables (the arguments
// passed to it) and emits NOPS (if requested). Unlike the patchpoint
@@ -9375,7 +9623,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
Chain = DAG.getNode(ISD::STACKMAP, DL, NodeTys, Ops);
InFlag = Chain.getValue(1);
- Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL);
+ Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InFlag, DL);
// Stackmaps don't generate values, so nothing goes into the NodeMap.
@@ -9898,7 +10146,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
ISD::OutputArg MyFlags(
Flags, Parts[j].getValueType().getSimpleVT(), VT,
i < CLI.NumFixedArgs, i,
- j * Parts[j].getValueType().getStoreSize().getKnownMinSize());
+ j * Parts[j].getValueType().getStoreSize().getKnownMinValue());
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0) {
@@ -9986,7 +10234,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
} else {
// Collect the legal value parts into potentially illegal values
// that correspond to the original function's return values.
- Optional<ISD::NodeType> AssertOp;
+ std::optional<ISD::NodeType> AssertOp;
if (CLI.RetSExt)
AssertOp = ISD::AssertSext;
else if (CLI.RetZExt)
@@ -10064,7 +10312,7 @@ void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V,
// notional registers required by the type.
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(),
- None); // This is not an ABI copy.
+ std::nullopt); // This is not an ABI copy.
SDValue Chain = DAG.getEntryNode();
if (ExtendType == ISD::ANY_EXTEND) {
@@ -10425,8 +10673,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// For scalable vectors, use the minimum size; individual targets
// are responsible for handling scalable vector arguments and
// return values.
- ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
- ArgNo, PartBase+i*RegisterVT.getStoreSize().getKnownMinSize());
+ ISD::InputArg MyFlags(
+ Flags, RegisterVT, VT, isArgValueUsed, ArgNo,
+ PartBase + i * RegisterVT.getStoreSize().getKnownMinValue());
if (NumRegs > 1 && i == 0)
MyFlags.Flags.setSplit();
// if it isn't first piece, alignment must be 1
@@ -10439,7 +10688,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
}
if (NeedsRegBlock && Value == NumValues - 1)
Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
- PartBase += VT.getStoreSize().getKnownMinSize();
+ PartBase += VT.getStoreSize().getKnownMinValue();
}
}
@@ -10477,7 +10726,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
ValueVTs);
MVT VT = ValueVTs[0].getSimpleVT();
MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
- Optional<ISD::NodeType> AssertOp = None;
+ std::optional<ISD::NodeType> AssertOp;
SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT,
nullptr, F.getCallingConv(), AssertOp);
@@ -10539,7 +10788,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// we do generate a copy for it that can be used on return from the
// function.
if (ArgHasUses || isSwiftErrorArg) {
- Optional<ISD::NodeType> AssertOp;
+ std::optional<ISD::NodeType> AssertOp;
if (Arg.hasAttribute(Attribute::SExt))
AssertOp = ISD::AssertSext;
else if (Arg.hasAttribute(Attribute::ZExt))
@@ -10562,7 +10811,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
- SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
+ SDValue Res = DAG.getMergeValues(ArrayRef(ArgValues.data(), NumValues),
SDB->getCurSDLoc());
SDB->setValue(&Arg, Res);
@@ -10645,14 +10894,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
void
SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- const Instruction *TI = LLVMBB->getTerminator();
SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
// Check PHI nodes in successors that expect a value to be available from this
// block.
- for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
- const BasicBlock *SuccBB = TI->getSuccessor(succ);
+ for (const BasicBlock *SuccBB : successors(LLVMBB->getTerminator())) {
if (!isa<PHINode>(SuccBB->begin())) continue;
MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
@@ -10678,7 +10925,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
unsigned Reg;
const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);
- if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
+ if (const auto *C = dyn_cast<Constant>(PHIOp)) {
unsigned &RegOut = ConstantsOut[C];
if (RegOut == 0) {
RegOut = FuncInfo.CreateRegs(C);
@@ -10709,10 +10956,9 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// the input for this MBB.
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs);
- for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
- EVT VT = ValueVTs[vti];
- unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
- for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+ for (EVT VT : ValueVTs) {
+ const unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
+ for (unsigned i = 0; i != NumRegisters; ++i)
FuncInfo.PHINodesToUpdate.push_back(
std::make_pair(&*MBBI++, Reg + i));
Reg += NumRegisters;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index d1915fd4e7ae..bf2111013461 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -33,6 +34,7 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
+#include <optional>
#include <utility>
#include <vector>
@@ -42,6 +44,7 @@ class AAResults;
class AllocaInst;
class AtomicCmpXchgInst;
class AtomicRMWInst;
+class AssumptionCache;
class BasicBlock;
class BranchInst;
class CallInst;
@@ -103,20 +106,68 @@ class SelectionDAGBuilder {
/// Helper type for DanglingDebugInfoMap.
class DanglingDebugInfo {
- const DbgValueInst* DI = nullptr;
- DebugLoc dl;
+ using DbgValTy = const DbgValueInst *;
+ using VarLocTy = const VarLocInfo *;
+ PointerUnion<DbgValTy, VarLocTy> Info;
unsigned SDNodeOrder = 0;
public:
DanglingDebugInfo() = default;
- DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO)
- : DI(di), dl(std::move(DL)), SDNodeOrder(SDNO) {}
-
- const DbgValueInst* getDI() { return DI; }
- DebugLoc getdl() { return dl; }
- unsigned getSDNodeOrder() { return SDNodeOrder; }
+ DanglingDebugInfo(const DbgValueInst *DI, unsigned SDNO)
+ : Info(DI), SDNodeOrder(SDNO) {}
+ DanglingDebugInfo(const VarLocInfo *VarLoc, unsigned SDNO)
+ : Info(VarLoc), SDNodeOrder(SDNO) {}
+
+ DILocalVariable *getVariable(const FunctionVarLocs *Locs) const {
+ if (Info.is<VarLocTy>())
+ return Locs->getDILocalVariable(Info.get<VarLocTy>()->VariableID);
+ return Info.get<DbgValTy>()->getVariable();
+ }
+ DIExpression *getExpression() const {
+ if (Info.is<VarLocTy>())
+ return Info.get<VarLocTy>()->Expr;
+ return Info.get<DbgValTy>()->getExpression();
+ }
+ Value *getVariableLocationOp(unsigned Idx) const {
+ assert(Idx == 0 && "Dangling variadic debug values not supported yet");
+ if (Info.is<VarLocTy>())
+ return Info.get<VarLocTy>()->V;
+ return Info.get<DbgValTy>()->getVariableLocationOp(Idx);
+ }
+ DebugLoc getDebugLoc() const {
+ if (Info.is<VarLocTy>())
+ return Info.get<VarLocTy>()->DL;
+ return Info.get<DbgValTy>()->getDebugLoc();
+ }
+ unsigned getSDNodeOrder() const { return SDNodeOrder; }
+
+ /// Helper for printing DanglingDebugInfo. This hoop-jumping is to
+ /// accommodate the fact that an argument is required for getVariable.
+ /// Call SelectionDAGBuilder::printDDI instead of using directly.
+ struct Print {
+ Print(const DanglingDebugInfo &DDI, const FunctionVarLocs *VarLocs)
+ : DDI(DDI), VarLocs(VarLocs) {}
+ const DanglingDebugInfo &DDI;
+ const FunctionVarLocs *VarLocs;
+ friend raw_ostream &operator<<(raw_ostream &OS,
+ const DanglingDebugInfo::Print &P) {
+ OS << "DDI(var=" << *P.DDI.getVariable(P.VarLocs)
+ << ", val= " << *P.DDI.getVariableLocationOp(0)
+ << ", expr=" << *P.DDI.getExpression()
+ << ", order=" << P.DDI.getSDNodeOrder()
+ << ", loc=" << P.DDI.getDebugLoc() << ")";
+ return OS;
+ }
+ };
};
+ /// Returns an object that defines `raw_ostream &operator<<` for printing.
+ /// Usage example:
+ //// errs() << printDDI(MyDanglingInfo) << " is dangling\n";
+ DanglingDebugInfo::Print printDDI(const DanglingDebugInfo &DDI) {
+ return DanglingDebugInfo::Print(DDI, DAG.getFunctionVarLocs());
+ }
+
/// Helper type for DanglingDebugInfoMap.
typedef std::vector<DanglingDebugInfo> DanglingDebugInfoVector;
@@ -191,6 +242,7 @@ public:
SelectionDAG &DAG;
AAResults *AA = nullptr;
+ AssumptionCache *AC = nullptr;
const TargetLibraryInfo *LibInfo;
class SDAGSwitchLowering : public SwitchCG::SwitchLowering {
@@ -244,7 +296,7 @@ public:
SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo),
SwiftError(swifterror) {}
- void init(GCFunctionInfo *gfi, AAResults *AA,
+ void init(GCFunctionInfo *gfi, AAResults *AA, AssumptionCache *AC,
const TargetLibraryInfo *li);
/// Clear out the current SelectionDAG and the associated state and prepare
@@ -296,8 +348,8 @@ public:
SDValue getCopyFromRegs(const Value *V, Type *Ty);
/// Register a dbg_value which relies on a Value which we have not yet seen.
- void addDanglingDebugInfo(const DbgValueInst *DI, DebugLoc DL,
- unsigned Order);
+ void addDanglingDebugInfo(const DbgValueInst *DI, unsigned Order);
+ void addDanglingDebugInfo(const VarLocInfo *VarLoc, unsigned Order);
/// If we have dangling debug info that describes \p Variable, or an
/// overlapping part of variable considering the \p Expr, then this method
@@ -317,8 +369,8 @@ public:
/// For a given list of Values, attempt to create and record a SDDbgValue in
/// the SelectionDAG.
bool handleDebugValue(ArrayRef<const Value *> Values, DILocalVariable *Var,
- DIExpression *Expr, DebugLoc CurDL, DebugLoc InstDL,
- unsigned Order, bool IsVariadic);
+ DIExpression *Expr, DebugLoc DbgLoc, unsigned Order,
+ bool IsVariadic);
/// Evict any dangling debug information, attempting to salvage it first.
void resolveOrClearDbgInfo();
@@ -567,10 +619,14 @@ private:
void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
- void visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
- SmallVector<SDValue, 7> &OpValues, bool IsGather);
- void visitVPStoreScatter(const VPIntrinsic &VPIntrin,
- SmallVector<SDValue, 7> &OpValues, bool IsScatter);
+ void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
+ SmallVector<SDValue, 7> &OpValues);
+ void visitVPStore(const VPIntrinsic &VPIntrin,
+ SmallVector<SDValue, 7> &OpValues);
+ void visitVPGather(const VPIntrinsic &VPIntrin, EVT VT,
+ SmallVector<SDValue, 7> &OpValues);
+ void visitVPScatter(const VPIntrinsic &VPIntrin,
+ SmallVector<SDValue, 7> &OpValues);
void visitVPStridedLoad(const VPIntrinsic &VPIntrin, EVT VT,
SmallVectorImpl<SDValue> &OpValues);
void visitVPStridedStore(const VPIntrinsic &VPIntrin,
@@ -680,14 +736,14 @@ struct RegsForValue {
/// Records if this value needs to be treated in an ABI dependant manner,
/// different to normal type legalization.
- Optional<CallingConv::ID> CallConv;
+ std::optional<CallingConv::ID> CallConv;
RegsForValue() = default;
RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt,
- Optional<CallingConv::ID> CC = None);
+ std::optional<CallingConv::ID> CC = std::nullopt);
RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
const DataLayout &DL, unsigned Reg, Type *Ty,
- Optional<CallingConv::ID> CC);
+ std::optional<CallingConv::ID> CC);
bool isABIMangled() const { return CallConv.has_value(); }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 6ba01664e756..fe4261291fc5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -79,6 +79,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::DELETED_NODE: return "<<Deleted Node!>>";
#endif
case ISD::PREFETCH: return "Prefetch";
+ case ISD::MEMBARRIER: return "MemBarrier";
case ISD::ATOMIC_FENCE: return "AtomicFence";
case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return "AtomicCmpSwapWithSuccess";
@@ -95,6 +96,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
case ISD::ATOMIC_LOAD_FADD: return "AtomicLoadFAdd";
+ case ISD::ATOMIC_LOAD_UINC_WRAP:
+ return "AtomicLoadUIncWrap";
+ case ISD::ATOMIC_LOAD_UDEC_WRAP:
+ return "AtomicLoadUDecWrap";
case ISD::ATOMIC_LOAD: return "AtomicLoad";
case ISD::ATOMIC_STORE: return "AtomicStore";
case ISD::PCMARKER: return "PCMarker";
@@ -422,7 +427,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
return "call_alloc";
// Floating point environment manipulation
- case ISD::FLT_ROUNDS_: return "flt_rounds";
+ case ISD::GET_ROUNDING: return "get_rounding";
case ISD::SET_ROUNDING: return "set_rounding";
// Bit manipulation
@@ -1059,6 +1064,9 @@ LLVM_DUMP_METHOD void SDNode::dumprFull(const SelectionDAG *G) const {
void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
printr(OS, G);
+ // Under VerboseDAGDumping divergence will be printed always.
+ if (isDivergent() && !VerboseDAGDumping)
+ OS << " # D:1";
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
if (i) OS << ", "; else OS << " ";
printOperand(OS, G, getOperand(i));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index d46a0a23cca3..902f46115557 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
@@ -31,6 +32,7 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -61,6 +63,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -101,6 +104,7 @@
#include <iterator>
#include <limits>
#include <memory>
+#include <optional>
#include <string>
#include <utility>
#include <vector>
@@ -309,7 +313,8 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
// SelectionDAGISel code
//===----------------------------------------------------------------------===//
-SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL)
+SelectionDAGISel::SelectionDAGISel(char &ID, TargetMachine &tm,
+ CodeGenOpt::Level OL)
: MachineFunctionPass(ID), TM(tm), FuncInfo(new FunctionLoweringInfo()),
SwiftError(new SwiftErrorValueTracking()),
CurDAG(new SelectionDAG(tm, OL)),
@@ -336,9 +341,14 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<GCModuleInfo>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
if (UseMBPI && OptLevel != CodeGenOpt::None)
AU.addRequired<BranchProbabilityInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ // AssignmentTrackingAnalysis only runs if assignment tracking is enabled for
+ // the module.
+ AU.addRequired<AssignmentTrackingAnalysis>();
+ AU.addPreserved<AssignmentTrackingAnalysis>();
if (OptLevel != CodeGenOpt::None)
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
@@ -382,8 +392,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Decide what flavour of variable location debug-info will be used, before
// we change the optimisation level.
- UseInstrRefDebugInfo = mf.useDebugInstrRef();
- CurDAG->useInstrRefDebugInfo(UseInstrRefDebugInfo);
+ bool InstrRef = mf.shouldUseDebugInstrRef();
+ mf.setUseDebugInstrRef(InstrRef);
// Reset the target options before resetting the optimization
// level below.
@@ -403,15 +413,21 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn);
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn);
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(mf.getFunction());
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
BlockFrequencyInfo *BFI = nullptr;
if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOpt::None)
BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
+ FunctionVarLocs const *FnVarLocs = nullptr;
+ if (isAssignmentTrackingEnabled(*Fn.getParent()))
+ FnVarLocs = getAnalysis<AssignmentTrackingAnalysis>().getResults();
+
LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
CurDAG->init(*MF, *ORE, this, LibInfo,
- getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI);
+ getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI,
+ FnVarLocs);
FuncInfo->set(Fn, *MF, CurDAG);
SwiftError->setFunction(*MF);
@@ -430,7 +446,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
else
AA = nullptr;
- SDB->init(GFI, AA, LibInfo);
+ SDB->init(GFI, AA, AC, LibInfo);
MF->setHasInlineAsm(false);
@@ -488,7 +504,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
To = J->second;
}
// Make sure the new register has a sufficiently constrained register class.
- if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To))
+ if (From.isVirtual() && To.isVirtual())
MRI.constrainRegClass(To, MRI.getRegClass(From));
// Replace it.
@@ -530,15 +546,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
LiveInMap.insert(LI);
// Insert DBG_VALUE instructions for function arguments to the entry block.
- bool InstrRef = MF->useDebugInstrRef();
for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
MachineInstr *MI = FuncInfo->ArgDbgValues[e - i - 1];
assert(MI->getOpcode() != TargetOpcode::DBG_VALUE_LIST &&
"Function parameters should not be described by DBG_VALUE_LIST.");
- bool hasFI = MI->getOperand(0).isFI();
+ bool hasFI = MI->getDebugOperand(0).isFI();
Register Reg =
- hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg();
- if (Register::isPhysicalRegister(Reg))
+ hasFI ? TRI.getFrameRegister(*MF) : MI->getDebugOperand(0).getReg();
+ if (Reg.isPhysical())
EntryMBB->insert(EntryMBB->begin(), MI);
else {
MachineInstr *Def = RegInfo->getVRegDef(Reg);
@@ -567,7 +582,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
DebugLoc DL = MI->getDebugLoc();
bool IsIndirect = MI->isIndirectDebugValue();
if (IsIndirect)
- assert(MI->getOperand(1).getImm() == 0 &&
+ assert(MI->getDebugOffset().getImm() == 0 &&
"DBG_VALUE with nonzero offset");
assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
@@ -608,7 +623,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// For debug-info, in instruction referencing mode, we need to perform some
// post-isel maintenence.
- if (UseInstrRefDebugInfo)
+ if (MF->useDebugInstrRef())
MF->finalizeDebugInstrRefs();
// Determine if there are any calls in this machine function.
@@ -997,6 +1012,15 @@ public:
if (ISelPosition == SelectionDAG::allnodes_iterator(N))
++ISelPosition;
}
+
+ /// NodeInserted - Handle new nodes inserted into the graph: propagate
+ /// metadata from root nodes that also applies to new nodes, in case the root
+ /// is later deleted.
+ void NodeInserted(SDNode *N) override {
+ SDNode *CurNode = &*ISelPosition;
+ if (MDNode *MD = DAG.getPCSections(CurNode))
+ DAG.addPCSections(N, MD);
+ }
};
} // end anonymous namespace
@@ -1073,7 +1097,7 @@ void SelectionDAGISel::DoInstructionSelection() {
++ISelPosition;
// Make sure that ISelPosition gets properly updated when nodes are deleted
- // in calls made from this function.
+ // in calls made from this function. New nodes inherit relevant metadata.
ISelUpdater ISU(*CurDAG, ISelPosition);
// The AllNodes list is now topological-sorted. Visit the
@@ -1181,11 +1205,11 @@ static void mapWasmLandingPadIndex(MachineBasicBlock *MBB,
// In case of single catch (...), we don't emit LSDA, so we don't need
// this information.
bool IsSingleCatchAllClause =
- CPI->getNumArgOperands() == 1 &&
+ CPI->arg_size() == 1 &&
cast<Constant>(CPI->getArgOperand(0))->isNullValue();
// cathchpads for longjmp use an empty type list, e.g. catchpad within %0 []
// and they don't need LSDA info
- bool IsCatchLongjmp = CPI->getNumArgOperands() == 0;
+ bool IsCatchLongjmp = CPI->arg_size() == 0;
if (!IsSingleCatchAllClause && !IsCatchLongjmp) {
// Create a mapping from landing pad label to landing pad index.
bool IntrFound = false;
@@ -1279,56 +1303,75 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
!FuncInfo.isExportedInst(I); // Exported instrs must be computed.
}
+static void processDbgDeclare(FunctionLoweringInfo &FuncInfo,
+ const Value *Address, DIExpression *Expr,
+ DILocalVariable *Var, DebugLoc DbgLoc) {
+ MachineFunction *MF = FuncInfo.MF;
+ const DataLayout &DL = MF->getDataLayout();
+
+ assert(Var && "Missing variable");
+ assert(DbgLoc && "Missing location");
+
+ // Look through casts and constant offset GEPs. These mostly come from
+ // inalloca.
+ APInt Offset(DL.getTypeSizeInBits(Address->getType()), 0);
+ Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
+
+ // Check if the variable is a static alloca or a byval or inalloca
+ // argument passed in memory. If it is not, then we will ignore this
+ // intrinsic and handle this during isel like dbg.value.
+ int FI = std::numeric_limits<int>::max();
+ if (const auto *AI = dyn_cast<AllocaInst>(Address)) {
+ auto SI = FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end())
+ FI = SI->second;
+ } else if (const auto *Arg = dyn_cast<Argument>(Address))
+ FI = FuncInfo.getArgumentFrameIndex(Arg);
+
+ if (FI == std::numeric_limits<int>::max())
+ return;
+
+ if (Offset.getBoolValue())
+ Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset,
+ Offset.getZExtValue());
+
+ LLVM_DEBUG(dbgs() << "processDbgDeclare: setVariableDbgInfo Var=" << *Var
+ << ", Expr=" << *Expr << ", FI=" << FI
+ << ", DbgLoc=" << DbgLoc << "\n");
+ MF->setVariableDbgInfo(Var, Expr, FI, DbgLoc);
+}
+
/// Collect llvm.dbg.declare information. This is done after argument lowering
/// in case the declarations refer to arguments.
static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) {
- MachineFunction *MF = FuncInfo.MF;
- const DataLayout &DL = MF->getDataLayout();
for (const BasicBlock &BB : *FuncInfo.Fn) {
for (const Instruction &I : BB) {
- const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I);
- if (!DI)
- continue;
-
- assert(DI->getVariable() && "Missing variable");
- assert(DI->getDebugLoc() && "Missing location");
- const Value *Address = DI->getAddress();
- if (!Address) {
- LLVM_DEBUG(dbgs() << "processDbgDeclares skipping " << *DI
- << " (bad address)\n");
- continue;
+ if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I)) {
+ Value *Address = DI->getAddress();
+ if (!Address) {
+ LLVM_DEBUG(dbgs() << "processDbgDeclares skipping " << *DI
+ << " (bad address)\n");
+ continue;
+ }
+ processDbgDeclare(FuncInfo, Address, DI->getExpression(),
+ DI->getVariable(), DI->getDebugLoc());
}
-
- // Look through casts and constant offset GEPs. These mostly come from
- // inalloca.
- APInt Offset(DL.getTypeSizeInBits(Address->getType()), 0);
- Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
-
- // Check if the variable is a static alloca or a byval or inalloca
- // argument passed in memory. If it is not, then we will ignore this
- // intrinsic and handle this during isel like dbg.value.
- int FI = std::numeric_limits<int>::max();
- if (const auto *AI = dyn_cast<AllocaInst>(Address)) {
- auto SI = FuncInfo.StaticAllocaMap.find(AI);
- if (SI != FuncInfo.StaticAllocaMap.end())
- FI = SI->second;
- } else if (const auto *Arg = dyn_cast<Argument>(Address))
- FI = FuncInfo.getArgumentFrameIndex(Arg);
-
- if (FI == std::numeric_limits<int>::max())
- continue;
-
- DIExpression *Expr = DI->getExpression();
- if (Offset.getBoolValue())
- Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset,
- Offset.getZExtValue());
- LLVM_DEBUG(dbgs() << "processDbgDeclares: setVariableDbgInfo FI=" << FI
- << ", " << *DI << "\n");
- MF->setVariableDbgInfo(DI->getVariable(), Expr, FI, DI->getDebugLoc());
}
}
}
+/// Collect single location variable information generated with assignment
+/// tracking. This is done after argument lowering in case the declarations
+/// refer to arguments.
+static void processSingleLocVars(FunctionLoweringInfo &FuncInfo,
+ FunctionVarLocs const *FnVarLocs) {
+ for (auto It = FnVarLocs->single_locs_begin(),
+ End = FnVarLocs->single_locs_end();
+ It != End; ++It)
+ processDbgDeclare(FuncInfo, It->V, It->Expr,
+ FnVarLocs->getDILocalVariable(It->VariableID), It->DL);
+}
+
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastISelFailed = false;
// Initialize the Fast-ISel state, if needed.
@@ -1336,8 +1379,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
if (TM.Options.EnableFastISel) {
LLVM_DEBUG(dbgs() << "Enabling fast-isel\n");
FastIS = TLI->createFastISel(*FuncInfo, LibInfo);
- if (FastIS)
- FastIS->useInstrRefDebugInfo(UseInstrRefDebugInfo);
}
ReversePostOrderTraversal<const Function*> RPOT(&Fn);
@@ -1391,7 +1432,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
if (FastIS && Inserted)
FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
- processDbgDeclares(*FuncInfo);
+ if (isAssignmentTrackingEnabled(*Fn.getParent())) {
+ assert(CurDAG->getFunctionVarLocs() &&
+ "expected AssignmentTrackingAnalysis pass results");
+ processSingleLocVars(*FuncInfo, CurDAG->getFunctionVarLocs());
+ } else {
+ processDbgDeclares(*FuncInfo);
+ }
// Iterate over all basic blocks in the function.
StackProtector &SP = getAnalysis<StackProtector>();
@@ -1957,7 +2004,7 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops,
while (i != e) {
unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
- if (!InlineAsm::isMemKind(Flags)) {
+ if (!InlineAsm::isMemKind(Flags) && !InlineAsm::isFuncKind(Flags)) {
// Just skip over this operand, copying the operands verbatim.
Ops.insert(Ops.end(), InOps.begin()+i,
InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1);
@@ -1986,7 +2033,9 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops,
// Add this to the output node.
unsigned NewFlags =
- InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size());
+ InlineAsm::isMemKind(Flags)
+ ? InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size())
+ : InlineAsm::getFlagWord(InlineAsm::Kind_Func, SelOps.size());
NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID);
Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32));
llvm::append_range(Ops, SelOps);
@@ -2193,6 +2242,11 @@ void SelectionDAGISel::Select_ARITH_FENCE(SDNode *N) {
N->getOperand(0));
}
+void SelectionDAGISel::Select_MEMBARRIER(SDNode *N) {
+ CurDAG->SelectNodeTo(N, TargetOpcode::MEMBARRIER, N->getValueType(0),
+ N->getOperand(0));
+}
+
void SelectionDAGISel::pushStackMapLiveVariable(SmallVectorImpl<SDValue> &Ops,
SDValue OpVal, SDLoc DL) {
SDNode *OpNode = OpVal.getNode();
@@ -2249,7 +2303,7 @@ void SelectionDAGISel::Select_PATCHPOINT(SDNode *N) {
// Cache arguments that will be moved to the end in the target node.
SDValue Chain = *It++;
- Optional<SDValue> Glue;
+ std::optional<SDValue> Glue;
if (It->getValueType() == MVT::Glue)
Glue = *It++;
SDValue RegMask = *It++;
@@ -2287,7 +2341,7 @@ void SelectionDAGISel::Select_PATCHPOINT(SDNode *N) {
Ops.push_back(RegMask);
Ops.push_back(Chain);
if (Glue.has_value())
- Ops.push_back(Glue.value());
+ Ops.push_back(*Glue);
SDVTList NodeTys = N->getVTList();
CurDAG->SelectNodeTo(N, TargetOpcode::PATCHPOINT, NodeTys, Ops);
@@ -2847,6 +2901,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::ARITH_FENCE:
Select_ARITH_FENCE(NodeToMatch);
return;
+ case ISD::MEMBARRIER:
+ Select_MEMBARRIER(NodeToMatch);
+ return;
case ISD::STACKMAP:
Select_STACKMAP(NodeToMatch);
return;
@@ -3764,5 +3821,3 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {
}
report_fatal_error(Twine(Msg.str()));
}
-
-char SelectionDAGISel::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index c5c093ae228f..57bfe344dbab 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -14,8 +14,6 @@
#include "StatepointLowering.h"
#include "SelectionDAGBuilder.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
@@ -160,12 +158,12 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
/// Utility function for reservePreviousStackSlotForValue. Tries to find
/// stack slot index to which we have spilled value for previous statepoints.
/// LookUpDepth specifies maximum DFS depth this function is allowed to look.
-static Optional<int> findPreviousSpillSlot(const Value *Val,
- SelectionDAGBuilder &Builder,
- int LookUpDepth) {
+static std::optional<int> findPreviousSpillSlot(const Value *Val,
+ SelectionDAGBuilder &Builder,
+ int LookUpDepth) {
// Can not look any further - give up now
if (LookUpDepth <= 0)
- return None;
+ return std::nullopt;
// Spill location is known for gc relocates
if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) {
@@ -173,18 +171,18 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
assert((isa<GCStatepointInst>(Statepoint) || isa<UndefValue>(Statepoint)) &&
"GetStatepoint must return one of two types");
if (isa<UndefValue>(Statepoint))
- return None;
+ return std::nullopt;
const auto &RelocationMap = Builder.FuncInfo.StatepointRelocationMaps
[cast<GCStatepointInst>(Statepoint)];
auto It = RelocationMap.find(Relocate);
if (It == RelocationMap.end())
- return None;
+ return std::nullopt;
auto &Record = It->second;
if (Record.type != RecordType::Spill)
- return None;
+ return std::nullopt;
return Record.payload.FI;
}
@@ -197,16 +195,16 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
// All incoming values should have same known stack slot, otherwise result
// is unknown.
if (const PHINode *Phi = dyn_cast<PHINode>(Val)) {
- Optional<int> MergedResult = None;
+ std::optional<int> MergedResult;
for (const auto &IncomingValue : Phi->incoming_values()) {
- Optional<int> SpillSlot =
+ std::optional<int> SpillSlot =
findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1);
if (!SpillSlot)
- return None;
+ return std::nullopt;
if (MergedResult && *MergedResult != *SpillSlot)
- return None;
+ return std::nullopt;
MergedResult = SpillSlot;
}
@@ -241,7 +239,7 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
// which we visit values is unspecified.
// Don't know any information about this instruction
- return None;
+ return std::nullopt;
}
/// Return true if-and-only-if the given SDValue can be lowered as either a
@@ -284,7 +282,7 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue,
return;
const int LookUpDepth = 6;
- Optional<int> Index =
+ std::optional<int> Index =
findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth);
if (!Index)
return;
@@ -321,7 +319,7 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue,
/// reference lowered call result
static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo(
SelectionDAGBuilder::StatepointLoweringInfo &SI,
- SelectionDAGBuilder &Builder, SmallVectorImpl<SDValue> &PendingExports) {
+ SelectionDAGBuilder &Builder) {
SDValue ReturnValue, CallEndVal;
std::tie(ReturnValue, CallEndVal) =
Builder.lowerInvokable(SI.CLI, SI.EHPadBB);
@@ -526,34 +524,6 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
SelectionDAGBuilder &Builder) {
// Lower the deopt and gc arguments for this statepoint. Layout will be:
// deopt argument length, deopt arguments.., gc arguments...
-#ifndef NDEBUG
- if (auto *GFI = Builder.GFI) {
- // Check that each of the gc pointer and bases we've gotten out of the
- // safepoint is something the strategy thinks might be a pointer (or vector
- // of pointers) into the GC heap. This is basically just here to help catch
- // errors during statepoint insertion. TODO: This should actually be in the
- // Verifier, but we can't get to the GCStrategy from there (yet).
- GCStrategy &S = GFI->getStrategy();
- for (const Value *V : SI.Bases) {
- auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
- if (Opt) {
- assert(Opt.value() &&
- "non gc managed base pointer found in statepoint");
- }
- }
- for (const Value *V : SI.Ptrs) {
- auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
- if (Opt) {
- assert(Opt.value() &&
- "non gc managed derived pointer found in statepoint");
- }
- }
- assert(SI.Bases.size() == SI.Ptrs.size() && "Pointer without base!");
- } else {
- assert(SI.Bases.empty() && "No gc specified, so cannot relocate pointers!");
- assert(SI.Ptrs.empty() && "No gc specified, so cannot relocate pointers!");
- }
-#endif
// Figure out what lowering strategy we're going to use for each part
// Note: Is is conservatively correct to lower both "live-in" and "live-out"
@@ -742,7 +712,9 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
NumOfStatepoints++;
// Clear state
StatepointLowering.startNewStatepoint(*this);
- assert(SI.Bases.size() == SI.Ptrs.size());
+ assert(SI.Bases.size() == SI.Ptrs.size() && "Pointer without base!");
+ assert((GFI || SI.Bases.empty()) &&
+ "No gc specified, so cannot relocate pointers!");
LLVM_DEBUG(dbgs() << "Lowering statepoint " << *SI.StatepointInstr << "\n");
#ifndef NDEBUG
@@ -770,8 +742,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
// Get call node, we will replace it later with statepoint
SDValue ReturnVal;
SDNode *CallNode;
- std::tie(ReturnVal, CallNode) =
- lowerCallFromStatepointLoweringInfo(SI, *this, PendingExports);
+ std::tie(ReturnVal, CallNode) = lowerCallFromStatepointLoweringInfo(SI, *this);
// Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END
// nodes with all the appropriate arguments and return values.
@@ -921,7 +892,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
auto *RetTy = Relocate->getType();
Register Reg = FuncInfo.CreateRegs(RetTy);
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
- DAG.getDataLayout(), Reg, RetTy, None);
+ DAG.getDataLayout(), Reg, RetTy, std::nullopt);
SDValue Chain = DAG.getRoot();
RFV.getCopyToRegs(Relocated, DAG, getCurSDLoc(), Chain, nullptr);
PendingExports.push_back(Chain);
@@ -1148,7 +1119,7 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
// TODO: To eliminate this problem we can remove gc.result intrinsics
// completely and make statepoint call to return a tuple.
Type *RetTy = GCResultLocality.second->getType();
- unsigned Reg = FuncInfo.CreateRegs(RetTy);
+ Register Reg = FuncInfo.CreateRegs(RetTy);
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
DAG.getDataLayout(), Reg, RetTy,
I.getCallingConv());
@@ -1239,10 +1210,6 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
if (cast<GCStatepointInst>(Statepoint)->getParent() == Relocate.getParent())
StatepointLowering.relocCallVisited(Relocate);
-
- auto *Ty = Relocate.getType()->getScalarType();
- if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty))
- assert(*IsManaged && "Non gc managed pointer relocated!");
#endif
const Value *DerivedPtr = Relocate.getDerivedPtr();
@@ -1266,7 +1233,7 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
Register InReg = Record.payload.Reg;
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
DAG.getDataLayout(), InReg, Relocate.getType(),
- None); // This is not an ABI copy.
+ std::nullopt); // This is not an ABI copy.
// We generate copy to/from regs even for local uses, hence we must
// chain with current root to ensure proper ordering of copies w.r.t.
// statepoint.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 6205e74837c0..8d4c8802f71c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -12,6 +12,7 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -260,7 +261,7 @@ bool TargetLowering::findOptimalMemOpLowering(
// If the new VT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.
- bool Fast;
+ unsigned Fast;
if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
allowsMisalignedMemoryAccesses(
VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
@@ -351,7 +352,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
break;
case ISD::SETO:
ShouldInvertCC = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETUO:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 :
@@ -360,7 +361,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
case ISD::SETONE:
// SETONE = O && UNE
ShouldInvertCC = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETUEQ:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 :
@@ -397,7 +398,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
}
}
- // Use the target specific return value for comparions lib calls.
+ // Use the target specific return value for comparison lib calls.
EVT RetVT = getCmpLibcallReturnType();
SDValue Ops[2] = {NewLHS, NewRHS};
TargetLowering::MakeLibCallOptions CallOptions;
@@ -633,35 +634,22 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
bool AssumeSingleUse) const {
EVT VT = Op.getValueType();
- // TODO: We can probably do more work on calculating the known bits and
- // simplifying the operations for scalable vectors, but for now we just
- // bail out.
- if (VT.isScalableVector()) {
- // Pretend we don't know anything for now.
- Known = KnownBits(DemandedBits.getBitWidth());
- return false;
- }
-
- APInt DemandedElts = VT.isVector()
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts = VT.isFixedLengthVector()
? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
AssumeSingleUse);
}
-// TODO: Can we merge SelectionDAG::GetDemandedBits into this?
// TODO: Under what circumstances can we create nodes? Constant folding?
SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
SelectionDAG &DAG, unsigned Depth) const {
EVT VT = Op.getValueType();
- // Pretend we don't know anything about scalable vectors for now.
- // TODO: We can probably do more work on simplifying the operations for
- // scalable vectors, but for now we just bail out.
- if (VT.isScalableVector())
- return SDValue();
-
// Limit search depth.
if (Depth >= SelectionDAG::MaxRecursionDepth)
return SDValue();
@@ -680,6 +668,9 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
KnownBits LHSKnown, RHSKnown;
switch (Op.getOpcode()) {
case ISD::BITCAST: {
+ if (VT.isScalableVector())
+ return SDValue();
+
SDValue Src = peekThroughBitcasts(Op.getOperand(0));
EVT SrcVT = Src.getValueType();
EVT DstVT = Op.getValueType();
@@ -825,6 +816,9 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ if (VT.isScalableVector())
+ return SDValue();
+
// If we only want the lowest element and none of extended bits, then we can
// return the bitcasted source vector.
SDValue Src = Op.getOperand(0);
@@ -838,6 +832,9 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
break;
}
case ISD::INSERT_VECTOR_ELT: {
+ if (VT.isScalableVector())
+ return SDValue();
+
// If we don't demand the inserted element, return the base vector.
SDValue Vec = Op.getOperand(0);
auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
@@ -848,6 +845,9 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
break;
}
case ISD::INSERT_SUBVECTOR: {
+ if (VT.isScalableVector())
+ return SDValue();
+
SDValue Vec = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
uint64_t Idx = Op.getConstantOperandVal(2);
@@ -857,6 +857,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
if (DemandedSubElts == 0)
return Vec;
// If this simply widens the lowest subvector, see if we can do it earlier.
+ // TODO: REMOVE ME - SimplifyMultipleUseDemandedBits shouldn't be creating
+ // general nodes like this.
if (Idx == 0 && Vec.isUndef()) {
if (SDValue NewSub = SimplifyMultipleUseDemandedBits(
Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1))
@@ -866,6 +868,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
break;
}
case ISD::VECTOR_SHUFFLE: {
+ assert(!VT.isScalableVector());
ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
// If all the demanded elts are from one operand and are inline,
@@ -889,6 +892,11 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
break;
}
default:
+ // TODO: Probably okay to remove after audit; here to reduce change size
+ // in initial enablement patch for scalable vectors
+ if (VT.isScalableVector())
+ return SDValue();
+
if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
Op, DemandedBits, DemandedElts, DAG, Depth))
@@ -902,14 +910,10 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
unsigned Depth) const {
EVT VT = Op.getValueType();
-
- // Pretend we don't know anything about scalable vectors for now.
- // TODO: We can probably do more work on simplifying the operations for
- // scalable vectors, but for now we just bail out.
- if (VT.isScalableVector())
- return SDValue();
-
- APInt DemandedElts = VT.isVector()
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts = VT.isFixedLengthVector()
? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
@@ -1068,16 +1072,10 @@ bool TargetLowering::SimplifyDemandedBits(
// Don't know anything.
Known = KnownBits(BitWidth);
- // TODO: We can probably do more work on calculating the known bits and
- // simplifying the operations for scalable vectors, but for now we just
- // bail out.
EVT VT = Op.getValueType();
- if (VT.isScalableVector())
- return false;
-
bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
unsigned NumElts = OriginalDemandedElts.getBitWidth();
- assert((!VT.isVector() || NumElts == VT.getVectorNumElements()) &&
+ assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
"Unexpected vector size");
APInt DemandedBits = OriginalDemandedBits;
@@ -1089,6 +1087,10 @@ bool TargetLowering::SimplifyDemandedBits(
if (Op.isUndef())
return false;
+ // We can't simplify target constants.
+ if (Op.getOpcode() == ISD::TargetConstant)
+ return false;
+
if (Op.getOpcode() == ISD::Constant) {
// We know all of the bits for a constant!
Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
@@ -1103,17 +1105,16 @@ bool TargetLowering::SimplifyDemandedBits(
}
// Other users may use these bits.
- if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
- if (Depth != 0) {
- // If not at the root, Just compute the Known bits to
- // simplify things downstream.
- Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ bool HasMultiUse = false;
+ if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
+ if (Depth >= SelectionDAG::MaxRecursionDepth) {
+ // Limit search depth.
return false;
}
- // If this is the root being simplified, allow it to have multiple uses,
- // just set the DemandedBits/Elts to all bits.
+ // Allow multiple uses, just set the DemandedBits/Elts to all bits.
DemandedBits = APInt::getAllOnes(BitWidth);
DemandedElts = APInt::getAllOnes(NumElts);
+ HasMultiUse = true;
} else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
// Not demanding any bits/elts from Op.
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
@@ -1124,9 +1125,9 @@ bool TargetLowering::SimplifyDemandedBits(
KnownBits Known2;
switch (Op.getOpcode()) {
- case ISD::TargetConstant:
- llvm_unreachable("Can't simplify this node");
case ISD::SCALAR_TO_VECTOR: {
+ if (VT.isScalableVector())
+ return false;
if (!DemandedElts[0])
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
@@ -1164,6 +1165,8 @@ bool TargetLowering::SimplifyDemandedBits(
break;
}
case ISD::INSERT_VECTOR_ELT: {
+ if (VT.isScalableVector())
+ return false;
SDValue Vec = Op.getOperand(0);
SDValue Scl = Op.getOperand(1);
auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
@@ -1200,6 +1203,8 @@ bool TargetLowering::SimplifyDemandedBits(
return false;
}
case ISD::INSERT_SUBVECTOR: {
+ if (VT.isScalableVector())
+ return false;
// Demand any elements from the subvector and the remainder from the src its
// inserted into.
SDValue Src = Op.getOperand(0);
@@ -1243,6 +1248,8 @@ bool TargetLowering::SimplifyDemandedBits(
break;
}
case ISD::EXTRACT_SUBVECTOR: {
+ if (VT.isScalableVector())
+ return false;
// Offset the demanded elts by the subvector index.
SDValue Src = Op.getOperand(0);
if (Src.getValueType().isScalableVector())
@@ -1268,6 +1275,8 @@ bool TargetLowering::SimplifyDemandedBits(
break;
}
case ISD::CONCAT_VECTORS: {
+ if (VT.isScalableVector())
+ return false;
Known.Zero.setAllBits();
Known.One.setAllBits();
EVT SubVT = Op.getOperand(0).getValueType();
@@ -1286,28 +1295,14 @@ bool TargetLowering::SimplifyDemandedBits(
break;
}
case ISD::VECTOR_SHUFFLE: {
+ assert(!VT.isScalableVector());
ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
// Collect demanded elements from shuffle operands..
- APInt DemandedLHS(NumElts, 0);
- APInt DemandedRHS(NumElts, 0);
- for (unsigned i = 0; i != NumElts; ++i) {
- if (!DemandedElts[i])
- continue;
- int M = ShuffleMask[i];
- if (M < 0) {
- // For UNDEF elements, we don't know anything about the common state of
- // the shuffle result.
- DemandedLHS.clearAllBits();
- DemandedRHS.clearAllBits();
- break;
- }
- assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
- if (M < (int)NumElts)
- DemandedLHS.setBit(M);
- else
- DemandedRHS.setBit(M - NumElts);
- }
+ APInt DemandedLHS, DemandedRHS;
+ if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
+ DemandedRHS))
+ break;
if (!!DemandedLHS || !!DemandedRHS) {
SDValue Op0 = Op.getOperand(0);
@@ -1378,7 +1373,7 @@ bool TargetLowering::SimplifyDemandedBits(
// AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
// iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
- if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
(Op0.getOperand(0).isUndef() ||
ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&
Op0->hasOneUse()) {
@@ -1745,7 +1740,7 @@ bool TargetLowering::SimplifyDemandedBits(
// aren't demanded (as above) and that the shifted upper c1 bits of
// x aren't demanded.
// TODO - support non-uniform vector amounts.
- if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
+ if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
InnerOp.hasOneUse()) {
if (const APInt *SA2 =
TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
@@ -1879,6 +1874,16 @@ bool TargetLowering::SimplifyDemandedBits(
Known.One.lshrInPlace(ShAmt);
// High bits known zero.
Known.Zero.setHighBits(ShAmt);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0) {
+ SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
}
break;
}
@@ -2081,10 +2086,10 @@ bool TargetLowering::SimplifyDemandedBits(
KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
Known = KnownBits::umin(Known0, Known1);
- if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1))
- return TLO.CombineTo(Op, IsULE.value() ? Op0 : Op1);
- if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1))
- return TLO.CombineTo(Op, IsULT.value() ? Op0 : Op1);
+ if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
+ return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
+ if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
+ return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
break;
}
case ISD::UMAX: {
@@ -2094,10 +2099,10 @@ bool TargetLowering::SimplifyDemandedBits(
KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
Known = KnownBits::umax(Known0, Known1);
- if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
- return TLO.CombineTo(Op, IsUGE.value() ? Op0 : Op1);
- if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
- return TLO.CombineTo(Op, IsUGT.value() ? Op0 : Op1);
+ if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
+ return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
+ if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
+ return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
break;
}
case ISD::BITREVERSE: {
@@ -2225,19 +2230,18 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
return true;
- Known.Zero = KnownLo.Zero.zext(BitWidth) |
- KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
-
- Known.One = KnownLo.One.zext(BitWidth) |
- KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
+ Known = KnownHi.concat(KnownLo);
break;
}
- case ISD::ZERO_EXTEND:
- case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ if (VT.isScalableVector())
+ return false;
+ [[fallthrough]];
+ case ISD::ZERO_EXTEND: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned InBits = SrcVT.getScalarSizeInBits();
- unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
// If none of the top bits are demanded, convert this into an any_extend.
@@ -2269,12 +2273,15 @@ bool TargetLowering::SimplifyDemandedBits(
return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
break;
}
- case ISD::SIGN_EXTEND:
- case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ if (VT.isScalableVector())
+ return false;
+ [[fallthrough]];
+ case ISD::SIGN_EXTEND: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned InBits = SrcVT.getScalarSizeInBits();
- unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
// If none of the top bits are demanded, convert this into an any_extend.
@@ -2321,12 +2328,15 @@ bool TargetLowering::SimplifyDemandedBits(
return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
break;
}
- case ISD::ANY_EXTEND:
- case ISD::ANY_EXTEND_VECTOR_INREG: {
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ if (VT.isScalableVector())
+ return false;
+ [[fallthrough]];
+ case ISD::ANY_EXTEND: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned InBits = SrcVT.getScalarSizeInBits();
- unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
// If we only need the bottom element then we can just bitcast.
@@ -2369,18 +2379,18 @@ bool TargetLowering::SimplifyDemandedBits(
// If the input is only used by this truncate, see if we can shrink it based
// on the known demanded bits.
- if (Src.getNode()->hasOneUse()) {
- switch (Src.getOpcode()) {
- default:
+ switch (Src.getOpcode()) {
+ default:
+ break;
+ case ISD::SRL:
+ // Shrink SRL by a constant if none of the high bits shifted in are
+ // demanded.
+ if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
+ // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
+ // undesirable.
break;
- case ISD::SRL:
- // Shrink SRL by a constant if none of the high bits shifted in are
- // demanded.
- if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
- // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
- // undesirable.
- break;
+ if (Src.getNode()->hasOneUse()) {
const APInt *ShAmtC =
TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
if (!ShAmtC || ShAmtC->uge(BitWidth))
@@ -2402,8 +2412,8 @@ bool TargetLowering::SimplifyDemandedBits(
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
}
- break;
}
+ break;
}
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
@@ -2420,6 +2430,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero |= ~InMask;
+ Known.One &= (~Known.Zero);
break;
}
case ISD::EXTRACT_VECTOR_ELT: {
@@ -2464,6 +2475,8 @@ bool TargetLowering::SimplifyDemandedBits(
break;
}
case ISD::BITCAST: {
+ if (VT.isScalableVector())
+ return false;
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
@@ -2576,7 +2589,7 @@ bool TargetLowering::SimplifyDemandedBits(
SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
return TLO.CombineTo(Op, And1);
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::ADD:
case ISD::SUB: {
// Add, Sub, and Mul don't demand any bits in positions beyond that
@@ -2601,6 +2614,11 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
}
+ // neg x with only low bit demanded is simply x.
+ if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
+ isa<ConstantSDNode>(Op0) && cast<ConstantSDNode>(Op0)->isZero())
+ return TLO.CombineTo(Op, Op1);
+
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
@@ -2679,10 +2697,16 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
}
default:
- if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+ // We also ask the target about intrinsics (which could be specific to it).
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
+ // TODO: Probably okay to remove after audit; here to reduce change size
+ // in initial enablement patch for scalable vectors
+ if (Op.getValueType().isScalableVector())
+ break;
if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
Known, TLO, Depth))
return true;
@@ -2715,6 +2739,12 @@ bool TargetLowering::SimplifyDemandedBits(
APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
}
+ // A multi use 'all demanded elts' simplify failed to find any knownbits.
+ // Try again just for the original demanded elts.
+ // Ensure we do this AFTER constant folding above.
+ if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
+ Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
+
return false;
}
@@ -2746,7 +2776,7 @@ static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
"Vector binop only");
EVT EltVT = VT.getVectorElementType();
- unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
assert(UndefOp0.getBitWidth() == NumElts &&
UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
@@ -2814,7 +2844,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
// If Op has other users, assume that all elements are needed.
- if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
+ if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
DemandedElts.setAllBits();
// Not demanding any elements from Op.
@@ -3176,6 +3206,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
break;
}
case ISD::VECTOR_SHUFFLE: {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
// Collect demanded elements from shuffle operands..
@@ -3195,17 +3227,17 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// See if we can simplify either shuffle operand.
APInt UndefLHS, ZeroLHS;
APInt UndefRHS, ZeroRHS;
- if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
- ZeroLHS, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
+ Depth + 1))
return true;
- if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
- ZeroRHS, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
+ Depth + 1))
return true;
// Simplify mask using undef elements from LHS/RHS.
bool Updated = false;
bool IdentityLHS = true, IdentityRHS = true;
- SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
+ SmallVector<int, 32> NewMask(ShuffleMask);
for (unsigned i = 0; i != NumElts; ++i) {
int &M = NewMask[i];
if (M < 0)
@@ -3223,8 +3255,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// to Identity which can cause premature removal of the shuffle mask.
if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
SDValue LegalShuffle =
- buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
- NewMask, TLO.DAG);
+ buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
if (LegalShuffle)
return TLO.CombineTo(Op, LegalShuffle);
}
@@ -3307,7 +3338,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
Depth + 1, /*AssumeSingleUse*/ true))
return true;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
}
case ISD::OR:
case ISD::XOR:
@@ -3367,6 +3398,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
break;
}
case ISD::MUL:
+ case ISD::MULHU:
+ case ISD::MULHS:
case ISD::AND: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
@@ -3375,10 +3408,16 @@ bool TargetLowering::SimplifyDemandedVectorElts(
if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
Depth + 1))
return true;
- if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
+ // If we know that a demanded element was zero in Op1 we don't need to
+ // demand it in Op0 - its guaranteed to be zero.
+ APInt DemandedElts0 = DemandedElts & ~SrcZero;
+ if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
TLO, Depth + 1))
return true;
+ KnownUndef &= DemandedElts0;
+ KnownZero &= DemandedElts0;
+
// If every element pair has a zero/undef then just fold to zero.
// fold (and x, undef) -> 0 / (and x, 0) -> 0
// fold (mul x, undef) -> 0 / (mul x, 0) -> 0
@@ -3566,6 +3605,19 @@ bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
return false;
}
+bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
+ SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
+ bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use canCreateUndefOrPoison if you don't know whether Op"
+ " is a target node!");
+ // Be conservative and return true.
+ return true;
+}
+
bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
const SelectionDAG &DAG,
bool SNaN,
@@ -3582,6 +3634,7 @@ bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
const APInt &DemandedElts,
APInt &UndefElts,
+ const SelectionDAG &DAG,
unsigned Depth) const {
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
@@ -3692,6 +3745,26 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
}
+ // Try to eliminate a power-of-2 mask constant by converting to a signbit
+ // test in a narrow type that we can truncate to with no cost. Examples:
+ // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
+ // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
+ // TODO: This conservatively checks for type legality on the source and
+ // destination types. That may inhibit optimizations, but it also
+ // allows setcc->shift transforms that may be more beneficial.
+ auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
+ isTypeLegal(OpVT) && N0.hasOneUse()) {
+ EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
+ AndC->getAPIntValue().getActiveBits());
+ if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
+ SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
+ SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
+ return DAG.getSetCC(DL, VT, Trunc, Zero,
+ Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
+ }
+ }
+
// Match these patterns in any of their permutations:
// (X & Y) == Y
// (X & Y) != Y
@@ -3968,14 +4041,14 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
EVT CTVT = CTPOP.getValueType();
SDValue CTOp = CTPOP.getOperand(0);
- // If this is a vector CTPOP, keep the CTPOP if it is legal.
- // TODO: Should we check if CTPOP is legal(or custom) for scalars?
- if (VT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
- return SDValue();
-
+ // Expand a power-of-2-or-zero comparison based on ctpop:
// (ctpop x) u< 2 -> (x & x-1) == 0
// (ctpop x) u> 1 -> (x & x-1) != 0
if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
+ // Keep the CTPOP if it is a legal vector op.
+ if (CTVT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
+ return SDValue();
+
unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
return SDValue();
@@ -3994,16 +4067,14 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
}
- // If ctpop is not supported, expand a power-of-2 comparison based on it.
+ // Expand a power-of-2 comparison based on ctpop:
+ // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
+ // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
- // For scalars, keep CTPOP if it is legal or custom.
- if (!VT.isVector() && TLI.isOperationLegalOrCustom(ISD::CTPOP, CTVT))
+ // Keep the CTPOP if it is legal.
+ if (TLI.isOperationLegal(ISD::CTPOP, CTVT))
return SDValue();
- // This is based on X86's custom lowering for CTPOP which produces more
- // instructions than the expansion here.
- // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
- // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
SDValue Zero = DAG.getConstant(0, dl, CTVT);
SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
assert(CTVT.isInteger());
@@ -4137,6 +4208,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
SelectionDAG &DAG = DCI.DAG;
const DataLayout &Layout = DAG.getDataLayout();
EVT OpVT = N0.getValueType();
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
// Constant fold or commute setcc.
if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
@@ -4181,6 +4253,23 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
return V;
+ // For equality to 0 of a no-wrap multiply, decompose and test each op:
+ // X * Y == 0 --> (X == 0) || (Y == 0)
+ // X * Y != 0 --> (X != 0) && (Y != 0)
+ // TODO: This bails out if minsize is set, but if the target doesn't have a
+ // single instruction multiply for this type, it would likely be
+ // smaller to decompose.
+ if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
+ (N0->getFlags().hasNoUnsignedWrap() ||
+ N0->getFlags().hasNoSignedWrap()) &&
+ !Attr.hasFnAttr(Attribute::MinSize)) {
+ SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
+ SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
+ unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
+ return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
+ }
+
// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
// equality comparison, then we're just comparing whether X itself is
// zero.
@@ -4970,8 +5059,6 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Fold remainder of division by a constant.
if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
- AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
-
// When division is cheap or optimizing for minimum size,
// fall through to DIVREM creation by skipping this fold.
if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
@@ -5221,6 +5308,12 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
}
+void TargetLowering::CollectTargetIntrinsicOperands(const CallInst &I,
+ SmallVectorImpl<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+ return;
+}
+
std::pair<unsigned, const TargetRegisterClass *>
TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
StringRef Constraint,
@@ -5334,11 +5427,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
break;
case InlineAsm::isLabel:
- OpInfo.CallOperandVal =
- cast<CallBrInst>(&Call)->getBlockAddressForIndirectDest(LabelNo);
- OpInfo.ConstraintVT =
- getAsmOperandValueType(DL, OpInfo.CallOperandVal->getType())
- .getSimpleVT();
+ OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
++LabelNo;
continue;
case InlineAsm::isClobber:
@@ -5944,54 +6033,68 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
- bool UseNPQ = false;
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Try to use leading zeros of the dividend to reduce the multiplier and
+ // avoid expensive fixups.
+ // TODO: Support vectors.
+ unsigned LeadingZeros = 0;
+ if (!VT.isVector() && isa<ConstantSDNode>(N1)) {
+ assert(!isOneConstant(N1) && "Unexpected divisor");
+ LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
+ // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
+ // the dividend exceeds the leading zeros for the divisor.
+ LeadingZeros =
+ std::min(LeadingZeros,
+ cast<ConstantSDNode>(N1)->getAPIntValue().countLeadingZeros());
+ }
+
+ bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
if (C->isZero())
return false;
- // FIXME: We should use a narrower constant when the upper
- // bits are known to be zero.
const APInt& Divisor = C->getAPIntValue();
- UnsignedDivisionByConstantInfo magics =
- UnsignedDivisionByConstantInfo::get(Divisor);
- unsigned PreShift = 0, PostShift = 0;
-
- // If the divisor is even, we can avoid using the expensive fixup by
- // shifting the divided value upfront.
- if (magics.IsAdd && !Divisor[0]) {
- PreShift = Divisor.countTrailingZeros();
- // Get magic number for the shifted divisor.
- magics =
- UnsignedDivisionByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
- assert(!magics.IsAdd && "Should use cheap fixup now");
- }
-
- unsigned SelNPQ;
- if (!magics.IsAdd || Divisor.isOne()) {
- assert(magics.ShiftAmount < Divisor.getBitWidth() &&
- "We shouldn't generate an undefined shift!");
- PostShift = magics.ShiftAmount;
- SelNPQ = false;
+
+ SDValue PreShift, MagicFactor, NPQFactor, PostShift;
+
+ // Magic algorithm doesn't work for division by 1. We need to emit a select
+ // at the end.
+ if (Divisor.isOne()) {
+ PreShift = PostShift = DAG.getUNDEF(ShSVT);
+ MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
} else {
- PostShift = magics.ShiftAmount - 1;
- SelNPQ = true;
- }
+ UnsignedDivisionByConstantInfo magics =
+ UnsignedDivisionByConstantInfo::get(Divisor, LeadingZeros);
- PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
- MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
- NPQFactors.push_back(
- DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
- : APInt::getZero(EltBits),
- dl, SVT));
- PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
- UseNPQ |= SelNPQ;
+ MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
+
+ assert(magics.PreShift < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ assert(magics.PostShift < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ assert((!magics.IsAdd || magics.PreShift == 0) &&
+ "Unexpected pre-shift");
+ PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
+ PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
+ NPQFactor = DAG.getConstant(
+ magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
+ : APInt::getZero(EltBits),
+ dl, SVT);
+ UseNPQ |= magics.IsAdd;
+ UsePreShift |= magics.PreShift != 0;
+ UsePostShift |= magics.PostShift != 0;
+ }
+
+ PreShifts.push_back(PreShift);
+ MagicFactors.push_back(MagicFactor);
+ NPQFactors.push_back(NPQFactor);
+ PostShifts.push_back(PostShift);
return true;
};
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
-
// Collect the shifts/magic values from each element.
if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
return SDValue();
@@ -6018,8 +6121,10 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
}
SDValue Q = N0;
- Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
- Created.push_back(Q.getNode());
+ if (UsePreShift) {
+ Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
+ Created.push_back(Q.getNode());
+ }
// FIXME: We should support doing a MUL in a wider type.
auto GetMULHU = [&](SDValue X, SDValue Y) {
@@ -6068,8 +6173,10 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
Created.push_back(Q.getNode());
}
- Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
- Created.push_back(Q.getNode());
+ if (UsePostShift) {
+ Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
+ Created.push_back(Q.getNode());
+ }
EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
@@ -6921,6 +7028,41 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
OptForSize, Cost, Depth))
return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
break;
+ case ISD::SELECT:
+ case ISD::VSELECT: {
+ // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
+ // iff at least one cost is cheaper and the other is neutral/cheaper
+ SDValue LHS = Op.getOperand(1);
+ NegatibleCost CostLHS = NegatibleCost::Expensive;
+ SDValue NegLHS =
+ getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
+ if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
+ RemoveDeadNode(NegLHS);
+ break;
+ }
+
+ // Prevent this node from being deleted by the next call.
+ Handles.emplace_back(NegLHS);
+
+ SDValue RHS = Op.getOperand(2);
+ NegatibleCost CostRHS = NegatibleCost::Expensive;
+ SDValue NegRHS =
+ getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
+
+ // We're done with the handles.
+ Handles.clear();
+
+ if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
+ (CostLHS != NegatibleCost::Cheaper &&
+ CostRHS != NegatibleCost::Cheaper)) {
+ RemoveDeadNode(NegLHS);
+ RemoveDeadNode(NegRHS);
+ break;
+ }
+
+ Cost = std::min(CostLHS, CostRHS);
+ return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
+ }
}
return SDValue();
@@ -7002,8 +7144,8 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
}
if (!VT.isVector() && Opcode == ISD::MUL &&
- DAG.ComputeNumSignBits(LHS) > InnerBitSize &&
- DAG.ComputeNumSignBits(RHS) > InnerBitSize) {
+ DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
+ DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
// The input values are both sign-extended.
// TODO non-MUL case?
if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
@@ -7014,8 +7156,7 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
}
unsigned ShiftAmount = OuterBitSize - InnerBitSize;
- EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
- SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
+ SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
if (!LH.getNode() && !RH.getNode() &&
isOperationLegalOrCustom(ISD::SRL, VT) &&
@@ -7122,6 +7263,190 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
return Ok;
}
+// Optimize unsigned division or remainder by constants for types twice as large
+// as a legal VT.
+//
+// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
+// can be computed
+// as:
+// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
+// Remainder = Sum % Constant
+// This is based on "Remainder by Summing Digits" from Hacker's Delight.
+//
+// For division, we can compute the remainder using the algorithm described
+// above, subtract it from the dividend to get an exact multiple of Constant.
+// Then multiply that extact multiply by the multiplicative inverse modulo
+// (1 << (BitWidth / 2)) to get the quotient.
+
+// If Constant is even, we can shift right the dividend and the divisor by the
+// number of trailing zeros in Constant before applying the remainder algorithm.
+// If we're after the quotient, we can subtract this value from the shifted
+// dividend and multiply by the multiplicative inverse of the shifted divisor.
+// If we want the remainder, we shift the value left by the number of trailing
+// zeros and add the bits that were shifted out of the dividend.
+bool TargetLowering::expandDIVREMByConstant(SDNode *N,
+ SmallVectorImpl<SDValue> &Result,
+ EVT HiLoVT, SelectionDAG &DAG,
+ SDValue LL, SDValue LH) const {
+ unsigned Opcode = N->getOpcode();
+ EVT VT = N->getValueType(0);
+
+ // TODO: Support signed division/remainder.
+ if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
+ return false;
+ assert(
+ (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
+ "Unexpected opcode");
+
+ auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!CN)
+ return false;
+
+ APInt Divisor = CN->getAPIntValue();
+ unsigned BitWidth = Divisor.getBitWidth();
+ unsigned HBitWidth = BitWidth / 2;
+ assert(VT.getScalarSizeInBits() == BitWidth &&
+ HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
+
+ // Divisor needs to less than (1 << HBitWidth).
+ APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
+ if (Divisor.uge(HalfMaxPlus1))
+ return false;
+
+ // We depend on the UREM by constant optimization in DAGCombiner that requires
+ // high multiply.
+ if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
+ !isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT))
+ return false;
+
+ // Don't expand if optimizing for size.
+ if (DAG.shouldOptForSize())
+ return false;
+
+ // Early out for 0 or 1 divisors.
+ if (Divisor.ule(1))
+ return false;
+
+ // If the divisor is even, shift it until it becomes odd.
+ unsigned TrailingZeros = 0;
+ if (!Divisor[0]) {
+ TrailingZeros = Divisor.countTrailingZeros();
+ Divisor.lshrInPlace(TrailingZeros);
+ }
+
+ SDLoc dl(N);
+ SDValue Sum;
+ SDValue PartialRem;
+
+ // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
+ // then add in the carry.
+ // TODO: If we can't split it in half, we might be able to split into 3 or
+ // more pieces using a smaller bit width.
+ if (HalfMaxPlus1.urem(Divisor).isOneValue()) {
+ assert(!LL == !LH && "Expected both input halves or no input halves!");
+ if (!LL) {
+ LL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, N->getOperand(0),
+ DAG.getIntPtrConstant(0, dl));
+ LH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, N->getOperand(0),
+ DAG.getIntPtrConstant(1, dl));
+ }
+
+ // Shift the input by the number of TrailingZeros in the divisor. The
+ // shifted out bits will be added to the remainder later.
+ if (TrailingZeros) {
+ // Save the shifted off bits if we need the remainder.
+ if (Opcode != ISD::UDIV) {
+ APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
+ PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
+ DAG.getConstant(Mask, dl, HiLoVT));
+ }
+
+ LL = DAG.getNode(
+ ISD::OR, dl, HiLoVT,
+ DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
+ DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
+ DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
+ DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
+ HiLoVT, dl)));
+ LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
+ DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
+ }
+
+ // Use addcarry if we can, otherwise use a compare to detect overflow.
+ EVT SetCCType =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
+ if (isOperationLegalOrCustom(ISD::ADDCARRY, HiLoVT)) {
+ SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
+ Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
+ Sum = DAG.getNode(ISD::ADDCARRY, dl, VTList, Sum,
+ DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
+ } else {
+ Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
+ SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
+ // If the boolean for the target is 0 or 1, we can add the setcc result
+ // directly.
+ if (getBooleanContents(HiLoVT) ==
+ TargetLoweringBase::ZeroOrOneBooleanContent)
+ Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
+ else
+ Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
+ DAG.getConstant(0, dl, HiLoVT));
+ Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
+ }
+ }
+
+ // If we didn't find a sum, we can't do the expansion.
+ if (!Sum)
+ return false;
+
+ // Perform a HiLoVT urem on the Sum using truncated divisor.
+ SDValue RemL =
+ DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
+ DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
+ SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
+
+ if (Opcode != ISD::UREM) {
+ // Subtract the remainder from the shifted dividend.
+ SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
+ SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
+
+ Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
+
+ // Multiply by the multiplicative inverse of the divisor modulo
+ // (1 << BitWidth).
+ APInt Mod = APInt::getSignedMinValue(BitWidth + 1);
+ APInt MulFactor = Divisor.zext(BitWidth + 1);
+ MulFactor = MulFactor.multiplicativeInverse(Mod);
+ MulFactor = MulFactor.trunc(BitWidth);
+
+ SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
+ DAG.getConstant(MulFactor, dl, VT));
+
+ // Split the quotient into low and high parts.
+ SDValue QuotL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
+ DAG.getIntPtrConstant(0, dl));
+ SDValue QuotH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
+ DAG.getIntPtrConstant(1, dl));
+ Result.push_back(QuotL);
+ Result.push_back(QuotH);
+ }
+
+ if (Opcode != ISD::UDIV) {
+ // If we shifted the input, shift the remainder left and add the bits we
+ // shifted off the input.
+ if (TrailingZeros) {
+ APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
+ RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
+ DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
+ RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
+ }
+ Result.push_back(RemL);
+ Result.push_back(DAG.getConstant(0, dl, HiLoVT));
+ }
+
+ return true;
+}
+
// Check that (every element of) Z is undef or not an exact multiple of BW.
static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
return ISD::matchUnaryPredicate(
@@ -7130,8 +7455,68 @@ static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
true);
}
+static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
+ EVT VT = Node->getValueType(0);
+ SDValue ShX, ShY;
+ SDValue ShAmt, InvShAmt;
+ SDValue X = Node->getOperand(0);
+ SDValue Y = Node->getOperand(1);
+ SDValue Z = Node->getOperand(2);
+ SDValue Mask = Node->getOperand(3);
+ SDValue VL = Node->getOperand(4);
+
+ unsigned BW = VT.getScalarSizeInBits();
+ bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
+ SDLoc DL(SDValue(Node, 0));
+
+ EVT ShVT = Z.getValueType();
+ if (isNonZeroModBitWidthOrUndef(Z, BW)) {
+ // fshl: X << C | Y >> (BW - C)
+ // fshr: X << (BW - C) | Y >> C
+ // where C = Z % BW is not zero
+ SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
+ ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
+ InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
+ ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
+ VL);
+ ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
+ VL);
+ } else {
+ // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
+ // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
+ SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
+ if (isPowerOf2_32(BW)) {
+ // Z % BW -> Z & (BW - 1)
+ ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
+ // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
+ SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
+ DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
+ InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
+ } else {
+ SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
+ ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
+ InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
+ }
+
+ SDValue One = DAG.getConstant(1, DL, ShVT);
+ if (IsFSHL) {
+ ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
+ SDValue ShY1 = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, One, Mask, VL);
+ ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, ShY1, InvShAmt, Mask, VL);
+ } else {
+ SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
+ ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
+ ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, ShAmt, Mask, VL);
+ }
+ }
+ return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
+}
+
SDValue TargetLowering::expandFunnelShift(SDNode *Node,
SelectionDAG &DAG) const {
+ if (Node->isVPOpcode())
+ return expandVPFunnelShift(Node, DAG);
+
EVT VT = Node->getValueType(0);
if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
@@ -7919,6 +8304,63 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
DAG.getConstant(Len - 8, dl, ShVT));
}
+SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Op = Node->getOperand(0);
+ SDValue Mask = Node->getOperand(1);
+ SDValue VL = Node->getOperand(2);
+ unsigned Len = VT.getScalarSizeInBits();
+ assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
+
+ // TODO: Add support for irregular type lengths.
+ if (!(Len <= 128 && Len % 8 == 0))
+ return SDValue();
+
+ // This is same algorithm of expandCTPOP from
+ // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+ SDValue Mask55 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
+ SDValue Mask33 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
+ SDValue Mask0F =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
+
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
+
+ // v = v - ((v >> 1) & 0x55555555...)
+ Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
+ DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
+ DAG.getConstant(1, dl, ShVT), Mask, VL),
+ Mask55, Mask, VL);
+ Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
+
+ // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+ Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
+ Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
+ DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
+ DAG.getConstant(2, dl, ShVT), Mask, VL),
+ Mask33, Mask, VL);
+ Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
+
+ // v = (v + (v >> 4)) & 0x0F0F0F0F...
+ Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
+ Mask, VL),
+ Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
+ Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
+
+ if (Len <= 8)
+ return Op;
+
+ // v = (v * 0x01010101...) >> (Len - 8)
+ SDValue Mask01 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
+ return DAG.getNode(ISD::VP_LSHR, dl, VT,
+ DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL),
+ DAG.getConstant(Len - 8, dl, ShVT), Mask, VL);
+}
+
SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
@@ -7969,6 +8411,77 @@ SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
return DAG.getNode(ISD::CTPOP, dl, VT, Op);
}
+SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Op = Node->getOperand(0);
+ SDValue Mask = Node->getOperand(1);
+ SDValue VL = Node->getOperand(2);
+ unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+
+ // do this:
+ // x = x | (x >> 1);
+ // x = x | (x >> 2);
+ // ...
+ // x = x | (x >>16);
+ // x = x | (x >>32); // for 64-bit input
+ // return popcount(~x);
+ for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
+ SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
+ Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
+ DAG.getNode(ISD::VP_LSHR, dl, VT, Op, Tmp, Mask, VL), Mask,
+ VL);
+ }
+ Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getConstant(-1, dl, VT), Mask,
+ VL);
+ return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
+}
+
+SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
+ const SDLoc &DL, EVT VT, SDValue Op,
+ unsigned BitWidth) const {
+ if (BitWidth != 32 && BitWidth != 64)
+ return SDValue();
+ APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
+ : APInt(64, 0x0218A392CD3D5DBFULL);
+ const DataLayout &TD = DAG.getDataLayout();
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
+ unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
+ SDValue Lookup = DAG.getNode(
+ ISD::SRL, DL, VT,
+ DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
+ DAG.getConstant(DeBruijn, DL, VT)),
+ DAG.getConstant(ShiftAmt, DL, VT));
+ Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD));
+
+ SmallVector<uint8_t> Table(BitWidth, 0);
+ for (unsigned i = 0; i < BitWidth; i++) {
+ APInt Shl = DeBruijn.shl(i);
+ APInt Lshr = Shl.lshr(ShiftAmt);
+ Table[Lshr.getZExtValue()] = i;
+ }
+
+ // Create a ConstantArray in Constant Pool
+ auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
+ SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
+ TD.getPrefTypeAlign(CA->getType()));
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
+ DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
+ PtrInfo, MVT::i8);
+ if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
+ return ExtLoad;
+
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
+ return DAG.getSelect(DL, VT, SrcIsZero,
+ DAG.getConstant(BitWidth, DL, VT), ExtLoad);
+}
+
SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
@@ -8002,6 +8515,12 @@ SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
return SDValue();
+ // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
+ if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
+ !isOperationLegal(ISD::CTLZ, VT))
+ if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
+ return V;
+
// for now, we use: { return popcount(~x & (x - 1)); }
// unless the target has ctlz but not ctpop, in which case we use:
// { return 32 - nlz(~x & (x-1)); }
@@ -8019,6 +8538,22 @@ SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
}
+SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
+ SDValue Op = Node->getOperand(0);
+ SDValue Mask = Node->getOperand(1);
+ SDValue VL = Node->getOperand(2);
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+
+ // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
+ SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
+ DAG.getConstant(-1, dl, VT), Mask, VL);
+ SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
+ DAG.getConstant(1, dl, VT), Mask, VL);
+ SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
+ return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
+}
+
SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
bool IsNegative) const {
SDLoc dl(N);
@@ -8092,36 +8627,36 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
case MVT::i32:
Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
- Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
+ DAG.getConstant(0xFF00, dl, VT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
- Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
- DAG.getConstant(0xFF0000, dl, VT));
Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
case MVT::i64:
Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
- Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
- Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
- Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
+ DAG.getConstant(255ULL<<8, dl, VT));
+ Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
+ Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
+ DAG.getConstant(255ULL<<16, dl, VT));
+ Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
+ Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
+ DAG.getConstant(255ULL<<24, dl, VT));
+ Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
- Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
- Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
- Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7,
- DAG.getConstant(255ULL<<48, dl, VT));
- Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6,
- DAG.getConstant(255ULL<<40, dl, VT));
- Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5,
- DAG.getConstant(255ULL<<32, dl, VT));
Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
DAG.getConstant(255ULL<<24, dl, VT));
+ Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
DAG.getConstant(255ULL<<16, dl, VT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
- DAG.getConstant(255ULL<<8 , dl, VT));
+ DAG.getConstant(255ULL<<8, dl, VT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
@@ -8132,6 +8667,82 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
}
}
+SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+
+ if (!VT.isSimple())
+ return SDValue();
+
+ EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
+ switch (VT.getSimpleVT().getScalarType().SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::i16:
+ Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
+ Mask, EVL);
+ return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
+ case MVT::i32:
+ Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
+ Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
+ Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
+ DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
+ Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
+ Mask, EVL);
+ Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
+ return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
+ case MVT::i64:
+ Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
+ Mask, EVL);
+ Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
+ DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
+ Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
+ Mask, EVL);
+ Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
+ DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
+ Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
+ Mask, EVL);
+ Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
+ DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
+ Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
+ Mask, EVL);
+ Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
+ Mask, EVL);
+ Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
+ DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
+ Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
+ DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
+ DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
+ Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
+ Mask, EVL);
+ Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
+ Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
+ Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
+ Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
+ Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
+ return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
+ }
+}
+
SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
@@ -8194,6 +8805,68 @@ SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
return Tmp;
}
+SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
+ assert(N->getOpcode() == ISD::VP_BITREVERSE);
+
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+ EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ unsigned Sz = VT.getScalarSizeInBits();
+
+ SDValue Tmp, Tmp2, Tmp3;
+
+ // If we can, perform BSWAP first and then the mask+swap the i4, then i2
+ // and finally the i1 pairs.
+ // TODO: We can easily support i4/i2 legal types if any target ever does.
+ if (Sz >= 8 && isPowerOf2_32(Sz)) {
+ // Create the masks - repeating the pattern every byte.
+ APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
+ APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
+ APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
+
+ // BSWAP if the type is wider than a single byte.
+ Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
+
+ // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
+ Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
+ DAG.getConstant(Mask4, dl, VT), Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
+ Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
+ Mask, EVL);
+ Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
+
+ // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
+ Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
+ DAG.getConstant(Mask2, dl, VT), Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
+ Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
+ Mask, EVL);
+ Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
+
+ // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
+ Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
+ Mask, EVL);
+ Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
+ DAG.getConstant(Mask1, dl, VT), Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
+ Mask, EVL);
+ Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
+ Mask, EVL);
+ Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
+ return Tmp;
+ }
+ return SDValue();
+}
+
std::pair<SDValue, SDValue>
TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
SelectionDAG &DAG) const {
@@ -8671,7 +9344,7 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
} else if (DataVT.isScalableVector()) {
Increment = DAG.getVScale(DL, AddrVT,
APInt(AddrVT.getFixedSizeInBits(),
- DataVT.getStoreSize().getKnownMinSize()));
+ DataVT.getStoreSize().getKnownMinValue()));
} else
Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
@@ -8957,9 +9630,13 @@ SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
assert(VT == RHS.getValueType() && "Expected operands to be the same type");
assert(VT.isInteger() && "Expected operands to be integers");
+ if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return DAG.UnrollVectorOp(Node);
+
// If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
unsigned BW = VT.getScalarSizeInBits();
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
SDValue Orig =
DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
@@ -8968,14 +9645,14 @@ SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
if (IsSigned) {
SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
- SatVal = DAG.getSelectCC(dl, LHS, DAG.getConstant(0, dl, VT),
- SatMin, SatMax, ISD::SETLT);
+ SDValue Cond =
+ DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
+ SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
} else {
SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
}
- Result = DAG.getSelectCC(dl, LHS, Orig, SatVal, Result, ISD::SETNE);
-
- return Result;
+ SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
+ return DAG.getSelect(dl, VT, Cond, SatVal, Result);
}
SDValue
@@ -9665,7 +10342,7 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node,
// Store the hi part of CONCAT_VECTORS(V1, V2)
SDValue OffsetToV2 = DAG.getVScale(
DL, PtrVT,
- APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize()));
+ APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
@@ -9686,9 +10363,10 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node,
DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
if (TrailingElts > VT.getVectorMinNumElements()) {
- SDValue VLBytes = DAG.getVScale(
- DL, PtrVT,
- APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize()));
+ SDValue VLBytes =
+ DAG.getVScale(DL, PtrVT,
+ APInt(PtrVT.getFixedSizeInBits(),
+ VT.getStoreSize().getKnownMinValue()));
TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
}
@@ -9757,7 +10435,7 @@ bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
"If SETUE is expanded, SETOEQ or SETUNE must be legal!");
NeedInvert = true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETO:
assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
"If SETO is expanded, SETOEQ must be legal!");
@@ -9781,7 +10459,7 @@ bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
NeedInvert = ((unsigned)CCCode & 0x8U);
break;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETOEQ:
case ISD::SETOGT:
case ISD::SETOGE:
@@ -9802,7 +10480,7 @@ bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
break;
}
// Fallthrough if we are unsigned integer.
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::SETLE:
case ISD::SETGT:
case ISD::SETGE:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
index 5f9ade18f15c..153fe77b8b4a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -39,6 +39,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Utils/EscapeEnumerator.h"
#include <cassert>
+#include <optional>
#include <string>
#include <utility>
#include <vector>
@@ -305,7 +306,7 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
if (Roots.empty())
return false;
- Optional<DomTreeUpdater> DTU;
+ std::optional<DomTreeUpdater> DTU;
if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
DTU.emplace(DTWP->getDomTree(), DomTreeUpdater::UpdateStrategy::Lazy);
@@ -320,9 +321,8 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
Instruction *StackEntry =
AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr, "gc_frame");
- while (isa<AllocaInst>(IP))
- ++IP;
- AtEntry.SetInsertPoint(IP->getParent(), IP);
+ AtEntry.SetInsertPointPastAllocas(&F);
+ IP = AtEntry.GetInsertPoint();
// Initialize the map pointer and load the current head of the shadow stack.
Instruction *CurrentHead =
@@ -361,7 +361,7 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
// For each instruction that escapes...
EscapeEnumerator EE(F, "gc_cleanup", /*HandleExceptions=*/true,
- DTU ? DTU.getPointer() : nullptr);
+ DTU ? &*DTU : nullptr);
while (IRBuilder<> *AtExit = EE.Next()) {
// Pop the entry from the shadow stack. Don't reuse CurrentHead from
// AtEntry, since that would make the value live for the entire function.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
index f6ad2b50abcd..2411b1ad5203 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -284,7 +284,7 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
Register PhysReg = MO.getReg();
if (!PhysReg)
continue;
- assert(Register::isPhysicalRegister(PhysReg) && "Unallocated register?!");
+ assert(PhysReg.isPhysical() && "Unallocated register?!");
// The stack pointer is not normally described as a callee-saved register
// in calling convention definitions, so we need to watch for it
// separately. An SP mentioned by a call instruction, we can ignore,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index 1fcee02184a9..3fed707a9eb1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -164,7 +164,7 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
// There are still some uses of LPI. Construct an aggregate with the exception
// values and replace the LPI with that aggregate.
Type *LPadType = LPI->getType();
- Value *LPadVal = UndefValue::get(LPadType);
+ Value *LPadVal = PoisonValue::get(LPadType);
auto *SelI = cast<Instruction>(SelVal);
IRBuilder<> Builder(SelI->getParent(), std::next(SelI->getIterator()));
LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
@@ -183,7 +183,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
// that needs to be restored on all exits from the function. This is an alloca
// because the value needs to be added to the global context list.
auto &DL = F.getParent()->getDataLayout();
- const Align Alignment(DL.getPrefTypeAlignment(FunctionContextTy));
+ const Align Alignment = DL.getPrefTypeAlign(FunctionContextTy);
FuncCtx = new AllocaInst(FunctionContextTy, DL.getAllocaAddrSpace(), nullptr,
Alignment, "fn_context", &EntryBB->front());
@@ -391,7 +391,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
lowerAcrossUnwindEdges(F, Invokes);
Value *FuncCtx =
- setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end()));
+ setupFunctionContext(F, ArrayRef(LPads.begin(), LPads.end()));
BasicBlock *EntryBB = &F.front();
IRBuilder<> Builder(EntryBB->getTerminator());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
index 94149f56e703..92e820c9d3d8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "SplitKit.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -323,7 +322,7 @@ unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
}
bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const {
- unsigned OrigReg = VRM.getOriginal(CurLI->reg());
+ Register OrigReg = VRM.getOriginal(CurLI->reg());
const LiveInterval &Orig = LIS.getInterval(OrigReg);
assert(!Orig.empty() && "Splitting empty interval?");
LiveInterval::const_iterator I = Orig.find(Idx);
@@ -590,7 +589,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, const VNInfo *ParentVNI,
bool Late = RegIdx != 0;
// Attempt cheap-as-a-copy rematerialization.
- unsigned Original = VRM.getOriginal(Edit->get(RegIdx));
+ Register Original = VRM.getOriginal(Edit->get(RegIdx));
LiveInterval &OrigLI = LIS.getInterval(Original);
VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
@@ -1450,7 +1449,7 @@ void SplitEditor::deleteRematVictims() {
if (Dead.empty())
return;
- Edit->eliminateDeadDefs(Dead, None);
+ Edit->eliminateDeadDefs(Dead, std::nullopt);
}
void SplitEditor::forceRecomputeVNI(const VNInfo &ParentVNI) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
index 556b022b93fb..5a3428a5e91f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
@@ -32,7 +32,6 @@
namespace llvm {
-class AAResults;
class LiveInterval;
class LiveRange;
class LiveIntervals;
@@ -488,7 +487,7 @@ public:
/// overlapIntv - Indicate that all instructions in range should use the open
/// interval if End does not have tied-def usage of the register and in this
- /// case compliment interval is used. Let the complement interval be live.
+ /// case complement interval is used. Let the complement interval be live.
///
/// This doubles the register pressure, but is sometimes required to deal with
/// register uses after the last valid split point.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
new file mode 100644
index 000000000000..3a48dd5b0a03
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
@@ -0,0 +1,253 @@
+//===-- StackFrameLayoutAnalysisPass.cpp
+//------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// StackFrameLayoutAnalysisPass implementation. Outputs information about the
+// layout of the stack frame, using the remarks interface. On the CLI it prints
+// a textual representation of the stack frame. When possible it prints the
+// values that occupy a stack slot using any available debug information. Since
+// output is remarks based, it is also available in a machine readable file
+// format, such as YAML.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/PrintPasses.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <sstream>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "stack-frame-layout"
+
+namespace {
+
+/// StackFrameLayoutAnalysisPass - This is a pass to dump the stack frame of a
+/// MachineFunction.
+///
+struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
+ using SlotDbgMap = SmallDenseMap<int, SetVector<const DILocalVariable *>>;
+ static char ID;
+
+ enum SlotType {
+ Spill, // a Spill slot
+ StackProtector, // Stack Protector slot
+ Variable, // a slot used to store a local data (could be a tmp)
+ Invalid // It's an error for a slot to have this type
+ };
+
+ struct SlotData {
+ int Slot;
+ int Size;
+ int Align;
+ int Offset;
+ SlotType SlotTy;
+
+ SlotData(const MachineFrameInfo &MFI, const int ValOffset, const int Idx)
+ : Slot(Idx), Size(MFI.getObjectSize(Idx)),
+ Align(MFI.getObjectAlign(Idx).value()),
+ Offset(MFI.getObjectOffset(Idx) - ValOffset), SlotTy(Invalid) {
+ if (MFI.isSpillSlotObjectIndex(Idx))
+ SlotTy = SlotType::Spill;
+ else if (Idx == MFI.getStackProtectorIndex())
+ SlotTy = SlotType::StackProtector;
+ else
+ SlotTy = SlotType::Variable;
+ }
+
+ // we use this to sort in reverse order, so that the layout is displayed
+ // correctly
+ bool operator<(const SlotData &Rhs) const { return Offset > Rhs.Offset; }
+ };
+
+ StackFrameLayoutAnalysisPass() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "Stack Frame Layout Analysis";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // TODO: We should implement a similar filter for remarks:
+ // -Rpass-func-filter=<regex>
+ if (!isFunctionInPrintList(MF.getName()))
+ return false;
+
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ if (!Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(DEBUG_TYPE))
+ return false;
+
+ MachineOptimizationRemarkAnalysis Rem(DEBUG_TYPE, "StackLayout",
+ MF.getFunction().getSubprogram(),
+ &MF.front());
+ Rem << ("\nFunction: " + MF.getName()).str();
+ emitStackFrameLayoutRemarks(MF, Rem);
+ getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE().emit(Rem);
+ return false;
+ }
+
+ std::string getTypeString(SlotType Ty) {
+ switch (Ty) {
+ case SlotType::Spill:
+ return "Spill";
+ case SlotType::StackProtector:
+ return "Protector";
+ case SlotType::Variable:
+ return "Variable";
+ default:
+ llvm_unreachable("bad slot type for stack layout");
+ }
+ }
+
+ void emitStackSlotRemark(const MachineFunction &MF, const SlotData &D,
+ MachineOptimizationRemarkAnalysis &Rem) {
+ // To make it easy to understand the stack layout from the CLI, we want to
+ // print each slot like the following:
+ //
+ // Offset: [SP+8], Type: Spill, Align: 8, Size: 16
+ // foo @ /path/to/file.c:25
+ // bar @ /path/to/file.c:35
+ //
+ // Which prints the size, alignment, and offset from the SP at function
+ // entry.
+ //
+ // But we also want the machine readable remarks data to be nicely
+ // organized. So we print some additional data as strings for the CLI
+ // output, but maintain more structured data for the YAML.
+ //
+ // For example we store the Offset in YAML as:
+ // ...
+ // - Offset: -8
+ //
+ // But we print it to the CLI as
+ // Offset: [SP-8]
+
+ // Negative offsets will print a leading `-`, so only add `+`
+ std::string Prefix =
+ formatv("\nOffset: [SP{0}", (D.Offset < 0) ? "" : "+").str();
+ Rem << Prefix << ore::NV("Offset", D.Offset)
+ << "], Type: " << ore::NV("Type", getTypeString(D.SlotTy))
+ << ", Align: " << ore::NV("Align", D.Align)
+ << ", Size: " << ore::NV("Size", D.Size);
+ }
+
+ void emitSourceLocRemark(const MachineFunction &MF, const DILocalVariable *N,
+ MachineOptimizationRemarkAnalysis &Rem) {
+ std::string Loc =
+ formatv("{0} @ {1}:{2}", N->getName(), N->getFilename(), N->getLine())
+ .str();
+ Rem << "\n " << ore::NV("DataLoc", Loc);
+ }
+
+ void emitStackFrameLayoutRemarks(MachineFunction &MF,
+ MachineOptimizationRemarkAnalysis &Rem) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.hasStackObjects())
+ return;
+
+ // ValOffset is the offset to the local area from the SP at function entry.
+ // To display the true offset from SP, we need to subtract ValOffset from
+ // MFI's ObjectOffset.
+ const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering();
+ const int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
+
+ LLVM_DEBUG(dbgs() << "getStackProtectorIndex =="
+ << MFI.getStackProtectorIndex() << "\n");
+
+ std::vector<SlotData> SlotInfo;
+
+ const unsigned int NumObj = MFI.getNumObjects();
+ SlotInfo.reserve(NumObj);
+ // initialize slot info
+ for (int Idx = MFI.getObjectIndexBegin(), EndIdx = MFI.getObjectIndexEnd();
+ Idx != EndIdx; ++Idx) {
+ if (MFI.isDeadObjectIndex(Idx))
+ continue;
+ SlotInfo.emplace_back(MFI, ValOffset, Idx);
+ }
+
+ // sort the ordering, to match the actual layout in memory
+ llvm::sort(SlotInfo);
+
+ SlotDbgMap SlotMap = genSlotDbgMapping(MF);
+
+ for (const SlotData &Info : SlotInfo) {
+ emitStackSlotRemark(MF, Info, Rem);
+ for (const DILocalVariable *N : SlotMap[Info.Slot])
+ emitSourceLocRemark(MF, N, Rem);
+ }
+ }
+
+ // We need to generate a mapping of slots to the values that are stored to
+ // them. This information is lost by the time we need to print out the frame,
+ // so we reconstruct it here by walking the CFG, and generating the mapping.
+ SlotDbgMap genSlotDbgMapping(MachineFunction &MF) {
+ SlotDbgMap SlotDebugMap;
+
+ // add variables to the map
+ for (MachineFunction::VariableDbgInfo &DI : MF.getVariableDbgInfo())
+ SlotDebugMap[DI.Slot].insert(DI.Var);
+
+ // Then add all the spills that have debug data
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ for (MachineMemOperand *MO : MI.memoperands()) {
+ if (!MO->isStore())
+ continue;
+ auto *FI = dyn_cast_or_null<FixedStackPseudoSourceValue>(
+ MO->getPseudoValue());
+ if (!FI)
+ continue;
+ int FrameIdx = FI->getFrameIndex();
+ SmallVector<MachineInstr *> Dbg;
+ MI.collectDebugValues(Dbg);
+
+ for (MachineInstr *MI : Dbg)
+ SlotDebugMap[FrameIdx].insert(MI->getDebugVariable());
+ }
+ }
+ }
+
+ return SlotDebugMap;
+ }
+};
+
+char StackFrameLayoutAnalysisPass::ID = 0;
+} // namespace
+
+char &llvm::StackFrameLayoutAnalysisPassID = StackFrameLayoutAnalysisPass::ID;
+INITIALIZE_PASS(StackFrameLayoutAnalysisPass, "stack-frame-layout",
+ "Stack Frame Layout", false, false)
+
+namespace llvm {
+/// Returns a newly-created StackFrameLayout pass.
+MachineFunctionPass *createStackFrameLayoutAnalysisPass() {
+ return new StackFrameLayoutAnalysisPass();
+}
+
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp
index ccaff862fa3f..bb7a51e49edb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp
@@ -146,6 +146,23 @@ unsigned StatepointOpers::getGCPointerMap(
return GCMapSize;
}
+bool StatepointOpers::isFoldableReg(Register Reg) const {
+ unsigned FoldableAreaStart = getVarIdx();
+ for (const MachineOperand &MO : MI->uses()) {
+ if (MI->getOperandNo(&MO) >= FoldableAreaStart)
+ break;
+ if (MO.isReg() && MO.getReg() == Reg)
+ return false;
+ }
+ return true;
+}
+
+bool StatepointOpers::isFoldableReg(const MachineInstr *MI, Register Reg) {
+ if (MI->getOpcode() != TargetOpcode::STATEPOINT)
+ return false;
+ return StatepointOpers(MI).isFoldableReg(Reg);
+}
+
StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) {
if (StackMapVersion != 3)
llvm_unreachable("Unsupported stackmap version!");
@@ -240,7 +257,7 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
return ++MOI;
}
- assert(Register::isPhysicalRegister(MOI->getReg()) &&
+ assert(MOI->getReg().isPhysical() &&
"Virtreg operands should have been rewritten before now.");
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg());
assert(!MOI->getSubReg() && "Physical subreg still around.");
@@ -688,7 +705,7 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) {
}
// Emit alignment to 8 byte.
- OS.emitValueToAlignment(8);
+ OS.emitValueToAlignment(Align(8));
// Num live-out registers and padding to align to 4 byte.
OS.emitInt16(0);
@@ -700,7 +717,7 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) {
OS.emitIntValue(LO.Size, 1);
}
// Emit alignment to 8 byte.
- OS.emitValueToAlignment(8);
+ OS.emitValueToAlignment(Align(8));
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
index 510a8e3e4ba2..46685f7b8208 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
@@ -46,6 +46,8 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <optional>
#include <utility>
using namespace llvm;
@@ -58,10 +60,12 @@ STATISTIC(NumAddrTaken, "Number of local variables that have their address"
static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp",
cl::init(true), cl::Hidden);
+static cl::opt<bool> DisableCheckNoReturn("disable-check-noreturn-call",
+ cl::init(false), cl::Hidden);
char StackProtector::ID = 0;
-StackProtector::StackProtector() : FunctionPass(ID), SSPBufferSize(8) {
+StackProtector::StackProtector() : FunctionPass(ID) {
initializeStackProtectorPass(*PassRegistry::getPassRegistry());
}
@@ -82,20 +86,16 @@ void StackProtector::getAnalysisUsage(AnalysisUsage &AU) const {
bool StackProtector::runOnFunction(Function &Fn) {
F = &Fn;
M = F->getParent();
- DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DTU.emplace(DTWP->getDomTree(), DomTreeUpdater::UpdateStrategy::Lazy);
TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
Trip = TM->getTargetTriple();
TLI = TM->getSubtargetImpl(Fn)->getTargetLowering();
HasPrologue = false;
HasIRCheck = false;
- Attribute Attr = Fn.getFnAttribute("stack-protector-buffer-size");
- if (Attr.isStringAttribute() &&
- Attr.getValueAsString().getAsInteger(10, SSPBufferSize))
- return false; // Invalid integer string
-
+ SSPBufferSize = Fn.getFnAttributeAsParsedInteger(
+ "stack-protector-buffer-size", DefaultSSPBufferSize);
if (!RequiresStackProtector())
return false;
@@ -108,7 +108,14 @@ bool StackProtector::runOnFunction(Function &Fn) {
}
++NumFunProtected;
- return InsertStackProtectors();
+ bool Changed = InsertStackProtectors();
+#ifdef EXPENSIVE_CHECKS
+ assert((!DTU ||
+ DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full)) &&
+ "Failed to maintain validity of domtree!");
+#endif
+ DTU.reset();
+ return Changed;
}
/// \param [out] IsLarge is set to true if a protectable array is found and
@@ -166,7 +173,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
const auto *I = cast<Instruction>(U);
// If this instruction accesses memory make sure it doesn't access beyond
// the bounds of the allocated object.
- Optional<MemoryLocation> MemLoc = MemoryLocation::getOrNone(I);
+ std::optional<MemoryLocation> MemLoc = MemoryLocation::getOrNone(I);
if (MemLoc && MemLoc->Size.hasValue() &&
!TypeSize::isKnownGE(AllocSize,
TypeSize::getFixed(MemLoc->Size.getValue())))
@@ -414,11 +421,11 @@ static Value *getStackGuard(const TargetLoweringBase *TLI, Module *M,
///
/// Returns true if the platform/triple supports the stackprotectorcreate pseudo
/// node.
-static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI,
+static bool CreatePrologue(Function *F, Module *M, Instruction *CheckLoc,
const TargetLoweringBase *TLI, AllocaInst *&AI) {
bool SupportsSelectionDAGSP = false;
IRBuilder<> B(&F->getEntryBlock().front());
- PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
+ PointerType *PtrTy = Type::getInt8PtrTy(CheckLoc->getContext());
AI = B.CreateAlloca(PtrTy, nullptr, "StackGuardSlot");
Value *GuardSlot = getStackGuard(TLI, M, B, &SupportsSelectionDAGSP);
@@ -441,16 +448,33 @@ bool StackProtector::InsertStackProtectors() {
TLI->useStackGuardXorFP() ||
(EnableSelectionDAGSP && !TM->Options.EnableFastISel);
AllocaInst *AI = nullptr; // Place on stack that stores the stack guard.
+ BasicBlock *FailBB = nullptr;
for (BasicBlock &BB : llvm::make_early_inc_range(*F)) {
- ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator());
- if (!RI)
+ // This is stack protector auto generated check BB, skip it.
+ if (&BB == FailBB)
+ continue;
+ Instruction *CheckLoc = dyn_cast<ReturnInst>(BB.getTerminator());
+ if (!CheckLoc && !DisableCheckNoReturn) {
+ for (auto &Inst : BB) {
+ auto *CB = dyn_cast<CallBase>(&Inst);
+ if (!CB)
+ continue;
+ if (!CB->doesNotReturn())
+ continue;
+ // Do stack check before non-return calls (e.g: __cxa_throw)
+ CheckLoc = CB;
+ break;
+ }
+ }
+
+ if (!CheckLoc)
continue;
// Generate prologue instrumentation if not already generated.
if (!HasPrologue) {
HasPrologue = true;
- SupportsSelectionDAGSP &= CreatePrologue(F, M, RI, TLI, AI);
+ SupportsSelectionDAGSP &= CreatePrologue(F, M, CheckLoc, TLI, AI);
}
// SelectionDAG based code generation. Nothing else needs to be done here.
@@ -471,18 +495,17 @@ bool StackProtector::InsertStackProtectors() {
// instrumentation has already been generated.
HasIRCheck = true;
- // If we're instrumenting a block with a musttail call, the check has to be
+ // If we're instrumenting a block with a tail call, the check has to be
// inserted before the call rather than between it and the return. The
- // verifier guarantees that a musttail call is either directly before the
+ // verifier guarantees that a tail call is either directly before the
// return or with a single correct bitcast of the return value in between so
// we don't need to worry about many situations here.
- Instruction *CheckLoc = RI;
- Instruction *Prev = RI->getPrevNonDebugInstruction();
- if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isMustTailCall())
+ Instruction *Prev = CheckLoc->getPrevNonDebugInstruction();
+ if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isTailCall())
CheckLoc = Prev;
else if (Prev) {
Prev = Prev->getPrevNonDebugInstruction();
- if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isMustTailCall())
+ if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isTailCall())
CheckLoc = Prev;
}
@@ -514,8 +537,8 @@ bool StackProtector::InsertStackProtectors() {
// ...
// %1 = <stack guard>
// %2 = load StackGuardSlot
- // %3 = cmp i1 %1, %2
- // br i1 %3, label %SP_return, label %CallStackCheckFailBlk
+ // %3 = icmp ne i1 %1, %2
+ // br i1 %3, label %CallStackCheckFailBlk, label %SP_return
//
// SP_return:
// ret ...
@@ -527,38 +550,33 @@ bool StackProtector::InsertStackProtectors() {
// Create the FailBB. We duplicate the BB every time since the MI tail
// merge pass will merge together all of the various BB into one including
// fail BB generated by the stack protector pseudo instruction.
- BasicBlock *FailBB = CreateFailBB();
-
- // Split the basic block before the return instruction.
- BasicBlock *NewBB =
- BB.splitBasicBlock(CheckLoc->getIterator(), "SP_return");
-
- // Update the dominator tree if we need to.
- if (DT && DT->isReachableFromEntry(&BB)) {
- DT->addNewBlock(NewBB, &BB);
- DT->addNewBlock(FailBB, &BB);
- }
-
- // Remove default branch instruction to the new BB.
- BB.getTerminator()->eraseFromParent();
+ if (!FailBB)
+ FailBB = CreateFailBB();
- // Move the newly created basic block to the point right after the old
- // basic block so that it's in the "fall through" position.
- NewBB->moveAfter(&BB);
-
- // Generate the stack protector instructions in the old basic block.
- IRBuilder<> B(&BB);
+ IRBuilder<> B(CheckLoc);
Value *Guard = getStackGuard(TLI, M, B);
LoadInst *LI2 = B.CreateLoad(B.getInt8PtrTy(), AI, true);
- Value *Cmp = B.CreateICmpEQ(Guard, LI2);
+ auto *Cmp = cast<ICmpInst>(B.CreateICmpNE(Guard, LI2));
auto SuccessProb =
BranchProbabilityInfo::getBranchProbStackProtector(true);
auto FailureProb =
BranchProbabilityInfo::getBranchProbStackProtector(false);
MDNode *Weights = MDBuilder(F->getContext())
- .createBranchWeights(SuccessProb.getNumerator(),
- FailureProb.getNumerator());
- B.CreateCondBr(Cmp, NewBB, FailBB, Weights);
+ .createBranchWeights(FailureProb.getNumerator(),
+ SuccessProb.getNumerator());
+
+ SplitBlockAndInsertIfThen(Cmp, CheckLoc,
+ /*Unreachable=*/false, Weights,
+ DTU ? &*DTU : nullptr,
+ /*LI=*/nullptr, /*ThenBlock=*/FailBB);
+
+ auto *BI = cast<BranchInst>(Cmp->getParent()->getTerminator());
+ BasicBlock *NewBB = BI->getSuccessor(1);
+ NewBB->setName("SP_return");
+ NewBB->moveAfter(&BB);
+
+ Cmp->setPredicate(Cmp->getInversePredicate());
+ BI->swapSuccessors();
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
index 2282d53e8ffd..83a7063de112 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
@@ -202,11 +202,10 @@ void SwiftErrorValueTracking::propagateVRegs() {
// downward defs.
bool needPHI =
VRegs.size() >= 1 &&
- llvm::find_if(
+ llvm::any_of(
VRegs,
[&](const std::pair<const MachineBasicBlock *, Register> &V)
- -> bool { return V.second != VRegs[0].second; }) !=
- VRegs.end();
+ -> bool { return V.second != VRegs[0].second; });
// If there is no upwards exposed used and we don't need a phi just
// forward the swifterror vreg from the predecessor(s).
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
index 18507b8fa84f..865add28f781 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -370,8 +370,10 @@ void TailDuplicator::processPHI(
// Remove PredBB from the PHI node.
MI->removeOperand(SrcOpIdx + 1);
MI->removeOperand(SrcOpIdx);
- if (MI->getNumOperands() == 1)
+ if (MI->getNumOperands() == 1 && !TailBB->hasAddressTaken())
MI->eraseFromParent();
+ else if (MI->getNumOperands() == 1)
+ MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
}
/// Duplicate a TailBB instruction to PredBB and update
@@ -395,7 +397,7 @@ void TailDuplicator::duplicateInstruction(
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
if (MO.isDef()) {
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
@@ -716,8 +718,7 @@ bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
bool TailDuplicator::duplicateSimpleBB(
MachineBasicBlock *TailBB, SmallVectorImpl<MachineBasicBlock *> &TDBBs,
- const DenseSet<Register> &UsedByPhi,
- SmallVectorImpl<MachineInstr *> &Copies) {
+ const DenseSet<Register> &UsedByPhi) {
SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(),
TailBB->succ_end());
SmallVector<MachineBasicBlock *, 8> Preds(TailBB->predecessors());
@@ -799,6 +800,15 @@ bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB,
return false;
if (!PredCond.empty())
return false;
+ // FIXME: This is overly conservative; it may be ok to relax this in the
+ // future under more specific conditions. If TailBB is an INLINEASM_BR
+ // indirect target, we need to see if the edge from PredBB to TailBB is from
+ // an INLINEASM_BR in PredBB, and then also if that edge was from the
+ // indirect target list, fallthrough/default target, or potentially both. If
+ // it's both, TailDuplicator::tailDuplicate will remove the edge, corrupting
+ // the successor list in PredBB and predecessor list in TailBB.
+ if (TailBB->isInlineAsmBrIndirectTarget())
+ return false;
return true;
}
@@ -826,7 +836,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
getRegsUsedByPHIs(*TailBB, &UsedByPhi);
if (IsSimple)
- return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);
+ return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi);
// Iterate through all the unique predecessors and tail-duplicate this
// block into them, if possible. Copying the list ahead of time also
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 4116231c005f..0f6cf11ca9d1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/MachineCombinerPattern.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -48,8 +49,8 @@ TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
if (OpNum >= MCID.getNumOperands())
return nullptr;
- short RegClass = MCID.OpInfo[OpNum].RegClass;
- if (MCID.OpInfo[OpNum].isLookupPtrRegClass())
+ short RegClass = MCID.operands()[OpNum].RegClass;
+ if (MCID.operands()[OpNum].isLookupPtrRegClass())
return TRI->getPointerRegClass(MF, RegClass);
// Instructions like INSERT_SUBREG do not have fixed register classes.
@@ -193,12 +194,10 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
bool Reg2IsInternal = MI.getOperand(Idx2).isInternalRead();
// Avoid calling isRenamable for virtual registers since we assert that
// renamable property is only queried/set for physical registers.
- bool Reg1IsRenamable = Register::isPhysicalRegister(Reg1)
- ? MI.getOperand(Idx1).isRenamable()
- : false;
- bool Reg2IsRenamable = Register::isPhysicalRegister(Reg2)
- ? MI.getOperand(Idx2).isRenamable()
- : false;
+ bool Reg1IsRenamable =
+ Reg1.isPhysical() ? MI.getOperand(Idx1).isRenamable() : false;
+ bool Reg2IsRenamable =
+ Reg2.isPhysical() ? MI.getOperand(Idx2).isRenamable() : false;
// If destination is tied to either of the commuted source register, then
// it must be updated.
if (HasDef && Reg0 == Reg1 &&
@@ -238,9 +237,9 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
CommutedMI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal);
// Avoid calling setIsRenamable for virtual registers since we assert that
// renamable property is only queried/set for physical registers.
- if (Register::isPhysicalRegister(Reg1))
+ if (Reg1.isPhysical())
CommutedMI->getOperand(Idx2).setIsRenamable(Reg1IsRenamable);
- if (Register::isPhysicalRegister(Reg2))
+ if (Reg2.isPhysical())
CommutedMI->getOperand(Idx1).setIsRenamable(Reg2IsRenamable);
return CommutedMI;
}
@@ -338,7 +337,7 @@ bool TargetInstrInfo::PredicateInstruction(
return false;
for (unsigned j = 0, i = 0, e = MI.getNumOperands(); i != e; ++i) {
- if (MCID.OpInfo[i].isPredicate()) {
+ if (MCID.operands()[i].isPredicate()) {
MachineOperand &MO = MI.getOperand(i);
if (MO.isReg()) {
MO.setReg(Pred[j].getReg());
@@ -455,12 +454,12 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI,
Register FoldReg = FoldOp.getReg();
Register LiveReg = LiveOp.getReg();
- assert(Register::isVirtualRegister(FoldReg) && "Cannot fold physregs");
+ assert(FoldReg.isVirtual() && "Cannot fold physregs");
const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
const TargetRegisterClass *RC = MRI.getRegClass(FoldReg);
- if (Register::isPhysicalRegister(LiveOp.getReg()))
+ if (LiveOp.getReg().isPhysical())
return RC->contains(LiveOp.getReg()) ? RC : nullptr;
if (RC->hasSubClassEq(MRI.getRegClass(LiveReg)))
@@ -641,9 +640,10 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
MachineBasicBlock::iterator Pos = MI;
if (Flags == MachineMemOperand::MOStore)
- storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI);
+ storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI,
+ Register());
else
- loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI);
+ loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI, Register());
return &*--Pos;
}
@@ -705,13 +705,18 @@ bool TargetInstrInfo::hasReassociableOperands(
// reassociate.
MachineInstr *MI1 = nullptr;
MachineInstr *MI2 = nullptr;
- if (Op1.isReg() && Register::isVirtualRegister(Op1.getReg()))
+ if (Op1.isReg() && Op1.getReg().isVirtual())
MI1 = MRI.getUniqueVRegDef(Op1.getReg());
- if (Op2.isReg() && Register::isVirtualRegister(Op2.getReg()))
+ if (Op2.isReg() && Op2.getReg().isVirtual())
MI2 = MRI.getUniqueVRegDef(Op2.getReg());
- // And they need to be in the trace (otherwise, they won't have a depth).
- return MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB;
+ // And at least one operand must be defined in MBB.
+ return MI1 && MI2 && (MI1->getParent() == MBB || MI2->getParent() == MBB);
+}
+
+bool TargetInstrInfo::areOpcodesEqualOrInverse(unsigned Opcode1,
+ unsigned Opcode2) const {
+ return Opcode1 == Opcode2 || getInverseOpcode(Opcode1) == Opcode2;
}
bool TargetInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
@@ -720,33 +725,39 @@ bool TargetInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg());
MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
- unsigned AssocOpcode = Inst.getOpcode();
+ unsigned Opcode = Inst.getOpcode();
- // If only one operand has the same opcode and it's the second source operand,
- // the operands must be commuted.
- Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode;
+ // If only one operand has the same or inverse opcode and it's the second
+ // source operand, the operands must be commuted.
+ Commuted = !areOpcodesEqualOrInverse(Opcode, MI1->getOpcode()) &&
+ areOpcodesEqualOrInverse(Opcode, MI2->getOpcode());
if (Commuted)
std::swap(MI1, MI2);
// 1. The previous instruction must be the same type as Inst.
- // 2. The previous instruction must also be associative/commutative (this can
- // be different even for instructions with the same opcode if traits like
- // fast-math-flags are included).
+ // 2. The previous instruction must also be associative/commutative or be the
+ // inverse of such an operation (this can be different even for
+ // instructions with the same opcode if traits like fast-math-flags are
+ // included).
// 3. The previous instruction must have virtual register definitions for its
// operands in the same basic block as Inst.
// 4. The previous instruction's result must only be used by Inst.
- return MI1->getOpcode() == AssocOpcode && isAssociativeAndCommutative(*MI1) &&
+ return areOpcodesEqualOrInverse(Opcode, MI1->getOpcode()) &&
+ (isAssociativeAndCommutative(*MI1) ||
+ isAssociativeAndCommutative(*MI1, /* Invert */ true)) &&
hasReassociableOperands(*MI1, MBB) &&
MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg());
}
-// 1. The operation must be associative and commutative.
+// 1. The operation must be associative and commutative or be the inverse of
+// such an operation.
// 2. The instruction must have virtual register definitions for its
// operands in the same basic block.
// 3. The instruction must have a reassociable sibling.
bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst,
bool &Commuted) const {
- return isAssociativeAndCommutative(Inst) &&
+ return (isAssociativeAndCommutative(Inst) ||
+ isAssociativeAndCommutative(Inst, /* Invert */ true)) &&
hasReassociableOperands(Inst, Inst.getParent()) &&
hasReassociableSibling(Inst, Commuted);
}
@@ -800,6 +811,111 @@ TargetInstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
return false;
}
+std::pair<unsigned, unsigned>
+TargetInstrInfo::getReassociationOpcodes(MachineCombinerPattern Pattern,
+ const MachineInstr &Root,
+ const MachineInstr &Prev) const {
+ bool AssocCommutRoot = isAssociativeAndCommutative(Root);
+ bool AssocCommutPrev = isAssociativeAndCommutative(Prev);
+
+ // Early exit if both opcodes are associative and commutative. It's a trivial
+ // reassociation when we only change operands order. In this case opcodes are
+ // not required to have inverse versions.
+ if (AssocCommutRoot && AssocCommutPrev) {
+ assert(Root.getOpcode() == Prev.getOpcode() && "Expected to be equal");
+ return std::make_pair(Root.getOpcode(), Root.getOpcode());
+ }
+
+ // At least one instruction is not associative or commutative.
+ // Since we have matched one of the reassociation patterns, we expect that the
+ // instructions' opcodes are equal or one of them is the inversion of the
+ // other.
+ assert(areOpcodesEqualOrInverse(Root.getOpcode(), Prev.getOpcode()) &&
+ "Incorrectly matched pattern");
+ unsigned AssocCommutOpcode = Root.getOpcode();
+ unsigned InverseOpcode = *getInverseOpcode(Root.getOpcode());
+ if (!AssocCommutRoot)
+ std::swap(AssocCommutOpcode, InverseOpcode);
+
+ // The transformation rule (`+` is any associative and commutative binary
+ // operation, `-` is the inverse):
+ // REASSOC_AX_BY:
+ // (A + X) + Y => A + (X + Y)
+ // (A + X) - Y => A + (X - Y)
+ // (A - X) + Y => A - (X - Y)
+ // (A - X) - Y => A - (X + Y)
+ // REASSOC_XA_BY:
+ // (X + A) + Y => (X + Y) + A
+ // (X + A) - Y => (X - Y) + A
+ // (X - A) + Y => (X + Y) - A
+ // (X - A) - Y => (X - Y) - A
+ // REASSOC_AX_YB:
+ // Y + (A + X) => (Y + X) + A
+ // Y - (A + X) => (Y - X) - A
+ // Y + (A - X) => (Y - X) + A
+ // Y - (A - X) => (Y + X) - A
+ // REASSOC_XA_YB:
+ // Y + (X + A) => (Y + X) + A
+ // Y - (X + A) => (Y - X) - A
+ // Y + (X - A) => (Y + X) - A
+ // Y - (X - A) => (Y - X) + A
+ switch (Pattern) {
+ default:
+ llvm_unreachable("Unexpected pattern");
+ case MachineCombinerPattern::REASSOC_AX_BY:
+ if (!AssocCommutRoot && AssocCommutPrev)
+ return {AssocCommutOpcode, InverseOpcode};
+ if (AssocCommutRoot && !AssocCommutPrev)
+ return {InverseOpcode, InverseOpcode};
+ if (!AssocCommutRoot && !AssocCommutPrev)
+ return {InverseOpcode, AssocCommutOpcode};
+ break;
+ case MachineCombinerPattern::REASSOC_XA_BY:
+ if (!AssocCommutRoot && AssocCommutPrev)
+ return {AssocCommutOpcode, InverseOpcode};
+ if (AssocCommutRoot && !AssocCommutPrev)
+ return {InverseOpcode, AssocCommutOpcode};
+ if (!AssocCommutRoot && !AssocCommutPrev)
+ return {InverseOpcode, InverseOpcode};
+ break;
+ case MachineCombinerPattern::REASSOC_AX_YB:
+ if (!AssocCommutRoot && AssocCommutPrev)
+ return {InverseOpcode, InverseOpcode};
+ if (AssocCommutRoot && !AssocCommutPrev)
+ return {AssocCommutOpcode, InverseOpcode};
+ if (!AssocCommutRoot && !AssocCommutPrev)
+ return {InverseOpcode, AssocCommutOpcode};
+ break;
+ case MachineCombinerPattern::REASSOC_XA_YB:
+ if (!AssocCommutRoot && AssocCommutPrev)
+ return {InverseOpcode, InverseOpcode};
+ if (AssocCommutRoot && !AssocCommutPrev)
+ return {InverseOpcode, AssocCommutOpcode};
+ if (!AssocCommutRoot && !AssocCommutPrev)
+ return {AssocCommutOpcode, InverseOpcode};
+ break;
+ }
+ llvm_unreachable("Unhandled combination");
+}
+
+// Return a pair of boolean flags showing if the new root and new prev operands
+// must be swapped. See visual example of the rule in
+// TargetInstrInfo::getReassociationOpcodes.
+static std::pair<bool, bool> mustSwapOperands(MachineCombinerPattern Pattern) {
+ switch (Pattern) {
+ default:
+ llvm_unreachable("Unexpected pattern");
+ case MachineCombinerPattern::REASSOC_AX_BY:
+ return {false, false};
+ case MachineCombinerPattern::REASSOC_XA_BY:
+ return {true, false};
+ case MachineCombinerPattern::REASSOC_AX_YB:
+ return {true, true};
+ case MachineCombinerPattern::REASSOC_XA_YB:
+ return {true, true};
+ }
+}
+
/// Attempt the reassociation transformation to reduce critical path length.
/// See the above comments before getMachineCombinerPatterns().
void TargetInstrInfo::reassociateOps(
@@ -845,15 +961,15 @@ void TargetInstrInfo::reassociateOps(
Register RegY = OpY.getReg();
Register RegC = OpC.getReg();
- if (Register::isVirtualRegister(RegA))
+ if (RegA.isVirtual())
MRI.constrainRegClass(RegA, RC);
- if (Register::isVirtualRegister(RegB))
+ if (RegB.isVirtual())
MRI.constrainRegClass(RegB, RC);
- if (Register::isVirtualRegister(RegX))
+ if (RegX.isVirtual())
MRI.constrainRegClass(RegX, RC);
- if (Register::isVirtualRegister(RegY))
+ if (RegY.isVirtual())
MRI.constrainRegClass(RegY, RC);
- if (Register::isVirtualRegister(RegC))
+ if (RegC.isVirtual())
MRI.constrainRegClass(RegC, RC);
// Create a new virtual register for the result of (X op Y) instead of
@@ -862,21 +978,35 @@ void TargetInstrInfo::reassociateOps(
Register NewVR = MRI.createVirtualRegister(RC);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
- unsigned Opcode = Root.getOpcode();
+ auto [NewRootOpc, NewPrevOpc] = getReassociationOpcodes(Pattern, Root, Prev);
bool KillA = OpA.isKill();
bool KillX = OpX.isKill();
bool KillY = OpY.isKill();
+ bool KillNewVR = true;
+
+ auto [SwapRootOperands, SwapPrevOperands] = mustSwapOperands(Pattern);
+
+ if (SwapPrevOperands) {
+ std::swap(RegX, RegY);
+ std::swap(KillX, KillY);
+ }
// Create new instructions for insertion.
MachineInstrBuilder MIB1 =
- BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR)
+ BuildMI(*MF, MIMetadata(Prev), TII->get(NewPrevOpc), NewVR)
.addReg(RegX, getKillRegState(KillX))
.addReg(RegY, getKillRegState(KillY))
.setMIFlags(Prev.getFlags());
+
+ if (SwapRootOperands) {
+ std::swap(RegA, NewVR);
+ std::swap(KillA, KillNewVR);
+ }
+
MachineInstrBuilder MIB2 =
- BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
+ BuildMI(*MF, MIMetadata(Root), TII->get(NewRootOpc), RegC)
.addReg(RegA, getKillRegState(KillA))
- .addReg(NewVR, getKillRegState(true))
+ .addReg(NewVR, getKillRegState(KillNewVR))
.setMIFlags(Root.getFlags());
setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2);
@@ -910,6 +1040,10 @@ void TargetInstrInfo::genAlternativeCodeSequence(
break;
}
+ // Don't reassociate if Prev and Root are in different blocks.
+ if (Prev->getParent() != Root.getParent())
+ return;
+
assert(Prev && "Unknown pattern for machine combiner");
reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
@@ -929,7 +1063,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
// doesn't read the other parts of the register. Otherwise it is really a
// read-modify-write operation on the full virtual register which cannot be
// moved safely.
- if (Register::isVirtualRegister(DefReg) && MI.getOperand(0).getSubReg() &&
+ if (DefReg.isVirtual() && MI.getOperand(0).getSubReg() &&
MI.readsVirtualRegister(DefReg))
return false;
@@ -964,7 +1098,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
continue;
// Check for a well-behaved physical register.
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
if (MO.isUse()) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
@@ -1170,7 +1304,7 @@ bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
return (DefCycle != -1 && DefCycle <= 1);
}
-Optional<ParamLoadedValue>
+std::optional<ParamLoadedValue>
TargetInstrInfo::describeLoadedValue(const MachineInstr &MI,
Register Reg) const {
const MachineFunction *MF = MI.getMF();
@@ -1200,7 +1334,7 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI,
assert(!TRI->isSuperOrSubRegisterEq(Reg, DestReg) &&
"TargetInstrInfo::describeLoadedValue can't describe super- or "
"sub-regs for copy instructions");
- return None;
+ return std::nullopt;
} else if (auto RegImm = isAddImmediate(MI, Reg)) {
Register SrcReg = RegImm->Reg;
Offset = RegImm->Imm;
@@ -1218,16 +1352,16 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI,
// If the address points to "special" memory (e.g. a spill slot), it's
// sufficient to check that it isn't aliased by any high-level IR value.
if (!PSV || PSV->mayAlias(&MFI))
- return None;
+ return std::nullopt;
const MachineOperand *BaseOp;
if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable,
TRI))
- return None;
+ return std::nullopt;
// FIXME: Scalable offsets are not yet handled in the offset code below.
if (OffsetIsScalable)
- return None;
+ return std::nullopt;
// TODO: Can currently only handle mem instructions with a single define.
// An example from the x86 target:
@@ -1236,7 +1370,7 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI,
// ...
//
if (MI.getNumExplicitDefs() != 1)
- return None;
+ return std::nullopt;
// TODO: In what way do we need to take Reg into consideration here?
@@ -1248,7 +1382,7 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI,
return ParamLoadedValue(*BaseOp, Expr);
}
- return None;
+ return std::nullopt;
}
/// Both DefMI and UseMI must be valid. By default, call directly to the
@@ -1411,6 +1545,8 @@ void TargetInstrInfo::mergeOutliningCandidateAttributes(
const Function &ParentFn = FirstCand.getMF()->getFunction();
if (ParentFn.hasFnAttribute("target-features"))
F.addFnAttr(ParentFn.getFnAttribute("target-features"));
+ if (ParentFn.hasFnAttribute("target-cpu"))
+ F.addFnAttr(ParentFn.getFnAttribute("target-cpu"));
// Set nounwind, so we don't generate eh_frame.
if (llvm::all_of(Candidates, [](const outliner::Candidate &C) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 9b965109745c..b62374320d75 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -724,6 +724,10 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
// with the Target-specific changes necessary.
MaxAtomicSizeInBitsSupported = 1024;
+ MaxDivRemBitWidthSupported = llvm::IntegerType::MAX_INT_BITS;
+
+ MaxLargeFPConvertBitWidthSupported = llvm::IntegerType::MAX_INT_BITS;
+
MinCmpXchgSizeInBits = 0;
SupportsUnalignedAtomics = false;
@@ -868,6 +872,11 @@ void TargetLoweringBase::initActions() {
// Named vector shuffles default to expand.
setOperationAction(ISD::VECTOR_SPLICE, VT, Expand);
+
+ // VP_SREM/UREM default to expand.
+ // TODO: Expand all VP intrinsics.
+ setOperationAction(ISD::VP_SREM, VT, Expand);
+ setOperationAction(ISD::VP_UREM, VT, Expand);
}
// Most targets ignore the @llvm.prefetch intrinsic.
@@ -950,7 +959,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
// If this is a simple type, use the ComputeRegisterProp mechanism.
if (VT.isSimple()) {
MVT SVT = VT.getSimpleVT();
- assert((unsigned)SVT.SimpleTy < array_lengthof(TransformToType));
+ assert((unsigned)SVT.SimpleTy < std::size(TransformToType));
MVT NVT = TransformToType[SVT.SimpleTy];
LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT);
@@ -1342,6 +1351,15 @@ void TargetLoweringBase::computeRegisterProperties(
ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
}
+ // Decide how to handle f80. If the target does not have native f80 support,
+ // expand it to i96 and we will be generating soft float library calls.
+ if (!isTypeLegal(MVT::f80)) {
+ NumRegistersForVT[MVT::f80] = 3*NumRegistersForVT[MVT::i32];
+ RegisterTypeForVT[MVT::f80] = RegisterTypeForVT[MVT::i32];
+ TransformToType[MVT::f80] = MVT::i32;
+ ValueTypeActions.setTypeAction(MVT::f80, TypeSoftenFloat);
+ }
+
// Decide how to handle f64. If the target does not have native f64 support,
// expand it to i64 and we will be generating soft float library calls.
if (!isTypeLegal(MVT::f64)) {
@@ -1385,7 +1403,7 @@ void TargetLoweringBase::computeRegisterProperties(
NumRegistersForVT[MVT::bf16] = NumRegistersForVT[MVT::f32];
RegisterTypeForVT[MVT::bf16] = RegisterTypeForVT[MVT::f32];
TransformToType[MVT::bf16] = MVT::f32;
- ValueTypeActions.setTypeAction(MVT::bf16, TypePromoteFloat);
+ ValueTypeActions.setTypeAction(MVT::bf16, TypeSoftPromoteHalf);
}
// Loop over all of the vector value types to see which need transformations.
@@ -1424,7 +1442,7 @@ void TargetLoweringBase::computeRegisterProperties(
}
if (IsLegalWiderType)
break;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
}
case TypeWidenVector:
@@ -1458,7 +1476,7 @@ void TargetLoweringBase::computeRegisterProperties(
break;
}
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case TypeSplitVector:
case TypeScalarizeVector: {
@@ -1609,7 +1627,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context,
if (EVT(DestVT).bitsLT(NewVT)) { // Value is expanded, e.g. i64 -> i16.
TypeSize NewVTSize = NewVT.getSizeInBits();
// Convert sizes such as i33 to i64.
- if (!isPowerOf2_32(NewVTSize.getKnownMinSize()))
+ if (!isPowerOf2_32(NewVTSize.getKnownMinValue()))
NewVTSize = NewVTSize.coefficientNextPowerOf2();
return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
}
@@ -1709,7 +1727,7 @@ uint64_t TargetLoweringBase::getByValTypeAlignment(Type *Ty,
bool TargetLoweringBase::allowsMemoryAccessForAlignment(
LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
- Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const {
+ Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const {
// Check if the specified alignment is sufficient based on the data layout.
// TODO: While using the data layout works in practice, a better solution
// would be to implement this check directly (make this a virtual function).
@@ -1719,7 +1737,7 @@ bool TargetLoweringBase::allowsMemoryAccessForAlignment(
if (VT.isZeroSized() || Alignment >= DL.getABITypeAlign(Ty)) {
// Assume that an access that meets the ABI-specified alignment is fast.
if (Fast != nullptr)
- *Fast = true;
+ *Fast = 1;
return true;
}
@@ -1729,7 +1747,7 @@ bool TargetLoweringBase::allowsMemoryAccessForAlignment(
bool TargetLoweringBase::allowsMemoryAccessForAlignment(
LLVMContext &Context, const DataLayout &DL, EVT VT,
- const MachineMemOperand &MMO, bool *Fast) const {
+ const MachineMemOperand &MMO, unsigned *Fast) const {
return allowsMemoryAccessForAlignment(Context, DL, VT, MMO.getAddrSpace(),
MMO.getAlign(), MMO.getFlags(), Fast);
}
@@ -1738,7 +1756,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
const DataLayout &DL, EVT VT,
unsigned AddrSpace, Align Alignment,
MachineMemOperand::Flags Flags,
- bool *Fast) const {
+ unsigned *Fast) const {
return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace, Alignment,
Flags, Fast);
}
@@ -1746,7 +1764,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
const DataLayout &DL, EVT VT,
const MachineMemOperand &MMO,
- bool *Fast) const {
+ unsigned *Fast) const {
return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(),
MMO.getFlags(), Fast);
}
@@ -1754,7 +1772,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
const DataLayout &DL, LLT Ty,
const MachineMemOperand &MMO,
- bool *Fast) const {
+ unsigned *Fast) const {
EVT VT = getApproximateEVTForLLT(Ty, DL, Context);
return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(),
MMO.getFlags(), Fast);
@@ -1843,41 +1861,6 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
llvm_unreachable("Unknown instruction type encountered!");
}
-std::pair<InstructionCost, MVT>
-TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
- Type *Ty) const {
- LLVMContext &C = Ty->getContext();
- EVT MTy = getValueType(DL, Ty);
-
- InstructionCost Cost = 1;
- // We keep legalizing the type until we find a legal kind. We assume that
- // the only operation that costs anything is the split. After splitting
- // we need to handle two types.
- while (true) {
- LegalizeKind LK = getTypeConversion(C, MTy);
-
- if (LK.first == TypeScalarizeScalableVector) {
- // Ensure we return a sensible simple VT here, since many callers of this
- // function require it.
- MVT VT = MTy.isSimple() ? MTy.getSimpleVT() : MVT::i64;
- return std::make_pair(InstructionCost::getInvalid(), VT);
- }
-
- if (LK.first == TypeLegal)
- return std::make_pair(Cost, MTy.getSimpleVT());
-
- if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger)
- Cost *= 2;
-
- // Do not loop with f128 type.
- if (MTy == LK.second)
- return std::make_pair(Cost, MTy.getSimpleVT());
-
- // Keep legalizing the type.
- MTy = LK.second;
- }
-}
-
Value *
TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase &IRB,
bool UseTLS) const {
@@ -2231,13 +2214,41 @@ int TargetLoweringBase::getDivRefinementSteps(EVT VT,
return getOpRefinementSteps(false, VT, getRecipEstimateForFunc(MF));
}
+bool TargetLoweringBase::isLoadBitCastBeneficial(
+ EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG,
+ const MachineMemOperand &MMO) const {
+ // Single-element vectors are scalarized, so we should generally avoid having
+ // any memory operations on such types, as they would get scalarized too.
+ if (LoadVT.isFixedLengthVector() && BitcastVT.isFixedLengthVector() &&
+ BitcastVT.getVectorNumElements() == 1)
+ return false;
+
+ // Don't do if we could do an indexed load on the original type, but not on
+ // the new one.
+ if (!LoadVT.isSimple() || !BitcastVT.isSimple())
+ return true;
+
+ MVT LoadMVT = LoadVT.getSimpleVT();
+
+ // Don't bother doing this if it's just going to be promoted again later, as
+ // doing so might interfere with other combines.
+ if (getOperationAction(ISD::LOAD, LoadMVT) == Promote &&
+ getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT())
+ return false;
+
+ unsigned Fast = 0;
+ return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT,
+ MMO, &Fast) &&
+ Fast;
+}
+
void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const {
MF.getRegInfo().freezeReservedRegs(MF);
}
-MachineMemOperand::Flags
-TargetLoweringBase::getLoadMemOperandFlags(const LoadInst &LI,
- const DataLayout &DL) const {
+MachineMemOperand::Flags TargetLoweringBase::getLoadMemOperandFlags(
+ const LoadInst &LI, const DataLayout &DL, AssumptionCache *AC,
+ const TargetLibraryInfo *LibInfo) const {
MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad;
if (LI.isVolatile())
Flags |= MachineMemOperand::MOVolatile;
@@ -2248,7 +2259,9 @@ TargetLoweringBase::getLoadMemOperandFlags(const LoadInst &LI,
if (LI.hasMetadata(LLVMContext::MD_invariant_load))
Flags |= MachineMemOperand::MOInvariant;
- if (isDereferenceablePointer(LI.getPointerOperand(), LI.getType(), DL))
+ if (isDereferenceableAndAlignedPointer(LI.getPointerOperand(), LI.getType(),
+ LI.getAlign(), DL, &LI, AC,
+ /*DT=*/nullptr, LibInfo))
Flags |= MachineMemOperand::MODereferenceable;
Flags |= getTargetMMOFlags(LI);
@@ -2325,7 +2338,7 @@ bool TargetLoweringBase::shouldLocalize(const MachineInstr &MI,
auto maxUses = [](unsigned RematCost) {
// A cost of 1 means remats are basically free.
if (RematCost == 1)
- return UINT_MAX;
+ return std::numeric_limits<unsigned>::max();
if (RematCost == 2)
return 2U;
@@ -2335,18 +2348,6 @@ bool TargetLoweringBase::shouldLocalize(const MachineInstr &MI,
llvm_unreachable("Unexpected remat cost");
};
- // Helper to walk through uses and terminate if we've reached a limit. Saves
- // us spending time traversing uses if all we want to know is if it's >= min.
- auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) {
- unsigned NumUses = 0;
- auto UI = MRI.use_instr_nodbg_begin(Reg), UE = MRI.use_instr_nodbg_end();
- for (; UI != UE && NumUses < MaxUses; ++UI) {
- NumUses++;
- }
- // If we haven't reached the end yet then there are more than MaxUses users.
- return UI == UE;
- };
-
switch (MI.getOpcode()) {
default:
return false;
@@ -2363,8 +2364,7 @@ bool TargetLoweringBase::shouldLocalize(const MachineInstr &MI,
unsigned MaxUses = maxUses(RematCost);
if (MaxUses == UINT_MAX)
return true; // Remats are "free" so always localize.
- bool B = isUsesAtMost(Reg, MaxUses);
- return B;
+ return MRI.hasAtMostUserInstrs(Reg, MaxUses);
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 2badbe34ae6a..e760564779c2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -58,6 +58,7 @@
#include "llvm/MC/MCValue.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Base64.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
@@ -127,7 +128,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
if (Ctx.getAsmInfo()->getExceptionHandlingType() == ExceptionHandling::ARM)
break;
// Fallthrough if not using EHABI
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case Triple::ppc:
case Triple::ppcle:
case Triple::x86:
@@ -289,6 +290,14 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
TTypeEncoding = dwarf::DW_EH_PE_absptr;
}
break;
+ case Triple::loongarch32:
+ case Triple::loongarch64:
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ break;
default:
break;
}
@@ -358,6 +367,31 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
}
}
+ if (NamedMDNode *LLVMStats = M.getNamedMetadata("llvm.stats")) {
+ // Emit the metadata for llvm statistics into .llvm_stats section, which is
+ // formatted as a list of key/value pair, the value is base64 encoded.
+ auto *S = C.getObjectFileInfo()->getLLVMStatsSection();
+ Streamer.switchSection(S);
+ for (const auto *Operand : LLVMStats->operands()) {
+ const auto *MD = cast<MDNode>(Operand);
+ assert(MD->getNumOperands() % 2 == 0 &&
+ ("Operand num should be even for a list of key/value pair"));
+ for (size_t I = 0; I < MD->getNumOperands(); I += 2) {
+ // Encode the key string size.
+ auto *Key = cast<MDString>(MD->getOperand(I));
+ Streamer.emitULEB128IntValue(Key->getString().size());
+ Streamer.emitBytes(Key->getString());
+ // Encode the value into a Base64 string.
+ std::string Value = encodeBase64(
+ Twine(mdconst::dyn_extract<ConstantInt>(MD->getOperand(I + 1))
+ ->getZExtValue())
+ .str());
+ Streamer.emitULEB128IntValue(Value.size());
+ Streamer.emitBytes(Value);
+ }
+ }
+ }
+
unsigned Version = 0;
unsigned Flags = 0;
StringRef Section;
@@ -400,7 +434,7 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(
ELF::SHT_PROGBITS, Flags, 0);
unsigned Size = DL.getPointerSize();
Streamer.switchSection(Sec);
- Streamer.emitValueToAlignment(DL.getPointerABIAlignment(0).value());
+ Streamer.emitValueToAlignment(DL.getPointerABIAlignment(0));
Streamer.emitSymbolAttribute(Label, MCSA_ELF_TypeObject);
const MCExpr *E = MCConstantExpr::create(Size, getContext());
Streamer.emitELFSize(Label, E);
@@ -636,7 +670,7 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind,
bool HasPrefix = false;
if (const auto *F = dyn_cast<Function>(GO)) {
- if (Optional<StringRef> Prefix = F->getSectionPrefix()) {
+ if (std::optional<StringRef> Prefix = F->getSectionPrefix()) {
raw_svector_ostream(Name) << '.' << *Prefix;
HasPrefix = true;
}
@@ -1686,7 +1720,7 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
StringRef COMDATSymName = Sym->getName();
if (const auto *F = dyn_cast<Function>(GO))
- if (Optional<StringRef> Prefix = F->getSectionPrefix())
+ if (std::optional<StringRef> Prefix = F->getSectionPrefix())
raw_svector_ostream(Name) << '$' << *Prefix;
// Append "$symbol" to the section name *before* IR-level mangling is
@@ -1889,11 +1923,24 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx,
// string that sorts between .CRT$XCA and .CRT$XCU. In the general case, we
// make a name like ".CRT$XCT12345", since that runs before .CRT$XCU. Really
// low priorities need to sort before 'L', since the CRT uses that
- // internally, so we use ".CRT$XCA00001" for them.
+ // internally, so we use ".CRT$XCA00001" for them. We have a contract with
+ // the frontend that "init_seg(compiler)" corresponds to priority 200 and
+ // "init_seg(lib)" corresponds to priority 400, and those respectively use
+ // 'C' and 'L' without the priority suffix. Priorities between 200 and 400
+ // use 'C' with the priority as a suffix.
SmallString<24> Name;
+ char LastLetter = 'T';
+ bool AddPrioritySuffix = Priority != 200 && Priority != 400;
+ if (Priority < 200)
+ LastLetter = 'A';
+ else if (Priority < 400)
+ LastLetter = 'C';
+ else if (Priority == 400)
+ LastLetter = 'L';
raw_svector_ostream OS(Name);
- OS << ".CRT$X" << (IsCtor ? "C" : "T") <<
- (Priority < 200 ? 'A' : 'T') << format("%05u", Priority);
+ OS << ".CRT$X" << (IsCtor ? "C" : "T") << LastLetter;
+ if (AddPrioritySuffix)
+ OS << format("%05u", Priority);
MCSectionCOFF *Sec = Ctx.getCOFFSection(
Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
SectionKind::getReadOnly());
@@ -2245,16 +2292,16 @@ TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV,
// function entry point. We choose to always return a function descriptor
// here.
if (const GlobalObject *GO = dyn_cast<GlobalObject>(GV)) {
+ if (GO->isDeclarationForLinker())
+ return cast<MCSectionXCOFF>(getSectionForExternalReference(GO, TM))
+ ->getQualNameSymbol();
+
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
if (GVar->hasAttribute("toc-data"))
return cast<MCSectionXCOFF>(
SectionForGlobal(GVar, SectionKind::getData(), TM))
->getQualNameSymbol();
- if (GO->isDeclarationForLinker())
- return cast<MCSectionXCOFF>(getSectionForExternalReference(GO, TM))
- ->getQualNameSymbol();
-
SectionKind GOKind = getKindForGlobal(GO, TM);
if (GOKind.isText())
return cast<MCSectionXCOFF>(
@@ -2313,6 +2360,10 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference(
if (GO->isThreadLocal())
SMC = XCOFF::XMC_UL;
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GO))
+ if (GVar->hasAttribute("toc-data"))
+ SMC = XCOFF::XMC_TD;
+
// Externals go into a csect of type ER.
return getContext().getXCOFFSection(
Name, SectionKind::getMetadata(),
@@ -2469,6 +2520,13 @@ void TargetLoweringObjectFileXCOFF::Initialize(MCContext &Ctx,
PersonalityEncoding = 0;
LSDAEncoding = 0;
CallSiteEncoding = dwarf::DW_EH_PE_udata4;
+
+ // AIX debug for thread local location is not ready. And for integrated as
+ // mode, the relocatable address for the thread local variable will cause
+ // linker error. So disable the location attribute generation for thread local
+ // variables for now.
+ // FIXME: when TLS debug on AIX is ready, remove this setting.
+ SupportDebugThreadLocalLocation = false;
}
MCSection *TargetLoweringObjectFileXCOFF::getStaticCtorSection(
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
index 0bd229f4fc68..3127328c363e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -16,8 +16,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
-#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -49,6 +47,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include <cassert>
+#include <optional>
#include <string>
using namespace llvm;
@@ -208,18 +207,6 @@ static cl::opt<bool> MISchedPostRA(
static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
cl::desc("Run live interval analysis earlier in the pipeline"));
-// Experimental option to use CFL-AA in codegen
-static cl::opt<CFLAAType> UseCFLAA(
- "use-cfl-aa-in-codegen", cl::init(CFLAAType::None), cl::Hidden,
- cl::desc("Enable the new, experimental CFL alias analysis in CodeGen"),
- cl::values(clEnumValN(CFLAAType::None, "none", "Disable CFL-AA"),
- clEnumValN(CFLAAType::Steensgaard, "steens",
- "Enable unification-based CFL-AA"),
- clEnumValN(CFLAAType::Andersen, "anders",
- "Enable inclusion-based CFL-AA"),
- clEnumValN(CFLAAType::Both, "both",
- "Enable both variants of CFL-AA")));
-
/// Option names for limiting the codegen pipeline.
/// Those are used in error reporting and we didn't want
/// to duplicate their names all over the place.
@@ -339,8 +326,8 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID,
static std::string getFSProfileFile(const TargetMachine *TM) {
if (!FSProfileFile.empty())
return FSProfileFile.getValue();
- const Optional<PGOOptions> &PGOOpt = TM->getPGOOption();
- if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse)
+ const std::optional<PGOOptions> &PGOOpt = TM->getPGOOption();
+ if (PGOOpt == std::nullopt || PGOOpt->Action != PGOOptions::SampleUse)
return std::string();
return PGOOpt->ProfileFile;
}
@@ -350,8 +337,8 @@ static std::string getFSProfileFile(const TargetMachine *TM) {
static std::string getFSRemappingFile(const TargetMachine *TM) {
if (!FSRemappingFile.empty())
return FSRemappingFile.getValue();
- const Optional<PGOOptions> &PGOOpt = TM->getPGOOption();
- if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse)
+ const std::optional<PGOOptions> &PGOOpt = TM->getPGOOption();
+ if (PGOOpt == std::nullopt || PGOOpt->Action != PGOOptions::SampleUse)
return std::string();
return PGOOpt->ProfileRemappingFile;
}
@@ -492,7 +479,6 @@ CGPassBuilderOption llvm::getCGPassBuilderOption() {
SET_BOOLEAN_OPTION(EnableImplicitNullChecks)
SET_BOOLEAN_OPTION(EnableMachineOutliner)
SET_BOOLEAN_OPTION(MISchedPostRA)
- SET_BOOLEAN_OPTION(UseCFLAA)
SET_BOOLEAN_OPTION(DisableMergeICmps)
SET_BOOLEAN_OPTION(DisableLSR)
SET_BOOLEAN_OPTION(DisableConstantHoisting)
@@ -548,7 +534,7 @@ static void registerPartialPipelineCallback(PassInstrumentationCallbacks &PIC,
PIC.registerShouldRunOptionalPassCallback(
[=, EnableCurrent = StartBefore.empty() && StartAfter.empty(),
- EnableNext = Optional<bool>(), StartBeforeCount = 0u,
+ EnableNext = std::optional<bool>(), StartBeforeCount = 0u,
StartAfterCount = 0u, StopBeforeCount = 0u,
StopAfterCount = 0u](StringRef P, Any) mutable {
bool StartBeforePass = !StartBefore.empty() && P.contains(StartBefore);
@@ -857,21 +843,6 @@ void TargetPassConfig::addIRPasses() {
addPass(createVerifierPass());
if (getOptLevel() != CodeGenOpt::None) {
- switch (UseCFLAA) {
- case CFLAAType::Steensgaard:
- addPass(createCFLSteensAAWrapperPass());
- break;
- case CFLAAType::Andersen:
- addPass(createCFLAndersAAWrapperPass());
- break;
- case CFLAAType::Both:
- addPass(createCFLAndersAAWrapperPass());
- addPass(createCFLSteensAAWrapperPass());
- break;
- default:
- break;
- }
-
// Basic AliasAnalysis support.
// Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
// BasicAliasAnalysis wins if they disagree. This is intended to help
@@ -904,7 +875,7 @@ void TargetPassConfig::addIRPasses() {
addPass(&ShadowStackGCLoweringID);
addPass(createLowerConstantIntrinsicsPass());
- // For MachO, lower @llvm.global_dtors into @llvm_global_ctors with
+ // For MachO, lower @llvm.global_dtors into @llvm.global_ctors with
// __cxa_atexit() calls to avoid emitting the deprecated __mod_term_func.
if (TM->getTargetTriple().isOSBinFormatMachO() &&
TM->Options.LowerGlobalDtorsViaCxaAtExit)
@@ -960,7 +931,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
// pad is shared by multiple invokes and is also a target of a normal
// edge from elsewhere.
addPass(createSjLjEHPreparePass(TM));
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ExceptionHandling::DwarfCFI:
case ExceptionHandling::ARM:
case ExceptionHandling::AIX:
@@ -1058,13 +1029,13 @@ bool TargetPassConfig::addCoreISelPasses() {
// pass manager into two. GlobalISel with the fallback path disabled
// and -run-pass seem to be unaffected. The majority of GlobalISel
// testing uses -run-pass so this probably isn't too bad.
- SaveAndRestore<bool> SavedDebugifyIsSafe(DebugifyIsSafe);
+ SaveAndRestore SavedDebugifyIsSafe(DebugifyIsSafe);
if (Selector != SelectorType::GlobalISel || !isGlobalISelAbortEnabled())
DebugifyIsSafe = false;
// Add instruction selector passes.
if (Selector == SelectorType::GlobalISel) {
- SaveAndRestore<bool> SavedAddingMachinePasses(AddingMachinePasses, true);
+ SaveAndRestore SavedAddingMachinePasses(AddingMachinePasses, true);
if (addIRTranslator())
return true;
@@ -1113,6 +1084,8 @@ bool TargetPassConfig::addISelPasses() {
addPass(createPreISelIntrinsicLoweringPass());
PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
+ addPass(createExpandLargeDivRemPass());
+ addPass(createExpandLargeFpConvertPass());
addIRPasses();
addCodeGenPrepare();
addPassesToHandleExceptions();
@@ -1267,6 +1240,7 @@ void TargetPassConfig::addMachinePasses() {
addPass(&StackMapLivenessID);
addPass(&LiveDebugValuesID);
+ addPass(&MachineSanitizerBinaryMetadataID);
if (TM->Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None &&
EnableMachineOutliner != RunOutliner::NeverOutline) {
@@ -1296,6 +1270,8 @@ void TargetPassConfig::addMachinePasses() {
if (!DisableCFIFixup && TM->Options.EnableCFIFixup)
addPass(createCFIFixup());
+ PM->add(createStackFrameLayoutAnalysisPass());
+
// Add passes that directly emit MI after all other MI passes.
addPreEmitPass2();
@@ -1519,6 +1495,9 @@ void TargetPassConfig::addOptimizedRegAlloc() {
/// Add passes that optimize machine instructions after register allocation.
void TargetPassConfig::addMachineLateOptimization() {
+ // Cleanup of redundant immediate/address loads.
+ addPass(&MachineLateInstrsCleanupID);
+
// Branch folding must be run after regalloc and prolog/epilog insertion.
addPass(&BranchFolderPassID);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index ac346585b0f8..a41d5999d961 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -115,7 +115,7 @@ Printable printReg(Register Reg, const TargetRegisterInfo *TRI,
OS << "$noreg";
else if (Register::isStackSlot(Reg))
OS << "SS#" << Register::stackSlot2Index(Reg);
- else if (Register::isVirtualRegister(Reg)) {
+ else if (Reg.isVirtual()) {
StringRef Name = MRI ? MRI->getVRegName(Reg) : "";
if (Name != "") {
OS << '%' << Name;
@@ -571,10 +571,14 @@ bool TargetRegisterInfo::getCoveringSubRegIndexes(
break;
}
- // Try to cover as much of the remaining lanes as possible but
- // as few of the already covered lanes as possible.
- int Cover = (SubRegMask & LanesLeft).getNumLanes() -
- (SubRegMask & ~LanesLeft).getNumLanes();
+ // Do not cover already-covered lanes to avoid creating cycles
+ // in copy bundles (= bundle contains copies that write to the
+ // registers).
+ if ((SubRegMask & ~LanesLeft).any())
+ continue;
+
+ // Try to cover as many of the remaining lanes as possible.
+ const int Cover = (SubRegMask & LanesLeft).getNumLanes();
if (Cover > BestCover) {
BestCover = Cover;
BestIdx = Idx;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp
index ac07c86cab85..dba84950f49d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -26,6 +26,7 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
+#include <numeric>
using namespace llvm;
@@ -43,22 +44,6 @@ bool TargetSchedModel::hasInstrItineraries() const {
return EnableSchedItins && !InstrItins.isEmpty();
}
-static unsigned gcd(unsigned Dividend, unsigned Divisor) {
- // Dividend and Divisor will be naturally swapped as needed.
- while (Divisor) {
- unsigned Rem = Dividend % Divisor;
- Dividend = Divisor;
- Divisor = Rem;
- };
- return Dividend;
-}
-
-static unsigned lcm(unsigned A, unsigned B) {
- unsigned LCM = (uint64_t(A) * B) / gcd(A, B);
- assert((LCM >= A && LCM >= B) && "LCM overflow");
- return LCM;
-}
-
void TargetSchedModel::init(const TargetSubtargetInfo *TSInfo) {
STI = TSInfo;
SchedModel = TSInfo->getSchedModel();
@@ -71,7 +56,7 @@ void TargetSchedModel::init(const TargetSubtargetInfo *TSInfo) {
for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
if (NumUnits > 0)
- ResourceLCM = lcm(ResourceLCM, NumUnits);
+ ResourceLCM = std::lcm(ResourceLCM, NumUnits);
}
MicroOpFactor = ResourceLCM / SchedModel.IssueWidth;
for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
@@ -237,9 +222,9 @@ unsigned TargetSchedModel::computeOperandLatency(
// If DefIdx does not exist in the model (e.g. implicit defs), then return
// unit latency (defaultDefLatency may be too conservative).
#ifndef NDEBUG
- if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit()
- && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()
- && SchedModel.isComplete()) {
+ if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() &&
+ !DefMI->getDesc().operands()[DefOperIdx].isOptionalDef() &&
+ SchedModel.isComplete()) {
errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
<< *DefMI << " (Try with MCSchedModel.CompleteModel set to false)";
llvm_unreachable("incomplete machine model");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 17fe819fa900..8cb3667aea28 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1657,13 +1657,50 @@ bool TwoAddressInstructionPass::processStatepoint(
if (RegA == RegB)
continue;
+ // CodeGenPrepare can sink pointer compare past statepoint, which
+ // breaks assumption that statepoint kills tied-use register when
+ // in SSA form (see note in IR/SafepointIRVerifier.cpp). Fall back
+ // to generic tied register handling to avoid assertion failures.
+ // TODO: Recompute LIS/LV information for new range here.
+ if (LIS) {
+ const auto &UseLI = LIS->getInterval(RegB);
+ const auto &DefLI = LIS->getInterval(RegA);
+ if (DefLI.overlaps(UseLI)) {
+ LLVM_DEBUG(dbgs() << "LIS: " << printReg(RegB, TRI, 0)
+ << " UseLI overlaps with DefLI\n");
+ NeedCopy = true;
+ continue;
+ }
+ } else if (LV && LV->getVarInfo(RegB).findKill(MI->getParent()) != MI) {
+ // Note that MachineOperand::isKill does not work here, because it
+ // is set only on first register use in instruction and for statepoint
+ // tied-use register will usually be found in preceeding deopt bundle.
+ LLVM_DEBUG(dbgs() << "LV: " << printReg(RegB, TRI, 0)
+ << " not killed by statepoint\n");
+ NeedCopy = true;
+ continue;
+ }
+
+ if (!MRI->constrainRegClass(RegB, MRI->getRegClass(RegA))) {
+ LLVM_DEBUG(dbgs() << "MRI: couldn't constrain" << printReg(RegB, TRI, 0)
+ << " to register class of " << printReg(RegA, TRI, 0)
+ << '\n');
+ NeedCopy = true;
+ continue;
+ }
MRI->replaceRegWith(RegA, RegB);
if (LIS) {
VNInfo::Allocator &A = LIS->getVNInfoAllocator();
LiveInterval &LI = LIS->getInterval(RegB);
- for (auto &S : LIS->getInterval(RegA)) {
- VNInfo *VNI = LI.getNextValue(S.start, A);
+ LiveInterval &Other = LIS->getInterval(RegA);
+ SmallVector<VNInfo *> NewVNIs;
+ for (const VNInfo *VNI : Other.valnos) {
+ assert(VNI->id == NewVNIs.size() && "assumed");
+ NewVNIs.push_back(LI.createValueCopy(VNI, A));
+ }
+ for (auto &S : Other) {
+ VNInfo *VNI = NewVNIs[S.valno->id];
LiveRange::Segment NewSeg(S.start, S.end, VNI);
LI.addSegment(NewSeg);
}
@@ -1676,6 +1713,7 @@ bool TwoAddressInstructionPass::processStatepoint(
LiveVariables::VarInfo &SrcInfo = LV->getVarInfo(RegB);
LiveVariables::VarInfo &DstInfo = LV->getVarInfo(RegA);
SrcInfo.AliveBlocks |= DstInfo.AliveBlocks;
+ DstInfo.AliveBlocks.clear();
for (auto *KillMI : DstInfo.Kills)
LV->addVirtualRegisterKilled(RegB, *KillMI, false);
}
@@ -1857,11 +1895,6 @@ void TwoAddressInstructionPass::
eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
Register DstReg = MI.getOperand(0).getReg();
- if (MI.getOperand(0).getSubReg() || DstReg.isPhysical() ||
- !(MI.getNumOperands() & 1)) {
- LLVM_DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI);
- llvm_unreachable(nullptr);
- }
SmallVector<Register, 4> OrigRegs;
if (LIS) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
index 36e3c1245f1c..e6c0b3242d67 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
@@ -15,8 +15,10 @@
///
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/TypePromotion.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -106,9 +108,9 @@ class IRPromoter {
SetVector<Value *> &Sources;
SetVector<Instruction *> &Sinks;
SmallPtrSetImpl<Instruction *> &SafeWrap;
+ SmallPtrSetImpl<Instruction *> &InstsToRemove;
IntegerType *ExtTy = nullptr;
SmallPtrSet<Value *, 8> NewInsts;
- SmallPtrSet<Instruction *, 4> InstsToRemove;
DenseMap<Value *, SmallVector<Type *, 4>> TruncTysMap;
SmallPtrSet<Value *, 8> Promoted;
@@ -120,25 +122,26 @@ class IRPromoter {
void Cleanup();
public:
- IRPromoter(LLVMContext &C, unsigned Width,
- SetVector<Value *> &visited, SetVector<Value *> &sources,
- SetVector<Instruction *> &sinks,
- SmallPtrSetImpl<Instruction *> &wrap)
- : Ctx(C), PromotedWidth(Width), Visited(visited),
- Sources(sources), Sinks(sinks), SafeWrap(wrap) {
+ IRPromoter(LLVMContext &C, unsigned Width, SetVector<Value *> &visited,
+ SetVector<Value *> &sources, SetVector<Instruction *> &sinks,
+ SmallPtrSetImpl<Instruction *> &wrap,
+ SmallPtrSetImpl<Instruction *> &instsToRemove)
+ : Ctx(C), PromotedWidth(Width), Visited(visited), Sources(sources),
+ Sinks(sinks), SafeWrap(wrap), InstsToRemove(instsToRemove) {
ExtTy = IntegerType::get(Ctx, PromotedWidth);
}
void Mutate();
};
-class TypePromotion : public FunctionPass {
+class TypePromotionImpl {
unsigned TypeSize = 0;
LLVMContext *Ctx = nullptr;
unsigned RegisterBitWidth = 0;
SmallPtrSet<Value *, 16> AllVisited;
SmallPtrSet<Instruction *, 8> SafeToPromote;
SmallPtrSet<Instruction *, 4> SafeWrap;
+ SmallPtrSet<Instruction *, 4> InstsToRemove;
// Does V have the same size result type as TypeSize.
bool EqualTypeSize(Value *V);
@@ -166,17 +169,25 @@ class TypePromotion : public FunctionPass {
// Is V an instruction thats result can trivially promoted, or has safe
// wrapping.
bool isLegalToPromote(Value *V);
- bool TryToPromote(Value *V, unsigned PromotedWidth);
+ bool TryToPromote(Value *V, unsigned PromotedWidth, const LoopInfo &LI);
+
+public:
+ bool run(Function &F, const TargetMachine *TM,
+ const TargetTransformInfo &TTI, const LoopInfo &LI);
+};
+class TypePromotionLegacy : public FunctionPass {
public:
static char ID;
- TypePromotion() : FunctionPass(ID) {}
+ TypePromotionLegacy() : FunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
AU.setPreservesCFG();
+ AU.addPreserved<LoopInfoWrapperPass>();
}
StringRef getPassName() const override { return PASS_NAME; }
@@ -192,19 +203,19 @@ static bool GenerateSignBits(Instruction *I) {
Opc == Instruction::SRem || Opc == Instruction::SExt;
}
-bool TypePromotion::EqualTypeSize(Value *V) {
+bool TypePromotionImpl::EqualTypeSize(Value *V) {
return V->getType()->getScalarSizeInBits() == TypeSize;
}
-bool TypePromotion::LessOrEqualTypeSize(Value *V) {
+bool TypePromotionImpl::LessOrEqualTypeSize(Value *V) {
return V->getType()->getScalarSizeInBits() <= TypeSize;
}
-bool TypePromotion::GreaterThanTypeSize(Value *V) {
+bool TypePromotionImpl::GreaterThanTypeSize(Value *V) {
return V->getType()->getScalarSizeInBits() > TypeSize;
}
-bool TypePromotion::LessThanTypeSize(Value *V) {
+bool TypePromotionImpl::LessThanTypeSize(Value *V) {
return V->getType()->getScalarSizeInBits() < TypeSize;
}
@@ -215,7 +226,7 @@ bool TypePromotion::LessThanTypeSize(Value *V) {
/// return values because we only accept ones that guarantee a zeroext ret val.
/// Many arguments will have the zeroext attribute too, so those would be free
/// too.
-bool TypePromotion::isSource(Value *V) {
+bool TypePromotionImpl::isSource(Value *V) {
if (!isa<IntegerType>(V->getType()))
return false;
@@ -236,7 +247,7 @@ bool TypePromotion::isSource(Value *V) {
/// Return true if V will require any promoted values to be truncated for the
/// the IR to remain valid. We can't mutate the value type of these
/// instructions.
-bool TypePromotion::isSink(Value *V) {
+bool TypePromotionImpl::isSink(Value *V) {
// TODO The truncate also isn't actually necessary because we would already
// proved that the data value is kept within the range of the original data
// type. We currently remove any truncs inserted for handling zext sinks.
@@ -262,7 +273,7 @@ bool TypePromotion::isSink(Value *V) {
}
/// Return whether this instruction can safely wrap.
-bool TypePromotion::isSafeWrap(Instruction *I) {
+bool TypePromotionImpl::isSafeWrap(Instruction *I) {
// We can support a potentially wrapping instruction (I) if:
// - It is only used by an unsigned icmp.
// - The icmp uses a constant.
@@ -368,7 +379,7 @@ bool TypePromotion::isSafeWrap(Instruction *I) {
return false;
}
-bool TypePromotion::shouldPromote(Value *V) {
+bool TypePromotionImpl::shouldPromote(Value *V) {
if (!isa<IntegerType>(V->getType()) || isSink(V))
return false;
@@ -551,8 +562,13 @@ void IRPromoter::TruncateSinks() {
}
// Don't insert a trunc for a zext which can still legally promote.
+ // Nor insert a trunc when the input value to that trunc has the same width
+ // as the zext we are inserting it for. When this happens the input operand
+ // for the zext will be promoted to the same width as the zext's return type
+ // rendering that zext unnecessary. This zext gets removed before the end
+ // of the pass.
if (auto ZExt = dyn_cast<ZExtInst>(I))
- if (ZExt->getType()->getScalarSizeInBits() > PromotedWidth)
+ if (ZExt->getType()->getScalarSizeInBits() >= PromotedWidth)
continue;
// Now handle the others.
@@ -599,7 +615,6 @@ void IRPromoter::Cleanup() {
for (auto *I : InstsToRemove) {
LLVM_DEBUG(dbgs() << "IR Promotion: Removing " << *I << "\n");
I->dropAllReferences();
- I->eraseFromParent();
}
}
@@ -675,7 +690,7 @@ void IRPromoter::Mutate() {
/// We disallow booleans to make life easier when dealing with icmps but allow
/// any other integer that fits in a scalar register. Void types are accepted
/// so we can handle switches.
-bool TypePromotion::isSupportedType(Value *V) {
+bool TypePromotionImpl::isSupportedType(Value *V) {
Type *Ty = V->getType();
// Allow voids and pointers, these won't be promoted.
@@ -693,7 +708,7 @@ bool TypePromotion::isSupportedType(Value *V) {
/// Disallow casts other than zext and truncs and only allow calls if their
/// return value is zeroext. We don't allow opcodes that can introduce sign
/// bits.
-bool TypePromotion::isSupportedValue(Value *V) {
+bool TypePromotionImpl::isSupportedValue(Value *V) {
if (auto *I = dyn_cast<Instruction>(V)) {
switch (I->getOpcode()) {
default:
@@ -741,7 +756,7 @@ bool TypePromotion::isSupportedValue(Value *V) {
/// Check that the type of V would be promoted and that the original type is
/// smaller than the targeted promoted type. Check that we're not trying to
/// promote something larger than our base 'TypeSize' type.
-bool TypePromotion::isLegalToPromote(Value *V) {
+bool TypePromotionImpl::isLegalToPromote(Value *V) {
auto *I = dyn_cast<Instruction>(V);
if (!I)
return true;
@@ -756,9 +771,10 @@ bool TypePromotion::isLegalToPromote(Value *V) {
return false;
}
-bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
+bool TypePromotionImpl::TryToPromote(Value *V, unsigned PromotedWidth,
+ const LoopInfo &LI) {
Type *OrigTy = V->getType();
- TypeSize = OrigTy->getPrimitiveSizeInBits().getFixedSize();
+ TypeSize = OrigTy->getPrimitiveSizeInBits().getFixedValue();
SafeToPromote.clear();
SafeWrap.clear();
@@ -850,95 +866,134 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
unsigned ToPromote = 0;
unsigned NonFreeArgs = 0;
+ unsigned NonLoopSources = 0, LoopSinks = 0;
SmallPtrSet<BasicBlock *, 4> Blocks;
- for (auto *V : CurrentVisited) {
- if (auto *I = dyn_cast<Instruction>(V))
+ for (auto *CV : CurrentVisited) {
+ if (auto *I = dyn_cast<Instruction>(CV))
Blocks.insert(I->getParent());
- if (Sources.count(V)) {
- if (auto *Arg = dyn_cast<Argument>(V))
+ if (Sources.count(CV)) {
+ if (auto *Arg = dyn_cast<Argument>(CV))
if (!Arg->hasZExtAttr() && !Arg->hasSExtAttr())
++NonFreeArgs;
+ if (!isa<Instruction>(CV) ||
+ !LI.getLoopFor(cast<Instruction>(CV)->getParent()))
+ ++NonLoopSources;
continue;
}
- if (Sinks.count(cast<Instruction>(V)))
+ if (isa<PHINode>(CV))
+ continue;
+ if (LI.getLoopFor(cast<Instruction>(CV)->getParent()))
+ ++LoopSinks;
+ if (Sinks.count(cast<Instruction>(CV)))
continue;
++ToPromote;
}
// DAG optimizations should be able to handle these cases better, especially
// for function arguments.
- if (ToPromote < 2 || (Blocks.size() == 1 && (NonFreeArgs > SafeWrap.size())))
+ if (!isa<PHINode>(V) && !(LoopSinks && NonLoopSources) &&
+ (ToPromote < 2 || (Blocks.size() == 1 && NonFreeArgs > SafeWrap.size())))
return false;
IRPromoter Promoter(*Ctx, PromotedWidth, CurrentVisited, Sources, Sinks,
- SafeWrap);
+ SafeWrap, InstsToRemove);
Promoter.Mutate();
return true;
}
-bool TypePromotion::runOnFunction(Function &F) {
- if (skipFunction(F) || DisablePromotion)
+bool TypePromotionImpl::run(Function &F, const TargetMachine *TM,
+ const TargetTransformInfo &TTI,
+ const LoopInfo &LI) {
+ if (DisablePromotion)
return false;
LLVM_DEBUG(dbgs() << "IR Promotion: Running on " << F.getName() << "\n");
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (!TPC)
- return false;
-
AllVisited.clear();
SafeToPromote.clear();
SafeWrap.clear();
bool MadeChange = false;
const DataLayout &DL = F.getParent()->getDataLayout();
- const TargetMachine &TM = TPC->getTM<TargetMachine>();
- const TargetSubtargetInfo *SubtargetInfo = TM.getSubtargetImpl(F);
+ const TargetSubtargetInfo *SubtargetInfo = TM->getSubtargetImpl(F);
const TargetLowering *TLI = SubtargetInfo->getTargetLowering();
- const TargetTransformInfo &TII =
- getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
RegisterBitWidth =
- TII.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedSize();
+ TTI.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedValue();
Ctx = &F.getParent()->getContext();
- // Search up from icmps to try to promote their operands.
+ // Return the preferred integer width of the instruction, or zero if we
+ // shouldn't try.
+ auto GetPromoteWidth = [&](Instruction *I) -> uint32_t {
+ if (!isa<IntegerType>(I->getType()))
+ return 0;
+
+ EVT SrcVT = TLI->getValueType(DL, I->getType());
+ if (SrcVT.isSimple() && TLI->isTypeLegal(SrcVT.getSimpleVT()))
+ return 0;
+
+ if (TLI->getTypeAction(*Ctx, SrcVT) != TargetLowering::TypePromoteInteger)
+ return 0;
+
+ EVT PromotedVT = TLI->getTypeToTransformTo(*Ctx, SrcVT);
+ if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register "
+ << "for promoted type\n");
+ return 0;
+ }
+
+ // TODO: Should we prefer to use RegisterBitWidth instead?
+ return PromotedVT.getFixedSizeInBits();
+ };
+
+ auto BBIsInLoop = [&](BasicBlock *BB) -> bool {
+ for (auto *L : LI)
+ if (L->contains(BB))
+ return true;
+ return false;
+ };
+
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
if (AllVisited.count(&I))
continue;
- if (!isa<ICmpInst>(&I))
- continue;
-
- auto *ICmp = cast<ICmpInst>(&I);
- // Skip signed or pointer compares
- if (ICmp->isSigned() || !isa<IntegerType>(ICmp->getOperand(0)->getType()))
- continue;
-
- LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp << "\n");
-
- for (auto &Op : ICmp->operands()) {
- if (auto *I = dyn_cast<Instruction>(Op)) {
- EVT SrcVT = TLI->getValueType(DL, I->getType());
- if (SrcVT.isSimple() && TLI->isTypeLegal(SrcVT.getSimpleVT()))
- break;
-
- if (TLI->getTypeAction(*Ctx, SrcVT) !=
- TargetLowering::TypePromoteInteger)
- break;
- EVT PromotedVT = TLI->getTypeToTransformTo(*Ctx, SrcVT);
- if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) {
- LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register "
- << "for promoted type\n");
- break;
+ if (isa<ZExtInst>(&I) && isa<PHINode>(I.getOperand(0)) &&
+ isa<IntegerType>(I.getType()) && BBIsInLoop(&BB)) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << I.getOperand(0)
+ << "\n");
+ EVT ZExtVT = TLI->getValueType(DL, I.getType());
+ Instruction *Phi = static_cast<Instruction *>(I.getOperand(0));
+ auto PromoteWidth = ZExtVT.getFixedSizeInBits();
+ if (RegisterBitWidth < PromoteWidth) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target "
+ << "register for ZExt type\n");
+ continue;
+ }
+ MadeChange |= TryToPromote(Phi, PromoteWidth, LI);
+ } else if (auto *ICmp = dyn_cast<ICmpInst>(&I)) {
+ // Search up from icmps to try to promote their operands.
+ // Skip signed or pointer compares
+ if (ICmp->isSigned())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp << "\n");
+
+ for (auto &Op : ICmp->operands()) {
+ if (auto *OpI = dyn_cast<Instruction>(Op)) {
+ if (auto PromotedWidth = GetPromoteWidth(OpI)) {
+ MadeChange |= TryToPromote(OpI, PromotedWidth, LI);
+ break;
+ }
}
-
- MadeChange |= TryToPromote(I, PromotedVT.getFixedSizeInBits());
- break;
}
}
}
+ if (!InstsToRemove.empty()) {
+ for (auto *I : InstsToRemove)
+ I->eraseFromParent();
+ InstsToRemove.clear();
+ }
}
AllVisited.clear();
@@ -948,9 +1003,46 @@ bool TypePromotion::runOnFunction(Function &F) {
return MadeChange;
}
-INITIALIZE_PASS_BEGIN(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false)
-INITIALIZE_PASS_END(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false)
+INITIALIZE_PASS_BEGIN(TypePromotionLegacy, DEBUG_TYPE, PASS_NAME, false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(TypePromotionLegacy, DEBUG_TYPE, PASS_NAME, false, false)
+
+char TypePromotionLegacy::ID = 0;
-char TypePromotion::ID = 0;
+bool TypePromotionLegacy::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
-FunctionPass *llvm::createTypePromotionPass() { return new TypePromotion(); }
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ auto *TM = &TPC->getTM<TargetMachine>();
+ auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+
+ TypePromotionImpl TP;
+ return TP.run(F, TM, TTI, LI);
+}
+
+FunctionPass *llvm::createTypePromotionLegacyPass() {
+ return new TypePromotionLegacy();
+}
+
+PreservedAnalyses TypePromotionPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ TypePromotionImpl TP;
+
+ bool Changed = TP.run(F, TM, TTI, LI);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ PA.preserve<LoopAnalysis>();
+ return PA;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
index 8225d4ea6996..88460971338c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
@@ -582,7 +582,7 @@ int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) {
for (const auto &P : PD) {
if (!P.isValid())
continue;
- // The pressure differences are computed bottom-up, so the comparision for
+ // The pressure differences are computed bottom-up, so the comparison for
// an increase is positive in the bottom direction, but negative in the
// top-down direction.
if (HighPressureSets[P.getPSet()])
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
index f577aff39ea7..608434800bc3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
@@ -232,10 +232,16 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return FixedVectorType::get(Type::getInt1Ty(Context), 512);
case MVT::v1024i1:
return FixedVectorType::get(Type::getInt1Ty(Context), 1024);
+ case MVT::v2048i1:
+ return FixedVectorType::get(Type::getInt1Ty(Context), 2048);
case MVT::v128i2:
return FixedVectorType::get(Type::getIntNTy(Context, 2), 128);
+ case MVT::v256i2:
+ return FixedVectorType::get(Type::getIntNTy(Context, 2), 256);
case MVT::v64i4:
return FixedVectorType::get(Type::getIntNTy(Context, 4), 64);
+ case MVT::v128i4:
+ return FixedVectorType::get(Type::getIntNTy(Context, 4), 128);
case MVT::v1i8:
return FixedVectorType::get(Type::getInt8Ty(Context), 1);
case MVT::v2i8:
@@ -296,6 +302,14 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return FixedVectorType::get(Type::getInt32Ty(Context), 7);
case MVT::v8i32:
return FixedVectorType::get(Type::getInt32Ty(Context), 8);
+ case MVT::v9i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 9);
+ case MVT::v10i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 10);
+ case MVT::v11i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 11);
+ case MVT::v12i32:
+ return FixedVectorType::get(Type::getInt32Ty(Context), 12);
case MVT::v16i32:
return FixedVectorType::get(Type::getInt32Ty(Context), 16);
case MVT::v32i32:
@@ -388,6 +402,14 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return FixedVectorType::get(Type::getFloatTy(Context), 7);
case MVT::v8f32:
return FixedVectorType::get(Type::getFloatTy(Context), 8);
+ case MVT::v9f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 9);
+ case MVT::v10f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 10);
+ case MVT::v11f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 11);
+ case MVT::v12f32:
+ return FixedVectorType::get(Type::getFloatTy(Context), 12);
case MVT::v16f32:
return FixedVectorType::get(Type::getFloatTy(Context), 16);
case MVT::v32f32:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
index 069aca742da0..f80b06d7e9b7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -145,7 +145,7 @@ void VirtRegMap::assignVirt2StackSlot(Register virtReg, int SS) {
void VirtRegMap::print(raw_ostream &OS, const Module*) const {
OS << "********** REGISTER MAP **********\n";
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
+ Register Reg = Register::index2VirtReg(i);
if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
OS << '[' << printReg(Reg, TRI) << " -> "
<< printReg(Virt2PhysMap[Reg], TRI) << "] "
@@ -154,7 +154,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const {
}
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
+ Register Reg = Register::index2VirtReg(i);
if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
OS << '[' << printReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
<< "] " << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n";
@@ -475,7 +475,7 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
// clobbering.
for (int E = MIs.size(), PrevE = E; E > 1; PrevE = E) {
for (int I = E; I--; )
- if (!anyRegsAlias(MIs[I], makeArrayRef(MIs).take_front(E), TRI)) {
+ if (!anyRegsAlias(MIs[I], ArrayRef(MIs).take_front(E), TRI)) {
if (I + 1 != E)
std::swap(MIs[I], MIs[E - 1]);
--E;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
index aa6645227edb..361f185243b1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -182,8 +182,7 @@ bool WasmEHPrepare::prepareThrows(Function &F) {
Changed = true;
auto *BB = ThrowI->getParent();
SmallVector<BasicBlock *, 4> Succs(successors(BB));
- auto &InstList = BB->getInstList();
- InstList.erase(std::next(BasicBlock::iterator(ThrowI)), InstList.end());
+ BB->erase(std::next(BasicBlock::iterator(ThrowI)), BB->end());
IRB.SetInsertPoint(BB);
IRB.CreateUnreachable();
eraseDeadBBsAndChildren(Succs);
@@ -253,7 +252,7 @@ bool WasmEHPrepare::prepareEHPads(Function &F) {
auto *CPI = cast<CatchPadInst>(BB->getFirstNonPHI());
// In case of a single catch (...), we don't need to emit a personalify
// function call
- if (CPI->getNumArgOperands() == 1 &&
+ if (CPI->arg_size() == 1 &&
cast<Constant>(CPI->getArgOperand(0))->isNullValue())
prepareEHPad(BB, false);
else
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
index b835503ee9ed..dfca2be0a114 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -556,8 +556,8 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
// Create the entry for this cleanup with the appropriate handler
// properties. Finally and fault handlers are distinguished by arity.
ClrHandlerType HandlerType =
- (Cleanup->getNumArgOperands() ? ClrHandlerType::Fault
- : ClrHandlerType::Finally);
+ (Cleanup->arg_size() ? ClrHandlerType::Fault
+ : ClrHandlerType::Finally);
int CleanupState = addClrEHHandler(FuncInfo, HandlerParentState, -1,
HandlerType, 0, Pad->getParent());
// Queue any child EH pads on the worklist.
@@ -733,7 +733,7 @@ void WinEHPrepare::demotePHIsOnFunclets(Function &F,
for (auto *PN : PHINodes) {
// There may be lingering uses on other EH PHIs being removed
- PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ PN->replaceAllUsesWith(PoisonValue::get(PN->getType()));
PN->eraseFromParent();
}
}
@@ -847,10 +847,7 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
ColorVector &IncomingColors = BlockColors[IncomingBlock];
assert(!IncomingColors.empty() && "Block not colored!");
assert((IncomingColors.size() == 1 ||
- llvm::all_of(IncomingColors,
- [&](BasicBlock *Color) {
- return Color != FuncletPadBB;
- })) &&
+ !llvm::is_contained(IncomingColors, FuncletPadBB)) &&
"Cloning should leave this funclet's blocks monochromatic");
EdgeTargetsFunclet = (IncomingColors.front() == FuncletPadBB);
}
@@ -1215,8 +1212,8 @@ void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
BranchInst *Goto = cast<BranchInst>(IncomingBlock->getTerminator());
Goto->removeFromParent();
CatchRet->removeFromParent();
- IncomingBlock->getInstList().push_back(CatchRet);
- NewBlock->getInstList().push_back(Goto);
+ CatchRet->insertInto(IncomingBlock, IncomingBlock->end());
+ Goto->insertInto(NewBlock, NewBlock->end());
Goto->setSuccessor(0, PHIBlock);
CatchRet->setSuccessor(NewBlock);
// Update the color mapping for the newly split edge.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
index b66429d8a5bf..13f45ae048bb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -151,19 +151,18 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
InstrAttr.getValueAsString() == "xray-never";
if (NeverInstrument && !AlwaysInstrument)
return false;
- auto ThresholdAttr = F.getFnAttribute("xray-instruction-threshold");
auto IgnoreLoopsAttr = F.getFnAttribute("xray-ignore-loops");
- unsigned int XRayThreshold = 0;
- if (!AlwaysInstrument) {
- if (!ThresholdAttr.isStringAttribute())
- return false; // XRay threshold attribute not found.
- if (ThresholdAttr.getValueAsString().getAsInteger(10, XRayThreshold))
- return false; // Invalid value for threshold.
+ uint64_t XRayThreshold = 0;
+ if (!AlwaysInstrument) {
bool IgnoreLoops = IgnoreLoopsAttr.isValid();
+ XRayThreshold = F.getFnAttributeAsParsedInteger(
+ "xray-instruction-threshold", std::numeric_limits<uint64_t>::max());
+ if (XRayThreshold == std::numeric_limits<uint64_t>::max())
+ return false;
// Count the number of MachineInstr`s in MachineFunction
- int64_t MICount = 0;
+ uint64_t MICount = 0;
for (const auto &MBB : MF)
MICount += MBB.size();